diff --git a/bgl_babylon.cc b/bgl_babylon.cc index 6014be89..edfb7b38 100644 --- a/bgl_babylon.cc +++ b/bgl_babylon.cc @@ -306,6 +306,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler ) std::vector alternates; std::string alternate; std::string root; + bool defBodyEnded = false; while( readBlock( block ) ) { @@ -403,6 +404,27 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler ) pos += length + 2; a += length + 1; } + else + if ( block.data[ pos ] == 0x28 && defBodyEnded && + len - a >= 3 ) + { + // 2-byte sized displayed headword + unsigned length = (unsigned char)block.data[ pos + 1 ]; + length <<= 8; + length += (unsigned char)block.data[ pos + 2 ]; + + if ( length > len - a - 3 ) + { + fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() ); + pos += len - a; + break; + } + + displayedHeadword = std::string( block.data + pos + 3, length ); + + pos += length + 3; + a += length + 2; + } else if ( (unsigned char)block.data[pos] == 0x50 && len - a - 1 >= 2 && (unsigned char)block.data[pos + 1 ] == 0x1B ) { @@ -487,52 +509,8 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler ) //break; } else - if ( block.data[ pos ] == 0x14 && len - a >= 5 && - block.data[ pos + 1 ] == 0x06 && - block.data[ pos + 2 ] == 0x5A && - block.data[ pos + 3 ] == 0x18 ) - { - // 1-byte sized displayed headword - // note that this probably overrides the handlers above - unsigned length = (unsigned char)block.data[ pos + 4 ]; - - if ( length > len - a - 5 ) - { - fprintf( stderr, "1-byte sized displayed headword for %s is too large\n", headword.c_str() ); - pos += len - a; - break; - } - - displayedHeadword = std::string( block.data + pos + 5, length ); - - pos += length + 5; - a += length + 4; - } - else - if ( block.data[ pos ] == 0x14 && len - a >= 6 && - block.data[ pos + 1 ] == 0x06 && - block.data[ pos + 2 ] == 0x5A && - block.data[ pos + 3 ] == 0x28 ) - { - // 2-byte sized displayed headword - unsigned length = (unsigned char)block.data[ pos + 4 ]; - length <<= 8; - length += (unsigned char)block.data[ pos + 5 ]; - - if ( length > len - a - 6 ) - { - fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() ); - pos += len - a; - break; - } - - displayedHeadword = std::string( block.data + pos + 6, length ); - - pos += length + 6; - a += length + 5; - } - else if (block.data[pos] == 0x14) { + defBodyEnded = true; // Presumably pos++; } else if ((unsigned char)block.data[pos] == 0x1A){ unsigned length = (unsigned char)block.data[ pos + 1 ]; @@ -573,19 +551,19 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler ) // Try adding displayed headword to the list of alts if ( headword != displayedHeadword ) { - // Only add displayed headword if the normal one has digits. + // Only add displayed headword if the normal one has two or more digits. // This would indicate some irregularity in it (like e.g. if it serves // as some kind of an identifier instead of being an actual headword) - bool hasDigits = false; + int totalDigits = 0; for( char const * p = headword.c_str(); *p; ++p ) if ( *p >= '0' && *p <= '9' ) { - hasDigits = true; - break; + if ( ++totalDigits > 1 ) + break; } - if ( hasDigits ) + if ( totalDigits > 1 ) { // Ok, let's add it. diff --git a/bgl_babylon.hh b/bgl_babylon.hh index 970dec81..5c31d330 100644 --- a/bgl_babylon.hh +++ b/bgl_babylon.hh @@ -193,7 +193,7 @@ public: enum { - ParserVersion = 11 + ParserVersion = 12 }; private: