Support more types of displayed headwords in BGL.

Also use the right encoding for all displayed headwords.
This commit is contained in:
Konstantin Isakov 2010-05-16 17:38:06 +04:00
parent ab88fa4867
commit 1b8bb12b37
2 changed files with 50 additions and 3 deletions

View file

@ -485,7 +485,54 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
a += 2; a += 2;
//pos += len - a; //pos += len - a;
//break; //break;
} else if (block.data[pos] == 0x14) { }
else
if ( block.data[ pos ] == 0x14 && len - a >= 5 &&
block.data[ pos + 1 ] == 0x06 &&
block.data[ pos + 2 ] == 0x5A &&
block.data[ pos + 3 ] == 0x18 )
{
// 1-byte sized displayed headword
// note that this probably overrides the handlers above
unsigned length = (unsigned char)block.data[ pos + 4 ];
if ( length > len - a - 5 )
{
fprintf( stderr, "1-byte sized displayed headword for %s is too large\n", headword.c_str() );
pos += len - a;
break;
}
displayedHeadword = std::string( block.data + pos + 5, length );
pos += length + 5;
a += length + 4;
}
else
if ( block.data[ pos ] == 0x14 && len - a >= 6 &&
block.data[ pos + 1 ] == 0x06 &&
block.data[ pos + 2 ] == 0x5A &&
block.data[ pos + 3 ] == 0x28 )
{
// 2-byte sized displayed headword
unsigned length = (unsigned char)block.data[ pos + 4 ];
length <<= 8;
length += (unsigned char)block.data[ pos + 5 ];
if ( length > len - a - 6 )
{
fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
pos += len - a;
break;
}
displayedHeadword = std::string( block.data + pos + 6, length );
pos += length + 6;
a += length + 5;
}
else
if (block.data[pos] == 0x14) {
pos++; pos++;
} else if ((unsigned char)block.data[pos] == 0x1A){ } else if ((unsigned char)block.data[pos] == 0x1A){
unsigned length = (unsigned char)block.data[ pos + 1 ]; unsigned length = (unsigned char)block.data[ pos + 1 ];
@ -510,7 +557,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
convertToUtf8( definition, TARGET_CHARSET ); convertToUtf8( definition, TARGET_CHARSET );
if ( displayedHeadword.size() ) if ( displayedHeadword.size() )
convertToUtf8( displayedHeadword, SOURCE_CHARSET ); convertToUtf8( displayedHeadword, TARGET_CHARSET );
// Alternate forms // Alternate forms
while( pos != block.length ) while( pos != block.length )

View file

@ -193,7 +193,7 @@ public:
enum enum
{ {
ParserVersion = 10 ParserVersion = 11
}; };
private: private: