mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-23 20:14:05 +00:00
More work with Babylon.
1) Support 2-byte sized displayed headwords in a more general way. This version should catch more of them. 2) Only add displayed headword as a headword to the index if the original one contain two digits, not one, as it was previously.
This commit is contained in:
parent
57e416820e
commit
9980863bd8
|
@ -306,6 +306,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
std::vector<std::string> alternates;
|
||||
std::string alternate;
|
||||
std::string root;
|
||||
bool defBodyEnded = false;
|
||||
|
||||
while( readBlock( block ) )
|
||||
{
|
||||
|
@ -403,6 +404,27 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
pos += length + 2;
|
||||
a += length + 1;
|
||||
}
|
||||
else
|
||||
if ( block.data[ pos ] == 0x28 && defBodyEnded &&
|
||||
len - a >= 3 )
|
||||
{
|
||||
// 2-byte sized displayed headword
|
||||
unsigned length = (unsigned char)block.data[ pos + 1 ];
|
||||
length <<= 8;
|
||||
length += (unsigned char)block.data[ pos + 2 ];
|
||||
|
||||
if ( length > len - a - 3 )
|
||||
{
|
||||
fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
||||
displayedHeadword = std::string( block.data + pos + 3, length );
|
||||
|
||||
pos += length + 3;
|
||||
a += length + 2;
|
||||
}
|
||||
else if ( (unsigned char)block.data[pos] == 0x50 && len - a - 1 >= 2 &&
|
||||
(unsigned char)block.data[pos + 1 ] == 0x1B )
|
||||
{
|
||||
|
@ -487,52 +509,8 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
//break;
|
||||
}
|
||||
else
|
||||
if ( block.data[ pos ] == 0x14 && len - a >= 5 &&
|
||||
block.data[ pos + 1 ] == 0x06 &&
|
||||
block.data[ pos + 2 ] == 0x5A &&
|
||||
block.data[ pos + 3 ] == 0x18 )
|
||||
{
|
||||
// 1-byte sized displayed headword
|
||||
// note that this probably overrides the handlers above
|
||||
unsigned length = (unsigned char)block.data[ pos + 4 ];
|
||||
|
||||
if ( length > len - a - 5 )
|
||||
{
|
||||
fprintf( stderr, "1-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
||||
displayedHeadword = std::string( block.data + pos + 5, length );
|
||||
|
||||
pos += length + 5;
|
||||
a += length + 4;
|
||||
}
|
||||
else
|
||||
if ( block.data[ pos ] == 0x14 && len - a >= 6 &&
|
||||
block.data[ pos + 1 ] == 0x06 &&
|
||||
block.data[ pos + 2 ] == 0x5A &&
|
||||
block.data[ pos + 3 ] == 0x28 )
|
||||
{
|
||||
// 2-byte sized displayed headword
|
||||
unsigned length = (unsigned char)block.data[ pos + 4 ];
|
||||
length <<= 8;
|
||||
length += (unsigned char)block.data[ pos + 5 ];
|
||||
|
||||
if ( length > len - a - 6 )
|
||||
{
|
||||
fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
||||
pos += len - a;
|
||||
break;
|
||||
}
|
||||
|
||||
displayedHeadword = std::string( block.data + pos + 6, length );
|
||||
|
||||
pos += length + 6;
|
||||
a += length + 5;
|
||||
}
|
||||
else
|
||||
if (block.data[pos] == 0x14) {
|
||||
defBodyEnded = true; // Presumably
|
||||
pos++;
|
||||
} else if ((unsigned char)block.data[pos] == 0x1A){
|
||||
unsigned length = (unsigned char)block.data[ pos + 1 ];
|
||||
|
@ -573,19 +551,19 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
|||
// Try adding displayed headword to the list of alts
|
||||
if ( headword != displayedHeadword )
|
||||
{
|
||||
// Only add displayed headword if the normal one has digits.
|
||||
// Only add displayed headword if the normal one has two or more digits.
|
||||
// This would indicate some irregularity in it (like e.g. if it serves
|
||||
// as some kind of an identifier instead of being an actual headword)
|
||||
bool hasDigits = false;
|
||||
int totalDigits = 0;
|
||||
|
||||
for( char const * p = headword.c_str(); *p; ++p )
|
||||
if ( *p >= '0' && *p <= '9' )
|
||||
{
|
||||
hasDigits = true;
|
||||
if ( ++totalDigits > 1 )
|
||||
break;
|
||||
}
|
||||
|
||||
if ( hasDigits )
|
||||
if ( totalDigits > 1 )
|
||||
{
|
||||
// Ok, let's add it.
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ public:
|
|||
|
||||
enum
|
||||
{
|
||||
ParserVersion = 11
|
||||
ParserVersion = 12
|
||||
};
|
||||
|
||||
private:
|
||||
|
|
Loading…
Reference in a new issue