mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 00:14:06 +00:00
More work with Babylon.
1) Support 2-byte sized displayed headwords in a more general way. This version should catch more of them. 2) Only add displayed headword as a headword to the index if the original one contain two digits, not one, as it was previously.
This commit is contained in:
parent
57e416820e
commit
9980863bd8
|
@ -306,6 +306,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
||||||
std::vector<std::string> alternates;
|
std::vector<std::string> alternates;
|
||||||
std::string alternate;
|
std::string alternate;
|
||||||
std::string root;
|
std::string root;
|
||||||
|
bool defBodyEnded = false;
|
||||||
|
|
||||||
while( readBlock( block ) )
|
while( readBlock( block ) )
|
||||||
{
|
{
|
||||||
|
@ -403,6 +404,27 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
||||||
pos += length + 2;
|
pos += length + 2;
|
||||||
a += length + 1;
|
a += length + 1;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
if ( block.data[ pos ] == 0x28 && defBodyEnded &&
|
||||||
|
len - a >= 3 )
|
||||||
|
{
|
||||||
|
// 2-byte sized displayed headword
|
||||||
|
unsigned length = (unsigned char)block.data[ pos + 1 ];
|
||||||
|
length <<= 8;
|
||||||
|
length += (unsigned char)block.data[ pos + 2 ];
|
||||||
|
|
||||||
|
if ( length > len - a - 3 )
|
||||||
|
{
|
||||||
|
fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
||||||
|
pos += len - a;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
displayedHeadword = std::string( block.data + pos + 3, length );
|
||||||
|
|
||||||
|
pos += length + 3;
|
||||||
|
a += length + 2;
|
||||||
|
}
|
||||||
else if ( (unsigned char)block.data[pos] == 0x50 && len - a - 1 >= 2 &&
|
else if ( (unsigned char)block.data[pos] == 0x50 && len - a - 1 >= 2 &&
|
||||||
(unsigned char)block.data[pos + 1 ] == 0x1B )
|
(unsigned char)block.data[pos + 1 ] == 0x1B )
|
||||||
{
|
{
|
||||||
|
@ -487,52 +509,8 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
||||||
//break;
|
//break;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
if ( block.data[ pos ] == 0x14 && len - a >= 5 &&
|
|
||||||
block.data[ pos + 1 ] == 0x06 &&
|
|
||||||
block.data[ pos + 2 ] == 0x5A &&
|
|
||||||
block.data[ pos + 3 ] == 0x18 )
|
|
||||||
{
|
|
||||||
// 1-byte sized displayed headword
|
|
||||||
// note that this probably overrides the handlers above
|
|
||||||
unsigned length = (unsigned char)block.data[ pos + 4 ];
|
|
||||||
|
|
||||||
if ( length > len - a - 5 )
|
|
||||||
{
|
|
||||||
fprintf( stderr, "1-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
|
||||||
pos += len - a;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
displayedHeadword = std::string( block.data + pos + 5, length );
|
|
||||||
|
|
||||||
pos += length + 5;
|
|
||||||
a += length + 4;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
if ( block.data[ pos ] == 0x14 && len - a >= 6 &&
|
|
||||||
block.data[ pos + 1 ] == 0x06 &&
|
|
||||||
block.data[ pos + 2 ] == 0x5A &&
|
|
||||||
block.data[ pos + 3 ] == 0x28 )
|
|
||||||
{
|
|
||||||
// 2-byte sized displayed headword
|
|
||||||
unsigned length = (unsigned char)block.data[ pos + 4 ];
|
|
||||||
length <<= 8;
|
|
||||||
length += (unsigned char)block.data[ pos + 5 ];
|
|
||||||
|
|
||||||
if ( length > len - a - 6 )
|
|
||||||
{
|
|
||||||
fprintf( stderr, "2-byte sized displayed headword for %s is too large\n", headword.c_str() );
|
|
||||||
pos += len - a;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
displayedHeadword = std::string( block.data + pos + 6, length );
|
|
||||||
|
|
||||||
pos += length + 6;
|
|
||||||
a += length + 5;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
if (block.data[pos] == 0x14) {
|
if (block.data[pos] == 0x14) {
|
||||||
|
defBodyEnded = true; // Presumably
|
||||||
pos++;
|
pos++;
|
||||||
} else if ((unsigned char)block.data[pos] == 0x1A){
|
} else if ((unsigned char)block.data[pos] == 0x1A){
|
||||||
unsigned length = (unsigned char)block.data[ pos + 1 ];
|
unsigned length = (unsigned char)block.data[ pos + 1 ];
|
||||||
|
@ -573,19 +551,19 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
|
||||||
// Try adding displayed headword to the list of alts
|
// Try adding displayed headword to the list of alts
|
||||||
if ( headword != displayedHeadword )
|
if ( headword != displayedHeadword )
|
||||||
{
|
{
|
||||||
// Only add displayed headword if the normal one has digits.
|
// Only add displayed headword if the normal one has two or more digits.
|
||||||
// This would indicate some irregularity in it (like e.g. if it serves
|
// This would indicate some irregularity in it (like e.g. if it serves
|
||||||
// as some kind of an identifier instead of being an actual headword)
|
// as some kind of an identifier instead of being an actual headword)
|
||||||
bool hasDigits = false;
|
int totalDigits = 0;
|
||||||
|
|
||||||
for( char const * p = headword.c_str(); *p; ++p )
|
for( char const * p = headword.c_str(); *p; ++p )
|
||||||
if ( *p >= '0' && *p <= '9' )
|
if ( *p >= '0' && *p <= '9' )
|
||||||
{
|
{
|
||||||
hasDigits = true;
|
if ( ++totalDigits > 1 )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( hasDigits )
|
if ( totalDigits > 1 )
|
||||||
{
|
{
|
||||||
// Ok, let's add it.
|
// Ok, let's add it.
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,7 @@ public:
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
ParserVersion = 11
|
ParserVersion = 12
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Reference in a new issue