mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 00:14:06 +00:00
+ Skip weird synonym entries with slashes and dollars in StarDict
dictionaries. Those were introduced by bad conversions from Babylon, are superfluous and no one needs them. The filter is quite conservative and it shouldn't be dropping any legitimate synonym entries.
This commit is contained in:
parent
08c1222f9d
commit
6207fef4c6
|
@ -76,7 +76,7 @@ struct Ifo
|
|||
enum
|
||||
{
|
||||
Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
|
||||
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
CurrentFormatVersion = 7 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
};
|
||||
|
||||
struct IdxHeader
|
||||
|
@ -957,6 +957,25 @@ static void handleIdxSynFile( string const & fileName,
|
|||
throw exIncorrectOffset( fileName );
|
||||
|
||||
offset = (*articleOffsets)[ offsetInIndex ];
|
||||
|
||||
// Some StarDict dictionaries are in fact badly converted Babylon ones.
|
||||
// They contain a lot of superfluous slashed entries with dollar signs.
|
||||
// We try to filter them out here, since those entries become much more
|
||||
// apparent in GoldenDict than they were in StarDict because of
|
||||
// punctuation folding. Hopefully there are not a whole lot of valid
|
||||
// synonyms which really start from slash and contain dollar signs, or
|
||||
// end with dollar and contain slashes.
|
||||
if ( *word == '/' )
|
||||
{
|
||||
if ( strchr( word, '$' ) )
|
||||
continue; // Skip this entry
|
||||
}
|
||||
else
|
||||
if ( wordLen && word[ wordLen - 1 ] == '$' )
|
||||
{
|
||||
if ( strchr( word, '/' ) )
|
||||
continue; // Skip this entry
|
||||
}
|
||||
}
|
||||
|
||||
// Insert new entry into an index
|
||||
|
|
Loading…
Reference in a new issue