From 6207fef4c6809f5e5933164e7c3597027ff741a7 Mon Sep 17 00:00:00 2001 From: Konstantin Isakov Date: Mon, 19 Oct 2009 23:05:28 +0000 Subject: [PATCH] + Skip weird synonym entries with slashes and dollars in StarDict dictionaries. Those were introduced by bad conversions from Babylon, are superfluous and no one needs them. The filter is quite conservative and it shouldn't be dropping any legitimate synonym entries. --- src/stardict.cc | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/stardict.cc b/src/stardict.cc index 2904cc64..4084d1e8 100644 --- a/src/stardict.cc +++ b/src/stardict.cc @@ -76,7 +76,7 @@ struct Ifo enum { Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian - CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version + CurrentFormatVersion = 7 + BtreeIndexing::FormatVersion + Folding::Version }; struct IdxHeader @@ -957,6 +957,25 @@ static void handleIdxSynFile( string const & fileName, throw exIncorrectOffset( fileName ); offset = (*articleOffsets)[ offsetInIndex ]; + + // Some StarDict dictionaries are in fact badly converted Babylon ones. + // They contain a lot of superfluous slashed entries with dollar signs. + // We try to filter them out here, since those entries become much more + // apparent in GoldenDict than they were in StarDict because of + // punctuation folding. Hopefully there are not a whole lot of valid + // synonyms which really start from slash and contain dollar signs, or + // end with dollar and contain slashes. + if ( *word == '/' ) + { + if ( strchr( word, '$' ) ) + continue; // Skip this entry + } + else + if ( wordLen && word[ wordLen - 1 ] == '$' ) + { + if ( strchr( word, '/' ) ) + continue; // Skip this entry + } } // Insert new entry into an index