Merge pull request #935 from xiaoyifang/fix/stardict-syn

opt: limit the number of stardict's extra synonym (from `.syn`) searching
This commit is contained in:
xiaoyifang 2023-07-08 10:44:18 +08:00 committed by GitHub
commit aeb98e2e57
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 14 deletions

View file

@ -64,7 +64,8 @@ void BtreeIndex::openIndex( IndexInfo const & indexInfo, File::Class & file, QMu
rootNode.clear();
}
vector< WordArticleLink > BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics )
vector< WordArticleLink >
BtreeIndex::findArticles( wstring const & search_word, bool ignoreDiacritics, uint32_t maxMatchCount )
{
//First trim ending zero
wstring word = gd::removeTrailingZero( search_word );
@ -89,7 +90,7 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & search_word,
if ( chainOffset && exactMatch )
{
result = readChain( chainOffset );
result = readChain( chainOffset, maxMatchCount );
antialias( word, result, ignoreDiacritics );
}
@ -108,6 +109,7 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & search_word,
return result;
}
BtreeWordSearchRequest::BtreeWordSearchRequest( BtreeDictionary & dict_,
wstring const & str_,
unsigned minLength_,
@ -752,7 +754,7 @@ char const * BtreeIndex::findChainOffsetExactOrPrefix( wstring const & target,
}
}
vector< WordArticleLink > BtreeIndex::readChain( char const * & ptr )
vector< WordArticleLink > BtreeIndex::readChain( char const *& ptr, uint32_t maxMatchCount )
{
uint32_t chainSize;
@ -762,8 +764,7 @@ vector< WordArticleLink > BtreeIndex::readChain( char const * & ptr )
vector< WordArticleLink > result;
while( chainSize )
{
while ( chainSize && ( maxMatchCount < 0 || result.size() < maxMatchCount ) ) {
string str = ptr;
ptr += str.size() + 1;

View file

@ -83,7 +83,7 @@ public:
/// Finds articles that match the given string. A case-insensitive search
/// is performed.
vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false );
vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false, uint32_t maxMatchCount = -1 );
/// Find all unique article links in the index
void findAllArticleLinks( QVector< WordArticleLink > & articleLinks );
@ -133,7 +133,7 @@ protected:
/// Reads the word-article links' chain at the given offset. The pointer
/// is updated to point to the next chain, if there's any.
vector< WordArticleLink > readChain( char const * & );
vector< WordArticleLink > readChain( char const *&, uint32_t maxMatchCount = -1 );
/// Drops any aliases which arose due to folding. Only case-folded aliases
/// are left.

View file

@ -1258,7 +1258,8 @@ void StardictHeadwordsRequest::run()
try
{
vector< WordArticleLink > chain = dict.findArticles( word );
//limited the synomys to at most 10 entries
vector< WordArticleLink > chain = dict.findArticles( word, false, 10 );
wstring caseFolded = Folding::applySimpleCaseOnly( word );
@ -1295,11 +1296,9 @@ void StardictHeadwordsRequest::run()
finish();
}
sptr< Dictionary::WordSearchRequest >
StardictDictionary::findHeadwordsForSynonym( wstring const & word )
sptr< Dictionary::WordSearchRequest > StardictDictionary::findHeadwordsForSynonym( wstring const & word )
{
return synonymSearchEnabled ? std::make_shared<StardictHeadwordsRequest>( word, *this ) :
return synonymSearchEnabled ? std::make_shared< StardictHeadwordsRequest >( word, *this ) :
Class::findHeadwordsForSynonym( word );
}
@ -1371,10 +1370,9 @@ void StardictArticleRequest::run()
}
}
multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
set< uint32_t > articlesIncluded; // Some synonms make it that the articles
set< uint32_t > articlesIncluded; // Some synonyms make it that the articles
// appear several times. We combat this
// by only allowing them to appear once.