fix: some zim dictionary can not parse correctly

This commit is contained in:
xiaoyifang 2022-04-04 10:27:47 +08:00
parent 2100ec8e5d
commit 7dae5186ab

31
zim.cc
View file

@ -1612,14 +1612,31 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
{
wstring word;
if( !title.empty() )
word = Utf8::decode( title );
else
word = Utf8::decode( url );
{
word = Utf8::decode( title );
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
indexedWords.addSingleWord( word, n );
else
indexedWords.addWord( word, n );
}
if( !url.empty() )
{
word = Utf8::decode( url );
// begin, the same process order as ZimDictionary::convert before findArticle's invocation
QString qword = QString::fromStdU32String( word );
QRegularExpression htmlRx( "\\.(s|)htm(l|)$", QRegularExpression::CaseInsensitiveOption );
qword.remove( htmlRx ).replace( "_", " " ).remove( QRegularExpression( ".*/" ) );
//end
word = qword.toStdU32String();
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
indexedWords.addSingleWord( word, n );
else
indexedWords.addWord( word, n );
}
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
indexedWords.addSingleWord( word, n );
else
indexedWords.addWord( word, n );
wordCount++;
}
else