Handle decoding errors in some bgl dictionaries

This commit is contained in:
Abs62 2012-12-19 21:34:56 +04:00
parent 82b6844d4f
commit 193eb556ea

31
bgl.cc
View file

@ -482,9 +482,16 @@ void BglHeadwordsRequest::run()
dict.loadArticle( chain[ x ].articleOffset, dict.loadArticle( chain[ x ].articleOffset,
headword, displayedHeadword, articleText ); headword, displayedHeadword, articleText );
wstring headwordDecoded = Utf8::decode( removePostfix( headword ) ); wstring headwordDecoded;
try
{
headwordDecoded = Utf8::decode( removePostfix( headword ) );
}
catch( Utf8::exCantDecode )
{
}
if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) ) if ( caseFolded != Folding::applySimpleCaseOnly( headwordDecoded ) && !headwordDecoded.empty() )
{ {
// The headword seems to differ from the input word, which makes the // The headword seems to differ from the input word, which makes the
// input word its synonym. // input word its synonym.
@ -603,7 +610,17 @@ void BglArticleRequestRunnable::run()
void BglArticleRequest::fixHebString(string & hebStr) // Hebrew support - convert non-unicode to unicode void BglArticleRequest::fixHebString(string & hebStr) // Hebrew support - convert non-unicode to unicode
{ {
wstring hebWStr=Utf8::decode(hebStr); wstring hebWStr;
try
{
hebWStr = Utf8::decode(hebStr);
}
catch( Utf8::exCantDecode )
{
hebStr = "Utf-8 decoding error";
return;
}
for (unsigned int i=0; i<hebWStr.size();i++) for (unsigned int i=0; i<hebWStr.size();i++)
{ {
if (hebWStr[i]>=224 && hebWStr[i]<=250) // Hebrew chars encoded ecoded as windows-1255 or ISO-8859-8 if (hebWStr[i]>=224 && hebWStr[i]<=250) // Hebrew chars encoded ecoded as windows-1255 or ISO-8859-8
@ -665,6 +682,9 @@ void BglArticleRequest::run()
return; return;
} }
try
{
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() ) if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body. continue; // We already have this article in the body.
@ -711,6 +731,11 @@ void BglArticleRequest::run()
pair< string, string >( targetHeadword, articleText ) ) ); pair< string, string >( targetHeadword, articleText ) ) );
articlesIncluded.insert( chain[ x ].articleOffset ); articlesIncluded.insert( chain[ x ].articleOffset );
} // try
catch( Utf8::exCantDecode )
{
}
} }
if ( mainArticles.empty() && alternateArticles.empty() ) if ( mainArticles.empty() && alternateArticles.empty() )