mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
improve:handle invalid tag soup improvement, related to old issue #271
This commit is contained in:
parent
ccc0f275ba
commit
37d22bc412
9
aard.cc
9
aard.cc
|
@ -524,11 +524,10 @@ void AardDictionary::loadArticle( quint32 address,
|
||||||
articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );
|
articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );
|
||||||
|
|
||||||
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>"
|
string cleaner = "";
|
||||||
"</a></a></a></a></a></a></a></a>";
|
|
||||||
|
|
||||||
string prefix( "<div class=\"aard\"" );
|
string prefix( "<div class=\"aard\"" );
|
||||||
if( isToLanguageRTL() )
|
if( isToLanguageRTL() )
|
||||||
|
|
|
@ -47,8 +47,7 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word,
|
||||||
bool expandOptionalParts ) const
|
bool expandOptionalParts ) const
|
||||||
{
|
{
|
||||||
string result =
|
string result =
|
||||||
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
|
"<!DOCTYPE html>"
|
||||||
"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"
|
|
||||||
"<html><head>"
|
"<html><head>"
|
||||||
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">";
|
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">";
|
||||||
|
|
||||||
|
|
8
bgl.cc
8
bgl.cc
|
@ -858,10 +858,10 @@ void BglArticleRequest::run()
|
||||||
|
|
||||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||||
|
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>";
|
string cleaner = "";
|
||||||
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
|
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
|
||||||
{
|
{
|
||||||
if (dict.isFromLanguageRTL() ) // RTL support
|
if (dict.isFromLanguageRTL() ) // RTL support
|
||||||
|
|
9
mdx.cc
9
mdx.cc
|
@ -707,11 +707,10 @@ void MdxArticleRequest::run()
|
||||||
}
|
}
|
||||||
|
|
||||||
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>"
|
string cleaner = "";
|
||||||
"</a></a></a></a></a></a></a></a>";
|
|
||||||
articleText += "<div class=\"mdict\">" + articleBody + cleaner + "</div>\n";
|
articleText += "<div class=\"mdict\">" + articleBody + cleaner + "</div>\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
9
slob.cc
9
slob.cc
|
@ -799,11 +799,10 @@ void SlobDictionary::loadArticle( quint32 address,
|
||||||
articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );
|
articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );
|
||||||
|
|
||||||
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>"
|
string cleaner = "";
|
||||||
"</a></a></a></a></a></a></a></a>";
|
|
||||||
|
|
||||||
string prefix( "<div class=\"slobdict\"" );
|
string prefix( "<div class=\"slobdict\"" );
|
||||||
if( isToLanguageRTL() )
|
if( isToLanguageRTL() )
|
||||||
|
|
|
@ -1464,11 +1464,10 @@ void StardictArticleRequest::run()
|
||||||
|
|
||||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||||
|
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>";
|
string cleaner = "";
|
||||||
|
|
||||||
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
|
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
|
||||||
{
|
{
|
||||||
result += dict.isFromLanguageRTL() ? "<h3 class=\"sdct_headwords\" dir=\"rtl\">" : "<h3 class=\"sdct_headwords\">";
|
result += dict.isFromLanguageRTL() ? "<h3 class=\"sdct_headwords\" dir=\"rtl\">" : "<h3 class=\"sdct_headwords\">";
|
||||||
|
|
|
@ -281,11 +281,9 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
|
||||||
}
|
}
|
||||||
|
|
||||||
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
||||||
articleString += "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>"
|
|
||||||
"</a></a></a></a></a></a></a></a>";
|
|
||||||
|
|
||||||
QByteArray articleBody = articleString.toUtf8();
|
QByteArray articleBody = articleString.toUtf8();
|
||||||
|
|
||||||
|
|
8
xdxf.cc
8
xdxf.cc
|
@ -584,10 +584,10 @@ void XdxfArticleRequest::run()
|
||||||
|
|
||||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||||
|
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>";
|
string cleaner = "";
|
||||||
|
|
||||||
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
|
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
|
||||||
{
|
{
|
||||||
|
|
9
zim.cc
9
zim.cc
|
@ -1286,11 +1286,10 @@ void ZimArticleRequest::run()
|
||||||
string result;
|
string result;
|
||||||
|
|
||||||
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
// See Issue #271: A mechanism to clean-up invalid HTML cards.
|
||||||
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
|
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
|
||||||
"</font>""</font>""</font>""</font>""</font>""</font>"
|
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
||||||
"</b></b></b></b></b></b></b></b>"
|
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
|
||||||
"</i></i></i></i></i></i></i></i>"
|
string cleaner = "";
|
||||||
"</a></a></a></a></a></a></a></a>";
|
|
||||||
|
|
||||||
multimap< wstring, pair< string, string > >::const_iterator i;
|
multimap< wstring, pair< string, string > >::const_iterator i;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue