Revert "improve:handle invalid tag soup improvement, related to old issue #271"

This reverts commit 37d22bc412.
This commit is contained in:
xiaoyifang 2022-04-10 20:07:00 +08:00
parent d12f070ee6
commit 30ce781e6a
9 changed files with 41 additions and 32 deletions

View file

@ -524,10 +524,11 @@ void AardDictionary::loadArticle( quint32 address,
articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );
// See Issue #271: A mechanism to clean-up invalid HTML cards.
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>"
"</a></a></a></a></a></a></a></a>";
string prefix( "<div class=\"aard\"" );
if( isToLanguageRTL() )

View file

@ -47,7 +47,8 @@ std::string ArticleMaker::makeHtmlHeader( QString const & word,
bool expandOptionalParts ) const
{
string result =
"<!DOCTYPE html>"
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" "
"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"
"<html><head>"
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">";

8
bgl.cc
View file

@ -850,10 +850,10 @@ void BglArticleRequest::run()
multimap< wstring, pair< string, string > >::const_iterator i;
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>";
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{
if (dict.isFromLanguageRTL() ) // RTL support

9
mdx.cc
View file

@ -757,10 +757,11 @@ void MdxArticleRequest::run()
}
// See Issue #271: A mechanism to clean-up invalid HTML cards.
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>"
"</a></a></a></a></a></a></a></a>";
articleText += "<div class=\"mdict\">" + articleBody + cleaner + "</div>\n";
}

View file

@ -802,10 +802,11 @@ void SlobDictionary::loadArticle( quint32 address,
articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );
// See Issue #271: A mechanism to clean-up invalid HTML cards.
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>"
"</a></a></a></a></a></a></a></a>";
string prefix( "<div class=\"slobdict\"" );
if( isToLanguageRTL() )

View file

@ -1468,10 +1468,11 @@ void StardictArticleRequest::run()
multimap< wstring, pair< string, string > >::const_iterator i;
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>";
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{
result += dict.isFromLanguageRTL() ? "<h3 class=\"sdct_headwords\" dir=\"rtl\">" : "<h3 class=\"sdct_headwords\">";

View file

@ -264,9 +264,11 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r )
}
// See Issue #271: A mechanism to clean-up invalid HTML cards.
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
articleString += "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>"
"</a></a></a></a></a></a></a></a>";
QByteArray articleBody = articleString.toUtf8();

View file

@ -586,10 +586,10 @@ void XdxfArticleRequest::run()
multimap< wstring, pair< string, string > >::const_iterator i;
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>";
for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
{

10
zim.cc
View file

@ -1284,10 +1284,12 @@ void ZimArticleRequest::run()
string result;
// leave the invalid tags at the mercy of modern browsers.(webengine chrome)
// https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
// https://en.wikipedia.org/wiki/Tag_soup#HTML5
string cleaner = "";
// See Issue #271: A mechanism to clean-up invalid HTML cards.
string cleaner = "</font>""</font>""</font>""</font>""</font>""</font>"
"</font>""</font>""</font>""</font>""</font>""</font>"
"</b></b></b></b></b></b></b></b>"
"</i></i></i></i></i></i></i></i>"
"</a></a></a></a></a></a></a></a>";
multimap< wstring, pair< string, string > >::const_iterator i;