From 30ce781e6a9dbfa61501a3314c9fbb5e997807ca Mon Sep 17 00:00:00 2001 From: xiaoyifang Date: Sun, 10 Apr 2022 20:07:00 +0800 Subject: [PATCH] Revert "improve:handle invalid tag soup improvement, related to old issue #271" This reverts commit 37d22bc4127fc632a69dd88e187eaba40cd44f75. --- aard.cc | 9 +++++---- article_maker.cc | 3 ++- bgl.cc | 8 ++++---- mdx.cc | 9 +++++---- slob.cc | 9 +++++---- stardict.cc | 9 +++++---- website.cc | 8 +++++--- xdxf.cc | 8 ++++---- zim.cc | 10 ++++++---- 9 files changed, 41 insertions(+), 32 deletions(-) diff --git a/aard.cc b/aard.cc index ef0ee50e..5fe9e694 100644 --- a/aard.cc +++ b/aard.cc @@ -524,10 +524,11 @@ void AardDictionary::loadArticle( quint32 address, articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() ); // See Issue #271: A mechanism to clean-up invalid HTML cards. - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + string cleaner = """""""""""" + """""""""""" + "" + "" + ""; string prefix( "
" + "" "" ""; diff --git a/bgl.cc b/bgl.cc index ec11ba0b..569c2916 100644 --- a/bgl.cc +++ b/bgl.cc @@ -850,10 +850,10 @@ void BglArticleRequest::run() multimap< wstring, pair< string, string > >::const_iterator i; - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + string cleaner = """""""""""" + """""""""""" + "" + ""; for( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { if (dict.isFromLanguageRTL() ) // RTL support diff --git a/mdx.cc b/mdx.cc index c40663e5..4b512227 100644 --- a/mdx.cc +++ b/mdx.cc @@ -757,10 +757,11 @@ void MdxArticleRequest::run() } // See Issue #271: A mechanism to clean-up invalid HTML cards. - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + string cleaner = """""""""""" + """""""""""" + "" + "" + ""; articleText += "
" + articleBody + cleaner + "
\n"; } diff --git a/slob.cc b/slob.cc index 08144b4e..a18ac2ba 100644 --- a/slob.cc +++ b/slob.cc @@ -802,10 +802,11 @@ void SlobDictionary::loadArticle( quint32 address, articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() ); // See Issue #271: A mechanism to clean-up invalid HTML cards. - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + string cleaner = """""""""""" + """""""""""" + "" + "" + ""; string prefix( "
>::const_iterator i; - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + string cleaner = """""""""""" + """""""""""" + "" + ""; + for( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { result += dict.isFromLanguageRTL() ? "

" : "

"; diff --git a/website.cc b/website.cc index 0a766969..8c57a3d9 100644 --- a/website.cc +++ b/website.cc @@ -264,9 +264,11 @@ void WebSiteArticleRequest::requestFinished( QNetworkReply * r ) } // See Issue #271: A mechanism to clean-up invalid HTML cards. - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 + articleString += """""""""""" + """""""""""" + "" + "" + ""; QByteArray articleBody = articleString.toUtf8(); diff --git a/xdxf.cc b/xdxf.cc index 1d8d757e..585e8abe 100644 --- a/xdxf.cc +++ b/xdxf.cc @@ -586,10 +586,10 @@ void XdxfArticleRequest::run() multimap< wstring, pair< string, string > >::const_iterator i; - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + string cleaner = """""""""""" + """""""""""" + "" + ""; for( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { diff --git a/zim.cc b/zim.cc index b153f324..15e8a5be 100644 --- a/zim.cc +++ b/zim.cc @@ -1284,10 +1284,12 @@ void ZimArticleRequest::run() string result; - // leave the invalid tags at the mercy of modern browsers.(webengine chrome) - // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser - // https://en.wikipedia.org/wiki/Tag_soup#HTML5 - string cleaner = ""; + // See Issue #271: A mechanism to clean-up invalid HTML cards. + string cleaner = """""""""""" + """""""""""" + "" + "" + ""; multimap< wstring, pair< string, string > >::const_iterator i;