From 4ce03e9415bee694c2b0d8ab745077c6a29bb65e Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Wed, 8 Jun 2022 21:13:07 +0800 Subject: [PATCH] html unescape --- htmlescape.cc | 29 +++++++++++++++++++++++++++++ htmlescape.hh | 3 +++ mdictparser.cc | 2 +- utils.hh | 7 ------- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/htmlescape.cc b/htmlescape.cc index 59086e8c..bb2c65e6 100644 --- a/htmlescape.cc +++ b/htmlescape.cc @@ -157,6 +157,35 @@ QString unescape( QString const & str, bool saveFormat ) return str; } +QString fromHtmlEscaped( QString const & str){ + QString retVal = str; + QRegularExpression regExp("(?\\<\\;)|(?\\>\\;)|(?\\&\\;)|(?\\"\\;)", QRegularExpression::PatternOption::CaseInsensitiveOption); + auto match = regExp.match(str, 0); + + while (match.hasMatch()) + { + if (!match.captured("lt").isEmpty()) + { + retVal.replace(match.capturedStart("lt"), match.capturedLength("lt"), "<"); + } + else if (!match.captured("gt").isEmpty()) + { + retVal.replace(match.capturedStart("gt"), match.capturedLength("gt"), ">"); + } + else if (!match.captured("amp").isEmpty()) + { + retVal.replace(match.capturedStart("amp"), match.capturedLength("amp"), "&"); + } + else if (!match.captured("quot").isEmpty()) + { + retVal.replace(match.capturedStart("quot"), match.capturedLength("quot"), "\""); + } + match = regExp.match(retVal, match.capturedStart() + 1); + } + + return retVal; +} + string unescapeUtf8( const string &str, bool saveFormat ) { return string( unescape( QString::fromUtf8( str.c_str(), str.size() ) ).toUtf8().data(), saveFormat ); diff --git a/htmlescape.hh b/htmlescape.hh index f86e4136..39f3d161 100644 --- a/htmlescape.hh +++ b/htmlescape.hh @@ -4,6 +4,7 @@ #ifndef __HTMLESCAPE_HH_INCLUDED__ #define __HTMLESCAPE_HH_INCLUDED__ +#include #include namespace Html { @@ -24,6 +25,8 @@ string escapeForJavaScript( string const & ); // Replace html entities QString unescape( QString const & str, bool saveFormat = false ); + +QString fromHtmlEscaped( QString const & str); string unescapeUtf8( string const & str, bool saveFormat = false ); } diff --git a/mdictparser.cc b/mdictparser.cc index f3124a46..a4ec7142 100644 --- a/mdictparser.cc +++ b/mdictparser.cc @@ -374,7 +374,7 @@ bool MdictParser::readHeader( QDataStream & in ) { #if( QT_VERSION >= QT_VERSION_CHECK( 6, 0, 0 ) ) styleSheets_[ lines[ i ].toInt() ] = - pair< QString, QString >( Html::unescape( lines[ i + 1 ] ), Html::unescape( lines[ i + 2 ] ) ); + pair< QString, QString >( Html::fromHtmlEscaped( lines[ i + 1 ] ), Html::fromHtmlEscaped( lines[ i + 2 ] ) ); #else styleSheets_[ lines[ i ].toInt() ] = pair< QString, QString >( lines[ i + 1 ], lines[ i + 2 ] ); #endif diff --git a/utils.hh b/utils.hh index 6f4b0b1b..a12175af 100644 --- a/utils.hh +++ b/utils.hh @@ -78,13 +78,6 @@ inline QString rstripnull(const QString &str) { return ""; } -inline QString unescapeHtml(const QString &str) { - QTextDocument text; - text.setHtml(str); - return text.toPlainText(); -} - - inline bool isExternalLink(QUrl const &url) { return url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "ftp" || url.scheme() == "mailto" || url.scheme() == "file" || url.toString().startsWith( "//" );