html unescape

This commit is contained in:
Xiao YiFang 2022-06-08 21:13:07 +08:00
parent 3e6c85b404
commit 4ce03e9415
4 changed files with 33 additions and 8 deletions

View file

@ -157,6 +157,35 @@ QString unescape( QString const & str, bool saveFormat )
return str; return str;
} }
QString fromHtmlEscaped( QString const & str){
QString retVal = str;
QRegularExpression regExp("(?<lt>\\&lt\\;)|(?<gt>\\&gt\\;)|(?<amp>\\&amp\\;)|(?<quot>\\&quot\\;)", QRegularExpression::PatternOption::CaseInsensitiveOption);
auto match = regExp.match(str, 0);
while (match.hasMatch())
{
if (!match.captured("lt").isEmpty())
{
retVal.replace(match.capturedStart("lt"), match.capturedLength("lt"), "<");
}
else if (!match.captured("gt").isEmpty())
{
retVal.replace(match.capturedStart("gt"), match.capturedLength("gt"), ">");
}
else if (!match.captured("amp").isEmpty())
{
retVal.replace(match.capturedStart("amp"), match.capturedLength("amp"), "&");
}
else if (!match.captured("quot").isEmpty())
{
retVal.replace(match.capturedStart("quot"), match.capturedLength("quot"), "\"");
}
match = regExp.match(retVal, match.capturedStart() + 1);
}
return retVal;
}
string unescapeUtf8( const string &str, bool saveFormat ) string unescapeUtf8( const string &str, bool saveFormat )
{ {
return string( unescape( QString::fromUtf8( str.c_str(), str.size() ) ).toUtf8().data(), saveFormat ); return string( unescape( QString::fromUtf8( str.c_str(), str.size() ) ).toUtf8().data(), saveFormat );

View file

@ -4,6 +4,7 @@
#ifndef __HTMLESCAPE_HH_INCLUDED__ #ifndef __HTMLESCAPE_HH_INCLUDED__
#define __HTMLESCAPE_HH_INCLUDED__ #define __HTMLESCAPE_HH_INCLUDED__
#include <QString>
#include <string> #include <string>
namespace Html { namespace Html {
@ -24,6 +25,8 @@ string escapeForJavaScript( string const & );
// Replace html entities // Replace html entities
QString unescape( QString const & str, bool saveFormat = false ); QString unescape( QString const & str, bool saveFormat = false );
QString fromHtmlEscaped( QString const & str);
string unescapeUtf8( string const & str, bool saveFormat = false ); string unescapeUtf8( string const & str, bool saveFormat = false );
} }

View file

@ -374,7 +374,7 @@ bool MdictParser::readHeader( QDataStream & in )
{ {
#if( QT_VERSION >= QT_VERSION_CHECK( 6, 0, 0 ) ) #if( QT_VERSION >= QT_VERSION_CHECK( 6, 0, 0 ) )
styleSheets_[ lines[ i ].toInt() ] = styleSheets_[ lines[ i ].toInt() ] =
pair< QString, QString >( Html::unescape( lines[ i + 1 ] ), Html::unescape( lines[ i + 2 ] ) ); pair< QString, QString >( Html::fromHtmlEscaped( lines[ i + 1 ] ), Html::fromHtmlEscaped( lines[ i + 2 ] ) );
#else #else
styleSheets_[ lines[ i ].toInt() ] = pair< QString, QString >( lines[ i + 1 ], lines[ i + 2 ] ); styleSheets_[ lines[ i ].toInt() ] = pair< QString, QString >( lines[ i + 1 ], lines[ i + 2 ] );
#endif #endif

View file

@ -78,13 +78,6 @@ inline QString rstripnull(const QString &str) {
return ""; return "";
} }
inline QString unescapeHtml(const QString &str) {
QTextDocument text;
text.setHtml(str);
return text.toPlainText();
}
inline bool isExternalLink(QUrl const &url) { inline bool isExternalLink(QUrl const &url) {
return url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "ftp" || url.scheme() == "mailto" || return url.scheme() == "http" || url.scheme() == "https" || url.scheme() == "ftp" || url.scheme() == "mailto" ||
url.scheme() == "file" || url.toString().startsWith( "//" ); url.scheme() == "file" || url.toString().startsWith( "//" );