Merge pull request #950 from xiaoyifang/fix/reg-unicode

fix: unicode regex option support
This commit is contained in:
xiaoyifang 2023-07-10 09:45:22 +08:00 committed by GitHub
commit fc7a67d788
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 25 additions and 24 deletions

View file

@ -15,10 +15,8 @@ QRegularExpression Ftx::setsRegExp( R"(\[[^\]]+\])", QRegularExpression::CaseIns
QRegularExpression Ftx::regexRegExp( R"(\\[afnrtvdDwWsSbB]|\\x([0-9A-Fa-f]{4})|\\0([0-7]{3}))",
QRegularExpression::CaseInsensitiveOption );
QRegularExpression Ftx::handleRoundBracket( R"([^\w\(\)\p{M}]+)" ,
QRegularExpression::UseUnicodePropertiesOption );
QRegularExpression Ftx::noRoundBracket( "[^\\w\\p{M}]+",
QRegularExpression::UseUnicodePropertiesOption );
QRegularExpression Ftx::handleRoundBracket( R"([^\w\(\)\p{M}]+)", QRegularExpression::UseUnicodePropertiesOption );
QRegularExpression Ftx::noRoundBracket( R"([^\w\p{M}]+)", QRegularExpression::UseUnicodePropertiesOption );
QRegularExpression Ftx::tokenBoundary( R"([\*\?\+]|\bAnd\b|\bOR\b)", QRegularExpression::CaseInsensitiveOption );
QRegularExpression Ftx::token(R"((".*?")|([\w\W\+\-]+))",QRegularExpression::DotMatchesEverythingOption|QRegularExpression::CaseInsensitiveOption);
@ -47,7 +45,7 @@ QRegularExpression Mdx::stylesRe2(
QRegularExpression::CaseInsensitiveOption );
QRegularExpression Mdx::inlineScriptRe( R"(<\s*script(?:(?=\s)(?:(?![\s"']src\s*=)[^>])+|\s*)>)",
QRegularExpression::CaseInsensitiveOption );
QRegularExpression Mdx::closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption );
QRegularExpression Mdx::closeScriptTagRe( R"(<\s*/script\s*>)", QRegularExpression::CaseInsensitiveOption );
QRegularExpression Mdx::srcRe(
R"(([\s"'](?:src|srcset)\s*=)\s*(["'])(?!\s*\b(?:(?:bres|https?|ftp)://|(?:data|javascript):))(?:file://)?[\x00-\x1f\x7f]*\.*/?([^">]+)\2)",
QRegularExpression::CaseInsensitiveOption );

View file

@ -66,11 +66,11 @@ const static QRegularExpression emptyXmlTag(R"(<(?!(br|hr)\b)([^/ >]*)\s*/>)");
bool containHtmlEntity( std::string const & text );
}
const static QRegularExpression accentMark( R"(\p{M})" );
const static QRegularExpression accentMark( R"(\p{M})", QRegularExpression::UseUnicodePropertiesOption );
//contain unicode space mark and punctuation
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{P}])" );
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{P}])", QRegularExpression::UseUnicodePropertiesOption );
//contain unicode space and mark.
const static QRegularExpression markSpace( R"([\p{M}\p{Z}])" );
const static QRegularExpression markSpace( R"([\p{M}\p{Z}])", QRegularExpression::UseUnicodePropertiesOption );
} // namespace RX

View file

@ -1106,7 +1106,8 @@ void EpwingBook::fixHeadword( QString & headword )
headword.remove( QChar( 0x30FB ) ); // Used in Japan transcription
//replace any unicode Number ,Symbol ,Punctuation ,Mark character to whitespace
headword.replace( QRegularExpression( R"([\p{N}\p{S}\p{P}\p{M}])" ), " " );
headword.replace( QRegularExpression( R"([\p{N}\p{S}\p{P}\p{M}])", QRegularExpression::UseUnicodePropertiesOption ),
" " );
//if( isHeadwordCorrect( headword) )
// return;

View file

@ -354,7 +354,7 @@ bool MdictParser::readHeader( QDataStream & in )
}
//with this control character ,qt6.x can not parse attribute value.
headerText.remove(QRegularExpression("\\p{C}"));
headerText.remove( QRegularExpression( "\\p{C}", QRegularExpression::UseUnicodePropertiesOption ) );
QDomNamedNodeMap headerAttributes = parseHeaderAttributes( headerText );

View file

@ -30,7 +30,7 @@ void IframeSchemeHandler::requestStarted(QWebEngineUrlRequestJob *requestJob)
codecName = ct.mid( index + 8 );
}
}
QBuffer * buffer = new QBuffer( requestJob );
auto buffer = new QBuffer( requestJob );
QByteArray replyData = reply->readAll();
QString articleString;
@ -61,24 +61,25 @@ void IframeSchemeHandler::requestStarted(QWebEngineUrlRequestJob *requestJob)
QString root = reply->url().scheme() + "://" + reply->url().host();
QString base = root + reply->url().path();
QRegularExpression baseTag( "<base\\s+.*?>",
QRegularExpression::CaseInsensitiveOption | QRegularExpression::DotMatchesEverythingOption );
QString baseTagHtml = "<base href=\"" + base + "\">";
QRegularExpression baseTag( R"(<base\s+.*?>)",
QRegularExpression::CaseInsensitiveOption
| QRegularExpression::DotMatchesEverythingOption );
QString baseTagHtml = QString( R"(<base href="%1">)" ).arg( base );
QString depressionFocus =
R"(<script type="application/javascript"> HTMLElement.prototype.focus=function(){console.log("focus() has been disabled.");}</script>
<script type="text/javascript" src="qrc:///scripts/iframeResizer.contentWindow.min.js">
</script><script type="text/javascript" src="qrc:///scripts/iframe-defer.js"></script>)";
QString depressionFocus ="<script type=\"application/javascript\"> HTMLElement.prototype.focus=function(){console.log(\"focus() has been disabled.\");}</script>"
"<script type=\"text/javascript\" src=\"qrc:///scripts/iframeResizer.contentWindow.min.js\"></script>"
"<script type=\"text/javascript\" src=\"qrc:///scripts/iframe-defer.js\"></script>";
// remove existed base tag
articleString.remove( baseTag ) ;
articleString.remove( baseTag );
QRegularExpression headTag( "<head\\b.*?>",
QRegularExpression headTag( R"(<head\b.*?>)",
QRegularExpression::CaseInsensitiveOption
| QRegularExpression::DotMatchesEverythingOption );
auto match = headTag.match( articleString, 0 );
if( match.hasMatch() )
{
if ( match.hasMatch() ) {
articleString.insert( match.capturedEnd(), baseTagHtml );
articleString.insert( match.capturedEnd(), depressionFocus );
}

View file

@ -2290,7 +2290,8 @@ void ArticleView::highlightFTSResults()
}
//remove possible wildcard character.
auto cleaned = firstAvailableText.split( QRegularExpression( "\\p{P}" ) );
auto cleaned =
firstAvailableText.split( QRegularExpression( "\\p{P}", QRegularExpression::UseUnicodePropertiesOption ) );
if ( cleaned.empty() )
return;