mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
Merge pull request #950 from xiaoyifang/fix/reg-unicode
fix: unicode regex option support
This commit is contained in:
commit
fc7a67d788
|
@ -15,10 +15,8 @@ QRegularExpression Ftx::setsRegExp( R"(\[[^\]]+\])", QRegularExpression::CaseIns
|
|||
QRegularExpression Ftx::regexRegExp( R"(\\[afnrtvdDwWsSbB]|\\x([0-9A-Fa-f]{4})|\\0([0-7]{3}))",
|
||||
QRegularExpression::CaseInsensitiveOption );
|
||||
|
||||
QRegularExpression Ftx::handleRoundBracket( R"([^\w\(\)\p{M}]+)" ,
|
||||
QRegularExpression::UseUnicodePropertiesOption );
|
||||
QRegularExpression Ftx::noRoundBracket( "[^\\w\\p{M}]+",
|
||||
QRegularExpression::UseUnicodePropertiesOption );
|
||||
QRegularExpression Ftx::handleRoundBracket( R"([^\w\(\)\p{M}]+)", QRegularExpression::UseUnicodePropertiesOption );
|
||||
QRegularExpression Ftx::noRoundBracket( R"([^\w\p{M}]+)", QRegularExpression::UseUnicodePropertiesOption );
|
||||
|
||||
QRegularExpression Ftx::tokenBoundary( R"([\*\?\+]|\bAnd\b|\bOR\b)", QRegularExpression::CaseInsensitiveOption );
|
||||
QRegularExpression Ftx::token(R"((".*?")|([\w\W\+\-]+))",QRegularExpression::DotMatchesEverythingOption|QRegularExpression::CaseInsensitiveOption);
|
||||
|
@ -47,7 +45,7 @@ QRegularExpression Mdx::stylesRe2(
|
|||
QRegularExpression::CaseInsensitiveOption );
|
||||
QRegularExpression Mdx::inlineScriptRe( R"(<\s*script(?:(?=\s)(?:(?![\s"']src\s*=)[^>])+|\s*)>)",
|
||||
QRegularExpression::CaseInsensitiveOption );
|
||||
QRegularExpression Mdx::closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption );
|
||||
QRegularExpression Mdx::closeScriptTagRe( R"(<\s*/script\s*>)", QRegularExpression::CaseInsensitiveOption );
|
||||
QRegularExpression Mdx::srcRe(
|
||||
R"(([\s"'](?:src|srcset)\s*=)\s*(["'])(?!\s*\b(?:(?:bres|https?|ftp)://|(?:data|javascript):))(?:file://)?[\x00-\x1f\x7f]*\.*/?([^">]+)\2)",
|
||||
QRegularExpression::CaseInsensitiveOption );
|
||||
|
|
|
@ -66,11 +66,11 @@ const static QRegularExpression emptyXmlTag(R"(<(?!(br|hr)\b)([^/ >]*)\s*/>)");
|
|||
bool containHtmlEntity( std::string const & text );
|
||||
}
|
||||
|
||||
const static QRegularExpression accentMark( R"(\p{M})" );
|
||||
const static QRegularExpression accentMark( R"(\p{M})", QRegularExpression::UseUnicodePropertiesOption );
|
||||
//contain unicode space mark and punctuation
|
||||
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{P}])" );
|
||||
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{P}])", QRegularExpression::UseUnicodePropertiesOption );
|
||||
//contain unicode space and mark.
|
||||
const static QRegularExpression markSpace( R"([\p{M}\p{Z}])" );
|
||||
const static QRegularExpression markSpace( R"([\p{M}\p{Z}])", QRegularExpression::UseUnicodePropertiesOption );
|
||||
|
||||
} // namespace RX
|
||||
|
||||
|
|
|
@ -1106,7 +1106,8 @@ void EpwingBook::fixHeadword( QString & headword )
|
|||
headword.remove( QChar( 0x30FB ) ); // Used in Japan transcription
|
||||
|
||||
//replace any unicode Number ,Symbol ,Punctuation ,Mark character to whitespace
|
||||
headword.replace( QRegularExpression( R"([\p{N}\p{S}\p{P}\p{M}])" ), " " );
|
||||
headword.replace( QRegularExpression( R"([\p{N}\p{S}\p{P}\p{M}])", QRegularExpression::UseUnicodePropertiesOption ),
|
||||
" " );
|
||||
|
||||
//if( isHeadwordCorrect( headword) )
|
||||
// return;
|
||||
|
|
|
@ -354,7 +354,7 @@ bool MdictParser::readHeader( QDataStream & in )
|
|||
}
|
||||
|
||||
//with this control character ,qt6.x can not parse attribute value.
|
||||
headerText.remove(QRegularExpression("\\p{C}"));
|
||||
headerText.remove( QRegularExpression( "\\p{C}", QRegularExpression::UseUnicodePropertiesOption ) );
|
||||
|
||||
QDomNamedNodeMap headerAttributes = parseHeaderAttributes( headerText );
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ void IframeSchemeHandler::requestStarted(QWebEngineUrlRequestJob *requestJob)
|
|||
codecName = ct.mid( index + 8 );
|
||||
}
|
||||
}
|
||||
QBuffer * buffer = new QBuffer( requestJob );
|
||||
auto buffer = new QBuffer( requestJob );
|
||||
|
||||
QByteArray replyData = reply->readAll();
|
||||
QString articleString;
|
||||
|
@ -61,24 +61,25 @@ void IframeSchemeHandler::requestStarted(QWebEngineUrlRequestJob *requestJob)
|
|||
QString root = reply->url().scheme() + "://" + reply->url().host();
|
||||
QString base = root + reply->url().path();
|
||||
|
||||
QRegularExpression baseTag( "<base\\s+.*?>",
|
||||
QRegularExpression::CaseInsensitiveOption | QRegularExpression::DotMatchesEverythingOption );
|
||||
|
||||
QString baseTagHtml = "<base href=\"" + base + "\">";
|
||||
QRegularExpression baseTag( R"(<base\s+.*?>)",
|
||||
QRegularExpression::CaseInsensitiveOption
|
||||
| QRegularExpression::DotMatchesEverythingOption );
|
||||
|
||||
QString baseTagHtml = QString( R"(<base href="%1">)" ).arg( base );
|
||||
|
||||
QString depressionFocus =
|
||||
R"(<script type="application/javascript"> HTMLElement.prototype.focus=function(){console.log("focus() has been disabled.");}</script>
|
||||
<script type="text/javascript" src="qrc:///scripts/iframeResizer.contentWindow.min.js">
|
||||
</script><script type="text/javascript" src="qrc:///scripts/iframe-defer.js"></script>)";
|
||||
|
||||
QString depressionFocus ="<script type=\"application/javascript\"> HTMLElement.prototype.focus=function(){console.log(\"focus() has been disabled.\");}</script>"
|
||||
"<script type=\"text/javascript\" src=\"qrc:///scripts/iframeResizer.contentWindow.min.js\"></script>"
|
||||
"<script type=\"text/javascript\" src=\"qrc:///scripts/iframe-defer.js\"></script>";
|
||||
|
||||
// remove existed base tag
|
||||
articleString.remove( baseTag ) ;
|
||||
articleString.remove( baseTag );
|
||||
|
||||
QRegularExpression headTag( "<head\\b.*?>",
|
||||
QRegularExpression headTag( R"(<head\b.*?>)",
|
||||
QRegularExpression::CaseInsensitiveOption
|
||||
| QRegularExpression::DotMatchesEverythingOption );
|
||||
auto match = headTag.match( articleString, 0 );
|
||||
if( match.hasMatch() )
|
||||
{
|
||||
if ( match.hasMatch() ) {
|
||||
articleString.insert( match.capturedEnd(), baseTagHtml );
|
||||
articleString.insert( match.capturedEnd(), depressionFocus );
|
||||
}
|
||||
|
|
|
@ -2290,7 +2290,8 @@ void ArticleView::highlightFTSResults()
|
|||
}
|
||||
|
||||
//remove possible wildcard character.
|
||||
auto cleaned = firstAvailableText.split( QRegularExpression( "\\p{P}" ) );
|
||||
auto cleaned =
|
||||
firstAvailableText.split( QRegularExpression( "\\p{P}", QRegularExpression::UseUnicodePropertiesOption ) );
|
||||
|
||||
if ( cleaned.empty() )
|
||||
return;
|
||||
|
|
Loading…
Reference in a new issue