2022-06-05 02:44:40 +00:00
|
|
|
#include "globalregex.hh"
|
|
|
|
#include "fulltextsearch.hh"
|
|
|
|
|
|
|
|
using namespace RX;
|
|
|
|
|
|
|
|
QRegularExpression Ftx::regBrackets(
|
2022-12-24 22:01:50 +00:00
|
|
|
R"((\([\w\p{M}]+\)){0,1}([\w\p{M}]+)(\([\w\p{M}]+\)){0,1}([\w\p{M}]+){0,1}(\([\w\p{M}]+\)){0,1})",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
QRegularExpression Ftx::regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
|
|
|
|
QRegularExpression Ftx::spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
QRegularExpression Ftx::wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}",
|
|
|
|
QRegularExpression::UseUnicodePropertiesOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Ftx::setsRegExp( R"(\[[^\]]+\])", QRegularExpression::CaseInsensitiveOption );
|
|
|
|
QRegularExpression Ftx::regexRegExp( R"(\\[afnrtvdDwWsSbB]|\\x([0-9A-Fa-f]{4})|\\0([0-7]{3}))",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
|
|
|
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Ftx::handleRoundBracket( R"([^\w\(\)\p{M}]+)" ,
|
2022-06-18 10:16:37 +00:00
|
|
|
QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
QRegularExpression Ftx::noRoundBracket( "[^\\w\\p{M}]+",
|
|
|
|
QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Ftx::tokenBoundary( R"([\*\?\+]|\bAnd\b|\bOR\b)", QRegularExpression::CaseInsensitiveOption );
|
|
|
|
QRegularExpression Ftx::token(R"((".*?")|([\w\W\+\-]+))",QRegularExpression::DotMatchesEverythingOption|QRegularExpression::CaseInsensitiveOption);
|
2022-06-05 02:44:40 +00:00
|
|
|
//mdx
|
|
|
|
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::allLinksRe( R"((?:<\s*(a(?:rea)?|img|link|script|source)(?:\s+[^>]+|\s*)>))",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::wordCrossLink( R"(([\s"']href\s*=)\s*(["'])entry://([^>#]*?)((?:#[^>]*?)?)\2)",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::anchorIdRe( R"(([\s"'](?:name|id)\s*=)\s*(["'])\s*(?=\S))",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::anchorIdReWord( R"(([\s"'](?:name|id)\s*=)\s*(["'])\s*(?=\S)([^"]*))",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::anchorIdRe2( R"(([\s"'](?:name|id)\s*=)\s*(?=[^"'])([^\s">]+))",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::anchorLinkRe( R"(([\s"']href\s*=\s*["'])entry://#)",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::audioRe( R"(([\s"']href\s*=)\s*(["'])sound://([^">]+)\2)",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption
|
|
|
|
| QRegularExpression::InvertedGreedinessOption );
|
|
|
|
QRegularExpression Mdx::stylesRe( "([\\s\"']href\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://"
|
|
|
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
|
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
|
|
|
QRegularExpression Mdx::stylesRe2( "([\\s\"']href\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://"
|
|
|
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
|
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::inlineScriptRe( R"(<\s*script(?:(?=\s)(?:(?![\s"']src\s*=)[^>])+|\s*)>)",
|
2022-06-05 02:44:40 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
|
|
|
QRegularExpression Mdx::closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption );
|
2022-09-19 12:13:00 +00:00
|
|
|
QRegularExpression Mdx::srcRe( "([\\s\"'](?:src|srcset)\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://"
|
2022-06-05 02:44:40 +00:00
|
|
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
|
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-09-19 12:13:00 +00:00
|
|
|
QRegularExpression Mdx::srcRe2( "([\\s\"'](?:src|srcset)\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://"
|
2022-06-05 02:44:40 +00:00
|
|
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
|
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-06-18 10:16:37 +00:00
|
|
|
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::links( R"(url\(\s*(['"]?)([^'"]*)(['"]?)\s*\))",
|
2022-06-18 10:16:37 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption );
|
2022-10-03 12:28:16 +00:00
|
|
|
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::fontFace( R"((?:url\s*\(\s*\"(.*?)\"\s*)\))",
|
2022-10-03 12:28:16 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption|QRegularExpression::DotMatchesEverythingOption );
|
2022-10-07 02:17:44 +00:00
|
|
|
|
2022-12-24 22:01:50 +00:00
|
|
|
QRegularExpression Mdx::styleElment( R"((<style[^>]*>)([\w\W]*?)(<\/style>))",
|
2022-10-11 12:57:04 +00:00
|
|
|
QRegularExpression::CaseInsensitiveOption);
|
|
|
|
|
2022-10-07 02:17:44 +00:00
|
|
|
|
2022-11-19 08:34:31 +00:00
|
|
|
QRegularExpression Zim::linkSpecialChar("[\\.\\/]");
|