mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 16:54:08 +00:00
Merge branch 'staged' into dev
This commit is contained in:
commit
e2d470d6dc
4
.github/workflows/macos-PR-check.yml
vendored
4
.github/workflows/macos-PR-check.yml
vendored
|
@ -1,5 +1,7 @@
|
||||||
name: macos-PR-check
|
name: macos-PR-check
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
on:
|
on:
|
||||||
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
4
.github/workflows/ubuntu-PR-check.yml
vendored
4
.github/workflows/ubuntu-PR-check.yml
vendored
|
@ -1,5 +1,7 @@
|
||||||
name: Ubuntu-PR-check
|
name: Ubuntu-PR-check
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
on:
|
on:
|
||||||
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
4
.github/workflows/windows-PR-check.yml
vendored
4
.github/workflows/windows-PR-check.yml
vendored
|
@ -1,5 +1,7 @@
|
||||||
name: Windows-PR-check
|
name: Windows-PR-check
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
on:
|
on:
|
||||||
|
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
|
@ -24,24 +24,19 @@ a:hover
|
||||||
background: white;
|
background: white;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Dictionary's name heading */
|
|
||||||
.gddictname
|
|
||||||
{
|
|
||||||
border: 1px dotted black; padding: 0.2em; padding-left: 0.5em;
|
|
||||||
margin-top: 1.2em; margin-bottom: 0.1em; font-weight: bold; font-size: 14px;
|
|
||||||
background: #87CEEB;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The 'From ' string which preceeds dictionary name in the heading */
|
/* The 'From ' string which preceeds dictionary name in the heading */
|
||||||
.gdfromprefix
|
.gdfromprefix
|
||||||
{
|
{
|
||||||
display: none;
|
display: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Dictionary's name heading */
|
||||||
.gddictname
|
.gddictname
|
||||||
{
|
{
|
||||||
|
padding: 0.2em; padding-left: 0.5em;
|
||||||
|
margin-bottom: 0.1em;
|
||||||
|
font-size: 14px;
|
||||||
font-weight: normal;
|
font-weight: normal;
|
||||||
|
|
||||||
float: right;
|
float: right;
|
||||||
border: 1px solid white;
|
border: 1px solid white;
|
||||||
margin-top: 7px;
|
margin-top: 7px;
|
||||||
|
|
|
@ -42,6 +42,11 @@ pre
|
||||||
/*background: #ffffdd;*/
|
/*background: #ffffdd;*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.gddicttitle
|
||||||
|
{
|
||||||
|
user-select: none;
|
||||||
|
}
|
||||||
|
|
||||||
.gddictnamebodyseparator
|
.gddictnamebodyseparator
|
||||||
{
|
{
|
||||||
clear: both;
|
clear: both;
|
||||||
|
|
50
base/globalregex.cc
Normal file
50
base/globalregex.cc
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#include "globalregex.hh"
|
||||||
|
#include "fulltextsearch.hh"
|
||||||
|
|
||||||
|
using namespace RX;
|
||||||
|
|
||||||
|
QRegularExpression Ftx::regBrackets(
|
||||||
|
"(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}",
|
||||||
|
QRegularExpression::UseUnicodePropertiesOption );
|
||||||
|
QRegularExpression Ftx::regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
||||||
|
|
||||||
|
QRegularExpression Ftx::spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
|
||||||
|
QRegularExpression Ftx::wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}",
|
||||||
|
QRegularExpression::UseUnicodePropertiesOption );
|
||||||
|
QRegularExpression Ftx::setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Ftx::regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
|
||||||
|
|
||||||
|
//mdx
|
||||||
|
|
||||||
|
QRegularExpression Mdx::allLinksRe( "(?:<\\s*(a(?:rea)?|img|link|script|source)(?:\\s+[^>]+|\\s*)>)",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::wordCrossLink( "([\\s\"']href\\s*=)\\s*([\"'])entry://([^>#]*?)((?:#[^>]*?)?)\\2",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::anchorIdRe( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::anchorIdReWord( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)([^\"]*)",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::anchorIdRe2( "([\\s\"'](?:name|id)\\s*=)\\s*(?=[^\"'])([^\\s\">]+)",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::anchorLinkRe( "([\\s\"']href\\s*=\\s*[\"'])entry://#",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::audioRe( "([\\s\"']href\\s*=)\\s*([\"'])sound://([^\">]+)\\2",
|
||||||
|
QRegularExpression::CaseInsensitiveOption
|
||||||
|
| QRegularExpression::InvertedGreedinessOption );
|
||||||
|
QRegularExpression Mdx::stylesRe( "([\\s\"']href\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://"
|
||||||
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::stylesRe2( "([\\s\"']href\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://"
|
||||||
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::inlineScriptRe( "<\\s*script(?:(?=\\s)(?:(?![\\s\"']src\\s*=)[^>])+|\\s*)>",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::srcRe( "([\\s\"']src\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://"
|
||||||
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
||||||
|
QRegularExpression Mdx::srcRe2( "([\\s\"']src\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://"
|
||||||
|
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
|
||||||
|
QRegularExpression::CaseInsensitiveOption );
|
40
base/globalregex.hh
Normal file
40
base/globalregex.hh
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
#ifndef GLOBALREGEX_HH
|
||||||
|
#define GLOBALREGEX_HH
|
||||||
|
|
||||||
|
#include <QRegularExpression>
|
||||||
|
|
||||||
|
namespace RX
|
||||||
|
{
|
||||||
|
class Ftx
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static QRegularExpression regBrackets;
|
||||||
|
static QRegularExpression regSplit;
|
||||||
|
static QRegularExpression spacesRegExp;
|
||||||
|
static QRegularExpression wordRegExp;
|
||||||
|
static QRegularExpression setsRegExp;
|
||||||
|
static QRegularExpression regexRegExp;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Mdx
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static QRegularExpression allLinksRe;
|
||||||
|
static QRegularExpression wordCrossLink;
|
||||||
|
static QRegularExpression anchorIdRe;
|
||||||
|
static QRegularExpression anchorIdReWord;
|
||||||
|
static QRegularExpression anchorIdRe2;
|
||||||
|
static QRegularExpression anchorLinkRe;
|
||||||
|
static QRegularExpression audioRe;
|
||||||
|
static QRegularExpression stylesRe;
|
||||||
|
static QRegularExpression stylesRe2;
|
||||||
|
static QRegularExpression inlineScriptRe;
|
||||||
|
static QRegularExpression closeScriptTagRe;
|
||||||
|
static QRegularExpression srcRe;
|
||||||
|
static QRegularExpression srcRe2;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace RX
|
||||||
|
|
||||||
|
#endif // GLOBALREGEX_HH
|
217
ftshelpers.cc
217
ftshelpers.cc
|
@ -17,6 +17,8 @@
|
||||||
#include <QRegularExpression>
|
#include <QRegularExpression>
|
||||||
|
|
||||||
#include "wildcard.hh"
|
#include "wildcard.hh"
|
||||||
|
#include <QtConcurrent>
|
||||||
|
#include "base/globalregex.hh"
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
@ -147,36 +149,36 @@ bool parseSearchString( QString const & str, QStringList & indexWords,
|
||||||
{
|
{
|
||||||
searchWords.clear();
|
searchWords.clear();
|
||||||
indexWords.clear();
|
indexWords.clear();
|
||||||
QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
|
// QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
|
||||||
QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption );
|
// QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption );
|
||||||
QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
|
// QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
|
||||||
QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption);
|
// QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption);
|
||||||
|
|
||||||
hasCJK = containCJK( str );
|
hasCJK = containCJK( str );
|
||||||
|
|
||||||
if( searchMode == FTS::WholeWords || searchMode == FTS::PlainText )
|
if( searchMode == FTS::WholeWords || searchMode == FTS::PlainText )
|
||||||
{
|
{
|
||||||
// Make words list for search in article text
|
// Make words list for search in article text
|
||||||
searchWords = str.normalized( QString::NormalizationForm_C ).split( spacesRegExp, Qt::SkipEmptyParts );
|
searchWords = str.normalized( QString::NormalizationForm_C ).split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
||||||
// Make words list for index search
|
// Make words list for index search
|
||||||
QStringList list =
|
QStringList list =
|
||||||
str.normalized( QString::NormalizationForm_C ).toLower().split( spacesRegExp, Qt::SkipEmptyParts );
|
str.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
||||||
|
|
||||||
QString searchString;
|
QString searchString;
|
||||||
if( hasCJK )
|
if( hasCJK )
|
||||||
{
|
{
|
||||||
tokenizeCJK( indexWords, wordRegExp, list );
|
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
|
||||||
// QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts );
|
// QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts );
|
||||||
searchString = makeHiliteRegExpString( list, searchMode, distanceBetweenWords, hasCJK , ignoreWordsOrder);
|
searchString = makeHiliteRegExpString( list, searchMode, distanceBetweenWords, hasCJK , ignoreWordsOrder);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
indexWords = list.filter( wordRegExp );
|
indexWords = list.filter( RX::Ftx::wordRegExp );
|
||||||
indexWords.removeDuplicates();
|
indexWords.removeDuplicates();
|
||||||
|
|
||||||
// Make regexp for results hilite
|
// Make regexp for results hilite
|
||||||
|
|
||||||
QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts );
|
QStringList allWords = str.split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
||||||
searchString = makeHiliteRegExpString( allWords, searchMode, distanceBetweenWords,false, ignoreWordsOrder );
|
searchString = makeHiliteRegExpString( allWords, searchMode, distanceBetweenWords,false, ignoreWordsOrder );
|
||||||
}
|
}
|
||||||
searchRegExp = QRegExp( searchString, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive, QRegExp::RegExp2 );
|
searchRegExp = QRegExp( searchString, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive, QRegExp::RegExp2 );
|
||||||
|
@ -191,21 +193,21 @@ bool parseSearchString( QString const & str, QStringList & indexWords,
|
||||||
|
|
||||||
// Remove RegExp commands
|
// Remove RegExp commands
|
||||||
if( searchMode == FTS::RegExp )
|
if( searchMode == FTS::RegExp )
|
||||||
tmp.replace( regexRegExp, " " );
|
tmp.replace( RX::Ftx::regexRegExp, " " );
|
||||||
|
|
||||||
// Remove all symbol sets
|
// Remove all symbol sets
|
||||||
tmp.replace( setsRegExp, " " );
|
tmp.replace( RX::Ftx::setsRegExp, " " );
|
||||||
|
|
||||||
QStringList list = tmp.normalized( QString::NormalizationForm_C )
|
QStringList list = tmp.normalized( QString::NormalizationForm_C )
|
||||||
.toLower().split( spacesRegExp, Qt::SkipEmptyParts );
|
.toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
||||||
|
|
||||||
if( hasCJK )
|
if( hasCJK )
|
||||||
{
|
{
|
||||||
tokenizeCJK( indexWords, wordRegExp, list );
|
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
indexWords = list.filter( wordRegExp );
|
indexWords = list.filter( RX::Ftx::wordRegExp );
|
||||||
indexWords.removeDuplicates();
|
indexWords.removeDuplicates();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -224,9 +226,9 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
||||||
if( articleText.isEmpty() )
|
if( articleText.isEmpty() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
QRegularExpression regBrackets( "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}",
|
// QRegularExpression regBrackets( "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}",
|
||||||
QRegularExpression::UseUnicodePropertiesOption);
|
// QRegularExpression::UseUnicodePropertiesOption);
|
||||||
QRegularExpression regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
// QRegularExpression regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
||||||
|
|
||||||
QStringList articleWords = articleText.normalized( QString::NormalizationForm_C )
|
QStringList articleWords = articleText.normalized( QString::NormalizationForm_C )
|
||||||
.split( QRegularExpression( handleRoundBrackets ? "[^\\w\\(\\)\\p{M}]+" : "[^\\w\\p{M}]+",
|
.split( QRegularExpression( handleRoundBrackets ? "[^\\w\\(\\)\\p{M}]+" : "[^\\w\\p{M}]+",
|
||||||
|
@ -275,12 +277,12 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
||||||
// Special handle for words with round brackets - DSL feature
|
// Special handle for words with round brackets - DSL feature
|
||||||
QStringList list;
|
QStringList list;
|
||||||
|
|
||||||
QStringList oldVariant = word.split( regSplit, Qt::SkipEmptyParts );
|
QStringList oldVariant = word.split( RX::Ftx::regSplit, Qt::SkipEmptyParts );
|
||||||
for( QStringList::iterator it = oldVariant.begin(); it != oldVariant.end(); ++it )
|
for( QStringList::iterator it = oldVariant.begin(); it != oldVariant.end(); ++it )
|
||||||
if( it->size() >= FTS::MinimumWordSize && !list.contains( *it ) )
|
if( it->size() >= FTS::MinimumWordSize && !list.contains( *it ) )
|
||||||
list.append( *it );
|
list.append( *it );
|
||||||
|
|
||||||
QRegularExpressionMatch match = regBrackets.match( word );
|
QRegularExpressionMatch match = RX::Ftx::regBrackets.match( word );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
QStringList parts = match.capturedTexts();
|
QStringList parts = match.capturedTexts();
|
||||||
|
@ -445,21 +447,20 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
QStringList const & words,
|
QStringList const & words,
|
||||||
QRegExp const & searchRegexp )
|
QRegExp const & searchRegexp )
|
||||||
{
|
{
|
||||||
int results = 0;
|
QtConcurrent::blockingMap( offsets, [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } );
|
||||||
|
}
|
||||||
|
|
||||||
|
void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
||||||
|
QStringList const & words,
|
||||||
|
QRegExp const & searchRegexp )
|
||||||
|
{
|
||||||
|
qDebug()<<"checking"<<offset<<QThread::currentThreadId();
|
||||||
|
// int results = 0;
|
||||||
QString headword, articleText;
|
QString headword, articleText;
|
||||||
QList< uint32_t > offsetsForHeadwords;
|
QList< uint32_t > offsetsForHeadwords;
|
||||||
QVector< QStringList > hiliteRegExps;
|
QVector< QStringList > hiliteRegExps;
|
||||||
|
|
||||||
QString id = QString::fromUtf8( dict.getId().c_str() );
|
QString id = QString::fromUtf8( dict.getId().c_str() );
|
||||||
bool needHandleBrackets;
|
|
||||||
{
|
|
||||||
QString name = QString::fromUtf8( dict.getDictionaryFilenames()[ 0 ].c_str() ).toLower();
|
|
||||||
needHandleBrackets = name.endsWith( ".dsl" ) || name.endsWith( ".dsl.dz" );
|
|
||||||
}
|
|
||||||
|
|
||||||
QRegularExpression regBrackets( "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}",
|
|
||||||
QRegularExpression::UseUnicodePropertiesOption);
|
|
||||||
QRegularExpression regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
|
||||||
|
|
||||||
// RegExp mode
|
// RegExp mode
|
||||||
QRegularExpression searchRegularExpression;
|
QRegularExpression searchRegularExpression;
|
||||||
|
@ -478,12 +479,13 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
|
|
||||||
if( searchMode == FTS::Wildcards || searchMode == FTS::RegExp )
|
if( searchMode == FTS::Wildcards || searchMode == FTS::RegExp )
|
||||||
{
|
{
|
||||||
for( int i = 0; i < offsets.size(); i++ )
|
// for( int i = 0; i < offsets.size(); i++ )
|
||||||
{
|
{
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||||
break;
|
return;
|
||||||
|
|
||||||
dict.getArticleText( offsets.at( i ), headword, articleText );
|
// auto article_address = offsets.at( i );
|
||||||
|
dict.getArticleText( offset, headword, articleText );
|
||||||
articleText = articleText.normalized( QString::NormalizationForm_C );
|
articleText = articleText.normalized( QString::NormalizationForm_C );
|
||||||
|
|
||||||
if( ignoreDiacritics )
|
if( ignoreDiacritics )
|
||||||
|
@ -492,13 +494,13 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
if( articleText.contains( searchRegularExpression ) )
|
if( articleText.contains( searchRegularExpression ) )
|
||||||
{
|
{
|
||||||
if( headword.isEmpty() )
|
if( headword.isEmpty() )
|
||||||
offsetsForHeadwords.append( offsets.at( i ) );
|
offsetsForHeadwords.append( offset );
|
||||||
else
|
else
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||||
|
|
||||||
results++;
|
++results;
|
||||||
if( maxResults > 0 && results >= maxResults )
|
if( maxResults > 0 && results >= maxResults )
|
||||||
break;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -506,10 +508,6 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
{
|
{
|
||||||
// Words mode
|
// Words mode
|
||||||
|
|
||||||
QRegularExpression splitWithBrackets( "[^\\w\\(\\)\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
|
||||||
QRegularExpression splitWithoutBrackets( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption );
|
|
||||||
|
|
||||||
Qt::CaseSensitivity cs = matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive;
|
|
||||||
QVector< QPair< QString, bool > > wordsList;
|
QVector< QPair< QString, bool > > wordsList;
|
||||||
if( ignoreWordsOrder )
|
if( ignoreWordsOrder )
|
||||||
{
|
{
|
||||||
|
@ -517,18 +515,10 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
wordsList.append( QPair< QString, bool >( *it, true ) );
|
wordsList.append( QPair< QString, bool >( *it, true ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
for( int i = 0; i < offsets.size(); i++ )
|
// for( int i = 0; i < offsets.size(); i++ )
|
||||||
{
|
{
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||||
break;
|
return;
|
||||||
|
|
||||||
int pos = 0;
|
|
||||||
int matchWordNom = 0;
|
|
||||||
int unmatchWordNom = 0;
|
|
||||||
int nextNotFoundPos = 0;
|
|
||||||
|
|
||||||
QVector< QStringList > allOrders;
|
|
||||||
QStringList order;
|
|
||||||
|
|
||||||
if( ignoreWordsOrder )
|
if( ignoreWordsOrder )
|
||||||
{
|
{
|
||||||
|
@ -536,17 +526,14 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
wordsList[ i ].second = true;
|
wordsList[ i ].second = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
dict.getArticleText( offsets.at( i ), headword, articleText );
|
dict.getArticleText( offset, headword, articleText );
|
||||||
|
|
||||||
articleText = articleText.normalized( QString::NormalizationForm_C );
|
articleText = articleText.normalized( QString::NormalizationForm_C );
|
||||||
|
|
||||||
if( ignoreDiacritics )
|
if( ignoreDiacritics )
|
||||||
articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) );
|
articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) );
|
||||||
|
|
||||||
//QStringList articleWords = articleText.split( needHandleBrackets ? splitWithBrackets : splitWithoutBrackets,
|
if( ignoreWordsOrder )
|
||||||
// Qt::SkipEmptyParts );
|
|
||||||
|
|
||||||
if(ignoreWordsOrder)
|
|
||||||
{
|
{
|
||||||
bool allMatch = true;
|
bool allMatch = true;
|
||||||
foreach( QString word, words )
|
foreach( QString word, words )
|
||||||
|
@ -559,75 +546,78 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( searchMode == FTS::WholeWords)
|
else if( searchMode == FTS::WholeWords )
|
||||||
{
|
{
|
||||||
QRegularExpression tmpReg( QString( "\b%1\b" ).arg( word ),QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption );
|
QRegularExpression tmpReg( QString( "\b%1\b" ).arg( word ),
|
||||||
if( !articleText.contains( tmpReg) )
|
QRegularExpression::CaseInsensitiveOption
|
||||||
|
| QRegularExpression::UseUnicodePropertiesOption );
|
||||||
|
if( !articleText.contains( tmpReg ) )
|
||||||
{
|
{
|
||||||
allMatch = false;
|
allMatch = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!allMatch)
|
if( !allMatch )
|
||||||
{
|
{
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( distanceBetweenWords >= 0 )
|
if( distanceBetweenWords >= 0 )
|
||||||
{
|
{
|
||||||
// the article text contains all the needed words.
|
// the article text contains all the needed words.
|
||||||
// determine if distance restriction is meet
|
// determine if distance restriction is meet
|
||||||
QRegularExpression replaceReg( QString( "(%1)" ).arg( words.join( '|' ) ),
|
const QRegularExpression replaceReg( QString( "(%1)" ).arg( words.join( '|' ) ),
|
||||||
QRegularExpression::CaseInsensitiveOption |
|
QRegularExpression::CaseInsensitiveOption
|
||||||
QRegularExpression::UseUnicodePropertiesOption );
|
| QRegularExpression::UseUnicodePropertiesOption );
|
||||||
// use a string that could not be presented in the article.
|
// use a string that could not be presented in the article.
|
||||||
articleText = articleText.replace( replaceReg, "=@XXXXX@=" );
|
articleText = articleText.replace( replaceReg, "=@XXXXX@=" );
|
||||||
|
|
||||||
auto hasCJK = false;
|
auto hasCJK = false;
|
||||||
foreach(QString word,words)
|
foreach( QString word, words )
|
||||||
{
|
{
|
||||||
if(containCJK( word ))
|
if( containCJK( word ) )
|
||||||
{
|
{
|
||||||
hasCJK = true;
|
hasCJK = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//hascjk value ,perhaps should depend on each word
|
// hascjk value ,perhaps should depend on each word
|
||||||
auto searchRegStr = makeHiliteRegExpString( Utils::repeat( "=@XXXXX@=", words.size() ), searchMode, distanceBetweenWords,hasCJK );
|
const auto searchRegStr = makeHiliteRegExpString( Utils::repeat( "=@XXXXX@=", words.size() ),
|
||||||
QRegularExpression distanceOrderReg( searchRegStr,
|
searchMode,
|
||||||
QRegularExpression::CaseInsensitiveOption |
|
distanceBetweenWords,
|
||||||
QRegularExpression::UseUnicodePropertiesOption );
|
hasCJK );
|
||||||
|
const QRegularExpression distanceOrderReg( searchRegStr,
|
||||||
|
QRegularExpression::CaseInsensitiveOption
|
||||||
|
| QRegularExpression::UseUnicodePropertiesOption );
|
||||||
// use a string that could not be presented in the article.
|
// use a string that could not be presented in the article.
|
||||||
if(articleText.contains(distanceOrderReg))
|
if( articleText.contains( distanceOrderReg ) )
|
||||||
{
|
{
|
||||||
if( headword.isEmpty() )
|
if( headword.isEmpty() )
|
||||||
offsetsForHeadwords.append( offsets.at( i ) );
|
offsetsForHeadwords.append( offset );
|
||||||
else
|
else
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||||
|
|
||||||
results++;
|
++results;
|
||||||
if( maxResults > 0 && results >= maxResults )
|
if( maxResults > 0 && results >= maxResults )
|
||||||
break;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if( articleText.contains( searchRegularExpression ) )
|
if( articleText.contains( searchRegularExpression ) )
|
||||||
{
|
{
|
||||||
if( headword.isEmpty() )
|
if( headword.isEmpty() )
|
||||||
offsetsForHeadwords.append( offsets.at( i ) );
|
offsetsForHeadwords.append( offset );
|
||||||
else
|
else
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||||
|
|
||||||
results++;
|
++results;
|
||||||
if( maxResults > 0 && results >= maxResults )
|
if( maxResults > 0 && results >= maxResults )
|
||||||
break;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -637,7 +627,10 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
QVector< QString > headwords;
|
QVector< QString > headwords;
|
||||||
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
||||||
for( int x = 0; x < headwords.size(); x++ )
|
for( int x = 0; x < headwords.size(); x++ )
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), id, x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ),
|
||||||
|
id,
|
||||||
|
x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(),
|
||||||
|
matchCase ) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -648,27 +641,28 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
||||||
{
|
{
|
||||||
// Find articles which contains all requested words
|
// Find articles which contains all requested words
|
||||||
|
|
||||||
vector< BtreeIndexing::WordArticleLink > links;
|
QSet< uint32_t > setOfOffsets;
|
||||||
QSet< uint32_t > setOfOffsets, tmp;
|
|
||||||
uint32_t size;
|
|
||||||
|
|
||||||
if( indexWords.isEmpty() )
|
if( indexWords.isEmpty() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int n = indexWords.length();
|
QList< QSet< uint32_t > > addressLists;
|
||||||
for( int i = 0; i < n; i++ )
|
|
||||||
|
auto findLinks = [ & ]( const QString & word )
|
||||||
{
|
{
|
||||||
|
QSet< uint32_t > tmp;
|
||||||
|
uint32_t size;
|
||||||
|
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||||
return;
|
addressLists<< tmp;
|
||||||
|
|
||||||
tmp.clear();
|
vector< BtreeIndexing::WordArticleLink > links =
|
||||||
|
ftsIndex.findArticles( gd::toWString( word ), ignoreDiacritics );
|
||||||
links = ftsIndex.findArticles( gd::toWString( indexWords.at( i ) ), ignoreDiacritics );
|
|
||||||
for( unsigned x = 0; x < links.size(); x++ )
|
for( unsigned x = 0; x < links.size(); x++ )
|
||||||
{
|
{
|
||||||
|
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||||
return;
|
addressLists<< tmp;
|
||||||
|
|
||||||
vector< char > chunk;
|
vector< char > chunk;
|
||||||
char * linksPtr;
|
char * linksPtr;
|
||||||
|
@ -677,24 +671,31 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
||||||
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy( &size, linksPtr, sizeof(uint32_t) );
|
memcpy( &size, linksPtr, sizeof( uint32_t ) );
|
||||||
linksPtr += sizeof(uint32_t);
|
linksPtr += sizeof( uint32_t );
|
||||||
for( uint32_t y = 0; y < size; y++ )
|
for( uint32_t y = 0; y < size; y++ )
|
||||||
{
|
{
|
||||||
tmp.insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) );
|
tmp.insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) );
|
||||||
linksPtr += sizeof(uint32_t);
|
linksPtr += sizeof( uint32_t );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
links.clear();
|
links.clear();
|
||||||
|
|
||||||
if( i == 0 )
|
addressLists<< tmp;
|
||||||
setOfOffsets = tmp;
|
};
|
||||||
|
// int n = indexWords.length();
|
||||||
|
QtConcurrent::blockingMap( indexWords, findLinks );
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for( auto & elem : addressLists )
|
||||||
|
{
|
||||||
|
if( i++ == 0 )
|
||||||
|
setOfOffsets = elem;
|
||||||
else
|
else
|
||||||
setOfOffsets = setOfOffsets.intersect( tmp );
|
setOfOffsets = setOfOffsets.intersect( elem );
|
||||||
}
|
}
|
||||||
|
|
||||||
tmp.clear();
|
|
||||||
|
|
||||||
if( setOfOffsets.isEmpty() )
|
if( setOfOffsets.isEmpty() )
|
||||||
return;
|
return;
|
||||||
|
@ -757,17 +758,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
||||||
|
|
||||||
if( !hieroglyphsList.empty() )
|
if( !hieroglyphsList.empty() )
|
||||||
{
|
{
|
||||||
QSet< uint32_t > tmp;
|
QList< QSet< uint32_t > > sets;
|
||||||
vector< BtreeIndexing::WordArticleLink > links;
|
auto fn_wordLink = [ & ](const QString & word )
|
||||||
|
|
||||||
for( int i = 0; i < hieroglyphsList.size(); i++ )
|
|
||||||
{
|
{
|
||||||
links = ftsIndex.findArticles( gd::toWString( hieroglyphsList.at( i ) ) );
|
QSet< uint32_t > tmp;
|
||||||
|
vector< BtreeIndexing::WordArticleLink > links = ftsIndex.findArticles( gd::toWString( word ) );
|
||||||
for( unsigned x = 0; x < links.size(); x++ )
|
for( unsigned x = 0; x < links.size(); x++ )
|
||||||
{
|
{
|
||||||
|
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||||
return;
|
sets<< tmp;
|
||||||
|
|
||||||
vector< char > chunk;
|
vector< char > chunk;
|
||||||
char * linksPtr;
|
char * linksPtr;
|
||||||
|
@ -786,11 +785,17 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
||||||
}
|
}
|
||||||
|
|
||||||
links.clear();
|
links.clear();
|
||||||
|
sets<< tmp;
|
||||||
|
};
|
||||||
|
QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink );
|
||||||
|
|
||||||
if( i == 0 )
|
int i = 0;
|
||||||
setOfOffsets = tmp;
|
for( auto & elem : sets )
|
||||||
|
{
|
||||||
|
if( i++ == 0 )
|
||||||
|
setOfOffsets = elem;
|
||||||
else
|
else
|
||||||
setOfOffsets = setOfOffsets.intersect( tmp );
|
setOfOffsets = setOfOffsets.intersect( elem );
|
||||||
}
|
}
|
||||||
|
|
||||||
allWordsLinks[ wordNom ] = setOfOffsets;
|
allWordsLinks[ wordNom ] = setOfOffsets;
|
||||||
|
|
|
@ -82,12 +82,16 @@ class FTSResultsRequest : public Dictionary::DataRequest
|
||||||
|
|
||||||
QAtomicInt isCancelled;
|
QAtomicInt isCancelled;
|
||||||
|
|
||||||
|
QAtomicInt results;
|
||||||
|
|
||||||
QList< FTS::FtsHeadword > * foundHeadwords;
|
QList< FTS::FtsHeadword > * foundHeadwords;
|
||||||
|
|
||||||
void checkArticles( QVector< uint32_t > const & offsets,
|
void checkArticles( QVector< uint32_t > const & offsets,
|
||||||
QStringList const & words,
|
QStringList const & words,
|
||||||
QRegExp const & searchRegexp = QRegExp() );
|
QRegExp const & searchRegexp = QRegExp() );
|
||||||
|
|
||||||
|
void checkSingleArticle( uint32_t offset, QStringList const & words, QRegExp const & searchRegexp = QRegExp() );
|
||||||
|
|
||||||
void indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
void indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
||||||
sptr< ChunkedStorage::Reader > chunks,
|
sptr< ChunkedStorage::Reader > chunks,
|
||||||
QStringList & indexWords,
|
QStringList & indexWords,
|
||||||
|
@ -127,6 +131,7 @@ public:
|
||||||
searchString = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( searchString_ ) ) );
|
searchString = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( searchString_ ) ) );
|
||||||
|
|
||||||
foundHeadwords = new QList< FTS::FtsHeadword >;
|
foundHeadwords = new QList< FTS::FtsHeadword >;
|
||||||
|
results = 0;
|
||||||
QThreadPool::globalInstance()->start( [ this ]() { this->run(); }, -100 );
|
QThreadPool::globalInstance()->start( [ this ]() { this->run(); }, -100 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,8 @@ QT += core \
|
||||||
webenginewidgets\
|
webenginewidgets\
|
||||||
webchannel\
|
webchannel\
|
||||||
printsupport \
|
printsupport \
|
||||||
help
|
help \
|
||||||
|
concurrent
|
||||||
|
|
||||||
greaterThan(QT_MAJOR_VERSION, 5): QT += webenginecore core5compat
|
greaterThan(QT_MAJOR_VERSION, 5): QT += webenginecore core5compat
|
||||||
|
|
||||||
|
@ -242,6 +243,7 @@ HEADERS += folding.hh \
|
||||||
ankiconnector.h \
|
ankiconnector.h \
|
||||||
article_inspect.h \
|
article_inspect.h \
|
||||||
articlewebpage.h \
|
articlewebpage.h \
|
||||||
|
base/globalregex.hh \
|
||||||
globalbroadcaster.h \
|
globalbroadcaster.h \
|
||||||
iframeschemehandler.h \
|
iframeschemehandler.h \
|
||||||
inc_case_folding.hh \
|
inc_case_folding.hh \
|
||||||
|
@ -384,6 +386,7 @@ SOURCES += folding.cc \
|
||||||
ankiconnector.cpp \
|
ankiconnector.cpp \
|
||||||
article_inspect.cpp \
|
article_inspect.cpp \
|
||||||
articlewebpage.cpp \
|
articlewebpage.cpp \
|
||||||
|
base/globalregex.cc \
|
||||||
globalbroadcaster.cpp \
|
globalbroadcaster.cpp \
|
||||||
iframeschemehandler.cpp \
|
iframeschemehandler.cpp \
|
||||||
main.cc \
|
main.cc \
|
||||||
|
|
81
mdx.cc
81
mdx.cc
|
@ -42,6 +42,7 @@
|
||||||
|
|
||||||
#include "tiff.hh"
|
#include "tiff.hh"
|
||||||
#include "utils.hh"
|
#include "utils.hh"
|
||||||
|
#include "base/globalregex.hh"
|
||||||
|
|
||||||
namespace Mdx
|
namespace Mdx
|
||||||
{
|
{
|
||||||
|
@ -192,51 +193,6 @@ public:
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MdxRegex
|
|
||||||
{
|
|
||||||
MdxRegex() :
|
|
||||||
allLinksRe( "(?:<\\s*(a(?:rea)?|img|link|script|source)(?:\\s+[^>]+|\\s*)>)",
|
|
||||||
QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
wordCrossLink( "([\\s\"']href\\s*=)\\s*([\"'])entry://([^>#]*?)((?:#[^>]*?)?)\\2",
|
|
||||||
QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
anchorIdRe( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)", QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
anchorIdReWord( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)([^\"]*)", QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
anchorIdRe2( "([\\s\"'](?:name|id)\\s*=)\\s*(?=[^\"'])([^\\s\">]+)", QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
anchorLinkRe( "([\\s\"']href\\s*=\\s*[\"'])entry://#", QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
audioRe( "([\\s\"']href\\s*=)\\s*([\"'])sound://([^\">]+)\\2",
|
|
||||||
QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption ),
|
|
||||||
stylesRe( "([\\s\"']href\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://"
|
|
||||||
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
|
|
||||||
QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
stylesRe2( "([\\s\"']href\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://"
|
|
||||||
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
|
|
||||||
QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
inlineScriptRe( "<\\s*script(?:(?=\\s)(?:(?![\\s\"']src\\s*=)[^>])+|\\s*)>",
|
|
||||||
QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
srcRe( "([\\s\"']src\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://"
|
|
||||||
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2",
|
|
||||||
QRegularExpression::CaseInsensitiveOption ),
|
|
||||||
srcRe2( "([\\s\"']src\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://"
|
|
||||||
"|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)",
|
|
||||||
QRegularExpression::CaseInsensitiveOption )
|
|
||||||
{
|
|
||||||
}
|
|
||||||
QRegularExpression allLinksRe;
|
|
||||||
QRegularExpression wordCrossLink;
|
|
||||||
QRegularExpression anchorIdRe;
|
|
||||||
QRegularExpression anchorIdReWord;
|
|
||||||
QRegularExpression anchorIdRe2;
|
|
||||||
QRegularExpression anchorLinkRe;
|
|
||||||
QRegularExpression audioRe;
|
|
||||||
QRegularExpression stylesRe;
|
|
||||||
QRegularExpression stylesRe2;
|
|
||||||
QRegularExpression inlineScriptRe;
|
|
||||||
QRegularExpression closeScriptTagRe;
|
|
||||||
QRegularExpression srcRe;
|
|
||||||
QRegularExpression srcRe2;
|
|
||||||
};
|
|
||||||
|
|
||||||
class MdxDictionary: public BtreeIndexing::BtreeDictionary
|
class MdxDictionary: public BtreeIndexing::BtreeDictionary
|
||||||
{
|
{
|
||||||
Mutex idxMutex;
|
Mutex idxMutex;
|
||||||
|
@ -256,8 +212,6 @@ class MdxDictionary: public BtreeIndexing::BtreeDictionary
|
||||||
string initError;
|
string initError;
|
||||||
QString cacheDirName;
|
QString cacheDirName;
|
||||||
|
|
||||||
static MdxRegex mdxRx;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
MdxDictionary( string const & id, string const & indexFile, vector<string> const & dictionaryFiles );
|
MdxDictionary( string const & id, string const & indexFile, vector<string> const & dictionaryFiles );
|
||||||
|
@ -347,8 +301,6 @@ private:
|
||||||
friend class MddResourceRequest;
|
friend class MddResourceRequest;
|
||||||
};
|
};
|
||||||
|
|
||||||
MdxRegex MdxDictionary::mdxRx;
|
|
||||||
|
|
||||||
MdxDictionary::MdxDictionary( string const & id, string const & indexFile,
|
MdxDictionary::MdxDictionary( string const & id, string const & indexFile,
|
||||||
vector<string> const & dictionaryFiles ):
|
vector<string> const & dictionaryFiles ):
|
||||||
BtreeDictionary( id, dictionaryFiles ),
|
BtreeDictionary( id, dictionaryFiles ),
|
||||||
|
@ -972,10 +924,11 @@ void MdxDictionary::loadArticle( uint32_t offset, string & articleText, bool noF
|
||||||
decompressed.constData() + recordInfo.recordOffset,
|
decompressed.constData() + recordInfo.recordOffset,
|
||||||
recordInfo.recordSize );
|
recordInfo.recordSize );
|
||||||
|
|
||||||
article = MdictParser::substituteStylesheet( article, styleSheets );
|
|
||||||
|
|
||||||
if( !noFilter )
|
if( !noFilter )
|
||||||
|
{
|
||||||
|
article = MdictParser::substituteStylesheet( article, styleSheets );
|
||||||
article = filterResource( articleId, article );
|
article = filterResource( articleId, article );
|
||||||
|
}
|
||||||
|
|
||||||
articleText = article.toStdString();
|
articleText = article.toStdString();
|
||||||
}
|
}
|
||||||
|
@ -987,7 +940,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
|
|
||||||
QString articleNewText;
|
QString articleNewText;
|
||||||
int linkPos = 0;
|
int linkPos = 0;
|
||||||
QRegularExpressionMatchIterator it = mdxRx.allLinksRe.globalMatch( article );
|
QRegularExpressionMatchIterator it = RX::Mdx::allLinksRe.globalMatch( article );
|
||||||
QMap<QString,QString> idMap;
|
QMap<QString,QString> idMap;
|
||||||
while( it.hasNext() )
|
while( it.hasNext() )
|
||||||
{
|
{
|
||||||
|
@ -1005,10 +958,10 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
|
|
||||||
if( !linkType.isEmpty() && linkType.at( 0 ) == 'a' )
|
if( !linkType.isEmpty() && linkType.at( 0 ) == 'a' )
|
||||||
{
|
{
|
||||||
QRegularExpressionMatch match = mdxRx.anchorIdRe.match( linkTxt );
|
QRegularExpressionMatch match = RX::Mdx::anchorIdRe.match( linkTxt );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
auto wordMatch = mdxRx.anchorIdReWord.match( linkTxt );
|
auto wordMatch = RX::Mdx::anchorIdReWord.match( linkTxt );
|
||||||
if( wordMatch.hasMatch() )
|
if( wordMatch.hasMatch() )
|
||||||
{
|
{
|
||||||
idMap.insert( wordMatch.captured( 3 ), uniquePrefix + wordMatch.captured( 3 ) );
|
idMap.insert( wordMatch.captured( 3 ), uniquePrefix + wordMatch.captured( 3 ) );
|
||||||
|
@ -1017,11 +970,11 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText );
|
newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
newLink = linkTxt.replace( mdxRx.anchorIdRe2, "\\1\"" + uniquePrefix + "\\2\"" );
|
newLink = linkTxt.replace( RX::Mdx::anchorIdRe2, "\\1\"" + uniquePrefix + "\\2\"" );
|
||||||
|
|
||||||
newLink = newLink.replace( mdxRx.anchorLinkRe, "\\1#" + uniquePrefix );
|
newLink = newLink.replace( RX::Mdx::anchorLinkRe, "\\1#" + uniquePrefix );
|
||||||
|
|
||||||
match = mdxRx.audioRe.match( newLink );
|
match = RX::Mdx::audioRe.match( newLink );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
// sounds and audio link script
|
// sounds and audio link script
|
||||||
|
@ -1032,7 +985,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
+ newLink.replace( match.capturedStart(), match.capturedLength(), newTxt );
|
+ newLink.replace( match.capturedStart(), match.capturedLength(), newTxt );
|
||||||
}
|
}
|
||||||
|
|
||||||
match = mdxRx.wordCrossLink.match( newLink );
|
match = RX::Mdx::wordCrossLink.match( newLink );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
QString newTxt = match.captured( 1 ) + match.captured( 2 )
|
QString newTxt = match.captured( 1 ) + match.captured( 2 )
|
||||||
|
@ -1050,7 +1003,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
if( linkType.compare( "link" ) == 0 )
|
if( linkType.compare( "link" ) == 0 )
|
||||||
{
|
{
|
||||||
// stylesheets
|
// stylesheets
|
||||||
QRegularExpressionMatch match = mdxRx.stylesRe.match( linkTxt );
|
QRegularExpressionMatch match = RX::Mdx::stylesRe.match( linkTxt );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
QString newText = match.captured( 1 ) + match.captured( 2 )
|
QString newText = match.captured( 1 ) + match.captured( 2 )
|
||||||
|
@ -1059,7 +1012,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText );
|
newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
newLink = linkTxt.replace( mdxRx.stylesRe2,
|
newLink = linkTxt.replace( RX::Mdx::stylesRe2,
|
||||||
"\\1\"bres://" + id + "/\\2\"" );
|
"\\1\"bres://" + id + "/\\2\"" );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -1067,13 +1020,13 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
|| linkType.compare( "source" ) == 0 )
|
|| linkType.compare( "source" ) == 0 )
|
||||||
{
|
{
|
||||||
// javascripts and images
|
// javascripts and images
|
||||||
QRegularExpressionMatch match = mdxRx.inlineScriptRe.match( linkTxt );
|
QRegularExpressionMatch match = RX::Mdx::inlineScriptRe.match( linkTxt );
|
||||||
if( linkType.at( 1 ) == 'c' // "script" tag
|
if( linkType.at( 1 ) == 'c' // "script" tag
|
||||||
&& match.hasMatch() && match.capturedLength() == linkTxt.length() )
|
&& match.hasMatch() && match.capturedLength() == linkTxt.length() )
|
||||||
{
|
{
|
||||||
// skip inline scripts
|
// skip inline scripts
|
||||||
articleNewText += linkTxt;
|
articleNewText += linkTxt;
|
||||||
match = mdxRx.closeScriptTagRe.match( article, linkPos );
|
match = RX::Mdx::closeScriptTagRe.match( article, linkPos );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
articleNewText += article.mid( linkPos, match.capturedEnd() - linkPos );
|
articleNewText += article.mid( linkPos, match.capturedEnd() - linkPos );
|
||||||
|
@ -1083,7 +1036,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
match = mdxRx.srcRe.match( linkTxt );
|
match = RX::Mdx::srcRe.match( linkTxt );
|
||||||
if( match.hasMatch() )
|
if( match.hasMatch() )
|
||||||
{
|
{
|
||||||
QString newText;
|
QString newText;
|
||||||
|
@ -1104,7 +1057,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar
|
||||||
newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText );
|
newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
newLink = linkTxt.replace( mdxRx.srcRe2,
|
newLink = linkTxt.replace( RX::Mdx::srcRe2,
|
||||||
"\\1\"bres://" + id + "/\\2\"" );
|
"\\1\"bres://" + id + "/\\2\"" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue