From 880f2df1b01c48d8eb46636fbe3088254f00c51d Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sat, 4 Jun 2022 22:13:29 +0800 Subject: [PATCH 01/10] clean code: remove useless code --- ftshelpers.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/ftshelpers.cc b/ftshelpers.cc index e155e06f..aa053dd3 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -522,11 +522,6 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, if( Utils::AtomicInt::loadAcquire( isCancelled ) ) break; - int pos = 0; - int matchWordNom = 0; - int unmatchWordNom = 0; - int nextNotFoundPos = 0; - QVector< QStringList > allOrders; QStringList order; @@ -543,9 +538,6 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, if( ignoreDiacritics ) articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); - //QStringList articleWords = articleText.split( needHandleBrackets ? splitWithBrackets : splitWithoutBrackets, - // Qt::SkipEmptyParts ); - if(ignoreWordsOrder) { bool allMatch = true; From dcaebf4948a8632ac2bd723988b5575fa1e9bdcc Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sat, 4 Jun 2022 23:22:14 +0800 Subject: [PATCH 02/10] imp. add QtConcurrent to checkArticle --- ftshelpers.cc | 197 ++++++++++++++++++++++++++++++++++++++++++++++++- ftshelpers.hh | 5 ++ goldendict.pro | 5 +- 3 files changed, 203 insertions(+), 4 deletions(-) diff --git a/ftshelpers.cc b/ftshelpers.cc index aa053dd3..65d0bc1a 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -17,6 +17,7 @@ #include #include "wildcard.hh" +#include using std::vector; using std::string; @@ -445,7 +446,7 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, QStringList const & words, QRegExp const & searchRegexp ) { - int results = 0; + // int results = 0; QString headword, articleText; QList< uint32_t > offsetsForHeadwords; QVector< QStringList > hiliteRegExps; @@ -617,7 +618,7 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, else foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); - results++; + ++results; if( maxResults > 0 && results >= maxResults ) break; } @@ -633,6 +634,195 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, } } +void FTSResultsRequest::checkSingleArticle( uint32_t offset, + QStringList const & words, + QRegExp const & searchRegexp ) +{ + qDebug()<<"checking"< offsetsForHeadwords; + QVector< QStringList > hiliteRegExps; + + QString id = QString::fromUtf8( dict.getId().c_str() ); + bool needHandleBrackets; + { + QString name = QString::fromUtf8( dict.getDictionaryFilenames()[ 0 ].c_str() ).toLower(); + needHandleBrackets = name.endsWith( ".dsl" ) || name.endsWith( ".dsl.dz" ); + } + + // RegExp mode + QRegularExpression searchRegularExpression; + if( searchMode == FTS::Wildcards ) + searchRegularExpression.setPattern( wildcardsToRegexp( searchRegexp.pattern() ) ); + else + searchRegularExpression.setPattern( searchRegexp.pattern() ); + QRegularExpression::PatternOptions patternOptions = + QRegularExpression::DotMatchesEverythingOption | QRegularExpression::UseUnicodePropertiesOption + | QRegularExpression::MultilineOption | QRegularExpression::InvertedGreedinessOption; + if( searchRegexp.caseSensitivity() == Qt::CaseInsensitive ) + patternOptions |= QRegularExpression::CaseInsensitiveOption; + searchRegularExpression.setPatternOptions( patternOptions ); + if( !searchRegularExpression.isValid() ) + searchRegularExpression.setPattern( "" ); + + if( searchMode == FTS::Wildcards || searchMode == FTS::RegExp ) + { + // for( int i = 0; i < offsets.size(); i++ ) + { + if( Utils::AtomicInt::loadAcquire( isCancelled ) ) + return; + + // auto article_address = offsets.at( i ); + dict.getArticleText( offset, headword, articleText ); + articleText = articleText.normalized( QString::NormalizationForm_C ); + + if( ignoreDiacritics ) + articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); + + if( articleText.contains( searchRegularExpression ) ) + { + if( headword.isEmpty() ) + offsetsForHeadwords.append( offset ); + else + foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); + + ++results; + if( maxResults > 0 && results >= maxResults ) + return; + } + } + } + else + { + // Words mode + + QVector< QPair< QString, bool > > wordsList; + if( ignoreWordsOrder ) + { + for( QStringList::const_iterator it = words.begin(); it != words.end(); ++it ) + wordsList.append( QPair< QString, bool >( *it, true ) ); + } + + // for( int i = 0; i < offsets.size(); i++ ) + { + if( Utils::AtomicInt::loadAcquire( isCancelled ) ) + return; + + if( ignoreWordsOrder ) + { + for( int i = 0; i < wordsList.size(); i++ ) + wordsList[ i ].second = true; + } + + dict.getArticleText( offset, headword, articleText ); + + articleText = articleText.normalized( QString::NormalizationForm_C ); + + if( ignoreDiacritics ) + articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); + + if( ignoreWordsOrder ) + { + bool allMatch = true; + foreach( QString word, words ) + { + if( containCJK( word ) || searchMode == FTS::PlainText ) + { + if( !articleText.contains( word ) ) + { + allMatch = false; + break; + } + } + else if( searchMode == FTS::WholeWords ) + { + QRegularExpression tmpReg( QString( "\b%1\b" ).arg( word ), + QRegularExpression::CaseInsensitiveOption + | QRegularExpression::UseUnicodePropertiesOption ); + if( !articleText.contains( tmpReg ) ) + { + allMatch = false; + break; + } + } + } + + if( !allMatch ) + { + return; + } + + if( distanceBetweenWords >= 0 ) + { + // the article text contains all the needed words. + // determine if distance restriction is meet + QRegularExpression replaceReg( QString( "(%1)" ).arg( words.join( '|' ) ), + QRegularExpression::CaseInsensitiveOption + | QRegularExpression::UseUnicodePropertiesOption ); + // use a string that could not be presented in the article. + articleText = articleText.replace( replaceReg, "=@XXXXX@=" ); + + auto hasCJK = false; + foreach( QString word, words ) + { + if( containCJK( word ) ) + { + hasCJK = true; + break; + } + } + + // hascjk value ,perhaps should depend on each word + auto searchRegStr = makeHiliteRegExpString( Utils::repeat( "=@XXXXX@=", words.size() ), + searchMode, + distanceBetweenWords, + hasCJK ); + QRegularExpression distanceOrderReg( searchRegStr, + QRegularExpression::CaseInsensitiveOption + | QRegularExpression::UseUnicodePropertiesOption ); + // use a string that could not be presented in the article. + if( articleText.contains( distanceOrderReg ) ) + { + if( headword.isEmpty() ) + offsetsForHeadwords.append( offset ); + else + foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); + + ++results; + if( maxResults > 0 && results >= maxResults ) + return; + } + } + } + else + { + if( articleText.contains( searchRegularExpression ) ) + { + if( headword.isEmpty() ) + offsetsForHeadwords.append( offset ); + else + foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); + + ++results; + if( maxResults > 0 && results >= maxResults ) + return; + } + } + } + } + if( !offsetsForHeadwords.isEmpty() ) + { + QVector< QString > headwords; + dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled ); + for( int x = 0; x < headwords.size(); x++ ) + foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), + id, + x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(), + matchCase ) ); + } +} + void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, sptr< ChunkedStorage::Reader > chunks, QStringList & indexWords, @@ -706,7 +896,8 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, dict.sortArticlesOffsetsForFTS( offsets, isCancelled ); - checkArticles( offsets, searchWords, regexp ); + // checkArticles( offsets, searchWords, regexp ); + QtConcurrent::blockingMapped(offsets,[&](uint32_t offset){checkSingleArticle(offset,searchWords,regexp); return 0;}); } void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex, diff --git a/ftshelpers.hh b/ftshelpers.hh index e5c712b9..4bd309ac 100644 --- a/ftshelpers.hh +++ b/ftshelpers.hh @@ -82,12 +82,16 @@ class FTSResultsRequest : public Dictionary::DataRequest QAtomicInt isCancelled; + QAtomicInt results; + QList< FTS::FtsHeadword > * foundHeadwords; void checkArticles( QVector< uint32_t > const & offsets, QStringList const & words, QRegExp const & searchRegexp = QRegExp() ); + void checkSingleArticle( uint32_t offset, QStringList const & words, QRegExp const & searchRegexp = QRegExp() ); + void indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, sptr< ChunkedStorage::Reader > chunks, QStringList & indexWords, @@ -127,6 +131,7 @@ public: searchString = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( searchString_ ) ) ); foundHeadwords = new QList< FTS::FtsHeadword >; + results = 0; QThreadPool::globalInstance()->start( [ this ]() { this->run(); }, -100 ); } diff --git a/goldendict.pro b/goldendict.pro index 440dda9a..7ab55f53 100644 --- a/goldendict.pro +++ b/goldendict.pro @@ -47,7 +47,8 @@ QT += core \ webenginewidgets\ webchannel\ printsupport \ - help + help \ + concurrent greaterThan(QT_MAJOR_VERSION, 5): QT += webenginecore core5compat @@ -242,6 +243,7 @@ HEADERS += folding.hh \ ankiconnector.h \ article_inspect.h \ articlewebpage.h \ + base/globalregex.hh \ globalbroadcaster.h \ iframeschemehandler.h \ inc_case_folding.hh \ @@ -384,6 +386,7 @@ SOURCES += folding.cc \ ankiconnector.cpp \ article_inspect.cpp \ articlewebpage.cpp \ + base/globalregex.cc \ globalbroadcaster.cpp \ iframeschemehandler.cpp \ main.cc \ From 6c82bf71b6110bc0dfac49ac0d6cfc16b7e9abea Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sun, 5 Jun 2022 10:44:40 +0800 Subject: [PATCH 03/10] improve: move temporary to global static --- base/globalregex.cc | 50 +++++++++++++++++++++++++++++ base/globalregex.hh | 40 +++++++++++++++++++++++ ftshelpers.cc | 78 ++++++++++++++++++--------------------------- mdx.cc | 76 ++++++++----------------------------------- 4 files changed, 135 insertions(+), 109 deletions(-) create mode 100644 base/globalregex.cc create mode 100644 base/globalregex.hh diff --git a/base/globalregex.cc b/base/globalregex.cc new file mode 100644 index 00000000..fab12fe5 --- /dev/null +++ b/base/globalregex.cc @@ -0,0 +1,50 @@ +#include "globalregex.hh" +#include "fulltextsearch.hh" + +using namespace RX; + +QRegularExpression Ftx::regBrackets( + "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}", + QRegularExpression::UseUnicodePropertiesOption ); +QRegularExpression Ftx::regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption ); + +QRegularExpression Ftx::spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption ); +QRegularExpression Ftx::wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", + QRegularExpression::UseUnicodePropertiesOption ); +QRegularExpression Ftx::setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Ftx::regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", + QRegularExpression::CaseInsensitiveOption ); + + +//mdx + +QRegularExpression Mdx::allLinksRe( "(?:<\\s*(a(?:rea)?|img|link|script|source)(?:\\s+[^>]+|\\s*)>)", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::wordCrossLink( "([\\s\"']href\\s*=)\\s*([\"'])entry://([^>#]*?)((?:#[^>]*?)?)\\2", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::anchorIdRe( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::anchorIdReWord( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)([^\"]*)", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::anchorIdRe2( "([\\s\"'](?:name|id)\\s*=)\\s*(?=[^\"'])([^\\s\">]+)", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::anchorLinkRe( "([\\s\"']href\\s*=\\s*[\"'])entry://#", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::audioRe( "([\\s\"']href\\s*=)\\s*([\"'])sound://([^\">]+)\\2", + QRegularExpression::CaseInsensitiveOption + | QRegularExpression::InvertedGreedinessOption ); +QRegularExpression Mdx::stylesRe( "([\\s\"']href\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://" + "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::stylesRe2( "([\\s\"']href\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://" + "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::inlineScriptRe( "<\\s*script(?:(?=\\s)(?:(?![\\s\"']src\\s*=)[^>])+|\\s*)>", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::srcRe( "([\\s\"']src\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://" + "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2", + QRegularExpression::CaseInsensitiveOption ); +QRegularExpression Mdx::srcRe2( "([\\s\"']src\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://" + "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)", + QRegularExpression::CaseInsensitiveOption ); diff --git a/base/globalregex.hh b/base/globalregex.hh new file mode 100644 index 00000000..012aba0d --- /dev/null +++ b/base/globalregex.hh @@ -0,0 +1,40 @@ +#ifndef GLOBALREGEX_HH +#define GLOBALREGEX_HH + +#include + +namespace RX +{ +class Ftx +{ +public: + static QRegularExpression regBrackets; + static QRegularExpression regSplit; + static QRegularExpression spacesRegExp; + static QRegularExpression wordRegExp; + static QRegularExpression setsRegExp; + static QRegularExpression regexRegExp; +}; + + +class Mdx +{ +public: + static QRegularExpression allLinksRe; + static QRegularExpression wordCrossLink; + static QRegularExpression anchorIdRe; + static QRegularExpression anchorIdReWord; + static QRegularExpression anchorIdRe2; + static QRegularExpression anchorLinkRe; + static QRegularExpression audioRe; + static QRegularExpression stylesRe; + static QRegularExpression stylesRe2; + static QRegularExpression inlineScriptRe; + static QRegularExpression closeScriptTagRe; + static QRegularExpression srcRe; + static QRegularExpression srcRe2; +}; + +} // namespace RX + +#endif // GLOBALREGEX_HH diff --git a/ftshelpers.cc b/ftshelpers.cc index 65d0bc1a..6b3942e3 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -18,6 +18,7 @@ #include "wildcard.hh" #include +#include "base/globalregex.hh" using std::vector; using std::string; @@ -148,36 +149,36 @@ bool parseSearchString( QString const & str, QStringList & indexWords, { searchWords.clear(); indexWords.clear(); - QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption ); - QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption ); - QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption ); - QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption); + // QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption ); + // QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption ); + // QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption ); + // QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption); hasCJK = containCJK( str ); if( searchMode == FTS::WholeWords || searchMode == FTS::PlainText ) { // Make words list for search in article text - searchWords = str.normalized( QString::NormalizationForm_C ).split( spacesRegExp, Qt::SkipEmptyParts ); + searchWords = str.normalized( QString::NormalizationForm_C ).split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts ); // Make words list for index search QStringList list = - str.normalized( QString::NormalizationForm_C ).toLower().split( spacesRegExp, Qt::SkipEmptyParts ); + str.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts ); QString searchString; if( hasCJK ) { - tokenizeCJK( indexWords, wordRegExp, list ); + tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list ); // QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts ); searchString = makeHiliteRegExpString( list, searchMode, distanceBetweenWords, hasCJK , ignoreWordsOrder); } else { - indexWords = list.filter( wordRegExp ); + indexWords = list.filter( RX::Ftx::wordRegExp ); indexWords.removeDuplicates(); // Make regexp for results hilite - QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts ); + QStringList allWords = str.split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts ); searchString = makeHiliteRegExpString( allWords, searchMode, distanceBetweenWords,false, ignoreWordsOrder ); } searchRegExp = QRegExp( searchString, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive, QRegExp::RegExp2 ); @@ -192,21 +193,21 @@ bool parseSearchString( QString const & str, QStringList & indexWords, // Remove RegExp commands if( searchMode == FTS::RegExp ) - tmp.replace( regexRegExp, " " ); + tmp.replace( RX::Ftx::regexRegExp, " " ); // Remove all symbol sets - tmp.replace( setsRegExp, " " ); + tmp.replace( RX::Ftx::setsRegExp, " " ); QStringList list = tmp.normalized( QString::NormalizationForm_C ) - .toLower().split( spacesRegExp, Qt::SkipEmptyParts ); + .toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts ); if( hasCJK ) { - tokenizeCJK( indexWords, wordRegExp, list ); + tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list ); } else { - indexWords = list.filter( wordRegExp ); + indexWords = list.filter( RX::Ftx::wordRegExp ); indexWords.removeDuplicates(); } @@ -225,9 +226,9 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText, if( articleText.isEmpty() ) return; - QRegularExpression regBrackets( "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}", - QRegularExpression::UseUnicodePropertiesOption); - QRegularExpression regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption ); + // QRegularExpression regBrackets( "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}", + // QRegularExpression::UseUnicodePropertiesOption); + // QRegularExpression regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption ); QStringList articleWords = articleText.normalized( QString::NormalizationForm_C ) .split( QRegularExpression( handleRoundBrackets ? "[^\\w\\(\\)\\p{M}]+" : "[^\\w\\p{M}]+", @@ -276,12 +277,12 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText, // Special handle for words with round brackets - DSL feature QStringList list; - QStringList oldVariant = word.split( regSplit, Qt::SkipEmptyParts ); + QStringList oldVariant = word.split( RX::Ftx::regSplit, Qt::SkipEmptyParts ); for( QStringList::iterator it = oldVariant.begin(); it != oldVariant.end(); ++it ) if( it->size() >= FTS::MinimumWordSize && !list.contains( *it ) ) list.append( *it ); - QRegularExpressionMatch match = regBrackets.match( word ); + QRegularExpressionMatch match = RX::Ftx::regBrackets.match( word ); if( match.hasMatch() ) { QStringList parts = match.capturedTexts(); @@ -452,15 +453,6 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, QVector< QStringList > hiliteRegExps; QString id = QString::fromUtf8( dict.getId().c_str() ); - bool needHandleBrackets; - { - QString name = QString::fromUtf8( dict.getDictionaryFilenames()[ 0 ].c_str() ).toLower(); - needHandleBrackets = name.endsWith( ".dsl" ) || name.endsWith( ".dsl.dz" ); - } - - QRegularExpression regBrackets( "(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+)(\\([\\w\\p{M}]+\\)){0,1}([\\w\\p{M}]+){0,1}(\\([\\w\\p{M}]+\\)){0,1}", - QRegularExpression::UseUnicodePropertiesOption); - QRegularExpression regSplit( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption ); // RegExp mode QRegularExpression searchRegularExpression; @@ -507,9 +499,6 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, { // Words mode - QRegularExpression splitWithBrackets( "[^\\w\\(\\)\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption ); - QRegularExpression splitWithoutBrackets( "[^\\w\\p{M}]+", QRegularExpression::UseUnicodePropertiesOption ); - Qt::CaseSensitivity cs = matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive; QVector< QPair< QString, bool > > wordsList; if( ignoreWordsOrder ) @@ -602,7 +591,7 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, else foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); - results++; + ++results; if( maxResults > 0 && results >= maxResults ) break; } @@ -645,11 +634,6 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, QVector< QStringList > hiliteRegExps; QString id = QString::fromUtf8( dict.getId().c_str() ); - bool needHandleBrackets; - { - QString name = QString::fromUtf8( dict.getDictionaryFilenames()[ 0 ].c_str() ).toLower(); - needHandleBrackets = name.endsWith( ".dsl" ) || name.endsWith( ".dsl.dz" ); - } // RegExp mode QRegularExpression searchRegularExpression; @@ -757,9 +741,9 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, { // the article text contains all the needed words. // determine if distance restriction is meet - QRegularExpression replaceReg( QString( "(%1)" ).arg( words.join( '|' ) ), - QRegularExpression::CaseInsensitiveOption - | QRegularExpression::UseUnicodePropertiesOption ); + const QRegularExpression replaceReg( QString( "(%1)" ).arg( words.join( '|' ) ), + QRegularExpression::CaseInsensitiveOption + | QRegularExpression::UseUnicodePropertiesOption ); // use a string that could not be presented in the article. articleText = articleText.replace( replaceReg, "=@XXXXX@=" ); @@ -774,13 +758,13 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, } // hascjk value ,perhaps should depend on each word - auto searchRegStr = makeHiliteRegExpString( Utils::repeat( "=@XXXXX@=", words.size() ), - searchMode, - distanceBetweenWords, - hasCJK ); - QRegularExpression distanceOrderReg( searchRegStr, - QRegularExpression::CaseInsensitiveOption - | QRegularExpression::UseUnicodePropertiesOption ); + const auto searchRegStr = makeHiliteRegExpString( Utils::repeat( "=@XXXXX@=", words.size() ), + searchMode, + distanceBetweenWords, + hasCJK ); + const QRegularExpression distanceOrderReg( searchRegStr, + QRegularExpression::CaseInsensitiveOption + | QRegularExpression::UseUnicodePropertiesOption ); // use a string that could not be presented in the article. if( articleText.contains( distanceOrderReg ) ) { diff --git a/mdx.cc b/mdx.cc index caa3ec0f..fdada233 100644 --- a/mdx.cc +++ b/mdx.cc @@ -42,6 +42,7 @@ #include "tiff.hh" #include "utils.hh" +#include "base/globalregex.hh" namespace Mdx { @@ -192,51 +193,6 @@ public: }; -struct MdxRegex -{ - MdxRegex() : - allLinksRe( "(?:<\\s*(a(?:rea)?|img|link|script|source)(?:\\s+[^>]+|\\s*)>)", - QRegularExpression::CaseInsensitiveOption ), - wordCrossLink( "([\\s\"']href\\s*=)\\s*([\"'])entry://([^>#]*?)((?:#[^>]*?)?)\\2", - QRegularExpression::CaseInsensitiveOption ), - anchorIdRe( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)", QRegularExpression::CaseInsensitiveOption ), - anchorIdReWord( "([\\s\"'](?:name|id)\\s*=)\\s*([\"'])\\s*(?=\\S)([^\"]*)", QRegularExpression::CaseInsensitiveOption ), - anchorIdRe2( "([\\s\"'](?:name|id)\\s*=)\\s*(?=[^\"'])([^\\s\">]+)", QRegularExpression::CaseInsensitiveOption ), - anchorLinkRe( "([\\s\"']href\\s*=\\s*[\"'])entry://#", QRegularExpression::CaseInsensitiveOption ), - audioRe( "([\\s\"']href\\s*=)\\s*([\"'])sound://([^\">]+)\\2", - QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption ), - stylesRe( "([\\s\"']href\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://" - "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2", - QRegularExpression::CaseInsensitiveOption ), - stylesRe2( "([\\s\"']href\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://" - "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)", - QRegularExpression::CaseInsensitiveOption ), - inlineScriptRe( "<\\s*script(?:(?=\\s)(?:(?![\\s\"']src\\s*=)[^>])+|\\s*)>", - QRegularExpression::CaseInsensitiveOption ), - closeScriptTagRe( "<\\s*/script\\s*>", QRegularExpression::CaseInsensitiveOption ), - srcRe( "([\\s\"']src\\s*=)\\s*([\"'])(?!\\s*\\b(?:(?:bres|https?|ftp)://" - "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\">]+)\\2", - QRegularExpression::CaseInsensitiveOption ), - srcRe2( "([\\s\"']src\\s*=)\\s*(?![\\s\"']|\\b(?:(?:bres|https?|ftp)://" - "|(?:data|javascript):))(?:file://)?[\\x00-\\x1f\\x7f]*\\.*/?([^\\s\">]+)", - QRegularExpression::CaseInsensitiveOption ) - { - } - QRegularExpression allLinksRe; - QRegularExpression wordCrossLink; - QRegularExpression anchorIdRe; - QRegularExpression anchorIdReWord; - QRegularExpression anchorIdRe2; - QRegularExpression anchorLinkRe; - QRegularExpression audioRe; - QRegularExpression stylesRe; - QRegularExpression stylesRe2; - QRegularExpression inlineScriptRe; - QRegularExpression closeScriptTagRe; - QRegularExpression srcRe; - QRegularExpression srcRe2; -}; - class MdxDictionary: public BtreeIndexing::BtreeDictionary { Mutex idxMutex; @@ -256,8 +212,6 @@ class MdxDictionary: public BtreeIndexing::BtreeDictionary string initError; QString cacheDirName; - static MdxRegex mdxRx; - public: MdxDictionary( string const & id, string const & indexFile, vector const & dictionaryFiles ); @@ -347,8 +301,6 @@ private: friend class MddResourceRequest; }; -MdxRegex MdxDictionary::mdxRx; - MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector const & dictionaryFiles ): BtreeDictionary( id, dictionaryFiles ), @@ -987,7 +939,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar QString articleNewText; int linkPos = 0; - QRegularExpressionMatchIterator it = mdxRx.allLinksRe.globalMatch( article ); + QRegularExpressionMatchIterator it = RX::Mdx::allLinksRe.globalMatch( article ); QMap idMap; while( it.hasNext() ) { @@ -1005,10 +957,10 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar if( !linkType.isEmpty() && linkType.at( 0 ) == 'a' ) { - QRegularExpressionMatch match = mdxRx.anchorIdRe.match( linkTxt ); + QRegularExpressionMatch match = RX::Mdx::anchorIdRe.match( linkTxt ); if( match.hasMatch() ) { - auto wordMatch = mdxRx.anchorIdReWord.match( linkTxt ); + auto wordMatch = RX::Mdx::anchorIdReWord.match( linkTxt ); if( wordMatch.hasMatch() ) { idMap.insert( wordMatch.captured( 3 ), uniquePrefix + wordMatch.captured( 3 ) ); @@ -1017,11 +969,11 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText ); } else - newLink = linkTxt.replace( mdxRx.anchorIdRe2, "\\1\"" + uniquePrefix + "\\2\"" ); + newLink = linkTxt.replace( RX::Mdx::anchorIdRe2, "\\1\"" + uniquePrefix + "\\2\"" ); - newLink = newLink.replace( mdxRx.anchorLinkRe, "\\1#" + uniquePrefix ); + newLink = newLink.replace( RX::Mdx::anchorLinkRe, "\\1#" + uniquePrefix ); - match = mdxRx.audioRe.match( newLink ); + match = RX::Mdx::audioRe.match( newLink ); if( match.hasMatch() ) { // sounds and audio link script @@ -1032,7 +984,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar + newLink.replace( match.capturedStart(), match.capturedLength(), newTxt ); } - match = mdxRx.wordCrossLink.match( newLink ); + match = RX::Mdx::wordCrossLink.match( newLink ); if( match.hasMatch() ) { QString newTxt = match.captured( 1 ) + match.captured( 2 ) @@ -1050,7 +1002,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar if( linkType.compare( "link" ) == 0 ) { // stylesheets - QRegularExpressionMatch match = mdxRx.stylesRe.match( linkTxt ); + QRegularExpressionMatch match = RX::Mdx::stylesRe.match( linkTxt ); if( match.hasMatch() ) { QString newText = match.captured( 1 ) + match.captured( 2 ) @@ -1059,7 +1011,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText ); } else - newLink = linkTxt.replace( mdxRx.stylesRe2, + newLink = linkTxt.replace( RX::Mdx::stylesRe2, "\\1\"bres://" + id + "/\\2\"" ); } else @@ -1067,13 +1019,13 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar || linkType.compare( "source" ) == 0 ) { // javascripts and images - QRegularExpressionMatch match = mdxRx.inlineScriptRe.match( linkTxt ); + QRegularExpressionMatch match = RX::Mdx::inlineScriptRe.match( linkTxt ); if( linkType.at( 1 ) == 'c' // "script" tag && match.hasMatch() && match.capturedLength() == linkTxt.length() ) { // skip inline scripts articleNewText += linkTxt; - match = mdxRx.closeScriptTagRe.match( article, linkPos ); + match = RX::Mdx::closeScriptTagRe.match( article, linkPos ); if( match.hasMatch() ) { articleNewText += article.mid( linkPos, match.capturedEnd() - linkPos ); @@ -1083,7 +1035,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar } else { - match = mdxRx.srcRe.match( linkTxt ); + match = RX::Mdx::srcRe.match( linkTxt ); if( match.hasMatch() ) { QString newText; @@ -1104,7 +1056,7 @@ QString & MdxDictionary::filterResource( QString const & articleId, QString & ar newLink = linkTxt.replace( match.capturedStart(), match.capturedLength(), newText ); } else - newLink = linkTxt.replace( mdxRx.srcRe2, + newLink = linkTxt.replace( RX::Mdx::srcRe2, "\\1\"bres://" + id + "/\\2\"" ); } } From 33b3a95e347c9d56b0e1bf636d3d3475a1da5166 Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sun, 5 Jun 2022 12:48:52 +0800 Subject: [PATCH 04/10] add qt5.15.2 compatibility with qtconcurrent --- ftshelpers.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ftshelpers.cc b/ftshelpers.cc index 6b3942e3..788ac757 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -880,8 +880,11 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, dict.sortArticlesOffsetsForFTS( offsets, isCancelled ); - // checkArticles( offsets, searchWords, regexp ); - QtConcurrent::blockingMapped(offsets,[&](uint32_t offset){checkSingleArticle(offset,searchWords,regexp); return 0;}); +//#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0)) + QtConcurrent::blockingMap(offsets,[&](uint32_t offset){checkSingleArticle(offset,searchWords,regexp);}); +//#else +// checkArticles( offsets, searchWords, regexp ); +//#endif } void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex, From 886aa7f25dfa1463237f3ea0cf02cf126c1ba772 Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sun, 5 Jun 2022 13:11:01 +0800 Subject: [PATCH 05/10] github: PR check add concurrency restriction --- .github/workflows/macos-PR-check.yml | 4 +++- .github/workflows/ubuntu-PR-check.yml | 4 +++- .github/workflows/windows-PR-check.yml | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/macos-PR-check.yml b/.github/workflows/macos-PR-check.yml index def35f86..3fb902b7 100644 --- a/.github/workflows/macos-PR-check.yml +++ b/.github/workflows/macos-PR-check.yml @@ -1,5 +1,7 @@ name: macos-PR-check - +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true on: workflow_dispatch: diff --git a/.github/workflows/ubuntu-PR-check.yml b/.github/workflows/ubuntu-PR-check.yml index 5fad1eb0..781c5225 100644 --- a/.github/workflows/ubuntu-PR-check.yml +++ b/.github/workflows/ubuntu-PR-check.yml @@ -1,5 +1,7 @@ name: Ubuntu-PR-check - +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true on: workflow_dispatch: diff --git a/.github/workflows/windows-PR-check.yml b/.github/workflows/windows-PR-check.yml index ea7348bb..9a0724d6 100644 --- a/.github/workflows/windows-PR-check.yml +++ b/.github/workflows/windows-PR-check.yml @@ -1,5 +1,7 @@ name: Windows-PR-check - +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true on: workflow_dispatch: From 76969aa49d760ea5aa72efac2a9b7c1e6841cb3d Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sun, 5 Jun 2022 16:31:44 +0800 Subject: [PATCH 06/10] fulltext search add concurrent support --- ftshelpers.cc | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/ftshelpers.cc b/ftshelpers.cc index 788ac757..e3b3ae41 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -814,27 +814,26 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, { // Find articles which contains all requested words - vector< BtreeIndexing::WordArticleLink > links; - QSet< uint32_t > setOfOffsets, tmp; - uint32_t size; + QSet< uint32_t > setOfOffsets; if( indexWords.isEmpty() ) return; - int n = indexWords.length(); - for( int i = 0; i < n; i++ ) + auto findLinks = [ & ]( const QString & word ) -> QSet< uint32_t > { + QSet< uint32_t > tmp; + uint32_t size; + if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - return; + return tmp; - tmp.clear(); - - links = ftsIndex.findArticles( gd::toWString( indexWords.at( i ) ), ignoreDiacritics ); + vector< BtreeIndexing::WordArticleLink > links = + ftsIndex.findArticles( gd::toWString( word ), ignoreDiacritics ); for( unsigned x = 0; x < links.size(); x++ ) { if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - return; + return tmp; vector< char > chunk; char * linksPtr; @@ -843,24 +842,32 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk ); } - memcpy( &size, linksPtr, sizeof(uint32_t) ); - linksPtr += sizeof(uint32_t); + memcpy( &size, linksPtr, sizeof( uint32_t ) ); + linksPtr += sizeof( uint32_t ); for( uint32_t y = 0; y < size; y++ ) { tmp.insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) ); - linksPtr += sizeof(uint32_t); + linksPtr += sizeof( uint32_t ); } } links.clear(); - if( i == 0 ) - setOfOffsets = tmp; + return tmp; + }; + // int n = indexWords.length(); + auto sets = QtConcurrent::blockingMapped( indexWords, + findLinks ); + + int i = 0; + for( auto & elem : sets ) + { + if( i++ == 0 ) + setOfOffsets = elem; else - setOfOffsets = setOfOffsets.intersect( tmp ); + setOfOffsets = setOfOffsets.intersect( elem ); } - tmp.clear(); if( setOfOffsets.isEmpty() ) return; From 837dcfbf63db75c1e22243a483e3d6e4bf524187 Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Wed, 8 Jun 2022 21:38:30 +0800 Subject: [PATCH 07/10] qt5.15.2 does not work with qtconcurrent::blockingmapped --- ftshelpers.cc | 220 ++++++-------------------------------------------- 1 file changed, 24 insertions(+), 196 deletions(-) diff --git a/ftshelpers.cc b/ftshelpers.cc index e3b3ae41..997c05c0 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -447,180 +447,7 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, QStringList const & words, QRegExp const & searchRegexp ) { - // int results = 0; - QString headword, articleText; - QList< uint32_t > offsetsForHeadwords; - QVector< QStringList > hiliteRegExps; - - QString id = QString::fromUtf8( dict.getId().c_str() ); - - // RegExp mode - QRegularExpression searchRegularExpression; - if( searchMode == FTS::Wildcards ) - searchRegularExpression.setPattern( wildcardsToRegexp( searchRegexp.pattern() ) ); - else - searchRegularExpression.setPattern( searchRegexp.pattern() ); - QRegularExpression::PatternOptions patternOptions = - QRegularExpression::DotMatchesEverythingOption | QRegularExpression::UseUnicodePropertiesOption - | QRegularExpression::MultilineOption | QRegularExpression::InvertedGreedinessOption; - if( searchRegexp.caseSensitivity() == Qt::CaseInsensitive ) - patternOptions |= QRegularExpression::CaseInsensitiveOption; - searchRegularExpression.setPatternOptions( patternOptions ); - if( !searchRegularExpression.isValid() ) - searchRegularExpression.setPattern( "" ); - - if( searchMode == FTS::Wildcards || searchMode == FTS::RegExp ) - { - for( int i = 0; i < offsets.size(); i++ ) - { - if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - break; - - dict.getArticleText( offsets.at( i ), headword, articleText ); - articleText = articleText.normalized( QString::NormalizationForm_C ); - - if( ignoreDiacritics ) - articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); - - if( articleText.contains( searchRegularExpression ) ) - { - if( headword.isEmpty() ) - offsetsForHeadwords.append( offsets.at( i ) ); - else - foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); - - results++; - if( maxResults > 0 && results >= maxResults ) - break; - } - } - } - else - { - // Words mode - - Qt::CaseSensitivity cs = matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive; - QVector< QPair< QString, bool > > wordsList; - if( ignoreWordsOrder ) - { - for( QStringList::const_iterator it = words.begin(); it != words.end(); ++it ) - wordsList.append( QPair< QString, bool >( *it, true ) ); - } - - for( int i = 0; i < offsets.size(); i++ ) - { - if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - break; - - QVector< QStringList > allOrders; - QStringList order; - - if( ignoreWordsOrder ) - { - for( int i = 0; i < wordsList.size(); i++ ) - wordsList[ i ].second = true; - } - - dict.getArticleText( offsets.at( i ), headword, articleText ); - - articleText = articleText.normalized( QString::NormalizationForm_C ); - - if( ignoreDiacritics ) - articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); - - if(ignoreWordsOrder) - { - bool allMatch = true; - foreach( QString word, words ) - { - if( containCJK( word ) || searchMode == FTS::PlainText ) - { - if( !articleText.contains( word ) ) - { - allMatch = false; - break; - } - } - else if( searchMode == FTS::WholeWords) - { - QRegularExpression tmpReg( QString( "\b%1\b" ).arg( word ),QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption ); - if( !articleText.contains( tmpReg) ) - { - allMatch = false; - break; - } - } - - } - - if(!allMatch) - { - continue; - } - - if( distanceBetweenWords >= 0 ) - { - // the article text contains all the needed words. - // determine if distance restriction is meet - QRegularExpression replaceReg( QString( "(%1)" ).arg( words.join( '|' ) ), - QRegularExpression::CaseInsensitiveOption | - QRegularExpression::UseUnicodePropertiesOption ); - // use a string that could not be presented in the article. - articleText = articleText.replace( replaceReg, "=@XXXXX@=" ); - - auto hasCJK = false; - foreach(QString word,words) - { - if(containCJK( word )) - { - hasCJK = true; - break; - } - } - - //hascjk value ,perhaps should depend on each word - auto searchRegStr = makeHiliteRegExpString( Utils::repeat( "=@XXXXX@=", words.size() ), searchMode, distanceBetweenWords,hasCJK ); - QRegularExpression distanceOrderReg( searchRegStr, - QRegularExpression::CaseInsensitiveOption | - QRegularExpression::UseUnicodePropertiesOption ); - // use a string that could not be presented in the article. - if(articleText.contains(distanceOrderReg)) - { - if( headword.isEmpty() ) - offsetsForHeadwords.append( offsets.at( i ) ); - else - foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); - - ++results; - if( maxResults > 0 && results >= maxResults ) - break; - } - } - - } - else - { - if( articleText.contains( searchRegularExpression ) ) - { - if( headword.isEmpty() ) - offsetsForHeadwords.append( offsets.at( i ) ); - else - foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); - - ++results; - if( maxResults > 0 && results >= maxResults ) - break; - } - } - } - } - if( !offsetsForHeadwords.isEmpty() ) - { - QVector< QString > headwords; - dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled ); - for( int x = 0; x < headwords.size(); x++ ) - foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), id, x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(), matchCase ) ); - } + QtConcurrent::blockingMap( offsets, [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } ); } void FTSResultsRequest::checkSingleArticle( uint32_t offset, @@ -819,13 +646,15 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, if( indexWords.isEmpty() ) return; - auto findLinks = [ & ]( const QString & word ) -> QSet< uint32_t > + QList< QSet< uint32_t > > addressLists; + + auto findLinks = [ & ]( const QString & word ) { QSet< uint32_t > tmp; uint32_t size; if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - return tmp; + addressLists<< tmp; vector< BtreeIndexing::WordArticleLink > links = ftsIndex.findArticles( gd::toWString( word ), ignoreDiacritics ); @@ -833,7 +662,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, { if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - return tmp; + addressLists<< tmp; vector< char > chunk; char * linksPtr; @@ -853,14 +682,13 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, links.clear(); - return tmp; + addressLists<< tmp; }; // int n = indexWords.length(); - auto sets = QtConcurrent::blockingMapped( indexWords, - findLinks ); + QtConcurrent::blockingMap( indexWords, findLinks ); int i = 0; - for( auto & elem : sets ) + for( auto & elem : addressLists ) { if( i++ == 0 ) setOfOffsets = elem; @@ -887,11 +715,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, dict.sortArticlesOffsetsForFTS( offsets, isCancelled ); -//#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0)) - QtConcurrent::blockingMap(offsets,[&](uint32_t offset){checkSingleArticle(offset,searchWords,regexp);}); -//#else -// checkArticles( offsets, searchWords, regexp ); -//#endif + checkArticles( offsets, searchWords, regexp ); } void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex, @@ -934,17 +758,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde if( !hieroglyphsList.empty() ) { - QSet< uint32_t > tmp; - vector< BtreeIndexing::WordArticleLink > links; - - for( int i = 0; i < hieroglyphsList.size(); i++ ) + QList< QSet< uint32_t > > sets; + auto fn_wordLink = [ & ](const QString & word ) { - links = ftsIndex.findArticles( gd::toWString( hieroglyphsList.at( i ) ) ); + QSet< uint32_t > tmp; + vector< BtreeIndexing::WordArticleLink > links = ftsIndex.findArticles( gd::toWString( word ) ); for( unsigned x = 0; x < links.size(); x++ ) { - if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - return; + sets<< tmp; vector< char > chunk; char * linksPtr; @@ -963,11 +785,17 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde } links.clear(); + sets<< tmp; + }; + QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink ); - if( i == 0 ) - setOfOffsets = tmp; + int i = 0; + for( auto & elem : sets ) + { + if( i++ == 0 ) + setOfOffsets = elem; else - setOfOffsets = setOfOffsets.intersect( tmp ); + setOfOffsets = setOfOffsets.intersect( elem ); } allWordsLinks[ wordNom ] = setOfOffsets; From 058ba392fd661c78d9915ff321fdc9e8774d7a27 Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sat, 11 Jun 2022 10:29:21 +0800 Subject: [PATCH 08/10] opt: fullindex need not to care about stylesheet --- mdx.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mdx.cc b/mdx.cc index fdada233..8998bfa5 100644 --- a/mdx.cc +++ b/mdx.cc @@ -924,10 +924,11 @@ void MdxDictionary::loadArticle( uint32_t offset, string & articleText, bool noF decompressed.constData() + recordInfo.recordOffset, recordInfo.recordSize ); - article = MdictParser::substituteStylesheet( article, styleSheets ); - if( !noFilter ) + { + article = MdictParser::substituteStylesheet( article, styleSheets ); article = filterResource( articleId, article ); + } articleText = article.toStdString(); } From ec5d2b12db49edcba13463eca69e4d61b29d19be Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sat, 11 Jun 2022 23:31:33 +0800 Subject: [PATCH 09/10] style:do not allow dictionary title to be selected --- article-style.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/article-style.css b/article-style.css index 959edbaa..4f2f871e 100644 --- a/article-style.css +++ b/article-style.css @@ -42,6 +42,11 @@ pre /*background: #ffffdd;*/ } +.gddicttitle +{ + user-select: none; +} + .gddictnamebodyseparator { clear: both; From e8bcaa225266601afbd8e4ebc2bfd5e12dec4fac Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Sat, 11 Jun 2022 23:37:58 +0800 Subject: [PATCH 10/10] style: move duplicate class together --- article-style-st-lingvo.css | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/article-style-st-lingvo.css b/article-style-st-lingvo.css index 8da59179..20974124 100644 --- a/article-style-st-lingvo.css +++ b/article-style-st-lingvo.css @@ -24,24 +24,19 @@ a:hover background: white; } -/* Dictionary's name heading */ -.gddictname -{ - border: 1px dotted black; padding: 0.2em; padding-left: 0.5em; - margin-top: 1.2em; margin-bottom: 0.1em; font-weight: bold; font-size: 14px; - background: #87CEEB; -} - /* The 'From ' string which preceeds dictionary name in the heading */ .gdfromprefix { display: none; } +/* Dictionary's name heading */ .gddictname { + padding: 0.2em; padding-left: 0.5em; + margin-bottom: 0.1em; + font-size: 14px; font-weight: normal; - float: right; border: 1px solid white; margin-top: 7px;