From e5f91f6a3e8bbf0cef556192a6609a497d636d40 Mon Sep 17 00:00:00 2001 From: xiaoyifang <105986+xiaoyifang@users.noreply.github.com> Date: Thu, 20 Jul 2023 22:50:32 +0800 Subject: [PATCH] feat: use parallel thread to create the fts (#980) * feat: use parallel thread to create the fts * [autofix.ci] apply automated fixes * feat: use parallel thread to create the fts * feat: add an option to control the thread count when create the fulltext * [autofix.ci] apply automated fixes * feat: set default parallel thread count to half the cpu numbers * feat: initialize the default parallel threads to 1/3 of cpu cores --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- src/config.cc | 7 +++ src/config.hh | 2 + src/ftshelpers.cc | 51 +----------------- src/fulltextsearch.cc | 34 ++++++------ src/ui/preferences.cc | 5 ++ src/ui/preferences.ui | 118 ++++++++++++++++++++++++------------------ 6 files changed, 101 insertions(+), 116 deletions(-) diff --git a/src/config.cc b/src/config.cc index 8faf5a7a..6843b5d6 100644 --- a/src/config.cc +++ b/src/config.cc @@ -1074,6 +1074,9 @@ Class load() if ( !fts.namedItem( "maxDictionarySize" ).isNull() ) c.preferences.fts.maxDictionarySize = fts.namedItem( "maxDictionarySize" ).toElement().text().toUInt(); + + if ( !fts.namedItem( "parallelThreads" ).isNull() ) + c.preferences.fts.parallelThreads = fts.namedItem( "parallelThreads" ).toElement().text().toUInt(); } } @@ -2052,6 +2055,10 @@ void save( Class const & c ) opt = dd.createElement( "maxDictionarySize" ); opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.maxDictionarySize ) ) ); hd.appendChild( opt ); + + opt = dd.createElement( "parallelThreads" ); + opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.parallelThreads ) ) ); + hd.appendChild( opt ); } } diff --git a/src/config.hh b/src/config.hh index 73ff4298..7680e50c 100644 --- a/src/config.hh +++ b/src/config.hh @@ -16,6 +16,7 @@ #include #include #include +#include /// GoldenDict's configuration namespace Config { @@ -202,6 +203,7 @@ struct FullTextSearch bool enablePosition = false; quint32 maxDictionarySize; + quint32 parallelThreads = QThread::idealThreadCount() / 3 + 1; QByteArray dialogGeometry; QString disabledTypes; diff --git a/src/ftshelpers.cc b/src/ftshelpers.cc index 3465b813..7f948f38 100644 --- a/src/ftshelpers.cc +++ b/src/ftshelpers.cc @@ -1,5 +1,6 @@ /* This file is (c) 2014 Abs62 * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ +//xapian.h must at the first in the include header files to avoid collision with other macro definition. #include "xapian.h" #include #include "fulltextsearch.hh" @@ -13,13 +14,9 @@ #include #include -#include #include -#include "wildcard.hh" -#include "globalregex.hh" -#include using std::vector; using std::string; @@ -52,52 +49,6 @@ bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict ) } } -static QString makeHiliteRegExpString( QStringList const & words, - int searchMode, - int distanceBetweenWords, - bool hasCJK = false, - bool ignoreWordsOrder = false ) -{ - QString searchString( "(" ); - - QString stripWords( "(?:\\W+\\w+){0," ); - - if ( hasCJK ) { - stripWords = "(?:[\\W\\w]){0,"; - } - - if ( distanceBetweenWords >= 0 ) - stripWords += QString::number( distanceBetweenWords ); - stripWords += "}"; - - if ( !hasCJK ) { - stripWords += "\\W+"; - } - - QString boundWord( searchMode == FTS::WholeWords ? "\\b" : "(?:\\w*)" ); - if ( hasCJK ) { - //no boundary for CJK - boundWord.clear(); - } - - for ( int x = 0; x < words.size(); x++ ) { - if ( x ) { - searchString += stripWords; - if ( ignoreWordsOrder ) - searchString += "("; - } - - searchString += boundWord + words[ x ] + boundWord; - - if ( x ) { - if ( ignoreWordsOrder ) - searchString += ")?"; - } - } - - searchString += ")"; - return searchString; -} void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list ) { diff --git a/src/fulltextsearch.cc b/src/fulltextsearch.cc index 93ba5067..06d1f48a 100644 --- a/src/fulltextsearch.cc +++ b/src/fulltextsearch.cc @@ -25,28 +25,30 @@ void Indexing::run() { try { timerThread->start(); - // First iteration - dictionaries with no more MaxDictionarySizeForFastSearch articles + const int parallel_count = GlobalBroadcaster::instance()->getPreference()->fts.parallelThreads; + QSemaphore sem( parallel_count < 1 ? 1 : parallel_count ); + + QFutureSynchronizer< void > synchronizer; + qDebug() << "starting create the fts with thread:" << parallel_count; for ( const auto & dictionary : dictionaries ) { - if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) + if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) { + // synchronizer.setCancelOnWait( true ); break; + } if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) { - emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) ); - dictionary->makeFTSIndex( isCancelled, true ); + sem.acquire(); + QFuture< void > const f = QtConcurrent::run( [ this, &sem, &dictionary ]() { + QSemaphoreReleaser const _( sem ); + emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) ); + dictionary->makeFTSIndex( isCancelled, false ); + } ); + synchronizer.addFuture( f ); } } - - // Second iteration - all remaining dictionaries - for ( const auto & dictionary : dictionaries ) { - if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) - break; - - if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) { - emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) ); - dictionary->makeFTSIndex( isCancelled, false ); - } - } - + qDebug() << "waiting for all the fts creation to finish."; + synchronizer.waitForFinished(); + qDebug() << "finished/cancel all the fts creation"; timerThread->quit(); timerThread->wait(); } diff --git a/src/ui/preferences.cc b/src/ui/preferences.cc index 6e2f3995..2d2bdea1 100644 --- a/src/ui/preferences.cc +++ b/src/ui/preferences.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -373,6 +374,9 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ): ui.allowEpwing->hide(); #endif ui.maxDictionarySize->setValue( p.fts.maxDictionarySize ); + + ui.parallelThreads->setMaximum( QThread::idealThreadCount() ); + ui.parallelThreads->setValue( p.fts.parallelThreads ); } void Preferences::buildDisabledTypes( QString & disabledTypes, bool is_checked, QString name ) @@ -506,6 +510,7 @@ Config::Preferences Preferences::getPreferences() p.fts.enabled = ui.ftsGroupBox->isChecked(); p.fts.maxDictionarySize = ui.maxDictionarySize->value(); + p.fts.parallelThreads = ui.parallelThreads->value(); p.fts.enablePosition = ui.enablePosition->isChecked(); buildDisabledTypes( p.fts.disabledTypes, ui.allowAard->isChecked(), "AARD" ); diff --git a/src/ui/preferences.ui b/src/ui/preferences.ui index f54c4c31..a49fde24 100644 --- a/src/ui/preferences.ui +++ b/src/ui/preferences.ui @@ -1409,6 +1409,37 @@ download page. true + + + + Zim + + + + + + + MDict + + + + + + + Positional information is required to use Xapian's phrase searching and NEAR operator, but the database size will be much bigger. Applies only to new incoming dictionaries. + + + Enable index with positional information + + + + + + + BGL + + + @@ -1423,24 +1454,10 @@ download page. - - + + - Slob - - - - - - - GLS - - - - - - - Aard + SDict @@ -1451,27 +1468,6 @@ download page. - - - - MDict - - - - - - - Zim - - - - - - - Stardict - - - @@ -1479,17 +1475,24 @@ download page. - - + + - BGL + GLS - - + + - SDict + Slob + + + + + + + Stardict @@ -1534,16 +1537,31 @@ download page. - - - - Positional information is required to use Xapian's phrase searching and NEAR operator, but the database size will be much bigger. Applies only to new incoming dictionaries. - + + - Enable index with positional information + Aard + + + + + + Create fulltext index with parallel threads + + + + + + + 1 + + + + +