mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-23 20:14:05 +00:00
feat: use parallel thread to create the fts (#980)
* feat: use parallel thread to create the fts * [autofix.ci] apply automated fixes * feat: use parallel thread to create the fts * feat: add an option to control the thread count when create the fulltext * [autofix.ci] apply automated fixes * feat: set default parallel thread count to half the cpu numbers * feat: initialize the default parallel threads to 1/3 of cpu cores --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
df4bc68248
commit
e5f91f6a3e
|
@ -1074,6 +1074,9 @@ Class load()
|
|||
|
||||
if ( !fts.namedItem( "maxDictionarySize" ).isNull() )
|
||||
c.preferences.fts.maxDictionarySize = fts.namedItem( "maxDictionarySize" ).toElement().text().toUInt();
|
||||
|
||||
if ( !fts.namedItem( "parallelThreads" ).isNull() )
|
||||
c.preferences.fts.parallelThreads = fts.namedItem( "parallelThreads" ).toElement().text().toUInt();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2052,6 +2055,10 @@ void save( Class const & c )
|
|||
opt = dd.createElement( "maxDictionarySize" );
|
||||
opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.maxDictionarySize ) ) );
|
||||
hd.appendChild( opt );
|
||||
|
||||
opt = dd.createElement( "parallelThreads" );
|
||||
opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.parallelThreads ) ) );
|
||||
hd.appendChild( opt );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <QDomDocument>
|
||||
#include <QLocale>
|
||||
#include <optional>
|
||||
#include <QThread>
|
||||
|
||||
/// GoldenDict's configuration
|
||||
namespace Config {
|
||||
|
@ -202,6 +203,7 @@ struct FullTextSearch
|
|||
bool enablePosition = false;
|
||||
|
||||
quint32 maxDictionarySize;
|
||||
quint32 parallelThreads = QThread::idealThreadCount() / 3 + 1;
|
||||
QByteArray dialogGeometry;
|
||||
QString disabledTypes;
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
/* This file is (c) 2014 Abs62
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
//xapian.h must at the first in the include header files to avoid collision with other macro definition.
|
||||
#include "xapian.h"
|
||||
#include <cstdlib>
|
||||
#include "fulltextsearch.hh"
|
||||
|
@ -13,13 +14,9 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <QVector>
|
||||
|
||||
#include <QRegularExpression>
|
||||
|
||||
#include "wildcard.hh"
|
||||
#include "globalregex.hh"
|
||||
#include <QSemaphoreReleaser>
|
||||
|
||||
using std::vector;
|
||||
using std::string;
|
||||
|
@ -52,52 +49,6 @@ bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict )
|
|||
}
|
||||
}
|
||||
|
||||
static QString makeHiliteRegExpString( QStringList const & words,
|
||||
int searchMode,
|
||||
int distanceBetweenWords,
|
||||
bool hasCJK = false,
|
||||
bool ignoreWordsOrder = false )
|
||||
{
|
||||
QString searchString( "(" );
|
||||
|
||||
QString stripWords( "(?:\\W+\\w+){0," );
|
||||
|
||||
if ( hasCJK ) {
|
||||
stripWords = "(?:[\\W\\w]){0,";
|
||||
}
|
||||
|
||||
if ( distanceBetweenWords >= 0 )
|
||||
stripWords += QString::number( distanceBetweenWords );
|
||||
stripWords += "}";
|
||||
|
||||
if ( !hasCJK ) {
|
||||
stripWords += "\\W+";
|
||||
}
|
||||
|
||||
QString boundWord( searchMode == FTS::WholeWords ? "\\b" : "(?:\\w*)" );
|
||||
if ( hasCJK ) {
|
||||
//no boundary for CJK
|
||||
boundWord.clear();
|
||||
}
|
||||
|
||||
for ( int x = 0; x < words.size(); x++ ) {
|
||||
if ( x ) {
|
||||
searchString += stripWords;
|
||||
if ( ignoreWordsOrder )
|
||||
searchString += "(";
|
||||
}
|
||||
|
||||
searchString += boundWord + words[ x ] + boundWord;
|
||||
|
||||
if ( x ) {
|
||||
if ( ignoreWordsOrder )
|
||||
searchString += ")?";
|
||||
}
|
||||
}
|
||||
|
||||
searchString += ")";
|
||||
return searchString;
|
||||
}
|
||||
|
||||
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
|
||||
{
|
||||
|
|
|
@ -25,28 +25,30 @@ void Indexing::run()
|
|||
{
|
||||
try {
|
||||
timerThread->start();
|
||||
// First iteration - dictionaries with no more MaxDictionarySizeForFastSearch articles
|
||||
for ( const auto & dictionary : dictionaries ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
break;
|
||||
const int parallel_count = GlobalBroadcaster::instance()->getPreference()->fts.parallelThreads;
|
||||
QSemaphore sem( parallel_count < 1 ? 1 : parallel_count );
|
||||
|
||||
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
||||
dictionary->makeFTSIndex( isCancelled, true );
|
||||
}
|
||||
QFutureSynchronizer< void > synchronizer;
|
||||
qDebug() << "starting create the fts with thread:" << parallel_count;
|
||||
for ( const auto & dictionary : dictionaries ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
// synchronizer.setCancelOnWait( true );
|
||||
break;
|
||||
}
|
||||
|
||||
// Second iteration - all remaining dictionaries
|
||||
for ( const auto & dictionary : dictionaries ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
break;
|
||||
|
||||
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
||||
sem.acquire();
|
||||
QFuture< void > const f = QtConcurrent::run( [ this, &sem, &dictionary ]() {
|
||||
QSemaphoreReleaser const _( sem );
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
||||
dictionary->makeFTSIndex( isCancelled, false );
|
||||
} );
|
||||
synchronizer.addFuture( f );
|
||||
}
|
||||
}
|
||||
|
||||
qDebug() << "waiting for all the fts creation to finish.";
|
||||
synchronizer.waitForFinished();
|
||||
qDebug() << "finished/cancel all the fts creation";
|
||||
timerThread->quit();
|
||||
timerThread->wait();
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <QDir>
|
||||
#include <QFontDatabase>
|
||||
#include <QMessageBox>
|
||||
#include <QThread>
|
||||
#include <QWebEngineProfile>
|
||||
#include <QWebEngineSettings>
|
||||
#include <QStyleFactory>
|
||||
|
@ -373,6 +374,9 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ):
|
|||
ui.allowEpwing->hide();
|
||||
#endif
|
||||
ui.maxDictionarySize->setValue( p.fts.maxDictionarySize );
|
||||
|
||||
ui.parallelThreads->setMaximum( QThread::idealThreadCount() );
|
||||
ui.parallelThreads->setValue( p.fts.parallelThreads );
|
||||
}
|
||||
|
||||
void Preferences::buildDisabledTypes( QString & disabledTypes, bool is_checked, QString name )
|
||||
|
@ -506,6 +510,7 @@ Config::Preferences Preferences::getPreferences()
|
|||
|
||||
p.fts.enabled = ui.ftsGroupBox->isChecked();
|
||||
p.fts.maxDictionarySize = ui.maxDictionarySize->value();
|
||||
p.fts.parallelThreads = ui.parallelThreads->value();
|
||||
p.fts.enablePosition = ui.enablePosition->isChecked();
|
||||
|
||||
buildDisabledTypes( p.fts.disabledTypes, ui.allowAard->isChecked(), "AARD" );
|
||||
|
|
|
@ -1409,6 +1409,37 @@ download page.</string>
|
|||
<bool>true</bool>
|
||||
</property>
|
||||
<layout class="QGridLayout" name="gridLayout_4">
|
||||
<item row="3" column="1">
|
||||
<widget class="QCheckBox" name="allowZim">
|
||||
<property name="text">
|
||||
<string notr="true">Zim</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
<widget class="QCheckBox" name="allowMDict">
|
||||
<property name="text">
|
||||
<string notr="true">MDict</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="7" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="enablePosition">
|
||||
<property name="toolTip">
|
||||
<string>Positional information is required to use Xapian's phrase searching and NEAR operator, but the database size will be much bigger. Applies only to new incoming dictionaries.</string>
|
||||
</property>
|
||||
<property name="text">
|
||||
<string>Enable index with positional information</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="allowBGL">
|
||||
<property name="text">
|
||||
<string notr="true">BGL</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0">
|
||||
<widget class="QCheckBox" name="allowDictD">
|
||||
<property name="text">
|
||||
|
@ -1423,24 +1454,10 @@ download page.</string>
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QCheckBox" name="allowSlob">
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="allowSDict">
|
||||
<property name="text">
|
||||
<string notr="true">Slob</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="1">
|
||||
<widget class="QCheckBox" name="allowGls">
|
||||
<property name="text">
|
||||
<string notr="true">GLS</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="allowAard">
|
||||
<property name="text">
|
||||
<string notr="true">Aard</string>
|
||||
<string notr="true">SDict</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -1451,27 +1468,6 @@ download page.</string>
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="4" column="0">
|
||||
<widget class="QCheckBox" name="allowMDict">
|
||||
<property name="text">
|
||||
<string notr="true">MDict</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="1">
|
||||
<widget class="QCheckBox" name="allowZim">
|
||||
<property name="text">
|
||||
<string notr="true">Zim</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QCheckBox" name="allowStardict">
|
||||
<property name="text">
|
||||
<string notr="true">Stardict</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="3" column="0">
|
||||
<widget class="QCheckBox" name="allowDSL">
|
||||
<property name="text">
|
||||
|
@ -1479,17 +1475,24 @@ download page.</string>
|
|||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="0">
|
||||
<widget class="QCheckBox" name="allowBGL">
|
||||
<item row="5" column="1">
|
||||
<widget class="QCheckBox" name="allowGls">
|
||||
<property name="text">
|
||||
<string notr="true">BGL</string>
|
||||
<string notr="true">GLS</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="0">
|
||||
<widget class="QCheckBox" name="allowSDict">
|
||||
<item row="0" column="1">
|
||||
<widget class="QCheckBox" name="allowSlob">
|
||||
<property name="text">
|
||||
<string notr="true">SDict</string>
|
||||
<string notr="true">Slob</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QCheckBox" name="allowStardict">
|
||||
<property name="text">
|
||||
<string notr="true">Stardict</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
|
@ -1534,16 +1537,31 @@ download page.</string>
|
|||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
<item row="7" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="enablePosition">
|
||||
<property name="toolTip">
|
||||
<string>Positional information is required to use Xapian's phrase searching and NEAR operator, but the database size will be much bigger. Applies only to new incoming dictionaries.</string>
|
||||
</property>
|
||||
<item row="0" column="0">
|
||||
<widget class="QCheckBox" name="allowAard">
|
||||
<property name="text">
|
||||
<string>Enable index with positional information</string>
|
||||
<string notr="true">Aard</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="8" column="0">
|
||||
<layout class="QHBoxLayout" name="horizontalLayout_18">
|
||||
<item>
|
||||
<widget class="QLabel" name="label_7">
|
||||
<property name="text">
|
||||
<string>Create fulltext index with parallel threads </string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item>
|
||||
<widget class="QSpinBox" name="parallelThreads">
|
||||
<property name="minimum">
|
||||
<number>1</number>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
</item>
|
||||
|
|
Loading…
Reference in a new issue