mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 00:14:06 +00:00
feat: use parallel thread to create the fts (#980)
* feat: use parallel thread to create the fts * [autofix.ci] apply automated fixes * feat: use parallel thread to create the fts * feat: add an option to control the thread count when create the fulltext * [autofix.ci] apply automated fixes * feat: set default parallel thread count to half the cpu numbers * feat: initialize the default parallel threads to 1/3 of cpu cores --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
df4bc68248
commit
e5f91f6a3e
|
@ -1074,6 +1074,9 @@ Class load()
|
||||||
|
|
||||||
if ( !fts.namedItem( "maxDictionarySize" ).isNull() )
|
if ( !fts.namedItem( "maxDictionarySize" ).isNull() )
|
||||||
c.preferences.fts.maxDictionarySize = fts.namedItem( "maxDictionarySize" ).toElement().text().toUInt();
|
c.preferences.fts.maxDictionarySize = fts.namedItem( "maxDictionarySize" ).toElement().text().toUInt();
|
||||||
|
|
||||||
|
if ( !fts.namedItem( "parallelThreads" ).isNull() )
|
||||||
|
c.preferences.fts.parallelThreads = fts.namedItem( "parallelThreads" ).toElement().text().toUInt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2052,6 +2055,10 @@ void save( Class const & c )
|
||||||
opt = dd.createElement( "maxDictionarySize" );
|
opt = dd.createElement( "maxDictionarySize" );
|
||||||
opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.maxDictionarySize ) ) );
|
opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.maxDictionarySize ) ) );
|
||||||
hd.appendChild( opt );
|
hd.appendChild( opt );
|
||||||
|
|
||||||
|
opt = dd.createElement( "parallelThreads" );
|
||||||
|
opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.parallelThreads ) ) );
|
||||||
|
hd.appendChild( opt );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
#include <QLocale>
|
#include <QLocale>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <QThread>
|
||||||
|
|
||||||
/// GoldenDict's configuration
|
/// GoldenDict's configuration
|
||||||
namespace Config {
|
namespace Config {
|
||||||
|
@ -202,6 +203,7 @@ struct FullTextSearch
|
||||||
bool enablePosition = false;
|
bool enablePosition = false;
|
||||||
|
|
||||||
quint32 maxDictionarySize;
|
quint32 maxDictionarySize;
|
||||||
|
quint32 parallelThreads = QThread::idealThreadCount() / 3 + 1;
|
||||||
QByteArray dialogGeometry;
|
QByteArray dialogGeometry;
|
||||||
QString disabledTypes;
|
QString disabledTypes;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/* This file is (c) 2014 Abs62
|
/* This file is (c) 2014 Abs62
|
||||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||||
|
//xapian.h must at the first in the include header files to avoid collision with other macro definition.
|
||||||
#include "xapian.h"
|
#include "xapian.h"
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include "fulltextsearch.hh"
|
#include "fulltextsearch.hh"
|
||||||
|
@ -13,13 +14,9 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include <QVector>
|
|
||||||
|
|
||||||
#include <QRegularExpression>
|
#include <QRegularExpression>
|
||||||
|
|
||||||
#include "wildcard.hh"
|
|
||||||
#include "globalregex.hh"
|
|
||||||
#include <QSemaphoreReleaser>
|
|
||||||
|
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
@ -52,52 +49,6 @@ bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static QString makeHiliteRegExpString( QStringList const & words,
|
|
||||||
int searchMode,
|
|
||||||
int distanceBetweenWords,
|
|
||||||
bool hasCJK = false,
|
|
||||||
bool ignoreWordsOrder = false )
|
|
||||||
{
|
|
||||||
QString searchString( "(" );
|
|
||||||
|
|
||||||
QString stripWords( "(?:\\W+\\w+){0," );
|
|
||||||
|
|
||||||
if ( hasCJK ) {
|
|
||||||
stripWords = "(?:[\\W\\w]){0,";
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( distanceBetweenWords >= 0 )
|
|
||||||
stripWords += QString::number( distanceBetweenWords );
|
|
||||||
stripWords += "}";
|
|
||||||
|
|
||||||
if ( !hasCJK ) {
|
|
||||||
stripWords += "\\W+";
|
|
||||||
}
|
|
||||||
|
|
||||||
QString boundWord( searchMode == FTS::WholeWords ? "\\b" : "(?:\\w*)" );
|
|
||||||
if ( hasCJK ) {
|
|
||||||
//no boundary for CJK
|
|
||||||
boundWord.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( int x = 0; x < words.size(); x++ ) {
|
|
||||||
if ( x ) {
|
|
||||||
searchString += stripWords;
|
|
||||||
if ( ignoreWordsOrder )
|
|
||||||
searchString += "(";
|
|
||||||
}
|
|
||||||
|
|
||||||
searchString += boundWord + words[ x ] + boundWord;
|
|
||||||
|
|
||||||
if ( x ) {
|
|
||||||
if ( ignoreWordsOrder )
|
|
||||||
searchString += ")?";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
searchString += ")";
|
|
||||||
return searchString;
|
|
||||||
}
|
|
||||||
|
|
||||||
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
|
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
|
||||||
{
|
{
|
||||||
|
|
|
@ -25,28 +25,30 @@ void Indexing::run()
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
timerThread->start();
|
timerThread->start();
|
||||||
// First iteration - dictionaries with no more MaxDictionarySizeForFastSearch articles
|
const int parallel_count = GlobalBroadcaster::instance()->getPreference()->fts.parallelThreads;
|
||||||
for ( const auto & dictionary : dictionaries ) {
|
QSemaphore sem( parallel_count < 1 ? 1 : parallel_count );
|
||||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
|
||||||
break;
|
|
||||||
|
|
||||||
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
QFutureSynchronizer< void > synchronizer;
|
||||||
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
qDebug() << "starting create the fts with thread:" << parallel_count;
|
||||||
dictionary->makeFTSIndex( isCancelled, true );
|
for ( const auto & dictionary : dictionaries ) {
|
||||||
}
|
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||||
|
// synchronizer.setCancelOnWait( true );
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second iteration - all remaining dictionaries
|
|
||||||
for ( const auto & dictionary : dictionaries ) {
|
|
||||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
|
||||||
break;
|
|
||||||
|
|
||||||
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
||||||
|
sem.acquire();
|
||||||
|
QFuture< void > const f = QtConcurrent::run( [ this, &sem, &dictionary ]() {
|
||||||
|
QSemaphoreReleaser const _( sem );
|
||||||
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
||||||
dictionary->makeFTSIndex( isCancelled, false );
|
dictionary->makeFTSIndex( isCancelled, false );
|
||||||
|
} );
|
||||||
|
synchronizer.addFuture( f );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
qDebug() << "waiting for all the fts creation to finish.";
|
||||||
|
synchronizer.waitForFinished();
|
||||||
|
qDebug() << "finished/cancel all the fts creation";
|
||||||
timerThread->quit();
|
timerThread->quit();
|
||||||
timerThread->wait();
|
timerThread->wait();
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <QDir>
|
#include <QDir>
|
||||||
#include <QFontDatabase>
|
#include <QFontDatabase>
|
||||||
#include <QMessageBox>
|
#include <QMessageBox>
|
||||||
|
#include <QThread>
|
||||||
#include <QWebEngineProfile>
|
#include <QWebEngineProfile>
|
||||||
#include <QWebEngineSettings>
|
#include <QWebEngineSettings>
|
||||||
#include <QStyleFactory>
|
#include <QStyleFactory>
|
||||||
|
@ -373,6 +374,9 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ):
|
||||||
ui.allowEpwing->hide();
|
ui.allowEpwing->hide();
|
||||||
#endif
|
#endif
|
||||||
ui.maxDictionarySize->setValue( p.fts.maxDictionarySize );
|
ui.maxDictionarySize->setValue( p.fts.maxDictionarySize );
|
||||||
|
|
||||||
|
ui.parallelThreads->setMaximum( QThread::idealThreadCount() );
|
||||||
|
ui.parallelThreads->setValue( p.fts.parallelThreads );
|
||||||
}
|
}
|
||||||
|
|
||||||
void Preferences::buildDisabledTypes( QString & disabledTypes, bool is_checked, QString name )
|
void Preferences::buildDisabledTypes( QString & disabledTypes, bool is_checked, QString name )
|
||||||
|
@ -506,6 +510,7 @@ Config::Preferences Preferences::getPreferences()
|
||||||
|
|
||||||
p.fts.enabled = ui.ftsGroupBox->isChecked();
|
p.fts.enabled = ui.ftsGroupBox->isChecked();
|
||||||
p.fts.maxDictionarySize = ui.maxDictionarySize->value();
|
p.fts.maxDictionarySize = ui.maxDictionarySize->value();
|
||||||
|
p.fts.parallelThreads = ui.parallelThreads->value();
|
||||||
p.fts.enablePosition = ui.enablePosition->isChecked();
|
p.fts.enablePosition = ui.enablePosition->isChecked();
|
||||||
|
|
||||||
buildDisabledTypes( p.fts.disabledTypes, ui.allowAard->isChecked(), "AARD" );
|
buildDisabledTypes( p.fts.disabledTypes, ui.allowAard->isChecked(), "AARD" );
|
||||||
|
|
|
@ -1409,6 +1409,37 @@ download page.</string>
|
||||||
<bool>true</bool>
|
<bool>true</bool>
|
||||||
</property>
|
</property>
|
||||||
<layout class="QGridLayout" name="gridLayout_4">
|
<layout class="QGridLayout" name="gridLayout_4">
|
||||||
|
<item row="3" column="1">
|
||||||
|
<widget class="QCheckBox" name="allowZim">
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">Zim</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="4" column="0">
|
||||||
|
<widget class="QCheckBox" name="allowMDict">
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">MDict</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="7" column="0" colspan="2">
|
||||||
|
<widget class="QCheckBox" name="enablePosition">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Positional information is required to use Xapian's phrase searching and NEAR operator, but the database size will be much bigger. Applies only to new incoming dictionaries.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Enable index with positional information</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="0">
|
||||||
|
<widget class="QCheckBox" name="allowBGL">
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">BGL</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item row="2" column="0">
|
<item row="2" column="0">
|
||||||
<widget class="QCheckBox" name="allowDictD">
|
<widget class="QCheckBox" name="allowDictD">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
|
@ -1423,24 +1454,10 @@ download page.</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="0" column="1">
|
<item row="5" column="0">
|
||||||
<widget class="QCheckBox" name="allowSlob">
|
<widget class="QCheckBox" name="allowSDict">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string notr="true">Slob</string>
|
<string notr="true">SDict</string>
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="5" column="1">
|
|
||||||
<widget class="QCheckBox" name="allowGls">
|
|
||||||
<property name="text">
|
|
||||||
<string notr="true">GLS</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="0" column="0">
|
|
||||||
<widget class="QCheckBox" name="allowAard">
|
|
||||||
<property name="text">
|
|
||||||
<string notr="true">Aard</string>
|
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -1451,27 +1468,6 @@ download page.</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0">
|
|
||||||
<widget class="QCheckBox" name="allowMDict">
|
|
||||||
<property name="text">
|
|
||||||
<string notr="true">MDict</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="3" column="1">
|
|
||||||
<widget class="QCheckBox" name="allowZim">
|
|
||||||
<property name="text">
|
|
||||||
<string notr="true">Zim</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="1" column="1">
|
|
||||||
<widget class="QCheckBox" name="allowStardict">
|
|
||||||
<property name="text">
|
|
||||||
<string notr="true">Stardict</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item row="3" column="0">
|
<item row="3" column="0">
|
||||||
<widget class="QCheckBox" name="allowDSL">
|
<widget class="QCheckBox" name="allowDSL">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
|
@ -1479,17 +1475,24 @@ download page.</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="1" column="0">
|
<item row="5" column="1">
|
||||||
<widget class="QCheckBox" name="allowBGL">
|
<widget class="QCheckBox" name="allowGls">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string notr="true">BGL</string>
|
<string notr="true">GLS</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="5" column="0">
|
<item row="0" column="1">
|
||||||
<widget class="QCheckBox" name="allowSDict">
|
<widget class="QCheckBox" name="allowSlob">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string notr="true">SDict</string>
|
<string notr="true">Slob</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="1" column="1">
|
||||||
|
<widget class="QCheckBox" name="allowStardict">
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">Stardict</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
@ -1534,16 +1537,31 @@ download page.</string>
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item row="7" column="0" colspan="2">
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="enablePosition">
|
<widget class="QCheckBox" name="allowAard">
|
||||||
<property name="toolTip">
|
|
||||||
<string>Positional information is required to use Xapian's phrase searching and NEAR operator, but the database size will be much bigger. Applies only to new incoming dictionaries.</string>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Enable index with positional information</string>
|
<string notr="true">Aard</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="8" column="0">
|
||||||
|
<layout class="QHBoxLayout" name="horizontalLayout_18">
|
||||||
|
<item>
|
||||||
|
<widget class="QLabel" name="label_7">
|
||||||
|
<property name="text">
|
||||||
|
<string>Create fulltext index with parallel threads </string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QSpinBox" name="parallelThreads">
|
||||||
|
<property name="minimum">
|
||||||
|
<number>1</number>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
|
Loading…
Reference in a new issue