mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 04:24:09 +00:00
opt: optimize cjk fulltext search
1, add concurrent parallel task limitaion
This commit is contained in:
parent
f52251d640
commit
6f9c1067a3
|
@ -454,9 +454,11 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||||
return;
|
return;
|
||||||
sem.acquire();
|
sem.acquire();
|
||||||
QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
|
QFuture<void> f =QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
|
||||||
sem.release();
|
sem.release();
|
||||||
} );
|
} );
|
||||||
|
f.waitForFinished();
|
||||||
|
|
||||||
// QtConcurrent::blockingMap( offsets,
|
// QtConcurrent::blockingMap( offsets,
|
||||||
// [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } );
|
// [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } );
|
||||||
}
|
}
|
||||||
|
@ -466,7 +468,6 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
||||||
QStringList const & words,
|
QStringList const & words,
|
||||||
QRegExp const & searchRegexp )
|
QRegExp const & searchRegexp )
|
||||||
{
|
{
|
||||||
qDebug()<<"checking"<<offset<<QThread::currentThreadId();
|
|
||||||
// int results = 0;
|
// int results = 0;
|
||||||
QString headword, articleText;
|
QString headword, articleText;
|
||||||
QList< uint32_t > offsetsForHeadwords;
|
QList< uint32_t > offsetsForHeadwords;
|
||||||
|
@ -508,7 +509,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
||||||
if( headword.isEmpty() )
|
if( headword.isEmpty() )
|
||||||
offsetsForHeadwords.append( offset );
|
offsetsForHeadwords.append( offset );
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
Mutex::Lock _( dataMutex );
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||||
|
}
|
||||||
|
|
||||||
++results;
|
++results;
|
||||||
if( maxResults > 0 && results >= maxResults )
|
if( maxResults > 0 && results >= maxResults )
|
||||||
|
@ -610,7 +614,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
||||||
if( headword.isEmpty() )
|
if( headword.isEmpty() )
|
||||||
offsetsForHeadwords.append( offset );
|
offsetsForHeadwords.append( offset );
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
Mutex::Lock _( dataMutex );
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||||
|
}
|
||||||
|
|
||||||
++results;
|
++results;
|
||||||
if( maxResults > 0 && results >= maxResults )
|
if( maxResults > 0 && results >= maxResults )
|
||||||
|
@ -625,7 +632,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
||||||
if( headword.isEmpty() )
|
if( headword.isEmpty() )
|
||||||
offsetsForHeadwords.append( offset );
|
offsetsForHeadwords.append( offset );
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
Mutex::Lock _( dataMutex );
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||||
|
}
|
||||||
|
|
||||||
++results;
|
++results;
|
||||||
if( maxResults > 0 && results >= maxResults )
|
if( maxResults > 0 && results >= maxResults )
|
||||||
|
@ -639,10 +649,13 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
||||||
QVector< QString > headwords;
|
QVector< QString > headwords;
|
||||||
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
||||||
for( int x = 0; x < headwords.size(); x++ )
|
for( int x = 0; x < headwords.size(); x++ )
|
||||||
|
{
|
||||||
|
Mutex::Lock _( dataMutex );
|
||||||
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ),
|
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ),
|
||||||
id,
|
id,
|
||||||
x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(),
|
x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(),
|
||||||
matchCase ) );
|
matchCase ) );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -765,12 +778,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
||||||
|
|
||||||
int n = wordsList.size();
|
int n = wordsList.size();
|
||||||
if( !hieroglyphsList.isEmpty() )
|
if( !hieroglyphsList.isEmpty() )
|
||||||
|
{
|
||||||
|
wordsList += hieroglyphsList;
|
||||||
n += 1;
|
n += 1;
|
||||||
|
}
|
||||||
|
|
||||||
allWordsLinks.resize( n );
|
allWordsLinks.resize( n );
|
||||||
int wordNom = 0;
|
int wordNom = 0;
|
||||||
|
|
||||||
if( !hieroglyphsList.empty() )
|
if( !wordsList.empty() )
|
||||||
{
|
{
|
||||||
QList< QSet< uint32_t > > sets;
|
QList< QSet< uint32_t > > sets;
|
||||||
auto fn_wordLink = [ & ](const QString & word )
|
auto fn_wordLink = [ & ](const QString & word )
|
||||||
|
@ -805,7 +821,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
||||||
sets << tmp;
|
sets << tmp;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink );
|
QtConcurrent::blockingMap( wordsList, fn_wordLink );
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for( auto & elem : sets )
|
for( auto & elem : sets )
|
||||||
|
@ -816,63 +832,11 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
||||||
setOfOffsets = setOfOffsets.intersect( elem );
|
setOfOffsets = setOfOffsets.intersect( elem );
|
||||||
}
|
}
|
||||||
|
|
||||||
allWordsLinks[ wordNom ] = setOfOffsets;
|
// allWordsLinks[ wordNom ] = setOfOffsets;
|
||||||
setOfOffsets.clear();
|
// setOfOffsets.clear();
|
||||||
wordNom += 1;
|
wordNom += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !wordsList.isEmpty() )
|
|
||||||
{
|
|
||||||
QVector< BtreeIndexing::WordArticleLink > links;
|
|
||||||
links.reserve( wordsInIndex );
|
|
||||||
ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled );
|
|
||||||
|
|
||||||
for( int x = 0; x < links.size(); x++ )
|
|
||||||
{
|
|
||||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
|
||||||
return;
|
|
||||||
|
|
||||||
QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() );
|
|
||||||
|
|
||||||
if( ignoreDiacritics )
|
|
||||||
word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) );
|
|
||||||
|
|
||||||
for( int i = 0; i < wordsList.size(); i++ )
|
|
||||||
{
|
|
||||||
if( word.length() >= wordsList.at( i ).length() && word.contains( wordsList.at( i ) ) )
|
|
||||||
{
|
|
||||||
vector< char > chunk;
|
|
||||||
char * linksPtr;
|
|
||||||
{
|
|
||||||
// Mutex::Lock _( dict.getFtsMutex() );
|
|
||||||
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy( &size, linksPtr, sizeof(uint32_t) );
|
|
||||||
linksPtr += sizeof(uint32_t);
|
|
||||||
for( uint32_t y = 0; y < size; y++ )
|
|
||||||
{
|
|
||||||
allWordsLinks[ wordNom ].insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) );
|
|
||||||
linksPtr += sizeof(uint32_t);
|
|
||||||
}
|
|
||||||
wordNom += 1;
|
|
||||||
if( searchMode == FTS::PlainText || searchMode == FTS::WholeWords )
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
links.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
for( int i = 0; i < allWordsLinks.size(); i++ )
|
|
||||||
{
|
|
||||||
if( i == 0 )
|
|
||||||
setOfOffsets = allWordsLinks.at( i );
|
|
||||||
else
|
|
||||||
setOfOffsets = setOfOffsets.intersect( allWordsLinks.at( i ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
if( setOfOffsets.isEmpty() )
|
if( setOfOffsets.isEmpty() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue