mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
opt: optimize cjk fulltext search
1, add concurrent parallel task limitaion
This commit is contained in:
parent
f52251d640
commit
6f9c1067a3
|
@ -454,9 +454,11 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
|||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
return;
|
||||
sem.acquire();
|
||||
QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
|
||||
QFuture<void> f =QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
|
||||
sem.release();
|
||||
} );
|
||||
f.waitForFinished();
|
||||
|
||||
// QtConcurrent::blockingMap( offsets,
|
||||
// [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } );
|
||||
}
|
||||
|
@ -466,7 +468,6 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
|||
QStringList const & words,
|
||||
QRegExp const & searchRegexp )
|
||||
{
|
||||
qDebug()<<"checking"<<offset<<QThread::currentThreadId();
|
||||
// int results = 0;
|
||||
QString headword, articleText;
|
||||
QList< uint32_t > offsetsForHeadwords;
|
||||
|
@ -508,7 +509,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
|||
if( headword.isEmpty() )
|
||||
offsetsForHeadwords.append( offset );
|
||||
else
|
||||
{
|
||||
Mutex::Lock _( dataMutex );
|
||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||
}
|
||||
|
||||
++results;
|
||||
if( maxResults > 0 && results >= maxResults )
|
||||
|
@ -610,7 +614,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
|||
if( headword.isEmpty() )
|
||||
offsetsForHeadwords.append( offset );
|
||||
else
|
||||
{
|
||||
Mutex::Lock _( dataMutex );
|
||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||
}
|
||||
|
||||
++results;
|
||||
if( maxResults > 0 && results >= maxResults )
|
||||
|
@ -625,7 +632,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
|||
if( headword.isEmpty() )
|
||||
offsetsForHeadwords.append( offset );
|
||||
else
|
||||
{
|
||||
Mutex::Lock _( dataMutex );
|
||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||
}
|
||||
|
||||
++results;
|
||||
if( maxResults > 0 && results >= maxResults )
|
||||
|
@ -639,12 +649,15 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
|||
QVector< QString > headwords;
|
||||
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
||||
for( int x = 0; x < headwords.size(); x++ )
|
||||
{
|
||||
Mutex::Lock _( dataMutex );
|
||||
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ),
|
||||
id,
|
||||
x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(),
|
||||
matchCase ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
||||
sptr< ChunkedStorage::Reader > chunks,
|
||||
|
@ -765,12 +778,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
|||
|
||||
int n = wordsList.size();
|
||||
if( !hieroglyphsList.isEmpty() )
|
||||
{
|
||||
wordsList += hieroglyphsList;
|
||||
n += 1;
|
||||
}
|
||||
|
||||
allWordsLinks.resize( n );
|
||||
int wordNom = 0;
|
||||
|
||||
if( !hieroglyphsList.empty() )
|
||||
if( !wordsList.empty() )
|
||||
{
|
||||
QList< QSet< uint32_t > > sets;
|
||||
auto fn_wordLink = [ & ](const QString & word )
|
||||
|
@ -805,7 +821,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
|||
sets << tmp;
|
||||
}
|
||||
};
|
||||
QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink );
|
||||
QtConcurrent::blockingMap( wordsList, fn_wordLink );
|
||||
|
||||
int i = 0;
|
||||
for( auto & elem : sets )
|
||||
|
@ -816,63 +832,11 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
|||
setOfOffsets = setOfOffsets.intersect( elem );
|
||||
}
|
||||
|
||||
allWordsLinks[ wordNom ] = setOfOffsets;
|
||||
setOfOffsets.clear();
|
||||
// allWordsLinks[ wordNom ] = setOfOffsets;
|
||||
// setOfOffsets.clear();
|
||||
wordNom += 1;
|
||||
}
|
||||
|
||||
if( !wordsList.isEmpty() )
|
||||
{
|
||||
QVector< BtreeIndexing::WordArticleLink > links;
|
||||
links.reserve( wordsInIndex );
|
||||
ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled );
|
||||
|
||||
for( int x = 0; x < links.size(); x++ )
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
return;
|
||||
|
||||
QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() );
|
||||
|
||||
if( ignoreDiacritics )
|
||||
word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) );
|
||||
|
||||
for( int i = 0; i < wordsList.size(); i++ )
|
||||
{
|
||||
if( word.length() >= wordsList.at( i ).length() && word.contains( wordsList.at( i ) ) )
|
||||
{
|
||||
vector< char > chunk;
|
||||
char * linksPtr;
|
||||
{
|
||||
// Mutex::Lock _( dict.getFtsMutex() );
|
||||
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
||||
}
|
||||
|
||||
memcpy( &size, linksPtr, sizeof(uint32_t) );
|
||||
linksPtr += sizeof(uint32_t);
|
||||
for( uint32_t y = 0; y < size; y++ )
|
||||
{
|
||||
allWordsLinks[ wordNom ].insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) );
|
||||
linksPtr += sizeof(uint32_t);
|
||||
}
|
||||
wordNom += 1;
|
||||
if( searchMode == FTS::PlainText || searchMode == FTS::WholeWords )
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
links.clear();
|
||||
}
|
||||
|
||||
for( int i = 0; i < allWordsLinks.size(); i++ )
|
||||
{
|
||||
if( i == 0 )
|
||||
setOfOffsets = allWordsLinks.at( i );
|
||||
else
|
||||
setOfOffsets = setOfOffsets.intersect( allWordsLinks.at( i ) );
|
||||
}
|
||||
|
||||
if( setOfOffsets.isEmpty() )
|
||||
return;
|
||||
|
||||
|
|
Loading…
Reference in a new issue