opt: optimize cjk fulltext search

1, add concurrent parallel task limitaion
This commit is contained in:
Xiao YiFang 2022-06-14 21:47:51 +08:00
parent f52251d640
commit 6f9c1067a3

View file

@ -454,9 +454,11 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return;
sem.acquire();
QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
QFuture<void> f =QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
sem.release();
} );
f.waitForFinished();
// QtConcurrent::blockingMap( offsets,
// [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } );
}
@ -466,7 +468,6 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
QStringList const & words,
QRegExp const & searchRegexp )
{
qDebug()<<"checking"<<offset<<QThread::currentThreadId();
// int results = 0;
QString headword, articleText;
QList< uint32_t > offsetsForHeadwords;
@ -508,7 +509,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
if( headword.isEmpty() )
offsetsForHeadwords.append( offset );
else
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
++results;
if( maxResults > 0 && results >= maxResults )
@ -610,7 +614,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
if( headword.isEmpty() )
offsetsForHeadwords.append( offset );
else
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
++results;
if( maxResults > 0 && results >= maxResults )
@ -625,7 +632,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
if( headword.isEmpty() )
offsetsForHeadwords.append( offset );
else
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
++results;
if( maxResults > 0 && results >= maxResults )
@ -639,12 +649,15 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
QVector< QString > headwords;
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
for( int x = 0; x < headwords.size(); x++ )
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ),
id,
x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(),
matchCase ) );
}
}
}
void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
@ -765,12 +778,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
int n = wordsList.size();
if( !hieroglyphsList.isEmpty() )
{
wordsList += hieroglyphsList;
n += 1;
}
allWordsLinks.resize( n );
int wordNom = 0;
if( !hieroglyphsList.empty() )
if( !wordsList.empty() )
{
QList< QSet< uint32_t > > sets;
auto fn_wordLink = [ & ](const QString & word )
@ -805,7 +821,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
sets << tmp;
}
};
QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink );
QtConcurrent::blockingMap( wordsList, fn_wordLink );
int i = 0;
for( auto & elem : sets )
@ -816,63 +832,11 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
setOfOffsets = setOfOffsets.intersect( elem );
}
allWordsLinks[ wordNom ] = setOfOffsets;
setOfOffsets.clear();
// allWordsLinks[ wordNom ] = setOfOffsets;
// setOfOffsets.clear();
wordNom += 1;
}
if( !wordsList.isEmpty() )
{
QVector< BtreeIndexing::WordArticleLink > links;
links.reserve( wordsInIndex );
ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled );
for( int x = 0; x < links.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return;
QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() );
if( ignoreDiacritics )
word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) );
for( int i = 0; i < wordsList.size(); i++ )
{
if( word.length() >= wordsList.at( i ).length() && word.contains( wordsList.at( i ) ) )
{
vector< char > chunk;
char * linksPtr;
{
// Mutex::Lock _( dict.getFtsMutex() );
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
}
memcpy( &size, linksPtr, sizeof(uint32_t) );
linksPtr += sizeof(uint32_t);
for( uint32_t y = 0; y < size; y++ )
{
allWordsLinks[ wordNom ].insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) );
linksPtr += sizeof(uint32_t);
}
wordNom += 1;
if( searchMode == FTS::PlainText || searchMode == FTS::WholeWords )
break;
}
}
}
links.clear();
}
for( int i = 0; i < allWordsLinks.size(); i++ )
{
if( i == 0 )
setOfOffsets = allWordsLinks.at( i );
else
setOfOffsets = setOfOffsets.intersect( allWordsLinks.at( i ) );
}
if( setOfOffsets.isEmpty() )
return;