opt: optimize cjk fulltext search

1, add concurrent parallel task limitaion
This commit is contained in:
Xiao YiFang 2022-06-14 21:47:51 +08:00
parent f52251d640
commit 6f9c1067a3

View file

@ -454,9 +454,11 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
if( Utils::AtomicInt::loadAcquire( isCancelled ) ) if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return; return;
sem.acquire(); sem.acquire();
QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp ); QFuture<void> f =QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp );
sem.release(); sem.release();
} ); } );
f.waitForFinished();
// QtConcurrent::blockingMap( offsets, // QtConcurrent::blockingMap( offsets,
// [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } ); // [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } );
} }
@ -466,7 +468,6 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
QStringList const & words, QStringList const & words,
QRegExp const & searchRegexp ) QRegExp const & searchRegexp )
{ {
qDebug()<<"checking"<<offset<<QThread::currentThreadId();
// int results = 0; // int results = 0;
QString headword, articleText; QString headword, articleText;
QList< uint32_t > offsetsForHeadwords; QList< uint32_t > offsetsForHeadwords;
@ -508,7 +509,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
if( headword.isEmpty() ) if( headword.isEmpty() )
offsetsForHeadwords.append( offset ); offsetsForHeadwords.append( offset );
else else
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
++results; ++results;
if( maxResults > 0 && results >= maxResults ) if( maxResults > 0 && results >= maxResults )
@ -610,7 +614,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
if( headword.isEmpty() ) if( headword.isEmpty() )
offsetsForHeadwords.append( offset ); offsetsForHeadwords.append( offset );
else else
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
++results; ++results;
if( maxResults > 0 && results >= maxResults ) if( maxResults > 0 && results >= maxResults )
@ -625,7 +632,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
if( headword.isEmpty() ) if( headword.isEmpty() )
offsetsForHeadwords.append( offset ); offsetsForHeadwords.append( offset );
else else
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
++results; ++results;
if( maxResults > 0 && results >= maxResults ) if( maxResults > 0 && results >= maxResults )
@ -639,12 +649,15 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
QVector< QString > headwords; QVector< QString > headwords;
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled ); dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
for( int x = 0; x < headwords.size(); x++ ) for( int x = 0; x < headwords.size(); x++ )
{
Mutex::Lock _( dataMutex );
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ),
id, id,
x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(), x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(),
matchCase ) ); matchCase ) );
} }
} }
}
void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks, sptr< ChunkedStorage::Reader > chunks,
@ -765,12 +778,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
int n = wordsList.size(); int n = wordsList.size();
if( !hieroglyphsList.isEmpty() ) if( !hieroglyphsList.isEmpty() )
{
wordsList += hieroglyphsList;
n += 1; n += 1;
}
allWordsLinks.resize( n ); allWordsLinks.resize( n );
int wordNom = 0; int wordNom = 0;
if( !hieroglyphsList.empty() ) if( !wordsList.empty() )
{ {
QList< QSet< uint32_t > > sets; QList< QSet< uint32_t > > sets;
auto fn_wordLink = [ & ](const QString & word ) auto fn_wordLink = [ & ](const QString & word )
@ -805,7 +821,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
sets << tmp; sets << tmp;
} }
}; };
QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink ); QtConcurrent::blockingMap( wordsList, fn_wordLink );
int i = 0; int i = 0;
for( auto & elem : sets ) for( auto & elem : sets )
@ -816,63 +832,11 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
setOfOffsets = setOfOffsets.intersect( elem ); setOfOffsets = setOfOffsets.intersect( elem );
} }
allWordsLinks[ wordNom ] = setOfOffsets; // allWordsLinks[ wordNom ] = setOfOffsets;
setOfOffsets.clear(); // setOfOffsets.clear();
wordNom += 1; wordNom += 1;
} }
if( !wordsList.isEmpty() )
{
QVector< BtreeIndexing::WordArticleLink > links;
links.reserve( wordsInIndex );
ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled );
for( int x = 0; x < links.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return;
QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() );
if( ignoreDiacritics )
word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) );
for( int i = 0; i < wordsList.size(); i++ )
{
if( word.length() >= wordsList.at( i ).length() && word.contains( wordsList.at( i ) ) )
{
vector< char > chunk;
char * linksPtr;
{
// Mutex::Lock _( dict.getFtsMutex() );
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
}
memcpy( &size, linksPtr, sizeof(uint32_t) );
linksPtr += sizeof(uint32_t);
for( uint32_t y = 0; y < size; y++ )
{
allWordsLinks[ wordNom ].insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) );
linksPtr += sizeof(uint32_t);
}
wordNom += 1;
if( searchMode == FTS::PlainText || searchMode == FTS::WholeWords )
break;
}
}
}
links.clear();
}
for( int i = 0; i < allWordsLinks.size(); i++ )
{
if( i == 0 )
setOfOffsets = allWordsLinks.at( i );
else
setOfOffsets = setOfOffsets.intersect( allWordsLinks.at( i ) );
}
if( setOfOffsets.isEmpty() ) if( setOfOffsets.isEmpty() )
return; return;