From 6f9c1067a3d33132af2dd8d9f28e4c593c4e0ce7 Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Tue, 14 Jun 2022 21:47:51 +0800 Subject: [PATCH 1/2] opt: optimize cjk fulltext search 1, add concurrent parallel task limitaion --- ftshelpers.cc | 80 ++++++++++++++------------------------------------- 1 file changed, 22 insertions(+), 58 deletions(-) diff --git a/ftshelpers.cc b/ftshelpers.cc index 6f86673d..724f2498 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -454,9 +454,11 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, if( Utils::AtomicInt::loadAcquire( isCancelled ) ) return; sem.acquire(); - QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp ); + QFuture f =QtConcurrent::run( [ & ]() { checkSingleArticle( address, words, searchRegexp ); sem.release(); } ); + f.waitForFinished(); + // QtConcurrent::blockingMap( offsets, // [ & ]( uint32_t offset ) { checkSingleArticle( offset, words, searchRegexp ); } ); } @@ -466,7 +468,6 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, QStringList const & words, QRegExp const & searchRegexp ) { - qDebug()<<"checking"< offsetsForHeadwords; @@ -508,7 +509,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, if( headword.isEmpty() ) offsetsForHeadwords.append( offset ); else + { + Mutex::Lock _( dataMutex ); foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); + } ++results; if( maxResults > 0 && results >= maxResults ) @@ -610,7 +614,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, if( headword.isEmpty() ) offsetsForHeadwords.append( offset ); else + { + Mutex::Lock _( dataMutex ); foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); + } ++results; if( maxResults > 0 && results >= maxResults ) @@ -625,7 +632,10 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, if( headword.isEmpty() ) offsetsForHeadwords.append( offset ); else + { + Mutex::Lock _( dataMutex ); foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) ); + } ++results; if( maxResults > 0 && results >= maxResults ) @@ -639,10 +649,13 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset, QVector< QString > headwords; dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled ); for( int x = 0; x < headwords.size(); x++ ) + { + Mutex::Lock _( dataMutex ); foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), id, x < hiliteRegExps.size() ? hiliteRegExps.at( x ) : QStringList(), matchCase ) ); + } } } @@ -765,12 +778,15 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde int n = wordsList.size(); if( !hieroglyphsList.isEmpty() ) + { + wordsList += hieroglyphsList; n += 1; + } allWordsLinks.resize( n ); int wordNom = 0; - if( !hieroglyphsList.empty() ) + if( !wordsList.empty() ) { QList< QSet< uint32_t > > sets; auto fn_wordLink = [ & ](const QString & word ) @@ -805,7 +821,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde sets << tmp; } }; - QtConcurrent::blockingMap( hieroglyphsList, fn_wordLink ); + QtConcurrent::blockingMap( wordsList, fn_wordLink ); int i = 0; for( auto & elem : sets ) @@ -816,63 +832,11 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde setOfOffsets = setOfOffsets.intersect( elem ); } - allWordsLinks[ wordNom ] = setOfOffsets; - setOfOffsets.clear(); + // allWordsLinks[ wordNom ] = setOfOffsets; + // setOfOffsets.clear(); wordNom += 1; } - if( !wordsList.isEmpty() ) - { - QVector< BtreeIndexing::WordArticleLink > links; - links.reserve( wordsInIndex ); - ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled ); - - for( int x = 0; x < links.size(); x++ ) - { - if( Utils::AtomicInt::loadAcquire( isCancelled ) ) - return; - - QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() ); - - if( ignoreDiacritics ) - word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) ); - - for( int i = 0; i < wordsList.size(); i++ ) - { - if( word.length() >= wordsList.at( i ).length() && word.contains( wordsList.at( i ) ) ) - { - vector< char > chunk; - char * linksPtr; - { - // Mutex::Lock _( dict.getFtsMutex() ); - linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk ); - } - - memcpy( &size, linksPtr, sizeof(uint32_t) ); - linksPtr += sizeof(uint32_t); - for( uint32_t y = 0; y < size; y++ ) - { - allWordsLinks[ wordNom ].insert( *( reinterpret_cast< uint32_t * >( linksPtr ) ) ); - linksPtr += sizeof(uint32_t); - } - wordNom += 1; - if( searchMode == FTS::PlainText || searchMode == FTS::WholeWords ) - break; - } - } - } - - links.clear(); - } - - for( int i = 0; i < allWordsLinks.size(); i++ ) - { - if( i == 0 ) - setOfOffsets = allWordsLinks.at( i ); - else - setOfOffsets = setOfOffsets.intersect( allWordsLinks.at( i ) ); - } - if( setOfOffsets.isEmpty() ) return; From 4b9d71376df19d0e4b65f33d6b43d348345ecbcc Mon Sep 17 00:00:00 2001 From: Xiao YiFang Date: Thu, 16 Jun 2022 07:53:14 +0800 Subject: [PATCH 2/2] remove javascript highlight logic webengine's findtext(highlight) has little features compared to webkits --- articleview.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/articleview.cc b/articleview.cc index 6effd956..7fe9d9f4 100644 --- a/articleview.cc +++ b/articleview.cc @@ -2558,10 +2558,10 @@ void ArticleView::highlightFTSResults() // highlightAllFtsOccurences( flags ); ui.definition->findText( allMatches.at( 0 ), flags ); // if( ui.definition->findText( allMatches.at( 0 ), flags ) ) - { - ui.definition->page()->runJavaScript( - QString( "%1=window.getSelection().getRangeAt(0);_=0;" ).arg( rangeVarName ) ); - } + // { + // ui.definition->page()->runJavaScript( + // QString( "%1=window.getSelection().getRangeAt(0);_=0;" ).arg( rangeVarName ) ); + // } } ui.ftsSearchFrame->show(); @@ -2673,9 +2673,9 @@ void ArticleView::performFtsFindOperation( bool backwards ) } #endif // Store new highlighted selection - ui.definition->page()-> - runJavaScript( QString( "%1=window.getSelection().getRangeAt(0);_=0;" ) - .arg( rangeVarName ) ); + // ui.definition->page()-> + // runJavaScript( QString( "%1=window.getSelection().getRangeAt(0);_=0;" ) + // .arg( rangeVarName ) ); } void ArticleView::on_ftsSearchPrevious_clicked()