opt: mdx fulltext lock seperated with normal search (#759)

* opt: refactor mdx fullindex creation

* opt: incremental fulltext creation logic change

* opt: incremental fulltext creation logic change

* opt: progress of fulltext creation logic

* opt: code smell

* fix: code smell

* fix: code smell

* fix: code smell

* fix: code smell

* fix: code smell

* 🎨 apply clang-format changes

* fix: code smell

* fix: code smell

---------

Co-authored-by: xiaoyifang <xiaoyifang@users.noreply.github.com>
This commit is contained in:
xiaoyifang 2023-05-29 00:01:21 +08:00 committed by GitHub
parent 5d15ffbc14
commit 4eb8374a35
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 210 additions and 230 deletions

View file

@ -34,7 +34,7 @@ public:
unsigned currentGroupId;
QString translateLineText{};
//hold the dictionary id;
QSet<QString> collapsedDicts;
QSet< QString > collapsedDicts;
QMap< QString, QSet< QString > > folderFavoritesMap;
QMap< unsigned, QString > groupFolderMap;
@ -42,6 +42,8 @@ public:
signals:
void dictionaryChanges( ActiveDictIds ad );
void dictionaryClear( ActiveDictIds ad );
void indexingDictionary( QString );
};
#endif // GLOBAL_GLOBALBROADCASTER_H

View file

@ -17,6 +17,7 @@
#include "config.hh"
#include "utils.hh"
#include <QString>
#include "globalbroadcaster.hh"
/// Abstract dictionary-related stuff
namespace Dictionary {
@ -261,12 +262,16 @@ Q_DECLARE_FLAGS( Features, Feature )
Q_DECLARE_OPERATORS_FOR_FLAGS( Features )
/// A dictionary. Can be used to query words.
class Class
class Class: public QObject
{
Q_OBJECT
string id;
vector< string > dictionaryFiles;
long indexedFtsDoc;
long lastProgress = 0;
protected:
QString dictionaryDescription;
QIcon dictionaryIcon, dictionaryNativeIcon;
@ -339,8 +344,16 @@ public:
/// Returns the number of articles in the dictionary.
virtual unsigned long getArticleCount() noexcept=0;
void setIndexedFtsDoc(long _indexedFtsDoc){
void setIndexedFtsDoc(long _indexedFtsDoc)
{
indexedFtsDoc = _indexedFtsDoc;
auto newProgress = getIndexingFtsProgress();
if ( newProgress != lastProgress ) {
lastProgress = newProgress;
emit GlobalBroadcaster::instance()->indexingDictionary(
QString( "%1......%%2" ).arg( QString::fromStdString( getName() ) ).arg( newProgress ) );
}
}
int getIndexingFtsProgress(){

View file

@ -23,11 +23,8 @@
#include <map>
#include <set>
#include <list>
#include <ctype.h>
#include <stdlib.h>
#ifdef _MSC_VER
#include <stub_msvc.h>
#include <stub_msvc.h>
#endif
#include "globalregex.hh"
@ -37,9 +34,7 @@
#include <QCryptographicHash>
#include <QDir>
#include <QRegularExpression>
#include <QSemaphore>
#include <QString>
#include <QTextDocument>
#include <QThreadPool>
#include <QtConcurrent>
@ -198,10 +193,11 @@ public:
};
class MdxDictionary: public QObject, public BtreeIndexing::BtreeDictionary
class MdxDictionary: public BtreeIndexing::BtreeDictionary
{
Mutex idxMutex;
File::Class idx;
string idxFileName;
IdxHeader idxHeader;
string encoding;
ChunkedStorage::Reader chunks;
@ -220,7 +216,7 @@ public:
MdxDictionary( string const & id, string const & indexFile, vector<string> const & dictionaryFiles );
~MdxDictionary();
~MdxDictionary() override;
void deferredInit() override;
@ -231,7 +227,7 @@ public:
map< Dictionary::Property, string > getProperties() noexcept override
{
return map< Dictionary::Property, string >();
return {};
}
unsigned long getArticleCount() noexcept override
@ -273,7 +269,7 @@ public:
void setFTSParameters( Config::FullTextSearch const & fts ) override
{
if( ensureInitDone().size() )
if ( !ensureInitDone().empty() )
return;
can_FTS = fts.enabled
@ -305,16 +301,15 @@ private:
void removeDirectory( QString const & directory );
friend class MdxHeadwordsRequest;
friend class MdxArticleRequest;
friend class MddResourceRequest;
void loadResourceFile( const wstring & resourceName, vector< char > & data );
};
MdxDictionary::MdxDictionary( string const & id, string const & indexFile,
vector<string> const & dictionaryFiles ):
MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
BtreeDictionary( id, dictionaryFiles ),
idx( indexFile, "rb" ),
idxFileName( indexFile ),
idxHeader( idx.read< IdxHeader >() ),
chunks( idx, idxHeader.chunksOffset ),
deferredInitRunnableStarted( false )
@ -479,8 +474,8 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration
if( haveFTSIndex() )
return;
if( ensureInitDone().size() )
return;
// if( !ensureInitDone().empty() )
// return;
if( firstIteration && getArticleCount() > FTS::MaxDictionarySizeForFastSearch )
return;
@ -490,7 +485,10 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration
try
{
FtsHelpers::makeFTSIndex( this, isCancelled );
auto _dict = std::make_shared< MdxDictionary >( this->getId(), idxFileName, this->getDictionaryFilenames() );
if( !_dict->ensureInitDone().empty() )
return;
FtsHelpers::makeFTSIndex( _dict.get(), isCancelled );
FTS_index_completed.ref();
}
catch( std::exception &ex )
@ -559,7 +557,7 @@ public:
isCancelled.ref();
}
~MdxArticleRequest()
~MdxArticleRequest() override
{
isCancelled.ref();
f.waitForFinished();
@ -575,8 +573,7 @@ void MdxArticleRequest::run()
return;
}
if ( dict.ensureInitDone().size() )
{
if ( !dict.ensureInitDone().empty() ) {
setErrorString( QString::fromUtf8( dict.ensureInitDone().c_str() ) );
finish();
return;
@ -584,10 +581,9 @@ void MdxArticleRequest::run()
vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );
for ( unsigned x = 0; x < alts.size(); ++x )
{
for ( const auto & alt : alts ) {
/// Make an additional query for each alt
vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
@ -1345,30 +1341,27 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
{
vector< sptr< Dictionary::Class > > dictionaries;
for ( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end(); ++i )
{
for ( const auto & fileName : fileNames ) {
// Skip files with the extensions different to .mdx to speed up the
// scanning
if ( i->size() < 4 || strcasecmp( i->c_str() + ( i->size() - 4 ), ".mdx" ) != 0 )
if ( fileName.size() < 4 || strcasecmp( fileName.c_str() + ( fileName.size() - 4 ), ".mdx" ) != 0 )
continue;
vector< string > dictFiles( 1, *i );
findResourceFiles( *i, dictFiles );
vector< string > dictFiles( 1, fileName );
findResourceFiles( fileName, dictFiles );
string dictId = Dictionary::makeDictionaryId( dictFiles );
string dictId = Dictionary::makeDictionaryId( dictFiles );
string indexFile = indicesDir + dictId;
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
indexIsOldOrBad( dictFiles, indexFile ) )
{
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( dictFiles, indexFile ) ) {
// Building the index
gdDebug( "MDict: Building the index for dictionary: %s\n", i->c_str() );
gdDebug( "MDict: Building the index for dictionary: %s\n", fileName.c_str() );
MdictParser parser;
list< sptr< MdictParser > > mddParsers;
if ( !parser.open( i->c_str() ) )
if ( !parser.open( fileName.c_str() ) )
continue;
string title = parser.title().toStdString();
@ -1470,52 +1463,46 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
// Save dictionary stylesheets
{
MdictParser::StyleSheets const & styleSheets = parser.styleSheets();
idxHeader.styleSheetAddress = idx.tell();
idxHeader.styleSheetCount = styleSheets.size();
idxHeader.styleSheetAddress = idx.tell();
idxHeader.styleSheetCount = styleSheets.size();
for ( MdictParser::StyleSheets::const_iterator iter = styleSheets.begin();
iter != styleSheets.end(); ++iter )
{
string styleBegin(iter->second.first.toStdString());
string styleEnd( iter->second.second.toStdString() );
for ( auto const & [ key, value ] : styleSheets ) {
string const styleBegin( value.first.toStdString() );
string const styleEnd( value.second.toStdString() );
// key
idx.write<qint32>( iter->first );
idx.write< qint32 >( key );
// styleBegin
idx.write<quint32>( ( quint32 )styleBegin.size() + 1 );
idx.write< quint32 >( (quint32)styleBegin.size() + 1 );
idx.write( styleBegin.c_str(), styleBegin.size() + 1 );
// styleEnd
idx.write<quint32>( ( quint32 )styleEnd.size() + 1 );
idx.write< quint32 >( (quint32)styleEnd.size() + 1 );
idx.write( styleEnd.c_str(), styleEnd.size() + 1 );
}
}
// read languages
QPair<quint32, quint32> langs = LangCoder::findIdsForFilename( QString::fromStdString( *i ) );
QPair< quint32, quint32 > langs = LangCoder::findIdsForFilename( QString::fromStdString( fileName ) );
// if no languages found, try dictionary's name
if ( langs.first == 0 || langs.second == 0 )
{
if ( langs.first == 0 || langs.second == 0 ) {
langs = LangCoder::findIdsForFilename( parser.title() );
}
idxHeader.langFrom = langs.first;
idxHeader.langTo = langs.second;
idxHeader.langTo = langs.second;
// Build index info for each mdd file
vector< IndexInfo > mddIndexInfos;
for ( vector< sptr< IndexedWords > >::const_iterator mddIndexIter = mddIndices.begin();
mddIndexIter != mddIndices.end(); ++mddIndexIter )
{
IndexInfo resourceIdxInfo = BtreeIndexing::buildIndex( *( *mddIndexIter ), idx );
for ( const auto & mddIndice : mddIndices ) {
IndexInfo const resourceIdxInfo = BtreeIndexing::buildIndex( *mddIndice, idx );
mddIndexInfos.push_back( resourceIdxInfo );
}
// Save address of IndexInfos for resource files
idxHeader.mddIndexInfosOffset = idx.tell();
idxHeader.mddIndexInfosCount = mddIndexInfos.size();
for ( uint32_t mi = 0; mi < mddIndexInfos.size(); mi++ )
{
idxHeader.mddIndexInfosCount = mddIndexInfos.size();
for ( uint32_t mi = 0; mi < mddIndexInfos.size(); mi++ ) {
const string & mddfile = mddFileNames[ mi ];
idx.write<quint32>( ( quint32 )mddfile.size() + 1 );

View file

@ -495,7 +495,7 @@ void ZimDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration
getName().c_str() );
try
{
return FtsHelpers::makeFTSIndexXapian(this,isCancelled);
return FtsHelpers::makeFTSIndex(this,isCancelled);
}
catch( std::exception &ex )
{

View file

@ -1,7 +1,7 @@
/* This file is (c) 2014 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "xapian.h"
#include <stdlib.h>
#include <cstdlib>
#include "fulltextsearch.hh"
#include "ftshelpers.hh"
#include "wstring_qt.hh"
@ -18,9 +18,7 @@
#include <QRegularExpression>
#include "wildcard.hh"
#include <QtConcurrent>
#include "globalregex.hh"
#include <QFutureSynchronizer>
#include <QSemaphoreReleaser>
using std::vector;
@ -44,7 +42,7 @@ bool ftsIndexIsOldOrBad( string const & indexFile,
qDebug()<<document.get_data().c_str();
//use a special document to mark the end of the index.
return document.get_data().compare(finish_mark)!=0;
return document.get_data()!=finish_mark;
}
catch( Xapian::Error & e )
{
@ -114,10 +112,8 @@ static QString makeHiliteRegExpString( QStringList const & words,
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
{
QStringList wordList, hieroglyphList;
for( int i = 0; i < list.size(); i ++ )
for(auto word : list)
{
QString word = list.at( i );
// Check for CJK symbols in word
bool parsed = false;
QString hieroglyph;
@ -150,8 +146,8 @@ void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStri
bool containCJK( QString const & str)
{
bool hasCJK = false;
for( int x = 0; x < str.size(); x++ )
if( isCJKChar( str.at( x ).unicode() ) )
for(auto x : str)
if( isCJKChar( x.unicode() ) )
{
hasCJK = true;
break;
@ -255,9 +251,9 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
QVector< QString > setOfWords;
setOfWords.reserve( articleWords.size() );
for( int x = 0; x < articleWords.size(); x++ )
for(const auto & articleWord : articleWords)
{
QString word = articleWords.at( x ).toLower();
QString word = articleWord.toLower();
bool hasCJK = false;
QString hieroglyph;
@ -295,9 +291,9 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
QStringList list;
QStringList oldVariant = word.split( RX::Ftx::regSplit, Qt::SkipEmptyParts );
for( QStringList::iterator it = oldVariant.begin(); it != oldVariant.end(); ++it )
if( it->size() >= FTS::MinimumWordSize && !list.contains( *it ) )
list.append( *it );
for ( auto const & it : oldVariant )
if ( it.size() >= FTS::MinimumWordSize && !list.contains( it ) )
list.append( it );
QRegularExpressionMatch match = RX::Ftx::regBrackets.match( word );
if( match.hasMatch() )
@ -321,11 +317,10 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
list.append( parsedWord );
}
for( QStringList::iterator it = list.begin(); it != list.end(); ++it )
{
for ( auto const & it : list ) {
//if( !setOfWords.contains( *it ) )
{
setOfWords.push_back( *it );
setOfWords.push_back( it );
/*Mutex::Lock _( _mapLock );
words[ *it ].push_back( articleAddress );*/
}
@ -352,118 +347,115 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
}
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
{
return makeFTSIndexXapian(dict,isCancelled);
}
// use xapian to create the index
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
{
Mutex::Lock _( dict->getFtsMutex() );
//check the index again.
if ( dict->haveFTSIndex() )
return;
try {
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
throw exUserAbort();
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
throw exUserAbort();
// Open the database for update, creating a new database if necessary.
Xapian::WritableDatabase db(dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN);
// Open the database for update, creating a new database if necessary.
Xapian::WritableDatabase db( dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN );
Xapian::TermGenerator indexer;
// Xapian::Stem stemmer("english");
// indexer.set_stemmer(stemmer);
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
indexer.set_flags(Xapian::TermGenerator::FLAG_CJK_NGRAM);
Xapian::TermGenerator indexer;
// Xapian::Stem stemmer("english");
// indexer.set_stemmer(stemmer);
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
indexer.set_flags( Xapian::TermGenerator::FLAG_CJK_NGRAM );
BtreeIndexing::IndexedWords indexedWords;
BtreeIndexing::IndexedWords indexedWords;
QSet< uint32_t > setOfOffsets;
setOfOffsets.reserve( dict->getArticleCount() );
QSet< uint32_t > setOfOffsets;
setOfOffsets.reserve( dict->getArticleCount() );
dict->findArticleLinks( 0, &setOfOffsets, 0, &isCancelled );
dict->findArticleLinks( nullptr, &setOfOffsets, nullptr, &isCancelled );
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
throw exUserAbort();
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
throw exUserAbort();
QVector< uint32_t > offsets;
offsets.resize( setOfOffsets.size() );
uint32_t * ptr = &offsets.front();
QVector< uint32_t > offsets;
offsets.resize( setOfOffsets.size() );
uint32_t * ptr = &offsets.front();
for( QSet< uint32_t >::ConstIterator it = setOfOffsets.constBegin();
it != setOfOffsets.constEnd(); ++it )
{
*ptr = *it;
ptr++;
}
for ( QSet< uint32_t >::ConstIterator it = setOfOffsets.constBegin(); it != setOfOffsets.constEnd(); ++it ) {
*ptr = *it;
ptr++;
}
// Free memory
setOfOffsets.clear();
// Free memory
setOfOffsets.clear();
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
throw exUserAbort();
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
throw exUserAbort();
dict->sortArticlesOffsetsForFTS( offsets, isCancelled );
dict->sortArticlesOffsetsForFTS( offsets, isCancelled );
// incremental build the index.
// get the last address.
bool skip = true;
uint32_t lastAddress = -1;
try
{
Xapian::Document lastDoc = db.get_document( db.get_lastdocid() );
lastAddress = atoi( lastDoc.get_data().c_str() );
}
catch( Xapian::Error & e )
{
qDebug() << e.get_description().c_str();
skip = false;
}
long indexedDoc=0L;
for( auto & address : offsets )
{
indexedDoc++;
if(address==lastAddress){
// incremental build the index.
// get the last address.
bool skip = true;
uint32_t lastAddress = -1;
try {
if ( db.get_lastdocid() > 0 ) {
Xapian::Document lastDoc = db.get_document( db.get_lastdocid() );
lastAddress = atoi( lastDoc.get_data().c_str() );
}
else {
skip = false;
}
}
catch ( Xapian::Error & e ) {
qDebug() << "get last doc failed: " << e.get_description().c_str();
skip = false;
}
//skip until to the lastAddress;
if((address!=lastAddress)&&skip){
continue;
long indexedDoc = 0L;
for ( auto const & address : offsets ) {
indexedDoc++;
if ( address > lastAddress && skip ) {
skip = false;
}
//skip until to the lastAddress;
if ( skip ) {
continue;
}
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
return;
}
QString headword, articleStr;
dict->getArticleText( address, headword, articleStr );
Xapian::Document doc;
indexer.set_document( doc );
indexer.index_text_without_positions( articleStr.toStdString() );
doc.set_data( std::to_string( address ) );
// Add the document to the database.
db.add_document( doc );
dict->setIndexedFtsDoc( indexedDoc );
}
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
{
return;
}
QString headword, articleStr;
dict->getArticleText( address, headword, articleStr );
//add a special document to mark the end of the index.
Xapian::Document doc;
indexer.set_document( doc );
indexer.index_text_without_positions( articleStr.toStdString() );
doc.set_data( std::to_string( address ) );
doc.set_data( finish_mark );
// Add the document to the database.
db.add_document( doc );
dict->setIndexedFtsDoc(indexedDoc);
// Free memory
offsets.clear();
db.commit();
}
//add a special document to mark the end of the index.
Xapian::Document doc;
doc.set_data( finish_mark );
// Add the document to the database.
db.add_document( doc );
// Free memory
offsets.clear();
db.commit();
} catch (Xapian::Error & e) {
qWarning()<<QString::fromStdString(e.get_description());
catch ( Xapian::Error & e ) {
qWarning() << "create xapian index:" << QString::fromStdString( e.get_description() );
}
}
@ -492,7 +484,7 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
}
}
QRegularExpression FTSResultsRequest::createMatchRegex( QRegExp const & searchRegexp )
QRegularExpression FTSResultsRequest::createMatchRegex( QRegExp const & searchRegexp ) const
{
QRegularExpression searchRegularExpression;
@ -559,18 +551,17 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
QVector< QPair< QString, bool > > wordsList;
if( ignoreWordsOrder )
{
for( QStringList::const_iterator it = words.begin(); it != words.end(); ++it )
wordsList.append( QPair< QString, bool >( *it, true ) );
for(const auto & word : words)
wordsList.append( QPair< QString, bool >( word, true ) );
}
// for( int i = 0; i < offsets.size(); i++ )
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return;
if( ignoreWordsOrder )
{
for( int i = 0; i < wordsList.size(); i++ )
wordsList[ i ].second = true;
if ( ignoreWordsOrder ) {
for ( auto & [ fst, snd ] : wordsList )
snd = true;
}
dict.getArticleText( offset, headword, articleText );
@ -717,10 +708,8 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
vector< BtreeIndexing::WordArticleLink > links =
ftsIndex.findArticles( gd::removeTrailingZero( word ), ignoreDiacritics );
for( unsigned x = 0; x < links.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
{
for ( auto const & link : links ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
addressLists << tmp;
return;
}
@ -729,7 +718,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
char * linksPtr;
{
// Mutex::Lock _( dict.getFtsMutex() );
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
linksPtr = chunks->getBlock( link.articleOffset, chunk );
}
memcpy( &size, linksPtr, sizeof( uint32_t ) );
@ -750,7 +739,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
// int n = indexWords.length();
// QtConcurrent::blockingMap( indexWords, findLinks );
for(QString word:indexWords)
for(const QString& word:indexWords)
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
{
@ -812,9 +801,8 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
QStringList wordsList, hieroglyphsList;
for( int x = 0; x < indexWords.size(); x++ )
for(const auto & word : indexWords)
{
QString const & word = indexWords.at( x );
if( isCJKChar( word[ 0 ].unicode() ) )
hieroglyphsList.append( word );
else
@ -839,10 +827,8 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
{
QSet< uint32_t > tmp;
vector< BtreeIndexing::WordArticleLink > links = ftsIndex.findArticles( gd::removeTrailingZero( word ) );
for( unsigned x = 0; x < links.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
{
for ( auto const & link : links ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
Mutex::Lock _( dataMutex );
sets << tmp;
return;
@ -852,7 +838,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
char * linksPtr;
{
// Mutex::Lock _( dict.getFtsMutex() );
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
linksPtr = chunks->getBlock( link.articleOffset, chunk );
}
memcpy( &size, linksPtr, sizeof( uint32_t ) );
@ -945,17 +931,17 @@ void FTSResultsRequest::fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
return;
links.reserve( wordsInIndex );
ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled );
ftsIndex.findArticleLinks( &links, nullptr, nullptr, &isCancelled );
QVector< QSet< uint32_t > > allWordsLinks;
allWordsLinks.resize( indexWords.size() );
for( int x = 0; x < links.size(); x++ )
for(auto & link : links)
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return;
QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() );
QString word = QString::fromUtf8( link.word.data(), link.word.size() );
if( ignoreDiacritics )
word = QString::fromStdU32String( Folding::applyDiacriticsOnly( gd::toWString( word ) ) );
@ -968,7 +954,7 @@ void FTSResultsRequest::fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
char * linksPtr;
{
// Mutex::Lock _( dict.getFtsMutex() );
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
linksPtr = chunks->getBlock( link.articleOffset, chunk );
}
memcpy( &size, linksPtr, sizeof(uint32_t) );
@ -1026,7 +1012,7 @@ void FTSResultsRequest::fullSearch( QStringList & searchWords, QRegExp & regexp
QSet< uint32_t > setOfOffsets;
setOfOffsets.reserve( dict.getArticleCount() );
dict.findArticleLinks( 0, &setOfOffsets, 0, &isCancelled );
dict.findArticleLinks( nullptr, &setOfOffsets, nullptr, &isCancelled );
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
return;
@ -1119,9 +1105,9 @@ void FTSResultsRequest::runXapian()
Mutex::Lock _( dataMutex );
QString id = QString::fromUtf8( dict.getId().c_str() );
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
for( int x = 0; x < headwords.size(); x++ )
for(const auto & headword : headwords)
{
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), id, QStringList(), matchCase ) );
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
}
}
}
@ -1143,7 +1129,7 @@ void FTSResultsRequest::runXapian()
Mutex::Lock _( dataMutex );
data.resize( sizeof( foundHeadwords ) );
memcpy( &data.front(), &foundHeadwords, sizeof( foundHeadwords ) );
foundHeadwords = 0;
foundHeadwords = nullptr;
hasAnyData = true;
}
}

View file

@ -64,7 +64,6 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
bool handleRoundBrackets = false );
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
bool isCJKChar( ushort ch );
class FTSResultsRequest : public Dictionary::DataRequest
@ -91,7 +90,7 @@ class FTSResultsRequest : public Dictionary::DataRequest
void checkArticles( QVector< uint32_t > const & offsets,
QStringList const & words,
QRegExp const & searchRegexp = QRegExp() );
QRegularExpression createMatchRegex( QRegExp const & searchRegexp );
QRegularExpression createMatchRegex( QRegExp const & searchRegexp ) const;
void checkSingleArticle( uint32_t offset,
QStringList const & words,

View file

@ -22,44 +22,30 @@
namespace FTS
{
enum
{
MinDistanceBetweenWords = 0,
MaxDistanceBetweenWords = 15,
MinArticlesPerDictionary = 1,
MaxArticlesPerDictionary = 10000
};
void Indexing::run()
{
try
{
timerThread->start();
// First iteration - dictionaries with no more MaxDictionarySizeForFastSearch articles
for( size_t x = 0; x < dictionaries.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
for ( const auto & dictionary : dictionaries ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
break;
if( dictionaries.at( x )->canFTS()
&&!dictionaries.at( x )->haveFTSIndex() )
{
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() ) );
dictionaries.at( x )->makeFTSIndex( isCancelled, true );
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
dictionary->makeFTSIndex( isCancelled, true );
}
}
// Second iteration - all remaining dictionaries
for( size_t x = 0; x < dictionaries.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
for ( const auto & dictionary : dictionaries ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
break;
if( dictionaries.at( x )->canFTS()
&&!dictionaries.at( x )->haveFTSIndex() )
{
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() ) );
dictionaries.at( x )->makeFTSIndex( isCancelled, false );
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
dictionary->makeFTSIndex( isCancelled, false );
}
}
@ -68,26 +54,27 @@ void Indexing::run()
}
catch( std::exception &ex )
{
gdWarning( "Exception occured while full-text search: %s", ex.what() );
gdWarning( "Exception occurred while full-text search: %s", ex.what() );
}
emit sendNowIndexingName( QString() );
}
void Indexing::timeout(){
for( size_t x = 0; x < dictionaries.size(); x++ )
{
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
void Indexing::timeout()
{
//display all the dictionary name in the following loop ,may result only one dictionary name been seen.
//as the interval is so small.
for ( const auto & dictionary : dictionaries ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
break;
auto progress = dictionaries.at( x )->getIndexingFtsProgress();
if( progress>0&&progress<100)
{
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() )+QString("......%1%2").arg("%").arg(progress) );
auto newProgress = dictionary->getIndexingFtsProgress();
if ( newProgress > 0 && newProgress < 100 ) {
emit sendNowIndexingName(
QString( "%1......%%2" ).arg( QString::fromStdString( dictionary->getName() ) ).arg( newProgress ) );
}
}
}
FtsIndexing::FtsIndexing( std::vector< sptr< Dictionary::Class > > const & dicts):
dictionaries( dicts ),
started( false )
@ -128,7 +115,7 @@ void FtsIndexing::stopIndexing()
}
}
void FtsIndexing::setNowIndexedName( QString name )
void FtsIndexing::setNowIndexedName( const QString & name )
{
{
Mutex::Lock _( nameMutex );
@ -235,6 +222,10 @@ FullTextSearchDialog::FullTextSearchDialog( QWidget * parent,
setNewIndexingName( ftsIdx.nowIndexingName() );
connect( &ftsIdx, &FtsIndexing::newIndexingName, this, &FullTextSearchDialog::setNewIndexingName );
connect( GlobalBroadcaster::instance(),
&GlobalBroadcaster::indexingDictionary,
this,
&FullTextSearchDialog::setNewIndexingName );
ui.searchMode->addItem( tr( "Whole words" ), WholeWords );
ui.searchMode->addItem( tr( "Plain text"), PlainText );

View file

@ -97,8 +97,6 @@ public:
~Indexing()
{
emit sendNowIndexingName( QString() );
hasExited.release();
}
@ -146,7 +144,7 @@ protected:
Mutex nameMutex;
private slots:
void setNowIndexedName( QString name );
void setNowIndexedName( const QString & name );
signals:
void newIndexingName( QString name );

View file

@ -691,6 +691,10 @@ MainWindow::MainWindow( Config::Class & cfg_ ):
groupListInToolbar->installEventFilter( this );
connect( &ftsIndexing, &FTS::FtsIndexing::newIndexingName, this, &MainWindow::showFTSIndexingName );
connect( GlobalBroadcaster::instance(),
&GlobalBroadcaster::indexingDictionary,
this,
&MainWindow::showFTSIndexingName );
applyProxySettings();