mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 00:14:06 +00:00
opt: mdx fulltext lock seperated with normal search (#759)
* opt: refactor mdx fullindex creation
* opt: incremental fulltext creation logic change
* opt: incremental fulltext creation logic change
* opt: progress of fulltext creation logic
* opt: code smell
* fix: code smell
* fix: code smell
* fix: code smell
* fix: code smell
* fix: code smell
* 🎨 apply clang-format changes
* fix: code smell
* fix: code smell
---------
Co-authored-by: xiaoyifang <xiaoyifang@users.noreply.github.com>
This commit is contained in:
parent
5d15ffbc14
commit
4eb8374a35
|
@ -34,7 +34,7 @@ public:
|
|||
unsigned currentGroupId;
|
||||
QString translateLineText{};
|
||||
//hold the dictionary id;
|
||||
QSet<QString> collapsedDicts;
|
||||
QSet< QString > collapsedDicts;
|
||||
QMap< QString, QSet< QString > > folderFavoritesMap;
|
||||
QMap< unsigned, QString > groupFolderMap;
|
||||
|
||||
|
@ -42,6 +42,8 @@ public:
|
|||
signals:
|
||||
void dictionaryChanges( ActiveDictIds ad );
|
||||
void dictionaryClear( ActiveDictIds ad );
|
||||
|
||||
void indexingDictionary( QString );
|
||||
};
|
||||
|
||||
#endif // GLOBAL_GLOBALBROADCASTER_H
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "config.hh"
|
||||
#include "utils.hh"
|
||||
#include <QString>
|
||||
#include "globalbroadcaster.hh"
|
||||
|
||||
/// Abstract dictionary-related stuff
|
||||
namespace Dictionary {
|
||||
|
@ -261,12 +262,16 @@ Q_DECLARE_FLAGS( Features, Feature )
|
|||
Q_DECLARE_OPERATORS_FOR_FLAGS( Features )
|
||||
|
||||
/// A dictionary. Can be used to query words.
|
||||
class Class
|
||||
class Class: public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
string id;
|
||||
vector< string > dictionaryFiles;
|
||||
long indexedFtsDoc;
|
||||
|
||||
long lastProgress = 0;
|
||||
|
||||
protected:
|
||||
QString dictionaryDescription;
|
||||
QIcon dictionaryIcon, dictionaryNativeIcon;
|
||||
|
@ -339,8 +344,16 @@ public:
|
|||
/// Returns the number of articles in the dictionary.
|
||||
virtual unsigned long getArticleCount() noexcept=0;
|
||||
|
||||
void setIndexedFtsDoc(long _indexedFtsDoc){
|
||||
void setIndexedFtsDoc(long _indexedFtsDoc)
|
||||
{
|
||||
indexedFtsDoc = _indexedFtsDoc;
|
||||
|
||||
auto newProgress = getIndexingFtsProgress();
|
||||
if ( newProgress != lastProgress ) {
|
||||
lastProgress = newProgress;
|
||||
emit GlobalBroadcaster::instance()->indexingDictionary(
|
||||
QString( "%1......%%2" ).arg( QString::fromStdString( getName() ) ).arg( newProgress ) );
|
||||
}
|
||||
}
|
||||
|
||||
int getIndexingFtsProgress(){
|
||||
|
|
|
@ -23,11 +23,8 @@
|
|||
#include <map>
|
||||
#include <set>
|
||||
#include <list>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <stub_msvc.h>
|
||||
#include <stub_msvc.h>
|
||||
#endif
|
||||
|
||||
#include "globalregex.hh"
|
||||
|
@ -37,9 +34,7 @@
|
|||
#include <QCryptographicHash>
|
||||
#include <QDir>
|
||||
#include <QRegularExpression>
|
||||
#include <QSemaphore>
|
||||
#include <QString>
|
||||
#include <QTextDocument>
|
||||
#include <QThreadPool>
|
||||
#include <QtConcurrent>
|
||||
|
||||
|
@ -198,10 +193,11 @@ public:
|
|||
|
||||
};
|
||||
|
||||
class MdxDictionary: public QObject, public BtreeIndexing::BtreeDictionary
|
||||
class MdxDictionary: public BtreeIndexing::BtreeDictionary
|
||||
{
|
||||
Mutex idxMutex;
|
||||
File::Class idx;
|
||||
string idxFileName;
|
||||
IdxHeader idxHeader;
|
||||
string encoding;
|
||||
ChunkedStorage::Reader chunks;
|
||||
|
@ -220,7 +216,7 @@ public:
|
|||
|
||||
MdxDictionary( string const & id, string const & indexFile, vector<string> const & dictionaryFiles );
|
||||
|
||||
~MdxDictionary();
|
||||
~MdxDictionary() override;
|
||||
|
||||
void deferredInit() override;
|
||||
|
||||
|
@ -231,7 +227,7 @@ public:
|
|||
|
||||
map< Dictionary::Property, string > getProperties() noexcept override
|
||||
{
|
||||
return map< Dictionary::Property, string >();
|
||||
return {};
|
||||
}
|
||||
|
||||
unsigned long getArticleCount() noexcept override
|
||||
|
@ -273,7 +269,7 @@ public:
|
|||
|
||||
void setFTSParameters( Config::FullTextSearch const & fts ) override
|
||||
{
|
||||
if( ensureInitDone().size() )
|
||||
if ( !ensureInitDone().empty() )
|
||||
return;
|
||||
|
||||
can_FTS = fts.enabled
|
||||
|
@ -305,16 +301,15 @@ private:
|
|||
|
||||
void removeDirectory( QString const & directory );
|
||||
|
||||
friend class MdxHeadwordsRequest;
|
||||
friend class MdxArticleRequest;
|
||||
friend class MddResourceRequest;
|
||||
void loadResourceFile( const wstring & resourceName, vector< char > & data );
|
||||
};
|
||||
|
||||
MdxDictionary::MdxDictionary( string const & id, string const & indexFile,
|
||||
vector<string> const & dictionaryFiles ):
|
||||
MdxDictionary::MdxDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ):
|
||||
BtreeDictionary( id, dictionaryFiles ),
|
||||
idx( indexFile, "rb" ),
|
||||
idxFileName( indexFile ),
|
||||
idxHeader( idx.read< IdxHeader >() ),
|
||||
chunks( idx, idxHeader.chunksOffset ),
|
||||
deferredInitRunnableStarted( false )
|
||||
|
@ -479,8 +474,8 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration
|
|||
if( haveFTSIndex() )
|
||||
return;
|
||||
|
||||
if( ensureInitDone().size() )
|
||||
return;
|
||||
// if( !ensureInitDone().empty() )
|
||||
// return;
|
||||
|
||||
if( firstIteration && getArticleCount() > FTS::MaxDictionarySizeForFastSearch )
|
||||
return;
|
||||
|
@ -490,7 +485,10 @@ void MdxDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration
|
|||
|
||||
try
|
||||
{
|
||||
FtsHelpers::makeFTSIndex( this, isCancelled );
|
||||
auto _dict = std::make_shared< MdxDictionary >( this->getId(), idxFileName, this->getDictionaryFilenames() );
|
||||
if( !_dict->ensureInitDone().empty() )
|
||||
return;
|
||||
FtsHelpers::makeFTSIndex( _dict.get(), isCancelled );
|
||||
FTS_index_completed.ref();
|
||||
}
|
||||
catch( std::exception &ex )
|
||||
|
@ -559,7 +557,7 @@ public:
|
|||
isCancelled.ref();
|
||||
}
|
||||
|
||||
~MdxArticleRequest()
|
||||
~MdxArticleRequest() override
|
||||
{
|
||||
isCancelled.ref();
|
||||
f.waitForFinished();
|
||||
|
@ -575,8 +573,7 @@ void MdxArticleRequest::run()
|
|||
return;
|
||||
}
|
||||
|
||||
if ( dict.ensureInitDone().size() )
|
||||
{
|
||||
if ( !dict.ensureInitDone().empty() ) {
|
||||
setErrorString( QString::fromUtf8( dict.ensureInitDone().c_str() ) );
|
||||
finish();
|
||||
return;
|
||||
|
@ -584,10 +581,9 @@ void MdxArticleRequest::run()
|
|||
|
||||
vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );
|
||||
|
||||
for ( unsigned x = 0; x < alts.size(); ++x )
|
||||
{
|
||||
for ( const auto & alt : alts ) {
|
||||
/// Make an additional query for each alt
|
||||
vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
|
||||
vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics );
|
||||
chain.insert( chain.end(), altChain.begin(), altChain.end() );
|
||||
}
|
||||
|
||||
|
@ -1345,30 +1341,27 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
{
|
||||
vector< sptr< Dictionary::Class > > dictionaries;
|
||||
|
||||
for ( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end(); ++i )
|
||||
{
|
||||
for ( const auto & fileName : fileNames ) {
|
||||
// Skip files with the extensions different to .mdx to speed up the
|
||||
// scanning
|
||||
if ( i->size() < 4 || strcasecmp( i->c_str() + ( i->size() - 4 ), ".mdx" ) != 0 )
|
||||
if ( fileName.size() < 4 || strcasecmp( fileName.c_str() + ( fileName.size() - 4 ), ".mdx" ) != 0 )
|
||||
continue;
|
||||
|
||||
vector< string > dictFiles( 1, *i );
|
||||
findResourceFiles( *i, dictFiles );
|
||||
vector< string > dictFiles( 1, fileName );
|
||||
findResourceFiles( fileName, dictFiles );
|
||||
|
||||
string dictId = Dictionary::makeDictionaryId( dictFiles );
|
||||
string dictId = Dictionary::makeDictionaryId( dictFiles );
|
||||
string indexFile = indicesDir + dictId;
|
||||
|
||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
|
||||
indexIsOldOrBad( dictFiles, indexFile ) )
|
||||
{
|
||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( dictFiles, indexFile ) ) {
|
||||
// Building the index
|
||||
|
||||
gdDebug( "MDict: Building the index for dictionary: %s\n", i->c_str() );
|
||||
gdDebug( "MDict: Building the index for dictionary: %s\n", fileName.c_str() );
|
||||
|
||||
MdictParser parser;
|
||||
list< sptr< MdictParser > > mddParsers;
|
||||
|
||||
if ( !parser.open( i->c_str() ) )
|
||||
if ( !parser.open( fileName.c_str() ) )
|
||||
continue;
|
||||
|
||||
string title = parser.title().toStdString();
|
||||
|
@ -1470,52 +1463,46 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
// Save dictionary stylesheets
|
||||
{
|
||||
MdictParser::StyleSheets const & styleSheets = parser.styleSheets();
|
||||
idxHeader.styleSheetAddress = idx.tell();
|
||||
idxHeader.styleSheetCount = styleSheets.size();
|
||||
idxHeader.styleSheetAddress = idx.tell();
|
||||
idxHeader.styleSheetCount = styleSheets.size();
|
||||
|
||||
for ( MdictParser::StyleSheets::const_iterator iter = styleSheets.begin();
|
||||
iter != styleSheets.end(); ++iter )
|
||||
{
|
||||
string styleBegin(iter->second.first.toStdString());
|
||||
string styleEnd( iter->second.second.toStdString() );
|
||||
for ( auto const & [ key, value ] : styleSheets ) {
|
||||
string const styleBegin( value.first.toStdString() );
|
||||
string const styleEnd( value.second.toStdString() );
|
||||
|
||||
// key
|
||||
idx.write<qint32>( iter->first );
|
||||
idx.write< qint32 >( key );
|
||||
// styleBegin
|
||||
idx.write<quint32>( ( quint32 )styleBegin.size() + 1 );
|
||||
idx.write< quint32 >( (quint32)styleBegin.size() + 1 );
|
||||
idx.write( styleBegin.c_str(), styleBegin.size() + 1 );
|
||||
// styleEnd
|
||||
idx.write<quint32>( ( quint32 )styleEnd.size() + 1 );
|
||||
idx.write< quint32 >( (quint32)styleEnd.size() + 1 );
|
||||
idx.write( styleEnd.c_str(), styleEnd.size() + 1 );
|
||||
}
|
||||
}
|
||||
|
||||
// read languages
|
||||
QPair<quint32, quint32> langs = LangCoder::findIdsForFilename( QString::fromStdString( *i ) );
|
||||
QPair< quint32, quint32 > langs = LangCoder::findIdsForFilename( QString::fromStdString( fileName ) );
|
||||
|
||||
// if no languages found, try dictionary's name
|
||||
if ( langs.first == 0 || langs.second == 0 )
|
||||
{
|
||||
if ( langs.first == 0 || langs.second == 0 ) {
|
||||
langs = LangCoder::findIdsForFilename( parser.title() );
|
||||
}
|
||||
|
||||
idxHeader.langFrom = langs.first;
|
||||
idxHeader.langTo = langs.second;
|
||||
idxHeader.langTo = langs.second;
|
||||
|
||||
// Build index info for each mdd file
|
||||
vector< IndexInfo > mddIndexInfos;
|
||||
for ( vector< sptr< IndexedWords > >::const_iterator mddIndexIter = mddIndices.begin();
|
||||
mddIndexIter != mddIndices.end(); ++mddIndexIter )
|
||||
{
|
||||
IndexInfo resourceIdxInfo = BtreeIndexing::buildIndex( *( *mddIndexIter ), idx );
|
||||
for ( const auto & mddIndice : mddIndices ) {
|
||||
IndexInfo const resourceIdxInfo = BtreeIndexing::buildIndex( *mddIndice, idx );
|
||||
mddIndexInfos.push_back( resourceIdxInfo );
|
||||
}
|
||||
|
||||
// Save address of IndexInfos for resource files
|
||||
idxHeader.mddIndexInfosOffset = idx.tell();
|
||||
idxHeader.mddIndexInfosCount = mddIndexInfos.size();
|
||||
for ( uint32_t mi = 0; mi < mddIndexInfos.size(); mi++ )
|
||||
{
|
||||
idxHeader.mddIndexInfosCount = mddIndexInfos.size();
|
||||
for ( uint32_t mi = 0; mi < mddIndexInfos.size(); mi++ ) {
|
||||
const string & mddfile = mddFileNames[ mi ];
|
||||
|
||||
idx.write<quint32>( ( quint32 )mddfile.size() + 1 );
|
||||
|
|
|
@ -495,7 +495,7 @@ void ZimDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration
|
|||
getName().c_str() );
|
||||
try
|
||||
{
|
||||
return FtsHelpers::makeFTSIndexXapian(this,isCancelled);
|
||||
return FtsHelpers::makeFTSIndex(this,isCancelled);
|
||||
}
|
||||
catch( std::exception &ex )
|
||||
{
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* This file is (c) 2014 Abs62
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#include "xapian.h"
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include "fulltextsearch.hh"
|
||||
#include "ftshelpers.hh"
|
||||
#include "wstring_qt.hh"
|
||||
|
@ -18,9 +18,7 @@
|
|||
#include <QRegularExpression>
|
||||
|
||||
#include "wildcard.hh"
|
||||
#include <QtConcurrent>
|
||||
#include "globalregex.hh"
|
||||
#include <QFutureSynchronizer>
|
||||
#include <QSemaphoreReleaser>
|
||||
|
||||
using std::vector;
|
||||
|
@ -44,7 +42,7 @@ bool ftsIndexIsOldOrBad( string const & indexFile,
|
|||
|
||||
qDebug()<<document.get_data().c_str();
|
||||
//use a special document to mark the end of the index.
|
||||
return document.get_data().compare(finish_mark)!=0;
|
||||
return document.get_data()!=finish_mark;
|
||||
}
|
||||
catch( Xapian::Error & e )
|
||||
{
|
||||
|
@ -114,10 +112,8 @@ static QString makeHiliteRegExpString( QStringList const & words,
|
|||
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
|
||||
{
|
||||
QStringList wordList, hieroglyphList;
|
||||
for( int i = 0; i < list.size(); i ++ )
|
||||
for(auto word : list)
|
||||
{
|
||||
QString word = list.at( i );
|
||||
|
||||
// Check for CJK symbols in word
|
||||
bool parsed = false;
|
||||
QString hieroglyph;
|
||||
|
@ -150,8 +146,8 @@ void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStri
|
|||
bool containCJK( QString const & str)
|
||||
{
|
||||
bool hasCJK = false;
|
||||
for( int x = 0; x < str.size(); x++ )
|
||||
if( isCJKChar( str.at( x ).unicode() ) )
|
||||
for(auto x : str)
|
||||
if( isCJKChar( x.unicode() ) )
|
||||
{
|
||||
hasCJK = true;
|
||||
break;
|
||||
|
@ -255,9 +251,9 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
|||
QVector< QString > setOfWords;
|
||||
setOfWords.reserve( articleWords.size() );
|
||||
|
||||
for( int x = 0; x < articleWords.size(); x++ )
|
||||
for(const auto & articleWord : articleWords)
|
||||
{
|
||||
QString word = articleWords.at( x ).toLower();
|
||||
QString word = articleWord.toLower();
|
||||
|
||||
bool hasCJK = false;
|
||||
QString hieroglyph;
|
||||
|
@ -295,9 +291,9 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
|||
QStringList list;
|
||||
|
||||
QStringList oldVariant = word.split( RX::Ftx::regSplit, Qt::SkipEmptyParts );
|
||||
for( QStringList::iterator it = oldVariant.begin(); it != oldVariant.end(); ++it )
|
||||
if( it->size() >= FTS::MinimumWordSize && !list.contains( *it ) )
|
||||
list.append( *it );
|
||||
for ( auto const & it : oldVariant )
|
||||
if ( it.size() >= FTS::MinimumWordSize && !list.contains( it ) )
|
||||
list.append( it );
|
||||
|
||||
QRegularExpressionMatch match = RX::Ftx::regBrackets.match( word );
|
||||
if( match.hasMatch() )
|
||||
|
@ -321,11 +317,10 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
|||
list.append( parsedWord );
|
||||
}
|
||||
|
||||
for( QStringList::iterator it = list.begin(); it != list.end(); ++it )
|
||||
{
|
||||
for ( auto const & it : list ) {
|
||||
//if( !setOfWords.contains( *it ) )
|
||||
{
|
||||
setOfWords.push_back( *it );
|
||||
setOfWords.push_back( it );
|
||||
/*Mutex::Lock _( _mapLock );
|
||||
words[ *it ].push_back( articleAddress );*/
|
||||
}
|
||||
|
@ -352,118 +347,115 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
|||
}
|
||||
|
||||
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
|
||||
{
|
||||
return makeFTSIndexXapian(dict,isCancelled);
|
||||
}
|
||||
|
||||
// use xapian to create the index
|
||||
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
|
||||
{
|
||||
Mutex::Lock _( dict->getFtsMutex() );
|
||||
|
||||
//check the index again.
|
||||
if ( dict->haveFTSIndex() )
|
||||
return;
|
||||
|
||||
try {
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
throw exUserAbort();
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
throw exUserAbort();
|
||||
|
||||
// Open the database for update, creating a new database if necessary.
|
||||
Xapian::WritableDatabase db(dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN);
|
||||
// Open the database for update, creating a new database if necessary.
|
||||
Xapian::WritableDatabase db( dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN );
|
||||
|
||||
Xapian::TermGenerator indexer;
|
||||
// Xapian::Stem stemmer("english");
|
||||
// indexer.set_stemmer(stemmer);
|
||||
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
|
||||
indexer.set_flags(Xapian::TermGenerator::FLAG_CJK_NGRAM);
|
||||
Xapian::TermGenerator indexer;
|
||||
// Xapian::Stem stemmer("english");
|
||||
// indexer.set_stemmer(stemmer);
|
||||
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
|
||||
indexer.set_flags( Xapian::TermGenerator::FLAG_CJK_NGRAM );
|
||||
|
||||
BtreeIndexing::IndexedWords indexedWords;
|
||||
BtreeIndexing::IndexedWords indexedWords;
|
||||
|
||||
QSet< uint32_t > setOfOffsets;
|
||||
setOfOffsets.reserve( dict->getArticleCount() );
|
||||
QSet< uint32_t > setOfOffsets;
|
||||
setOfOffsets.reserve( dict->getArticleCount() );
|
||||
|
||||
dict->findArticleLinks( 0, &setOfOffsets, 0, &isCancelled );
|
||||
dict->findArticleLinks( nullptr, &setOfOffsets, nullptr, &isCancelled );
|
||||
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
throw exUserAbort();
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
throw exUserAbort();
|
||||
|
||||
QVector< uint32_t > offsets;
|
||||
offsets.resize( setOfOffsets.size() );
|
||||
uint32_t * ptr = &offsets.front();
|
||||
QVector< uint32_t > offsets;
|
||||
offsets.resize( setOfOffsets.size() );
|
||||
uint32_t * ptr = &offsets.front();
|
||||
|
||||
for( QSet< uint32_t >::ConstIterator it = setOfOffsets.constBegin();
|
||||
it != setOfOffsets.constEnd(); ++it )
|
||||
{
|
||||
*ptr = *it;
|
||||
ptr++;
|
||||
}
|
||||
for ( QSet< uint32_t >::ConstIterator it = setOfOffsets.constBegin(); it != setOfOffsets.constEnd(); ++it ) {
|
||||
*ptr = *it;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
// Free memory
|
||||
setOfOffsets.clear();
|
||||
// Free memory
|
||||
setOfOffsets.clear();
|
||||
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
throw exUserAbort();
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
throw exUserAbort();
|
||||
|
||||
dict->sortArticlesOffsetsForFTS( offsets, isCancelled );
|
||||
dict->sortArticlesOffsetsForFTS( offsets, isCancelled );
|
||||
|
||||
// incremental build the index.
|
||||
// get the last address.
|
||||
bool skip = true;
|
||||
uint32_t lastAddress = -1;
|
||||
try
|
||||
{
|
||||
Xapian::Document lastDoc = db.get_document( db.get_lastdocid() );
|
||||
lastAddress = atoi( lastDoc.get_data().c_str() );
|
||||
}
|
||||
catch( Xapian::Error & e )
|
||||
{
|
||||
qDebug() << e.get_description().c_str();
|
||||
skip = false;
|
||||
}
|
||||
|
||||
long indexedDoc=0L;
|
||||
|
||||
for( auto & address : offsets )
|
||||
{
|
||||
indexedDoc++;
|
||||
|
||||
if(address==lastAddress){
|
||||
// incremental build the index.
|
||||
// get the last address.
|
||||
bool skip = true;
|
||||
uint32_t lastAddress = -1;
|
||||
try {
|
||||
if ( db.get_lastdocid() > 0 ) {
|
||||
Xapian::Document lastDoc = db.get_document( db.get_lastdocid() );
|
||||
lastAddress = atoi( lastDoc.get_data().c_str() );
|
||||
}
|
||||
else {
|
||||
skip = false;
|
||||
}
|
||||
}
|
||||
catch ( Xapian::Error & e ) {
|
||||
qDebug() << "get last doc failed: " << e.get_description().c_str();
|
||||
skip = false;
|
||||
}
|
||||
//skip until to the lastAddress;
|
||||
if((address!=lastAddress)&&skip){
|
||||
continue;
|
||||
|
||||
long indexedDoc = 0L;
|
||||
|
||||
for ( auto const & address : offsets ) {
|
||||
indexedDoc++;
|
||||
|
||||
if ( address > lastAddress && skip ) {
|
||||
skip = false;
|
||||
}
|
||||
//skip until to the lastAddress;
|
||||
if ( skip ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
QString headword, articleStr;
|
||||
|
||||
dict->getArticleText( address, headword, articleStr );
|
||||
|
||||
Xapian::Document doc;
|
||||
|
||||
indexer.set_document( doc );
|
||||
indexer.index_text_without_positions( articleStr.toStdString() );
|
||||
doc.set_data( std::to_string( address ) );
|
||||
// Add the document to the database.
|
||||
db.add_document( doc );
|
||||
dict->setIndexedFtsDoc( indexedDoc );
|
||||
}
|
||||
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
QString headword, articleStr;
|
||||
|
||||
dict->getArticleText( address, headword, articleStr );
|
||||
|
||||
//add a special document to mark the end of the index.
|
||||
Xapian::Document doc;
|
||||
|
||||
indexer.set_document( doc );
|
||||
indexer.index_text_without_positions( articleStr.toStdString() );
|
||||
doc.set_data( std::to_string( address ) );
|
||||
doc.set_data( finish_mark );
|
||||
// Add the document to the database.
|
||||
db.add_document( doc );
|
||||
|
||||
dict->setIndexedFtsDoc(indexedDoc);
|
||||
// Free memory
|
||||
offsets.clear();
|
||||
|
||||
db.commit();
|
||||
}
|
||||
|
||||
//add a special document to mark the end of the index.
|
||||
Xapian::Document doc;
|
||||
doc.set_data( finish_mark );
|
||||
// Add the document to the database.
|
||||
db.add_document( doc );
|
||||
|
||||
// Free memory
|
||||
offsets.clear();
|
||||
|
||||
db.commit();
|
||||
} catch (Xapian::Error & e) {
|
||||
qWarning()<<QString::fromStdString(e.get_description());
|
||||
catch ( Xapian::Error & e ) {
|
||||
qWarning() << "create xapian index:" << QString::fromStdString( e.get_description() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -492,7 +484,7 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets,
|
|||
}
|
||||
}
|
||||
|
||||
QRegularExpression FTSResultsRequest::createMatchRegex( QRegExp const & searchRegexp )
|
||||
QRegularExpression FTSResultsRequest::createMatchRegex( QRegExp const & searchRegexp ) const
|
||||
{
|
||||
QRegularExpression searchRegularExpression;
|
||||
|
||||
|
@ -559,18 +551,17 @@ void FTSResultsRequest::checkSingleArticle( uint32_t offset,
|
|||
QVector< QPair< QString, bool > > wordsList;
|
||||
if( ignoreWordsOrder )
|
||||
{
|
||||
for( QStringList::const_iterator it = words.begin(); it != words.end(); ++it )
|
||||
wordsList.append( QPair< QString, bool >( *it, true ) );
|
||||
for(const auto & word : words)
|
||||
wordsList.append( QPair< QString, bool >( word, true ) );
|
||||
}
|
||||
|
||||
// for( int i = 0; i < offsets.size(); i++ )
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
return;
|
||||
|
||||
if( ignoreWordsOrder )
|
||||
{
|
||||
for( int i = 0; i < wordsList.size(); i++ )
|
||||
wordsList[ i ].second = true;
|
||||
if ( ignoreWordsOrder ) {
|
||||
for ( auto & [ fst, snd ] : wordsList )
|
||||
snd = true;
|
||||
}
|
||||
|
||||
dict.getArticleText( offset, headword, articleText );
|
||||
|
@ -717,10 +708,8 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|||
|
||||
vector< BtreeIndexing::WordArticleLink > links =
|
||||
ftsIndex.findArticles( gd::removeTrailingZero( word ), ignoreDiacritics );
|
||||
for( unsigned x = 0; x < links.size(); x++ )
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
{
|
||||
for ( auto const & link : links ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
addressLists << tmp;
|
||||
return;
|
||||
}
|
||||
|
@ -729,7 +718,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|||
char * linksPtr;
|
||||
{
|
||||
// Mutex::Lock _( dict.getFtsMutex() );
|
||||
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
||||
linksPtr = chunks->getBlock( link.articleOffset, chunk );
|
||||
}
|
||||
|
||||
memcpy( &size, linksPtr, sizeof( uint32_t ) );
|
||||
|
@ -750,7 +739,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|||
// int n = indexWords.length();
|
||||
// QtConcurrent::blockingMap( indexWords, findLinks );
|
||||
|
||||
for(QString word:indexWords)
|
||||
for(const QString& word:indexWords)
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
{
|
||||
|
@ -812,9 +801,8 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
|||
|
||||
QStringList wordsList, hieroglyphsList;
|
||||
|
||||
for( int x = 0; x < indexWords.size(); x++ )
|
||||
for(const auto & word : indexWords)
|
||||
{
|
||||
QString const & word = indexWords.at( x );
|
||||
if( isCJKChar( word[ 0 ].unicode() ) )
|
||||
hieroglyphsList.append( word );
|
||||
else
|
||||
|
@ -839,10 +827,8 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
|||
{
|
||||
QSet< uint32_t > tmp;
|
||||
vector< BtreeIndexing::WordArticleLink > links = ftsIndex.findArticles( gd::removeTrailingZero( word ) );
|
||||
for( unsigned x = 0; x < links.size(); x++ )
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
{
|
||||
for ( auto const & link : links ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
||||
Mutex::Lock _( dataMutex );
|
||||
sets << tmp;
|
||||
return;
|
||||
|
@ -852,7 +838,7 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde
|
|||
char * linksPtr;
|
||||
{
|
||||
// Mutex::Lock _( dict.getFtsMutex() );
|
||||
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
||||
linksPtr = chunks->getBlock( link.articleOffset, chunk );
|
||||
}
|
||||
|
||||
memcpy( &size, linksPtr, sizeof( uint32_t ) );
|
||||
|
@ -945,17 +931,17 @@ void FTSResultsRequest::fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|||
return;
|
||||
|
||||
links.reserve( wordsInIndex );
|
||||
ftsIndex.findArticleLinks( &links, 0, 0, &isCancelled );
|
||||
ftsIndex.findArticleLinks( &links, nullptr, nullptr, &isCancelled );
|
||||
|
||||
QVector< QSet< uint32_t > > allWordsLinks;
|
||||
allWordsLinks.resize( indexWords.size() );
|
||||
|
||||
for( int x = 0; x < links.size(); x++ )
|
||||
for(auto & link : links)
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
return;
|
||||
|
||||
QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() );
|
||||
QString word = QString::fromUtf8( link.word.data(), link.word.size() );
|
||||
|
||||
if( ignoreDiacritics )
|
||||
word = QString::fromStdU32String( Folding::applyDiacriticsOnly( gd::toWString( word ) ) );
|
||||
|
@ -968,7 +954,7 @@ void FTSResultsRequest::fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|||
char * linksPtr;
|
||||
{
|
||||
// Mutex::Lock _( dict.getFtsMutex() );
|
||||
linksPtr = chunks->getBlock( links[ x ].articleOffset, chunk );
|
||||
linksPtr = chunks->getBlock( link.articleOffset, chunk );
|
||||
}
|
||||
|
||||
memcpy( &size, linksPtr, sizeof(uint32_t) );
|
||||
|
@ -1026,7 +1012,7 @@ void FTSResultsRequest::fullSearch( QStringList & searchWords, QRegExp & regexp
|
|||
|
||||
QSet< uint32_t > setOfOffsets;
|
||||
setOfOffsets.reserve( dict.getArticleCount() );
|
||||
dict.findArticleLinks( 0, &setOfOffsets, 0, &isCancelled );
|
||||
dict.findArticleLinks( nullptr, &setOfOffsets, nullptr, &isCancelled );
|
||||
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
return;
|
||||
|
@ -1119,9 +1105,9 @@ void FTSResultsRequest::runXapian()
|
|||
Mutex::Lock _( dataMutex );
|
||||
QString id = QString::fromUtf8( dict.getId().c_str() );
|
||||
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
||||
for( int x = 0; x < headwords.size(); x++ )
|
||||
for(const auto & headword : headwords)
|
||||
{
|
||||
foundHeadwords->append( FTS::FtsHeadword( headwords.at( x ), id, QStringList(), matchCase ) );
|
||||
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1143,7 +1129,7 @@ void FTSResultsRequest::runXapian()
|
|||
Mutex::Lock _( dataMutex );
|
||||
data.resize( sizeof( foundHeadwords ) );
|
||||
memcpy( &data.front(), &foundHeadwords, sizeof( foundHeadwords ) );
|
||||
foundHeadwords = 0;
|
||||
foundHeadwords = nullptr;
|
||||
hasAnyData = true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -64,7 +64,6 @@ void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
|||
bool handleRoundBrackets = false );
|
||||
|
||||
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
|
||||
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
|
||||
bool isCJKChar( ushort ch );
|
||||
|
||||
class FTSResultsRequest : public Dictionary::DataRequest
|
||||
|
@ -91,7 +90,7 @@ class FTSResultsRequest : public Dictionary::DataRequest
|
|||
void checkArticles( QVector< uint32_t > const & offsets,
|
||||
QStringList const & words,
|
||||
QRegExp const & searchRegexp = QRegExp() );
|
||||
QRegularExpression createMatchRegex( QRegExp const & searchRegexp );
|
||||
QRegularExpression createMatchRegex( QRegExp const & searchRegexp ) const;
|
||||
|
||||
void checkSingleArticle( uint32_t offset,
|
||||
QStringList const & words,
|
||||
|
|
|
@ -22,44 +22,30 @@
|
|||
namespace FTS
|
||||
{
|
||||
|
||||
enum
|
||||
{
|
||||
MinDistanceBetweenWords = 0,
|
||||
MaxDistanceBetweenWords = 15,
|
||||
MinArticlesPerDictionary = 1,
|
||||
MaxArticlesPerDictionary = 10000
|
||||
};
|
||||
|
||||
void Indexing::run()
|
||||
{
|
||||
try
|
||||
{
|
||||
timerThread->start();
|
||||
// First iteration - dictionaries with no more MaxDictionarySizeForFastSearch articles
|
||||
for( size_t x = 0; x < dictionaries.size(); x++ )
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
for ( const auto & dictionary : dictionaries ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
break;
|
||||
|
||||
if( dictionaries.at( x )->canFTS()
|
||||
&&!dictionaries.at( x )->haveFTSIndex() )
|
||||
{
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() ) );
|
||||
dictionaries.at( x )->makeFTSIndex( isCancelled, true );
|
||||
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
||||
dictionary->makeFTSIndex( isCancelled, true );
|
||||
}
|
||||
}
|
||||
|
||||
// Second iteration - all remaining dictionaries
|
||||
for( size_t x = 0; x < dictionaries.size(); x++ )
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
for ( const auto & dictionary : dictionaries ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
break;
|
||||
|
||||
if( dictionaries.at( x )->canFTS()
|
||||
&&!dictionaries.at( x )->haveFTSIndex() )
|
||||
{
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() ) );
|
||||
dictionaries.at( x )->makeFTSIndex( isCancelled, false );
|
||||
if ( dictionary->canFTS() && !dictionary->haveFTSIndex() ) {
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionary->getName().c_str() ) );
|
||||
dictionary->makeFTSIndex( isCancelled, false );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,26 +54,27 @@ void Indexing::run()
|
|||
}
|
||||
catch( std::exception &ex )
|
||||
{
|
||||
gdWarning( "Exception occured while full-text search: %s", ex.what() );
|
||||
gdWarning( "Exception occurred while full-text search: %s", ex.what() );
|
||||
}
|
||||
emit sendNowIndexingName( QString() );
|
||||
}
|
||||
|
||||
void Indexing::timeout(){
|
||||
for( size_t x = 0; x < dictionaries.size(); x++ )
|
||||
{
|
||||
if( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
void Indexing::timeout()
|
||||
{
|
||||
//display all the dictionary name in the following loop ,may result only one dictionary name been seen.
|
||||
//as the interval is so small.
|
||||
for ( const auto & dictionary : dictionaries ) {
|
||||
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
||||
break;
|
||||
|
||||
auto progress = dictionaries.at( x )->getIndexingFtsProgress();
|
||||
if( progress>0&&progress<100)
|
||||
{
|
||||
emit sendNowIndexingName( QString::fromUtf8( dictionaries.at( x )->getName().c_str() )+QString("......%1%2").arg("%").arg(progress) );
|
||||
auto newProgress = dictionary->getIndexingFtsProgress();
|
||||
if ( newProgress > 0 && newProgress < 100 ) {
|
||||
emit sendNowIndexingName(
|
||||
QString( "%1......%%2" ).arg( QString::fromStdString( dictionary->getName() ) ).arg( newProgress ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
FtsIndexing::FtsIndexing( std::vector< sptr< Dictionary::Class > > const & dicts):
|
||||
dictionaries( dicts ),
|
||||
started( false )
|
||||
|
@ -128,7 +115,7 @@ void FtsIndexing::stopIndexing()
|
|||
}
|
||||
}
|
||||
|
||||
void FtsIndexing::setNowIndexedName( QString name )
|
||||
void FtsIndexing::setNowIndexedName( const QString & name )
|
||||
{
|
||||
{
|
||||
Mutex::Lock _( nameMutex );
|
||||
|
@ -235,6 +222,10 @@ FullTextSearchDialog::FullTextSearchDialog( QWidget * parent,
|
|||
setNewIndexingName( ftsIdx.nowIndexingName() );
|
||||
|
||||
connect( &ftsIdx, &FtsIndexing::newIndexingName, this, &FullTextSearchDialog::setNewIndexingName );
|
||||
connect( GlobalBroadcaster::instance(),
|
||||
&GlobalBroadcaster::indexingDictionary,
|
||||
this,
|
||||
&FullTextSearchDialog::setNewIndexingName );
|
||||
|
||||
ui.searchMode->addItem( tr( "Whole words" ), WholeWords );
|
||||
ui.searchMode->addItem( tr( "Plain text"), PlainText );
|
||||
|
|
|
@ -97,8 +97,6 @@ public:
|
|||
|
||||
~Indexing()
|
||||
{
|
||||
|
||||
|
||||
emit sendNowIndexingName( QString() );
|
||||
hasExited.release();
|
||||
}
|
||||
|
@ -146,7 +144,7 @@ protected:
|
|||
Mutex nameMutex;
|
||||
|
||||
private slots:
|
||||
void setNowIndexedName( QString name );
|
||||
void setNowIndexedName( const QString & name );
|
||||
|
||||
signals:
|
||||
void newIndexingName( QString name );
|
||||
|
|
|
@ -691,6 +691,10 @@ MainWindow::MainWindow( Config::Class & cfg_ ):
|
|||
groupListInToolbar->installEventFilter( this );
|
||||
|
||||
connect( &ftsIndexing, &FTS::FtsIndexing::newIndexingName, this, &MainWindow::showFTSIndexingName );
|
||||
connect( GlobalBroadcaster::instance(),
|
||||
&GlobalBroadcaster::indexingDictionary,
|
||||
this,
|
||||
&MainWindow::showFTSIndexingName );
|
||||
|
||||
applyProxySettings();
|
||||
|
||||
|
|
Loading…
Reference in a new issue