2014-04-16 16:18:28 +00:00
|
|
|
#ifndef __FTSHELPERS_HH_INCLUDED__
|
|
|
|
#define __FTSHELPERS_HH_INCLUDED__
|
|
|
|
|
|
|
|
#include <QString>
|
2022-02-27 05:17:37 +00:00
|
|
|
#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0))
|
2022-02-27 14:42:40 +00:00
|
|
|
#include <QtCore5Compat/QRegExp>
|
|
|
|
#else
|
|
|
|
#include <QRegExp>
|
2022-02-27 05:17:37 +00:00
|
|
|
#endif
|
2014-04-16 16:18:28 +00:00
|
|
|
#include <QRunnable>
|
|
|
|
#include <QSemaphore>
|
|
|
|
#include <QList>
|
2022-06-19 12:24:34 +00:00
|
|
|
#include <QtConcurrent>
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
#include "dictionary.hh"
|
|
|
|
#include "btreeidx.hh"
|
|
|
|
#include "fulltextsearch.hh"
|
|
|
|
#include "chunkedstorage.hh"
|
2018-04-10 14:49:52 +00:00
|
|
|
#include "folding.hh"
|
|
|
|
#include "wstring_qt.hh"
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
namespace FtsHelpers
|
|
|
|
{
|
|
|
|
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
FtsSignature = 0x58535446, // FTSX on little-endian, XSTF on big-endian
|
2014-05-08 12:38:00 +00:00
|
|
|
CurrentFtsFormatVersion = 2 + BtreeIndexing::FormatVersion,
|
2014-04-16 16:18:28 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
#pragma pack(push,1)
|
|
|
|
|
|
|
|
struct FtsIdxHeader
|
|
|
|
{
|
|
|
|
uint32_t signature; // First comes the signature, FTSX
|
2014-11-22 14:22:04 +00:00
|
|
|
uint32_t formatVersion; // File format version
|
2014-04-16 16:18:28 +00:00
|
|
|
uint32_t chunksOffset; // The offset to chunks' storage
|
|
|
|
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
|
|
|
|
uint32_t indexRootOffset;
|
|
|
|
uint32_t wordCount; // Number of unique words this dictionary has
|
|
|
|
}
|
|
|
|
#ifndef _MSC_VER
|
|
|
|
__attribute__((packed))
|
|
|
|
#endif
|
|
|
|
;
|
|
|
|
|
|
|
|
#pragma pack(pop)
|
|
|
|
|
2014-11-22 14:22:04 +00:00
|
|
|
bool ftsIndexIsOldOrBad( std::string const & indexFile,
|
|
|
|
BtreeIndexing::BtreeDictionary * dict );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
bool parseSearchString( QString const & str, QStringList & IndexWords,
|
|
|
|
QStringList & searchWords,
|
|
|
|
QRegExp & searchRegExp, int searchMode,
|
2014-04-22 13:47:02 +00:00
|
|
|
bool matchCase,
|
2014-05-08 12:38:00 +00:00
|
|
|
int distanceBetweenWords,
|
2022-06-03 12:07:14 +00:00
|
|
|
bool & hasCJK,
|
|
|
|
bool ignoreWordsOrder = false );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
void parseArticleForFts( uint32_t articleAddress, QString & articleText,
|
2017-07-25 15:28:56 +00:00
|
|
|
QMap< QString, QVector< uint32_t > > & words,
|
|
|
|
bool handleRoundBrackets = false );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
|
2022-10-01 13:57:55 +00:00
|
|
|
#ifdef USE_XAPIAN
|
|
|
|
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
|
|
|
|
#endif
|
2014-05-08 12:38:00 +00:00
|
|
|
bool isCJKChar( ushort ch );
|
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
class FTSResultsRequest : public Dictionary::DataRequest
|
|
|
|
{
|
|
|
|
BtreeIndexing::BtreeDictionary & dict;
|
|
|
|
|
|
|
|
QString searchString;
|
|
|
|
int searchMode;
|
|
|
|
bool matchCase;
|
|
|
|
int distanceBetweenWords;
|
|
|
|
int maxResults;
|
2014-05-08 12:38:00 +00:00
|
|
|
bool hasCJK;
|
2017-07-25 15:28:29 +00:00
|
|
|
bool ignoreWordsOrder;
|
2018-04-10 14:49:52 +00:00
|
|
|
bool ignoreDiacritics;
|
2018-03-01 15:07:22 +00:00
|
|
|
int wordsInIndex;
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
QAtomicInt isCancelled;
|
|
|
|
|
2022-06-04 15:22:14 +00:00
|
|
|
QAtomicInt results;
|
2022-06-19 12:24:34 +00:00
|
|
|
QFuture< void > f;
|
2022-06-04 15:22:14 +00:00
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
QList< FTS::FtsHeadword > * foundHeadwords;
|
|
|
|
|
|
|
|
void checkArticles( QVector< uint32_t > const & offsets,
|
|
|
|
QStringList const & words,
|
|
|
|
QRegExp const & searchRegexp = QRegExp() );
|
2022-06-16 12:17:07 +00:00
|
|
|
QRegularExpression createMatchRegex( QRegExp const & searchRegexp );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
2022-06-16 12:17:07 +00:00
|
|
|
void checkSingleArticle( uint32_t offset,
|
|
|
|
QStringList const & words,
|
|
|
|
QRegularExpression const & searchRegexp = QRegularExpression() );
|
2022-06-04 15:22:14 +00:00
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
void indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|
|
|
sptr< ChunkedStorage::Reader > chunks,
|
|
|
|
QStringList & indexWords,
|
2022-06-04 15:55:04 +00:00
|
|
|
QStringList & searchWords, QRegExp & regexp );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
2014-05-08 12:38:00 +00:00
|
|
|
void combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|
|
|
sptr< ChunkedStorage::Reader > chunks,
|
|
|
|
QStringList & indexWords,
|
|
|
|
QStringList & searchWords,
|
|
|
|
QRegExp & regexp );
|
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
void fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
|
|
|
|
sptr< ChunkedStorage::Reader > chunks,
|
|
|
|
QStringList & indexWords,
|
|
|
|
QStringList & searchWords,
|
|
|
|
QRegExp & regexp );
|
|
|
|
|
|
|
|
void fullSearch( QStringList & searchWords, QRegExp & regexp );
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
FTSResultsRequest( BtreeIndexing::BtreeDictionary & dict_, QString const & searchString_,
|
2017-07-25 15:28:29 +00:00
|
|
|
int searchMode_, bool matchCase_, int distanceBetweenWords_, int maxResults_,
|
2018-04-10 14:49:52 +00:00
|
|
|
bool ignoreWordsOrder_, bool ignoreDiacritics_ ):
|
2014-04-16 16:18:28 +00:00
|
|
|
dict( dict_ ),
|
|
|
|
searchString( searchString_ ),
|
|
|
|
searchMode( searchMode_ ),
|
|
|
|
matchCase( matchCase_ ),
|
|
|
|
distanceBetweenWords( distanceBetweenWords_ ),
|
2014-05-08 12:38:00 +00:00
|
|
|
maxResults( maxResults_ ),
|
2017-07-25 15:28:29 +00:00
|
|
|
hasCJK( false ),
|
2018-03-01 15:07:22 +00:00
|
|
|
ignoreWordsOrder( ignoreWordsOrder_ ),
|
2018-04-10 14:49:52 +00:00
|
|
|
ignoreDiacritics( ignoreDiacritics_ ),
|
2018-03-01 15:07:22 +00:00
|
|
|
wordsInIndex( 0 )
|
2014-04-16 16:18:28 +00:00
|
|
|
{
|
2018-04-10 14:49:52 +00:00
|
|
|
if( ignoreDiacritics_ )
|
|
|
|
searchString = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( searchString_ ) ) );
|
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
foundHeadwords = new QList< FTS::FtsHeadword >;
|
2022-06-04 15:22:14 +00:00
|
|
|
results = 0;
|
2022-06-19 12:24:34 +00:00
|
|
|
f = QtConcurrent::run( [ this ]() { this->run(); } );
|
|
|
|
// QThreadPool::globalInstance()->start( [ this ]() { this->run(); }, -100 );
|
2014-04-16 16:18:28 +00:00
|
|
|
}
|
|
|
|
|
2022-05-30 12:21:44 +00:00
|
|
|
void run();
|
2022-10-01 13:57:55 +00:00
|
|
|
#ifdef USE_XAPIAN
|
|
|
|
void runXapian();
|
|
|
|
#endif
|
2014-04-16 16:18:28 +00:00
|
|
|
virtual void cancel()
|
|
|
|
{
|
|
|
|
isCancelled.ref();
|
|
|
|
}
|
|
|
|
|
|
|
|
~FTSResultsRequest()
|
|
|
|
{
|
|
|
|
isCancelled.ref();
|
2022-06-19 12:24:34 +00:00
|
|
|
f.waitForFinished();
|
2014-04-16 16:18:28 +00:00
|
|
|
if( foundHeadwords )
|
|
|
|
delete foundHeadwords;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
#endif // __FTSHELPERS_HH_INCLUDED__
|