goldendict-ng/src/ftshelpers.hh

162 lines
4.8 KiB
C++
Raw Normal View History

2014-04-16 16:18:28 +00:00
#ifndef __FTSHELPERS_HH_INCLUDED__
#define __FTSHELPERS_HH_INCLUDED__
#include <QString>
2022-02-27 05:17:37 +00:00
#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0))
2022-02-27 14:42:40 +00:00
#include <QtCore5Compat/QRegExp>
#else
#include <QRegExp>
2022-02-27 05:17:37 +00:00
#endif
2014-04-16 16:18:28 +00:00
#include <QRunnable>
#include <QSemaphore>
#include <QList>
2022-06-19 12:24:34 +00:00
#include <QtConcurrent>
2014-04-16 16:18:28 +00:00
#include "dict/dictionary.hh"
2014-04-16 16:18:28 +00:00
#include "btreeidx.hh"
#include "fulltextsearch.hh"
#include "chunkedstorage.hh"
#include "folding.hh"
#include "wstring_qt.hh"
2014-04-16 16:18:28 +00:00
#include <string>
namespace FtsHelpers
{
enum
{
FtsSignature = 0x58535446, // FTSX on little-endian, XSTF on big-endian
CurrentFtsFormatVersion = 2 + BtreeIndexing::FormatVersion,
2014-04-16 16:18:28 +00:00
};
#pragma pack(push,1)
struct FtsIdxHeader
{
uint32_t signature; // First comes the signature, FTSX
uint32_t formatVersion; // File format version
2014-04-16 16:18:28 +00:00
uint32_t chunksOffset; // The offset to chunks' storage
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
uint32_t wordCount; // Number of unique words this dictionary has
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;
#pragma pack(pop)
bool ftsIndexIsOldOrBad( std::string const & indexFile,
BtreeIndexing::BtreeDictionary * dict );
2014-04-16 16:18:28 +00:00
bool parseSearchString( QString const & str, QStringList & IndexWords,
QStringList & searchWords,
QRegExp & searchRegExp, int searchMode,
bool matchCase,
int distanceBetweenWords,
bool & hasCJK,
bool ignoreWordsOrder = false );
2014-04-16 16:18:28 +00:00
void parseArticleForFts( uint32_t articleAddress, QString & articleText,
QMap< QString, QVector< uint32_t > > & words,
bool handleRoundBrackets = false );
2014-04-16 16:18:28 +00:00
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
bool isCJKChar( ushort ch );
2014-04-16 16:18:28 +00:00
class FTSResultsRequest : public Dictionary::DataRequest
{
BtreeIndexing::BtreeDictionary & dict;
QString searchString;
int searchMode;
bool matchCase;
int distanceBetweenWords;
int maxResults;
bool hasCJK;
bool ignoreWordsOrder;
bool ignoreDiacritics;
int wordsInIndex;
2014-04-16 16:18:28 +00:00
QAtomicInt isCancelled;
2022-06-04 15:22:14 +00:00
QAtomicInt results;
2022-06-19 12:24:34 +00:00
QFuture< void > f;
2022-06-04 15:22:14 +00:00
2014-04-16 16:18:28 +00:00
QList< FTS::FtsHeadword > * foundHeadwords;
void checkArticles( QVector< uint32_t > const & offsets,
QStringList const & words,
QRegExp const & searchRegexp = QRegExp() );
QRegularExpression createMatchRegex( QRegExp const & searchRegexp );
2014-04-16 16:18:28 +00:00
void checkSingleArticle( uint32_t offset,
QStringList const & words,
QRegularExpression const & searchRegexp = QRegularExpression() );
2022-06-04 15:22:14 +00:00
2014-04-16 16:18:28 +00:00
void indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
QStringList & indexWords,
QStringList & searchWords, QRegExp & regexp );
2014-04-16 16:18:28 +00:00
void combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
QStringList & indexWords,
QStringList & searchWords,
QRegExp & regexp );
2014-04-16 16:18:28 +00:00
void fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
QStringList & indexWords,
QStringList & searchWords,
QRegExp & regexp );
void fullSearch( QStringList & searchWords, QRegExp & regexp );
public:
FTSResultsRequest( BtreeIndexing::BtreeDictionary & dict_, QString const & searchString_,
int searchMode_, bool matchCase_, int distanceBetweenWords_, int maxResults_,
bool ignoreWordsOrder_, bool ignoreDiacritics_ ):
2014-04-16 16:18:28 +00:00
dict( dict_ ),
searchString( searchString_ ),
searchMode( searchMode_ ),
matchCase( matchCase_ ),
distanceBetweenWords( distanceBetweenWords_ ),
maxResults( maxResults_ ),
hasCJK( false ),
ignoreWordsOrder( ignoreWordsOrder_ ),
ignoreDiacritics( ignoreDiacritics_ ),
wordsInIndex( 0 )
2014-04-16 16:18:28 +00:00
{
if( ignoreDiacritics_ )
2023-04-17 12:55:39 +00:00
searchString = QString::fromStdU32String( Folding::applyDiacriticsOnly( gd::removeTrailingZero( searchString_ ) ) );
2014-04-16 16:18:28 +00:00
foundHeadwords = new QList< FTS::FtsHeadword >;
2022-06-04 15:22:14 +00:00
results = 0;
2022-06-19 12:24:34 +00:00
f = QtConcurrent::run( [ this ]() { this->run(); } );
2014-04-16 16:18:28 +00:00
}
2022-05-30 12:21:44 +00:00
void run();
void runXapian();
2014-04-16 16:18:28 +00:00
virtual void cancel()
{
isCancelled.ref();
}
~FTSResultsRequest()
{
isCancelled.ref();
2022-06-19 12:24:34 +00:00
f.waitForFinished();
2014-04-16 16:18:28 +00:00
if( foundHeadwords )
delete foundHeadwords;
}
};
} // namespace
#endif // __FTSHELPERS_HH_INCLUDED__