goldendict-ng/src/ftshelpers.hh
xiaoyifang d174dc16ba
opt:remove non-xapian fulltext creation logic (#643)
* opt!:remove non-xapian fulltext creation logic

BREAKING CHANGE: after this PR, xapian package will have to installed
before the compilation. on Linux, this should be libxapian-dev package.

For common users: all created fulltext which is not created by xapian
before will be recreated using the new
engine.

* fix: remove old fulltext search ui

* 🎨 apply clang-format changes

---------

Co-authored-by: xiaoyifang <xiaoyifang@users.noreply.github.com>
2023-05-17 22:29:56 +08:00

162 lines
4.8 KiB
C++

#ifndef __FTSHELPERS_HH_INCLUDED__
#define __FTSHELPERS_HH_INCLUDED__
#include <QString>
#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0))
#include <QtCore5Compat/QRegExp>
#else
#include <QRegExp>
#endif
#include <QRunnable>
#include <QSemaphore>
#include <QList>
#include <QtConcurrent>
#include "dict/dictionary.hh"
#include "btreeidx.hh"
#include "fulltextsearch.hh"
#include "chunkedstorage.hh"
#include "folding.hh"
#include "wstring_qt.hh"
#include <string>
namespace FtsHelpers
{
enum
{
FtsSignature = 0x58535446, // FTSX on little-endian, XSTF on big-endian
CurrentFtsFormatVersion = 2 + BtreeIndexing::FormatVersion,
};
#pragma pack(push,1)
struct FtsIdxHeader
{
uint32_t signature; // First comes the signature, FTSX
uint32_t formatVersion; // File format version
uint32_t chunksOffset; // The offset to chunks' storage
uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
uint32_t indexRootOffset;
uint32_t wordCount; // Number of unique words this dictionary has
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;
#pragma pack(pop)
bool ftsIndexIsOldOrBad( std::string const & indexFile,
BtreeIndexing::BtreeDictionary * dict );
bool parseSearchString( QString const & str, QStringList & IndexWords,
QStringList & searchWords,
QRegExp & searchRegExp, int searchMode,
bool matchCase,
int distanceBetweenWords,
bool & hasCJK,
bool ignoreWordsOrder = false );
void parseArticleForFts( uint32_t articleAddress, QString & articleText,
QMap< QString, QVector< uint32_t > > & words,
bool handleRoundBrackets = false );
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
void makeFTSIndexXapian( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
bool isCJKChar( ushort ch );
class FTSResultsRequest : public Dictionary::DataRequest
{
BtreeIndexing::BtreeDictionary & dict;
QString searchString;
int searchMode;
bool matchCase;
int distanceBetweenWords;
int maxResults;
bool hasCJK;
bool ignoreWordsOrder;
bool ignoreDiacritics;
int wordsInIndex;
QAtomicInt isCancelled;
QAtomicInt results;
QFuture< void > f;
QList< FTS::FtsHeadword > * foundHeadwords;
void checkArticles( QVector< uint32_t > const & offsets,
QStringList const & words,
QRegExp const & searchRegexp = QRegExp() );
QRegularExpression createMatchRegex( QRegExp const & searchRegexp );
void checkSingleArticle( uint32_t offset,
QStringList const & words,
QRegularExpression const & searchRegexp = QRegularExpression() );
void indexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
QStringList & indexWords,
QStringList & searchWords, QRegExp & regexp );
void combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
QStringList & indexWords,
QStringList & searchWords,
QRegExp & regexp );
void fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex,
sptr< ChunkedStorage::Reader > chunks,
QStringList & indexWords,
QStringList & searchWords,
QRegExp & regexp );
void fullSearch( QStringList & searchWords, QRegExp & regexp );
public:
FTSResultsRequest( BtreeIndexing::BtreeDictionary & dict_, QString const & searchString_,
int searchMode_, bool matchCase_, int distanceBetweenWords_, int maxResults_,
bool ignoreWordsOrder_, bool ignoreDiacritics_ ):
dict( dict_ ),
searchString( searchString_ ),
searchMode( searchMode_ ),
matchCase( matchCase_ ),
distanceBetweenWords( distanceBetweenWords_ ),
maxResults( maxResults_ ),
hasCJK( false ),
ignoreWordsOrder( ignoreWordsOrder_ ),
ignoreDiacritics( ignoreDiacritics_ ),
wordsInIndex( 0 )
{
if( ignoreDiacritics_ )
searchString = QString::fromStdU32String( Folding::applyDiacriticsOnly( gd::removeTrailingZero( searchString_ ) ) );
foundHeadwords = new QList< FTS::FtsHeadword >;
results = 0;
f = QtConcurrent::run( [ this ]() { this->run(); } );
}
void run();
void runXapian();
virtual void cancel()
{
isCancelled.ref();
}
~FTSResultsRequest()
{
isCancelled.ref();
f.waitForFinished();
if( foundHeadwords )
delete foundHeadwords;
}
};
} // namespace
#endif // __FTSHELPERS_HH_INCLUDED__