2014-04-16 16:18:28 +00:00
|
|
|
/* This file is (c) 2014 Abs62
|
|
|
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
2022-10-01 13:57:55 +00:00
|
|
|
#include "xapian.h"
|
2023-05-28 16:01:21 +00:00
|
|
|
#include <cstdlib>
|
2014-04-16 16:18:28 +00:00
|
|
|
#include "fulltextsearch.hh"
|
|
|
|
#include "ftshelpers.hh"
|
|
|
|
#include "wstring_qt.hh"
|
|
|
|
#include "file.hh"
|
|
|
|
#include "gddebug.hh"
|
2018-04-10 14:49:52 +00:00
|
|
|
#include "folding.hh"
|
2021-11-27 07:17:33 +00:00
|
|
|
#include "utils.hh"
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
|
2017-07-25 15:28:29 +00:00
|
|
|
#include <QVector>
|
|
|
|
|
2018-02-21 14:43:35 +00:00
|
|
|
#include <QRegularExpression>
|
2022-06-03 12:07:14 +00:00
|
|
|
|
2018-02-21 14:43:35 +00:00
|
|
|
#include "wildcard.hh"
|
2023-04-18 00:41:47 +00:00
|
|
|
#include "globalregex.hh"
|
2022-06-16 12:34:32 +00:00
|
|
|
#include <QSemaphoreReleaser>
|
2018-02-21 14:43:35 +00:00
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
using std::vector;
|
|
|
|
using std::string;
|
|
|
|
|
|
|
|
DEF_EX( exUserAbort, "User abort", Dictionary::Ex )
|
|
|
|
|
|
|
|
namespace FtsHelpers
|
|
|
|
{
|
2022-10-07 02:34:27 +00:00
|
|
|
// finished reversed dehsinif
|
|
|
|
const static std::string finish_mark = std::string( "dehsinif" );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
2014-11-22 14:22:04 +00:00
|
|
|
bool ftsIndexIsOldOrBad( string const & indexFile,
|
|
|
|
BtreeIndexing::BtreeDictionary * dict )
|
2014-04-16 16:18:28 +00:00
|
|
|
{
|
2022-10-01 13:57:55 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
Xapian::WritableDatabase db( dict->ftsIndexName() );
|
2022-10-06 11:32:45 +00:00
|
|
|
auto docid = db.get_lastdocid();
|
|
|
|
auto document = db.get_document(docid);
|
|
|
|
|
2022-10-06 14:35:10 +00:00
|
|
|
qDebug()<<document.get_data().c_str();
|
2022-10-06 11:32:45 +00:00
|
|
|
//use a special document to mark the end of the index.
|
2023-05-28 16:01:21 +00:00
|
|
|
return document.get_data()!=finish_mark;
|
2022-10-01 13:57:55 +00:00
|
|
|
}
|
2022-10-06 01:21:03 +00:00
|
|
|
catch( Xapian::Error & e )
|
2022-10-01 13:57:55 +00:00
|
|
|
{
|
|
|
|
qWarning() << e.get_description().c_str();
|
|
|
|
//the file is corrupted,remove it.
|
|
|
|
QFile::remove(QString::fromStdString(dict->ftsIndexName()));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
catch( ... )
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2014-04-16 16:18:28 +00:00
|
|
|
}
|
|
|
|
|
2017-07-25 15:28:29 +00:00
|
|
|
static QString makeHiliteRegExpString( QStringList const & words,
|
2022-06-03 12:07:14 +00:00
|
|
|
int searchMode, int distanceBetweenWords, bool hasCJK = false, bool ignoreWordsOrder = false )
|
2017-07-25 15:28:29 +00:00
|
|
|
{
|
|
|
|
QString searchString( "(" );
|
|
|
|
|
|
|
|
QString stripWords( "(?:\\W+\\w+){0," );
|
2022-06-04 09:32:33 +00:00
|
|
|
|
|
|
|
if( hasCJK )
|
|
|
|
{
|
|
|
|
stripWords = "(?:[\\W\\w]){0,";
|
|
|
|
}
|
|
|
|
|
2017-07-25 15:28:29 +00:00
|
|
|
if( distanceBetweenWords >= 0 )
|
|
|
|
stripWords += QString::number( distanceBetweenWords );
|
2022-06-03 07:19:58 +00:00
|
|
|
stripWords += "}";
|
|
|
|
|
|
|
|
if(!hasCJK)
|
|
|
|
{
|
|
|
|
stripWords += "\\W+";
|
|
|
|
}
|
2017-07-25 15:28:29 +00:00
|
|
|
|
|
|
|
QString boundWord( searchMode == FTS::WholeWords ? "\\b" : "(?:\\w*)");
|
2022-06-03 07:19:58 +00:00
|
|
|
if(hasCJK)
|
|
|
|
{
|
|
|
|
//no boundary for CJK
|
|
|
|
boundWord.clear();
|
|
|
|
}
|
2017-07-25 15:28:29 +00:00
|
|
|
|
|
|
|
for( int x = 0; x < words.size(); x++ )
|
|
|
|
{
|
|
|
|
if( x )
|
2022-06-03 12:07:14 +00:00
|
|
|
{
|
2017-07-25 15:28:29 +00:00
|
|
|
searchString += stripWords;
|
2022-06-03 12:07:14 +00:00
|
|
|
if(ignoreWordsOrder)
|
|
|
|
searchString += "(";
|
|
|
|
}
|
2017-07-25 15:28:29 +00:00
|
|
|
|
|
|
|
searchString += boundWord + words[ x ] + boundWord;
|
2022-06-03 12:07:14 +00:00
|
|
|
|
|
|
|
if( x )
|
|
|
|
{
|
|
|
|
if( ignoreWordsOrder )
|
|
|
|
searchString += ")?";
|
|
|
|
}
|
|
|
|
|
2017-07-25 15:28:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
searchString += ")";
|
|
|
|
return searchString;
|
|
|
|
}
|
|
|
|
|
2022-06-03 07:19:58 +00:00
|
|
|
void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
|
|
|
|
{
|
|
|
|
QStringList wordList, hieroglyphList;
|
2023-05-28 16:01:21 +00:00
|
|
|
for(auto word : list)
|
2022-06-03 07:19:58 +00:00
|
|
|
{
|
|
|
|
// Check for CJK symbols in word
|
|
|
|
bool parsed = false;
|
|
|
|
QString hieroglyph;
|
|
|
|
for( int x = 0; x < word.size(); x++ )
|
|
|
|
if( isCJKChar( word.at( x ).unicode() ) )
|
|
|
|
{
|
|
|
|
parsed = true;
|
|
|
|
hieroglyph.append( word[ x ] );
|
|
|
|
|
|
|
|
if( QChar( word.at( x ) ).isHighSurrogate()
|
|
|
|
&& QChar( word[ x + 1 ] ).isLowSurrogate() )
|
|
|
|
hieroglyph.append( word[ ++x ] );
|
|
|
|
|
|
|
|
hieroglyphList.append( hieroglyph );
|
|
|
|
hieroglyph.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
// If word don't contains CJK symbols put it in list as is
|
|
|
|
if( !parsed )
|
|
|
|
wordList.append( word );
|
|
|
|
}
|
|
|
|
|
|
|
|
indexWords = wordList.filter( wordRegExp );
|
|
|
|
indexWords.removeDuplicates();
|
|
|
|
|
|
|
|
hieroglyphList.removeDuplicates();
|
|
|
|
indexWords += hieroglyphList;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool containCJK( QString const & str)
|
|
|
|
{
|
|
|
|
bool hasCJK = false;
|
2023-05-28 16:01:21 +00:00
|
|
|
for(auto x : str)
|
|
|
|
if( isCJKChar( x.unicode() ) )
|
2022-06-03 07:19:58 +00:00
|
|
|
{
|
|
|
|
hasCJK = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return hasCJK;
|
|
|
|
}
|
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
bool parseSearchString( QString const & str, QStringList & indexWords,
|
|
|
|
QStringList & searchWords,
|
|
|
|
QRegExp & searchRegExp, int searchMode,
|
2014-04-22 13:47:02 +00:00
|
|
|
bool matchCase,
|
2014-05-08 12:38:00 +00:00
|
|
|
int distanceBetweenWords,
|
2022-06-03 12:07:14 +00:00
|
|
|
bool & hasCJK,
|
|
|
|
bool ignoreWordsOrder )
|
2014-04-16 16:18:28 +00:00
|
|
|
{
|
|
|
|
searchWords.clear();
|
|
|
|
indexWords.clear();
|
2022-06-05 02:44:40 +00:00
|
|
|
// QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
// QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption );
|
|
|
|
// QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
|
|
|
|
// QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption);
|
2014-04-16 16:18:28 +00:00
|
|
|
|
2022-06-03 07:19:58 +00:00
|
|
|
hasCJK = containCJK( str );
|
2014-05-08 12:38:00 +00:00
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
if( searchMode == FTS::WholeWords || searchMode == FTS::PlainText )
|
|
|
|
{
|
|
|
|
// Make words list for search in article text
|
2022-06-05 02:44:40 +00:00
|
|
|
searchWords = str.normalized( QString::NormalizationForm_C ).split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
2014-04-16 16:18:28 +00:00
|
|
|
// Make words list for index search
|
2022-06-03 07:19:58 +00:00
|
|
|
QStringList list =
|
2022-06-05 02:44:40 +00:00
|
|
|
str.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
2022-06-03 07:19:58 +00:00
|
|
|
QString searchString;
|
|
|
|
if( hasCJK )
|
|
|
|
{
|
2022-06-05 02:44:40 +00:00
|
|
|
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
|
2022-06-03 07:19:58 +00:00
|
|
|
// QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts );
|
2022-06-03 12:07:14 +00:00
|
|
|
searchString = makeHiliteRegExpString( list, searchMode, distanceBetweenWords, hasCJK , ignoreWordsOrder);
|
2022-06-03 07:19:58 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-06-05 02:44:40 +00:00
|
|
|
indexWords = list.filter( RX::Ftx::wordRegExp );
|
2022-06-03 07:19:58 +00:00
|
|
|
indexWords.removeDuplicates();
|
2014-04-22 13:47:02 +00:00
|
|
|
|
2022-06-03 07:19:58 +00:00
|
|
|
// Make regexp for results hilite
|
2014-04-22 13:47:02 +00:00
|
|
|
|
2022-06-05 02:44:40 +00:00
|
|
|
QStringList allWords = str.split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
2022-06-03 12:07:14 +00:00
|
|
|
searchString = makeHiliteRegExpString( allWords, searchMode, distanceBetweenWords,false, ignoreWordsOrder );
|
2022-06-03 07:19:58 +00:00
|
|
|
}
|
|
|
|
searchRegExp = QRegExp( searchString, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive, QRegExp::RegExp2 );
|
2014-04-22 13:47:02 +00:00
|
|
|
searchRegExp.setMinimal( true );
|
2014-04-16 16:18:28 +00:00
|
|
|
return !indexWords.isEmpty();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// Make words list for index search
|
|
|
|
|
|
|
|
QString tmp = str;
|
|
|
|
|
2015-12-31 12:42:33 +00:00
|
|
|
// Remove RegExp commands
|
|
|
|
if( searchMode == FTS::RegExp )
|
2022-06-05 02:44:40 +00:00
|
|
|
tmp.replace( RX::Ftx::regexRegExp, " " );
|
2015-12-31 12:42:33 +00:00
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
// Remove all symbol sets
|
2022-06-05 02:44:40 +00:00
|
|
|
tmp.replace( RX::Ftx::setsRegExp, " " );
|
2014-04-16 16:18:28 +00:00
|
|
|
|
2023-05-30 23:42:31 +00:00
|
|
|
QStringList const list =
|
|
|
|
tmp.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
|
2014-05-08 12:38:00 +00:00
|
|
|
|
|
|
|
if( hasCJK )
|
|
|
|
{
|
2022-06-05 02:44:40 +00:00
|
|
|
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
|
2014-05-08 12:38:00 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-06-05 02:44:40 +00:00
|
|
|
indexWords = list.filter( RX::Ftx::wordRegExp );
|
2014-05-08 12:38:00 +00:00
|
|
|
indexWords.removeDuplicates();
|
|
|
|
}
|
2014-04-16 16:18:28 +00:00
|
|
|
|
|
|
|
searchRegExp = QRegExp( str, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive,
|
|
|
|
searchMode == FTS::Wildcards ? QRegExp::WildcardUnix : QRegExp::RegExp2 );
|
2014-04-22 13:47:02 +00:00
|
|
|
searchRegExp.setMinimal( true );
|
2014-04-16 16:18:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
|
2022-10-01 13:57:55 +00:00
|
|
|
{
|
2023-05-29 13:56:04 +00:00
|
|
|
QMutexLocker _( &dict->getFtsMutex() );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
//check the index again.
|
|
|
|
if ( dict->haveFTSIndex() )
|
|
|
|
return;
|
|
|
|
|
2022-10-01 13:57:55 +00:00
|
|
|
try {
|
2023-05-28 16:01:21 +00:00
|
|
|
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
|
|
|
throw exUserAbort();
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
// Open the database for update, creating a new database if necessary.
|
|
|
|
Xapian::WritableDatabase db( dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
Xapian::TermGenerator indexer;
|
|
|
|
// Xapian::Stem stemmer("english");
|
|
|
|
// indexer.set_stemmer(stemmer);
|
|
|
|
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
|
|
|
|
indexer.set_flags( Xapian::TermGenerator::FLAG_CJK_NGRAM );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
BtreeIndexing::IndexedWords indexedWords;
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
QSet< uint32_t > setOfOffsets;
|
|
|
|
setOfOffsets.reserve( dict->getArticleCount() );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
dict->findArticleLinks( nullptr, &setOfOffsets, nullptr, &isCancelled );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
|
|
|
throw exUserAbort();
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
QVector< uint32_t > offsets;
|
|
|
|
offsets.resize( setOfOffsets.size() );
|
|
|
|
uint32_t * ptr = &offsets.front();
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
for ( QSet< uint32_t >::ConstIterator it = setOfOffsets.constBegin(); it != setOfOffsets.constEnd(); ++it ) {
|
|
|
|
*ptr = *it;
|
|
|
|
ptr++;
|
|
|
|
}
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
// Free memory
|
|
|
|
setOfOffsets.clear();
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
|
|
|
|
throw exUserAbort();
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
dict->sortArticlesOffsetsForFTS( offsets, isCancelled );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
// incremental build the index.
|
|
|
|
// get the last address.
|
|
|
|
bool skip = true;
|
|
|
|
uint32_t lastAddress = -1;
|
|
|
|
try {
|
|
|
|
if ( db.get_lastdocid() > 0 ) {
|
|
|
|
Xapian::Document lastDoc = db.get_document( db.get_lastdocid() );
|
|
|
|
lastAddress = atoi( lastDoc.get_data().c_str() );
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
skip = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch ( Xapian::Error & e ) {
|
|
|
|
qDebug() << "get last doc failed: " << e.get_description().c_str();
|
|
|
|
skip = false;
|
|
|
|
}
|
2022-10-06 11:32:45 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
long indexedDoc = 0L;
|
2022-10-06 11:32:45 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
for ( auto const & address : offsets ) {
|
|
|
|
indexedDoc++;
|
2022-10-06 11:32:45 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
if ( address > lastAddress && skip ) {
|
|
|
|
skip = false;
|
|
|
|
}
|
|
|
|
//skip until to the lastAddress;
|
|
|
|
if ( skip ) {
|
|
|
|
continue;
|
|
|
|
}
|
2022-10-06 11:32:45 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
if ( Utils::AtomicInt::loadAcquire( isCancelled ) ) {
|
|
|
|
return;
|
|
|
|
}
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
QString headword, articleStr;
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
dict->getArticleText( address, headword, articleStr );
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
Xapian::Document doc;
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
indexer.set_document( doc );
|
|
|
|
indexer.index_text_without_positions( articleStr.toStdString() );
|
|
|
|
doc.set_data( std::to_string( address ) );
|
|
|
|
// Add the document to the database.
|
|
|
|
db.add_document( doc );
|
|
|
|
dict->setIndexedFtsDoc( indexedDoc );
|
|
|
|
}
|
|
|
|
|
|
|
|
//add a special document to mark the end of the index.
|
|
|
|
Xapian::Document doc;
|
|
|
|
doc.set_data( finish_mark );
|
2022-10-01 13:57:55 +00:00
|
|
|
// Add the document to the database.
|
|
|
|
db.add_document( doc );
|
2022-10-06 11:32:45 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
// Free memory
|
|
|
|
offsets.clear();
|
2022-10-01 13:57:55 +00:00
|
|
|
|
2023-05-28 16:01:21 +00:00
|
|
|
db.commit();
|
|
|
|
}
|
|
|
|
catch ( Xapian::Error & e ) {
|
|
|
|
qWarning() << "create xapian index:" << QString::fromStdString( e.get_description() );
|
2022-10-01 13:57:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-08 12:38:00 +00:00
|
|
|
bool isCJKChar( ushort ch )
|
|
|
|
{
|
2022-07-31 08:17:57 +00:00
|
|
|
return Utils::isCJKChar(ch);
|
2014-05-08 12:38:00 +00:00
|
|
|
}
|
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
void FTSResultsRequest::run()
|
|
|
|
{
|
2023-05-30 23:42:31 +00:00
|
|
|
if ( !dict.ensureInitDone().empty() ) {
|
2022-10-01 13:57:55 +00:00
|
|
|
setErrorString( QString::fromUtf8( dict.ensureInitDone().c_str() ) );
|
|
|
|
finish();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
if( dict.haveFTSIndex() )
|
|
|
|
{
|
|
|
|
//no need to parse the search string, use xapian directly.
|
|
|
|
//if the search mode is wildcard, change xapian search query flag?
|
|
|
|
// Open the database for searching.
|
|
|
|
Xapian::Database db(dict.ftsIndexName());
|
|
|
|
|
|
|
|
// Start an enquire session.
|
|
|
|
Xapian::Enquire enquire( db );
|
|
|
|
|
|
|
|
// Combine the rest of the command line arguments with spaces between
|
|
|
|
// them, so that simple queries don't have to be quoted at the shell
|
|
|
|
// level.
|
|
|
|
string query_string( searchString.toStdString() );
|
|
|
|
|
|
|
|
// Parse the query string to produce a Xapian::Query object.
|
|
|
|
Xapian::QueryParser qp;
|
|
|
|
qp.set_database( db );
|
|
|
|
Xapian::QueryParser::feature_flag flag = Xapian::QueryParser::FLAG_DEFAULT;
|
|
|
|
if( searchMode == FTS::Wildcards )
|
|
|
|
flag = Xapian::QueryParser::FLAG_WILDCARD;
|
2022-10-07 13:32:13 +00:00
|
|
|
Xapian::Query query = qp.parse_query( query_string, flag|Xapian::QueryParser::FLAG_CJK_NGRAM );
|
2022-10-01 13:57:55 +00:00
|
|
|
qDebug() << "Parsed query is: " << query.get_description().c_str();
|
|
|
|
|
|
|
|
// Find the top 100 results for the query.
|
|
|
|
enquire.set_query( query );
|
|
|
|
Xapian::MSet matches = enquire.get_mset( 0, 100 );
|
|
|
|
|
2022-10-07 01:59:41 +00:00
|
|
|
emit matchCount(matches.get_matches_estimated());
|
2022-10-01 13:57:55 +00:00
|
|
|
// Display the results.
|
|
|
|
qDebug() << matches.get_matches_estimated() << " results found.\n";
|
|
|
|
qDebug() << "Matches 1-" << matches.size() << ":\n\n";
|
|
|
|
QList< uint32_t > offsetsForHeadwords;
|
|
|
|
for( Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i )
|
|
|
|
{
|
|
|
|
qDebug() << i.get_rank() + 1 << ": " << i.get_weight() << " docid=" << *i << " ["
|
|
|
|
<< i.get_document().get_data().c_str() << "]";
|
2022-10-06 11:32:45 +00:00
|
|
|
if(i.get_document().get_data()==finish_mark)
|
|
|
|
continue;
|
2022-10-01 13:57:55 +00:00
|
|
|
offsetsForHeadwords.append( atoi( i.get_document().get_data().c_str() ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
if( !offsetsForHeadwords.isEmpty() )
|
|
|
|
{
|
|
|
|
QVector< QString > headwords;
|
2023-05-29 13:56:04 +00:00
|
|
|
QMutexLocker _( &dataMutex );
|
2022-10-01 13:57:55 +00:00
|
|
|
QString id = QString::fromUtf8( dict.getId().c_str() );
|
|
|
|
dict.getHeadwordsFromOffsets( offsetsForHeadwords, headwords, &isCancelled );
|
2023-05-28 16:01:21 +00:00
|
|
|
for(const auto & headword : headwords)
|
2022-10-01 13:57:55 +00:00
|
|
|
{
|
2023-05-28 16:01:21 +00:00
|
|
|
foundHeadwords->append( FTS::FtsHeadword( headword, id, QStringList(), matchCase ) );
|
2022-10-01 13:57:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-05-30 23:42:31 +00:00
|
|
|
else {
|
|
|
|
//if no fulltext index,just returned.
|
|
|
|
qWarning() << "There is no fulltext index right now.";
|
|
|
|
finish();
|
|
|
|
return;
|
2022-10-01 13:57:55 +00:00
|
|
|
}
|
|
|
|
|
2023-05-30 23:42:31 +00:00
|
|
|
if ( foundHeadwords && !foundHeadwords->empty() ) {
|
2023-05-29 13:56:04 +00:00
|
|
|
QMutexLocker _( &dataMutex );
|
2022-10-01 13:57:55 +00:00
|
|
|
data.resize( sizeof( foundHeadwords ) );
|
|
|
|
memcpy( &data.front(), &foundHeadwords, sizeof( foundHeadwords ) );
|
2023-05-28 16:01:21 +00:00
|
|
|
foundHeadwords = nullptr;
|
2023-05-30 23:42:31 +00:00
|
|
|
hasAnyData = true;
|
2022-10-01 13:57:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (const Xapian::Error &e) {
|
|
|
|
qWarning() << e.get_description().c_str();
|
|
|
|
}
|
|
|
|
catch( std::exception &ex )
|
|
|
|
{
|
|
|
|
gdWarning( "FTS: Failed full-text search for \"%s\", reason: %s\n",
|
|
|
|
dict.getName().c_str(), ex.what() );
|
|
|
|
// Results not loaded -- we don't set the hasAnyData flag then
|
|
|
|
}
|
|
|
|
|
|
|
|
finish();
|
|
|
|
}
|
|
|
|
|
2014-04-16 16:18:28 +00:00
|
|
|
} // namespace
|
|
|
|
|