fix: fulltext remove query word size check (#845)

* fix: fulltext remove query word size check

* fix: remove unused methods

* 🎨 apply clang-format changes

---------

Co-authored-by: xiaoyifang <xiaoyifang@users.noreply.github.com>
This commit is contained in:
xiaoyifang 2023-06-10 21:02:22 +08:00 committed by GitHub
parent 5bb1949c5c
commit dfd3a86c7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 7 additions and 119 deletions

View file

@ -115,8 +115,7 @@ void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStri
bool parsed = false; bool parsed = false;
QString hieroglyph; QString hieroglyph;
for( int x = 0; x < word.size(); x++ ) for( int x = 0; x < word.size(); x++ )
if( isCJKChar( word.at( x ).unicode() ) ) if ( Utils::isCJKChar( word.at( x ).unicode() ) ) {
{
parsed = true; parsed = true;
hieroglyph.append( word[ x ] ); hieroglyph.append( word[ x ] );
@ -144,94 +143,13 @@ bool containCJK( QString const & str)
{ {
bool hasCJK = false; bool hasCJK = false;
for(auto x : str) for(auto x : str)
if( isCJKChar( x.unicode() ) ) if ( Utils::isCJKChar( x.unicode() ) ) {
{
hasCJK = true; hasCJK = true;
break; break;
} }
return hasCJK; return hasCJK;
} }
bool parseSearchString( QString const & str, QStringList & indexWords,
QStringList & searchWords,
QRegExp & searchRegExp, int searchMode,
bool matchCase,
int distanceBetweenWords,
bool & hasCJK,
bool ignoreWordsOrder )
{
searchWords.clear();
indexWords.clear();
// QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
// QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption );
// QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
// QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption);
hasCJK = containCJK( str );
if( searchMode == FTS::WholeWords || searchMode == FTS::PlainText )
{
// Make words list for search in article text
searchWords = str.normalized( QString::NormalizationForm_C ).split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
// Make words list for index search
QStringList list =
str.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
QString searchString;
if( hasCJK )
{
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
// QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts );
searchString = makeHiliteRegExpString( list, searchMode, distanceBetweenWords, hasCJK , ignoreWordsOrder);
}
else
{
indexWords = list.filter( RX::Ftx::wordRegExp );
indexWords.removeDuplicates();
// Make regexp for results hilite
QStringList allWords = str.split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
searchString = makeHiliteRegExpString( allWords, searchMode, distanceBetweenWords,false, ignoreWordsOrder );
}
searchRegExp = QRegExp( searchString, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive, QRegExp::RegExp2 );
searchRegExp.setMinimal( true );
return !indexWords.isEmpty();
}
else
{
// Make words list for index search
QString tmp = str;
// Remove RegExp commands
if( searchMode == FTS::RegExp )
tmp.replace( RX::Ftx::regexRegExp, " " );
// Remove all symbol sets
tmp.replace( RX::Ftx::setsRegExp, " " );
QStringList const list =
tmp.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
if( hasCJK )
{
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
}
else
{
indexWords = list.filter( RX::Ftx::wordRegExp );
indexWords.removeDuplicates();
}
searchRegExp = QRegExp( str, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive,
searchMode == FTS::Wildcards ? QRegExp::WildcardUnix : QRegExp::RegExp2 );
searchRegExp.setMinimal( true );
}
return true;
}
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled ) void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
{ {
QMutexLocker _( &dict->getFtsMutex() ); QMutexLocker _( &dict->getFtsMutex() );
@ -345,11 +263,6 @@ void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancell
} }
} }
bool isCJKChar( ushort ch )
{
return Utils::isCJKChar(ch);
}
void FTSResultsRequest::run() void FTSResultsRequest::run()
{ {
if ( !dict.ensureInitDone().empty() ) { if ( !dict.ensureInitDone().empty() ) {

View file

@ -16,23 +16,12 @@
#include "folding.hh" #include "folding.hh"
#include "wstring_qt.hh" #include "wstring_qt.hh"
#include <string>
namespace FtsHelpers namespace FtsHelpers
{ {
bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict ); bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict );
bool parseSearchString( QString const & str, QStringList & IndexWords,
QStringList & searchWords,
QRegExp & searchRegExp, int searchMode,
bool matchCase,
int distanceBetweenWords,
bool & hasCJK,
bool ignoreWordsOrder = false );
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled ); void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
bool isCJKChar( ushort ch );
class FTSResultsRequest : public Dictionary::DataRequest class FTSResultsRequest : public Dictionary::DataRequest
{ {

View file

@ -327,27 +327,16 @@ void FullTextSearchDialog::setNewIndexingName( QString name )
void FullTextSearchDialog::accept() void FullTextSearchDialog::accept()
{ {
QStringList list1, list2; const int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
model->clear(); model->clear();
matchedCount=0; matchedCount=0;
ui.articlesFoundLabel->setText( tr( "Articles found: " ) + QString::number( results.size() ) ); ui.articlesFoundLabel->setText( tr( "Articles found: " ) + QString::number( results.size() ) );
bool hasCJK; if ( ui.searchLine->text().isEmpty() ) {
if ( !FtsHelpers::parseSearchString( ui.searchLine->text(),
list1,
list2,
searchRegExp,
mode,
false,
0,
hasCJK,
false ) ) {
QMessageBox message( QMessageBox::Warning, QMessageBox message( QMessageBox::Warning,
"GoldenDict", "GoldenDict",
tr( "The search line must contains at least one word containing " ) tr( "The querying word can not be empty." ),
+ QString::number( MinimumWordSize ) + tr( " or more symbols" ),
QMessageBox::Ok, QMessageBox::Ok,
this ); this );
message.exec(); message.exec();
@ -369,11 +358,8 @@ void FullTextSearchDialog::accept()
ui.searchProgressBar->show(); ui.searchProgressBar->show();
// Make search requests // Make search requests
for ( unsigned x = 0; x < activeDicts.size(); ++x ) {
for( unsigned x = 0; x < activeDicts.size(); ++x ) if ( !activeDicts[ x ]->haveFTSIndex() ) {
{
if( !activeDicts[ x ] ->haveFTSIndex())
{
continue; continue;
} }
//max results=100 //max results=100