fix: fulltext remove query word size check (#845)

* fix: fulltext remove query word size check

* fix: remove unused methods

* 🎨 apply clang-format changes

---------

Co-authored-by: xiaoyifang <xiaoyifang@users.noreply.github.com>
This commit is contained in:
xiaoyifang 2023-06-10 21:02:22 +08:00 committed by GitHub
parent 5bb1949c5c
commit dfd3a86c7f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 7 additions and 119 deletions

View file

@ -115,8 +115,7 @@ void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStri
bool parsed = false;
QString hieroglyph;
for( int x = 0; x < word.size(); x++ )
if( isCJKChar( word.at( x ).unicode() ) )
{
if ( Utils::isCJKChar( word.at( x ).unicode() ) ) {
parsed = true;
hieroglyph.append( word[ x ] );
@ -144,94 +143,13 @@ bool containCJK( QString const & str)
{
bool hasCJK = false;
for(auto x : str)
if( isCJKChar( x.unicode() ) )
{
if ( Utils::isCJKChar( x.unicode() ) ) {
hasCJK = true;
break;
}
return hasCJK;
}
bool parseSearchString( QString const & str, QStringList & indexWords,
QStringList & searchWords,
QRegExp & searchRegExp, int searchMode,
bool matchCase,
int distanceBetweenWords,
bool & hasCJK,
bool ignoreWordsOrder )
{
searchWords.clear();
indexWords.clear();
// QRegularExpression spacesRegExp( "\\W+", QRegularExpression::UseUnicodePropertiesOption );
// QRegularExpression wordRegExp( QString( "\\w{" ) + QString::number( FTS::MinimumWordSize ) + ",}", QRegularExpression::UseUnicodePropertiesOption );
// QRegularExpression setsRegExp( "\\[[^\\]]+\\]", QRegularExpression::CaseInsensitiveOption );
// QRegularExpression regexRegExp( "\\\\[afnrtvdDwWsSbB]|\\\\x([0-9A-Fa-f]{4})|\\\\0([0-7]{3})", QRegularExpression::CaseInsensitiveOption);
hasCJK = containCJK( str );
if( searchMode == FTS::WholeWords || searchMode == FTS::PlainText )
{
// Make words list for search in article text
searchWords = str.normalized( QString::NormalizationForm_C ).split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
// Make words list for index search
QStringList list =
str.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
QString searchString;
if( hasCJK )
{
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
// QStringList allWords = str.split( spacesRegExp, Qt::SkipEmptyParts );
searchString = makeHiliteRegExpString( list, searchMode, distanceBetweenWords, hasCJK , ignoreWordsOrder);
}
else
{
indexWords = list.filter( RX::Ftx::wordRegExp );
indexWords.removeDuplicates();
// Make regexp for results hilite
QStringList allWords = str.split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
searchString = makeHiliteRegExpString( allWords, searchMode, distanceBetweenWords,false, ignoreWordsOrder );
}
searchRegExp = QRegExp( searchString, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive, QRegExp::RegExp2 );
searchRegExp.setMinimal( true );
return !indexWords.isEmpty();
}
else
{
// Make words list for index search
QString tmp = str;
// Remove RegExp commands
if( searchMode == FTS::RegExp )
tmp.replace( RX::Ftx::regexRegExp, " " );
// Remove all symbol sets
tmp.replace( RX::Ftx::setsRegExp, " " );
QStringList const list =
tmp.normalized( QString::NormalizationForm_C ).toLower().split( RX::Ftx::spacesRegExp, Qt::SkipEmptyParts );
if( hasCJK )
{
tokenizeCJK( indexWords, RX::Ftx::wordRegExp, list );
}
else
{
indexWords = list.filter( RX::Ftx::wordRegExp );
indexWords.removeDuplicates();
}
searchRegExp = QRegExp( str, matchCase ? Qt::CaseSensitive : Qt::CaseInsensitive,
searchMode == FTS::Wildcards ? QRegExp::WildcardUnix : QRegExp::RegExp2 );
searchRegExp.setMinimal( true );
}
return true;
}
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
{
QMutexLocker _( &dict->getFtsMutex() );
@ -345,11 +263,6 @@ void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancell
}
}
bool isCJKChar( ushort ch )
{
return Utils::isCJKChar(ch);
}
void FTSResultsRequest::run()
{
if ( !dict.ensureInitDone().empty() ) {

View file

@ -16,23 +16,12 @@
#include "folding.hh"
#include "wstring_qt.hh"
#include <string>
namespace FtsHelpers
{
bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict );
bool parseSearchString( QString const & str, QStringList & IndexWords,
QStringList & searchWords,
QRegExp & searchRegExp, int searchMode,
bool matchCase,
int distanceBetweenWords,
bool & hasCJK,
bool ignoreWordsOrder = false );
void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled );
bool isCJKChar( ushort ch );
class FTSResultsRequest : public Dictionary::DataRequest
{

View file

@ -327,27 +327,16 @@ void FullTextSearchDialog::setNewIndexingName( QString name )
void FullTextSearchDialog::accept()
{
QStringList list1, list2;
int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
const int mode = ui.searchMode->itemData( ui.searchMode->currentIndex() ).toInt();
model->clear();
matchedCount=0;
ui.articlesFoundLabel->setText( tr( "Articles found: " ) + QString::number( results.size() ) );
bool hasCJK;
if ( !FtsHelpers::parseSearchString( ui.searchLine->text(),
list1,
list2,
searchRegExp,
mode,
false,
0,
hasCJK,
false ) ) {
if ( ui.searchLine->text().isEmpty() ) {
QMessageBox message( QMessageBox::Warning,
"GoldenDict",
tr( "The search line must contains at least one word containing " )
+ QString::number( MinimumWordSize ) + tr( " or more symbols" ),
tr( "The querying word can not be empty." ),
QMessageBox::Ok,
this );
message.exec();
@ -369,11 +358,8 @@ void FullTextSearchDialog::accept()
ui.searchProgressBar->show();
// Make search requests
for( unsigned x = 0; x < activeDicts.size(); ++x )
{
if( !activeDicts[ x ] ->haveFTSIndex())
{
for ( unsigned x = 0; x < activeDicts.size(); ++x ) {
if ( !activeDicts[ x ]->haveFTSIndex() ) {
continue;
}
//max results=100