From 5fa5cc123f19f11ce8b9711875103a4be4c82408 Mon Sep 17 00:00:00 2001 From: Abs62 Date: Tue, 10 Apr 2018 17:49:52 +0300 Subject: [PATCH] Full-text search: Allow ignore diacritics while search --- aard.cc | 8 ++-- articleview.cc | 114 +++++++++++++++++++++++++++++++++++++++++----- articleview.hh | 3 +- bgl.cc | 8 ++-- btreeidx.cc | 14 ++++-- btreeidx.hh | 4 +- config.cc | 7 +++ config.hh | 2 + dictdfiles.cc | 8 ++-- dictionary.cc | 2 +- dictionary.hh | 3 +- dsl.cc | 8 ++-- epwing.cc | 8 ++-- folding.cc | 37 ++++++++------- folding.hh | 6 +++ ftshelpers.cc | 22 +++++++-- ftshelpers.hh | 9 +++- fulltextsearch.cc | 16 ++++++- fulltextsearch.hh | 4 +- fulltextsearch.ui | 21 +++++++-- gls.cc | 8 ++-- mainwindow.cc | 10 ++-- mainwindow.hh | 2 +- mdx.cc | 8 ++-- sdict.cc | 8 ++-- slob.cc | 8 ++-- stardict.cc | 8 ++-- xdxf.cc | 8 ++-- zim.cc | 8 ++-- 29 files changed, 280 insertions(+), 92 deletions(-) diff --git a/aard.cc b/aard.cc index 9fc12fdd..fe9881e0 100644 --- a/aard.cc +++ b/aard.cc @@ -272,7 +272,8 @@ class AardDictionary: public BtreeIndexing::BtreeDictionary int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -654,9 +655,10 @@ sptr< Dictionary::DataRequest > AardDictionary::getSearchResults( QString const int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } /// AardDictionary::getArticle() diff --git a/articleview.cc b/articleview.cc index b9f63ee9..7984a38e 100644 --- a/articleview.cc +++ b/articleview.cc @@ -52,15 +52,20 @@ using std::list; class AccentMarkHandler { +protected: QString normalizedString; QVector< int > accentMarkPos; public: + AccentMarkHandler() + {} + virtual ~AccentMarkHandler() + {} static QChar accentMark() { return QChar( 0x301 ); } /// Create text without accent marks /// and store mark positions - void setText( QString const & baseString ) + virtual void setText( QString const & baseString ) { accentMarkPos.clear(); normalizedString.clear(); @@ -100,6 +105,72 @@ public: /// End of DslAccentMark class +/// DiacriticsHandler class +/// +/// Remove diacritics from text +/// and mirror position in normalized text to original text + +class DiacriticsHandler : public AccentMarkHandler +{ +public: + DiacriticsHandler() + {} + ~DiacriticsHandler() + {} + + /// Create text without diacriticss + /// and store diacritic marks positions + virtual void setText( QString const & baseString ) + { + accentMarkPos.clear(); + normalizedString.clear(); + + gd::wstring baseText = gd::toWString( baseString ); + gd::wstring normText; + + int pos = 0; + normText.reserve( baseText.size() ); + + gd::wchar const * nextChar = baseText.data(); + size_t consumed; + + for( size_t left = baseText.size(); left; ) + { + if( *nextChar >= 0x10000U ) + { + // Will be translated into surrogate pair + normText.push_back( *nextChar ); + pos += 2; + nextChar++; left--; + continue; + } + + gd::wchar ch = Folding::foldedDiacritic( nextChar, left, consumed ); + + if( Folding::isCombiningMark( ch ) ) + { + accentMarkPos.append( pos ); + nextChar++; left--; + continue; + } + + if( consumed > 1 ) + { + for( size_t i = 1; i < consumed; i++ ) + accentMarkPos.append( pos ); + } + + normText.push_back( ch ); + pos += 1; + nextChar += consumed; + left -= consumed; + } + normalizedString = gd::toQString( normText ); + } +}; + +/// End of DiacriticsHandler class + static QVariant evaluateJavaScriptVariableSafe( QWebFrame * frame, const QString & variable ) { return frame->evaluateJavaScript( @@ -340,7 +411,8 @@ void ArticleView::showDefinition( QString const & word, unsigned group, } void ArticleView::showDefinition( QString const & word, QStringList const & dictIDs, - QRegExp const & searchRegExp, unsigned group ) + QRegExp const & searchRegExp, unsigned group, + bool ignoreDiacritics ) { if( dictIDs.isEmpty() ) return; @@ -360,6 +432,8 @@ void ArticleView::showDefinition( QString const & word, QStringList const & dict if( searchRegExp.patternSyntax() == QRegExp::WildcardUnix ) Qt4x5::Url::addQueryItem( req, "wildcards", "1" ); Qt4x5::Url::addQueryItem( req, "group", QString::number( group ) ); + if( ignoreDiacritics ) + Qt4x5::Url::addQueryItem( req, "ignore_diacritics", "1" ); // Update both histories (pages history and headwords history) saveHistoryUserData(); @@ -1060,7 +1134,7 @@ void ArticleView::openLink( QUrl const & url, QUrl const & ref, QStringList dictsList = Qt4x5::Url::queryItemValue( ref, "dictionaries" ) .split( ",", QString::SkipEmptyParts ); - showDefinition( url.path(), dictsList, QRegExp(), getGroup( ref ) ); + showDefinition( url.path(), dictsList, QRegExp(), getGroup( ref ), false ); } else showDefinition( url.path(), @@ -1082,7 +1156,7 @@ void ArticleView::openLink( QUrl const & url, QUrl const & ref, QStringList dictsList = Qt4x5::Url::queryItemValue( ref, "dictionaries" ) .split( ",", QString::SkipEmptyParts ); - showDefinition( url.path().mid( 1 ), dictsList, QRegExp(), getGroup( ref ) ); + showDefinition( url.path().mid( 1 ), dictsList, QRegExp(), getGroup( ref ), false ); return; } @@ -2200,7 +2274,7 @@ void ArticleView::doubleClicked( QPoint pos ) { QStringList dictsList = Qt4x5::Url::queryItemValue(ref, "dictionaries" ) .split( ",", QString::SkipEmptyParts ); - showDefinition( selectedText, dictsList, QRegExp(), getGroup( ref ) ); + showDefinition( selectedText, dictsList, QRegExp(), getGroup( ref ), false ); } else showDefinition( selectedText, getGroup( ref ), getCurrentArticle() ); @@ -2368,18 +2442,29 @@ void ArticleView::highlightFTSResults() { closeSearch(); - AccentMarkHandler markHandler; - const QUrl & url = ui.definition->url(); - QRegExp regexp( Qt4x5::Url::queryItemValue( url, "regexp" ).remove( AccentMarkHandler::accentMark() ), + + bool ignoreDiacritics = Qt4x5::Url::hasQueryItem( url, "ignore_diacritics" ); + + QString regString = Qt4x5::Url::queryItemValue( url, "regexp" ); + if( ignoreDiacritics ) + regString = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( regString ) ) ); + else + regString = regString.remove( AccentMarkHandler::accentMark() ); + + QRegExp regexp( regString, Qt4x5::Url::hasQueryItem( url, "matchcase" ) ? Qt::CaseSensitive : Qt::CaseInsensitive, Qt4x5::Url::hasQueryItem( url, "wildcards" ) ? QRegExp::WildcardUnix : QRegExp::RegExp2 ); + if( regexp.pattern().isEmpty() ) return; regexp.setMinimal( true ); + sptr< AccentMarkHandler > marksHandler = ignoreDiacritics ? + new DiacriticsHandler : new AccentMarkHandler; + // Clear any current selection if ( ui.definition->selectedText().size() ) { @@ -2388,18 +2473,23 @@ void ArticleView::highlightFTSResults() } QString pageText = ui.definition->page()->currentFrame()->toPlainText(); - markHandler.setText( pageText ); + marksHandler->setText( pageText ); int pos = 0; while( pos >= 0 ) { - pos = regexp.indexIn( markHandler.normalizedText(), pos ); + pos = regexp.indexIn( marksHandler->normalizedText(), pos ); if( pos >= 0 ) { // Mirror pos and matched length to original string - int spos = markHandler.mirrorPosition( pos ); - int matched = markHandler.mirrorPosition( pos + regexp.matchedLength() ) - spos; + int spos = marksHandler->mirrorPosition( pos ); + int matched = marksHandler->mirrorPosition( pos + regexp.matchedLength() ) - spos; + + // Add mark pos (if presented) + while( spos + matched < pageText.length() + && pageText[ spos + matched ].category() == QChar::Mark_NonSpacing ) + matched++; if( matched > FTS::MaxMatchLengthForHighlightResults ) { diff --git a/articleview.hh b/articleview.hh index 36cebbcf..8c650aa5 100644 --- a/articleview.hh +++ b/articleview.hh @@ -101,7 +101,8 @@ public: Contexts const & contexts = Contexts() ); void showDefinition( QString const & word, QStringList const & dictIDs, - QRegExp const & searchRegExp, unsigned group ); + QRegExp const & searchRegExp, unsigned group, + bool ignoreDiacritics ); /// Clears the view and sets the application-global waiting cursor, /// which will be restored when some article loads eventually. diff --git a/bgl.cc b/bgl.cc index 534c7e85..c4c30479 100644 --- a/bgl.cc +++ b/bgl.cc @@ -242,7 +242,8 @@ namespace int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual QString const& getDescription(); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); @@ -1199,9 +1200,10 @@ sptr< Dictionary::DataRequest > BglDictionary::getSearchResults( QString const & int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } diff --git a/btreeidx.cc b/btreeidx.cc index b4e1cc6f..aa2cf557 100644 --- a/btreeidx.cc +++ b/btreeidx.cc @@ -83,7 +83,7 @@ void BtreeIndex::openIndex( IndexInfo const & indexInfo, rootNode.clear(); } -vector< WordArticleLink > BtreeIndex::findArticles( wstring const & word ) +vector< WordArticleLink > BtreeIndex::findArticles( wstring const & word, bool ignoreDiacritics ) { vector< WordArticleLink > result; @@ -108,7 +108,7 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & word ) { result = readChain( chainOffset ); - antialias( word, result ); + antialias( word, result, ignoreDiacritics ); } } catch( std::exception & e ) @@ -910,7 +910,8 @@ vector< WordArticleLink > BtreeIndex::readChain( char const * & ptr ) } void BtreeIndex::antialias( wstring const & str, - vector< WordArticleLink > & chain ) + vector< WordArticleLink > & chain, + bool ignoreDiacritics ) { wstring caseFolded = Folding::applySimpleCaseOnly( gd::normalize( str ) ); @@ -918,8 +919,11 @@ void BtreeIndex::antialias( wstring const & str, { // If after applying case folding to each word they wouldn't match, we // drop the entry. - if ( Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) ) != - caseFolded ) + wstring entry = Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) ); + if( ignoreDiacritics ) + entry = Folding::applyDiacriticsOnly( entry ); + + if ( entry != caseFolded ) chain.erase( chain.begin() + x ); else if ( chain[ x ].prefix.size() ) // If there's a prefix, merge it with the word, diff --git a/btreeidx.hh b/btreeidx.hh index c92b3ef3..d7879ffe 100644 --- a/btreeidx.hh +++ b/btreeidx.hh @@ -84,7 +84,7 @@ public: /// Finds articles that match the given string. A case-insensitive search /// is performed. - vector< WordArticleLink > findArticles( wstring const & ); + vector< WordArticleLink > findArticles( wstring const &, bool ignoreDiacritics = false ); /// Find all unique article links in the index void findAllArticleLinks( QVector< WordArticleLink > & articleLinks ); @@ -133,7 +133,7 @@ protected: /// Drops any alises which arose due to folding. Only case-folded aliases /// are left. - void antialias( wstring const &, vector< WordArticleLink > & ); + void antialias( wstring const &, vector< WordArticleLink > &, bool ignoreDiactitics ); protected: diff --git a/config.cc b/config.cc index 9053478c..7ab0c80a 100644 --- a/config.cc +++ b/config.cc @@ -931,6 +931,9 @@ Class load() throw( exError ) if ( !fts.namedItem( "ignoreWordsOrder" ).isNull() ) c.preferences.fts.ignoreWordsOrder = ( fts.namedItem( "ignoreWordsOrder" ).toElement().text() == "1" ); + if ( !fts.namedItem( "ignoreDiacritics" ).isNull() ) + c.preferences.fts.ignoreDiacritics = ( fts.namedItem( "ignoreDiacritics" ).toElement().text() == "1" ); + if ( !fts.namedItem( "maxDictionarySize" ).isNull() ) c.preferences.fts.maxDictionarySize = fts.namedItem( "maxDictionarySize" ).toElement().text().toUInt(); } @@ -1878,6 +1881,10 @@ void save( Class const & c ) throw( exError ) opt.appendChild( dd.createTextNode( c.preferences.fts.ignoreWordsOrder ? "1" : "0" ) ); hd.appendChild( opt ); + opt = dd.createElement( "ignoreDiacritics" ); + opt.appendChild( dd.createTextNode( c.preferences.fts.ignoreDiacritics ? "1" : "0" ) ); + hd.appendChild( opt ); + opt = dd.createElement( "maxDictionarySize" ); opt.appendChild( dd.createTextNode( QString::number( c.preferences.fts.maxDictionarySize ) ) ); hd.appendChild( opt ); diff --git a/config.hh b/config.hh index eff158ce..be1e3249 100644 --- a/config.hh +++ b/config.hh @@ -161,6 +161,7 @@ struct FullTextSearch bool useMaxArticlesPerDictionary; bool enabled; bool ignoreWordsOrder; + bool ignoreDiacritics; quint32 maxDictionarySize; QByteArray dialogGeometry; QString disabledTypes; @@ -173,6 +174,7 @@ struct FullTextSearch useMaxArticlesPerDictionary( false ), enabled( true ), ignoreWordsOrder( false ), + ignoreDiacritics( false ), maxDictionarySize( 0 ) {} }; diff --git a/dictdfiles.cc b/dictdfiles.cc index ce4929ad..9fdb350a 100644 --- a/dictdfiles.cc +++ b/dictdfiles.cc @@ -133,7 +133,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -588,9 +589,10 @@ sptr< Dictionary::DataRequest > DictdDictionary::getSearchResults( QString const int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } } // anonymous namespace diff --git a/dictionary.cc b/dictionary.cc index 676b99d7..bf2a17d9 100644 --- a/dictionary.cc +++ b/dictionary.cc @@ -175,7 +175,7 @@ sptr< DataRequest > Class::getResource( string const & /*name*/ ) return new DataRequestInstant( false ); } -sptr< DataRequest > Class::getSearchResults(const QString &, int, bool, int, int, bool ) +sptr< DataRequest > Class::getSearchResults(const QString &, int, bool, int, int, bool, bool ) { return new DataRequestInstant( false ); } diff --git a/dictionary.hh b/dictionary.hh index fec9f33c..9625e190 100644 --- a/dictionary.hh +++ b/dictionary.hh @@ -397,7 +397,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxArticlesPerDictionary, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); // Return dictionary description if presented virtual QString const& getDescription(); diff --git a/dsl.cc b/dsl.cc index 796af57b..69651cca 100644 --- a/dsl.cc +++ b/dsl.cc @@ -227,7 +227,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual QString const& getDescription(); virtual QString getMainFilename(); @@ -1991,9 +1992,10 @@ sptr< Dictionary::DataRequest > DslDictionary::getSearchResults( QString const & int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } } // anonymous namespace diff --git a/epwing.cc b/epwing.cc index 5c5ce8c9..d02115aa 100644 --- a/epwing.cc +++ b/epwing.cc @@ -128,7 +128,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -784,9 +785,10 @@ sptr< Dictionary::DataRequest > EpwingDictionary::getSearchResults( QString cons int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } int EpwingDictionary::japaneseWriting( gd::wchar ch ) diff --git a/folding.cc b/folding.cc index 821ab666..6f8f8c28 100644 --- a/folding.cc +++ b/folding.cc @@ -10,24 +10,21 @@ namespace Folding { -namespace -{ - #include "inc_case_folding.hh" - #include "inc_diacritic_folding.hh" +#include "inc_case_folding.hh" +#include "inc_diacritic_folding.hh" - /// Tests if the given char is one of the Unicode combining marks. Some are - /// caught by the diacritics folding table, but they are only handled there - /// when they come with their main characters, not by themselves. The rest - /// are caught here. - bool isCombiningMark( wchar ch ) - { - return ( - ( ch >= 0x300 && ch <= 0x36F ) || - ( ch >= 0x1DC0 && ch <= 0x1DFF ) || - ( ch >= 0x20D0 && ch <= 0x20FF ) || - ( ch >= 0xFE20 && ch <= 0xFE2F ) - ); - } +/// Tests if the given char is one of the Unicode combining marks. Some are +/// caught by the diacritics folding table, but they are only handled there +/// when they come with their main characters, not by themselves. The rest +/// are caught here. +bool isCombiningMark( wchar ch ) +{ + return ( + ( ch >= 0x300 && ch <= 0x36F ) || + ( ch >= 0x1DC0 && ch <= 0x1DFF ) || + ( ch >= 0x20D0 && ch <= 0x20FF ) || + ( ch >= 0xFE20 && ch <= 0xFE2F ) + ); } wstring apply( wstring const & in, bool preserveWildcards ) @@ -691,4 +688,10 @@ QString unescapeWildcardSymbols( const QString & str ) return unescaped; } +wchar foldedDiacritic( wchar const * in, size_t size, size_t & consumed ) +{ + return foldDiacritic( in, size, consumed ); +} + + } diff --git a/folding.hh b/folding.hh index 41734bfa..889858b8 100644 --- a/folding.hh +++ b/folding.hh @@ -85,6 +85,12 @@ QString unescapeWildcardSymbols( QString const & ); /// Escape all wildcard symbols (for place word to input line) QString escapeWildcardSymbols( QString const & ); +/// Return result of foldDiacritic() from "inc_diacritic_folding.hh" +wchar foldedDiacritic( wchar const * in, size_t size, size_t & consumed ); + +/// Tests if the given char is one of the Unicode combining marks. +bool isCombiningMark( wchar ch ); + } #endif diff --git a/ftshelpers.cc b/ftshelpers.cc index fcb212f1..4aeaf2a2 100644 --- a/ftshelpers.cc +++ b/ftshelpers.cc @@ -6,6 +6,7 @@ #include "wstring_qt.hh" #include "file.hh" #include "gddebug.hh" +#include "folding.hh" #include "qt4x5.hh" #include @@ -472,6 +473,9 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, dict.getArticleText( offsets.at( i ), headword, articleText ); articleText = articleText.normalized( QString::NormalizationForm_C ); + if( ignoreDiacritics ) + articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); + #if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 ) if( articleText.contains( searchRegularExpression ) ) #else @@ -530,8 +534,12 @@ void FTSResultsRequest::checkArticles( QVector< uint32_t > const & offsets, dict.getArticleText( offsets.at( i ), headword, articleText ); - QStringList articleWords = articleText.normalized( QString::NormalizationForm_C ) - .split( needHandleBrackets ? splitWithBrackets : splitWithoutBrackets, + articleText = articleText.normalized( QString::NormalizationForm_C ); + + if( ignoreDiacritics ) + articleText = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( articleText ) ) ); + + QStringList articleWords = articleText.split( needHandleBrackets ? splitWithBrackets : splitWithoutBrackets, QString::SkipEmptyParts ); int wordsNum = articleWords.length(); @@ -802,7 +810,7 @@ void FTSResultsRequest::indexSearch( BtreeIndexing::BtreeIndex & ftsIndex, tmp.clear(); - links = ftsIndex.findArticles( gd::toWString( indexWords.at( i ) ) ); + links = ftsIndex.findArticles( gd::toWString( indexWords.at( i ) ), ignoreDiacritics ); for( unsigned x = 0; x < links.size(); x++ ) { @@ -949,6 +957,10 @@ void FTSResultsRequest::combinedIndexSearch( BtreeIndexing::BtreeIndex & ftsInde return; QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() ); + + if( ignoreDiacritics ) + word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) ); + for( int i = 0; i < wordsList.size(); i++ ) { if( word.length() >= wordsList.at( i ).length() && word.contains( wordsList.at( i ) ) ) @@ -1035,6 +1047,10 @@ void FTSResultsRequest::fullIndexSearch( BtreeIndexing::BtreeIndex & ftsIndex, return; QString word = QString::fromUtf8( links[ x ].word.data(), links[ x ].word.size() ); + + if( ignoreDiacritics ) + word = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( word ) ) ); + for( int i = 0; i < indexWords.size(); i++ ) { if( word.length() >= indexWords.at( i ).length() && word.contains( indexWords.at( i ) ) ) diff --git a/ftshelpers.hh b/ftshelpers.hh index f695b126..0c3530aa 100644 --- a/ftshelpers.hh +++ b/ftshelpers.hh @@ -11,6 +11,8 @@ #include "btreeidx.hh" #include "fulltextsearch.hh" #include "chunkedstorage.hh" +#include "folding.hh" +#include "wstring_qt.hh" #include @@ -92,6 +94,7 @@ class FTSResultsRequest : public Dictionary::DataRequest int maxResults; bool hasCJK; bool ignoreWordsOrder; + bool ignoreDiacritics; int wordsInIndex; QAtomicInt isCancelled; @@ -126,7 +129,7 @@ public: FTSResultsRequest( BtreeIndexing::BtreeDictionary & dict_, QString const & searchString_, int searchMode_, bool matchCase_, int distanceBetweenWords_, int maxResults_, - bool ignoreWordsOrder_ ): + bool ignoreWordsOrder_, bool ignoreDiacritics_ ): dict( dict_ ), searchString( searchString_ ), searchMode( searchMode_ ), @@ -135,8 +138,12 @@ public: maxResults( maxResults_ ), hasCJK( false ), ignoreWordsOrder( ignoreWordsOrder_ ), + ignoreDiacritics( ignoreDiacritics_ ), wordsInIndex( 0 ) { + if( ignoreDiacritics_ ) + searchString = gd::toQString( Folding::applyDiacriticsOnly( gd::toWString( searchString_ ) ) ); + foundHeadwords = new QList< FTS::FtsHeadword >; QThreadPool::globalInstance()->start( new FTSResultsRequestRunnable( *this, hasExited ), -100 ); diff --git a/fulltextsearch.cc b/fulltextsearch.cc index df82eef0..b9e1caa0 100644 --- a/fulltextsearch.cc +++ b/fulltextsearch.cc @@ -137,6 +137,7 @@ FullTextSearchDialog::FullTextSearchDialog( QWidget * parent, groups( groups_ ), group( 0 ), ignoreWordsOrder( cfg_.preferences.fts.ignoreWordsOrder ), + ignoreDiacritics( cfg_.preferences.fts.ignoreDiacritics ), ftsIdx( ftsidx ) , helpAction( this ) { @@ -193,6 +194,8 @@ FullTextSearchDialog::FullTextSearchDialog( QWidget * parent, ui.checkBoxIgnoreWordOrder->setEnabled( false ); } + ui.checkBoxIgnoreDiacritics->setChecked( ignoreDiacritics ); + ui.matchCase->setChecked( cfg.preferences.fts.matchCase ); setLimitsUsing(); @@ -205,6 +208,8 @@ FullTextSearchDialog::FullTextSearchDialog( QWidget * parent, this, SLOT( setLimitsUsing() ) ); connect( ui.checkBoxIgnoreWordOrder, SIGNAL( stateChanged( int ) ), this, SLOT( ignoreWordsOrderClicked() ) ); + connect( ui.checkBoxIgnoreDiacritics, SIGNAL( stateChanged( int ) ), + this, SLOT( ignoreDiacriticsClicked() ) ); model = new HeadwordsListModel( this, results, activeDicts ); ui.headwordsView->setModel( model ); @@ -319,6 +324,7 @@ void FullTextSearchDialog::saveData() cfg.preferences.fts.useMaxDistanceBetweenWords = ui.checkBoxDistanceBetweenWords->isChecked(); cfg.preferences.fts.useMaxArticlesPerDictionary = ui.checkBoxArticlesPerDictionary->isChecked(); cfg.preferences.fts.ignoreWordsOrder = ignoreWordsOrder; + cfg.preferences.fts.ignoreDiacritics = ignoreDiacritics; cfg.preferences.fts.dialogGeometry = saveGeometry(); } @@ -355,6 +361,11 @@ void FullTextSearchDialog::ignoreWordsOrderClicked() ignoreWordsOrder = ui.checkBoxIgnoreWordOrder->isChecked(); } +void FullTextSearchDialog::ignoreDiacriticsClicked() +{ + ignoreDiacritics = ui.checkBoxIgnoreDiacritics->isChecked(); +} + void FullTextSearchDialog::accept() { QStringList list1, list2; @@ -421,7 +432,8 @@ void FullTextSearchDialog::accept() ui.matchCase->isChecked(), distanceBetweenWords, maxResultsPerDict, - ignoreWordsOrder + ignoreWordsOrder, + ignoreDiacritics ); connect( req.get(), SIGNAL( finished() ), this, SLOT( searchReqFinished() ), Qt::QueuedConnection ); @@ -512,7 +524,7 @@ void FullTextSearchDialog::itemClicked( const QModelIndex & idx ) } else reg = searchRegExp; - emit showTranslationFor( headword, results[ idx.row() ].dictIDs, reg ); + emit showTranslationFor( headword, results[ idx.row() ].dictIDs, reg, ignoreDiacritics ); } } diff --git a/fulltextsearch.hh b/fulltextsearch.hh index 6be03f3f..da7e2943 100644 --- a/fulltextsearch.hh +++ b/fulltextsearch.hh @@ -175,6 +175,7 @@ class FullTextSearchDialog : public QDialog unsigned group; std::vector< sptr< Dictionary::Class > > activeDicts; bool ignoreWordsOrder; + bool ignoreDiacritics; std::list< sptr< Dictionary::DataRequest > > searchReqs; @@ -217,6 +218,7 @@ private slots: void accept(); void setLimitsUsing(); void ignoreWordsOrderClicked(); + void ignoreDiacriticsClicked(); void searchReqFinished(); void reject(); void itemClicked( QModelIndex const & idx ); @@ -225,7 +227,7 @@ private slots: signals: void showTranslationFor( QString const &, QStringList const & dictIDs, - QRegExp const & searchRegExp ); + QRegExp const & searchRegExp, bool ignoreDiacritics ); void closeDialog(); }; diff --git a/fulltextsearch.ui b/fulltextsearch.ui index 529c982f..ffc9908e 100644 --- a/fulltextsearch.ui +++ b/fulltextsearch.ui @@ -75,11 +75,22 @@ - - - Ignore words order - - + + + + + Ignore words order + + + + + + + Ignore diacritics + + + + diff --git a/gls.cc b/gls.cc index 28e278b7..a59a6a79 100644 --- a/gls.cc +++ b/gls.cc @@ -487,7 +487,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); @@ -1546,9 +1547,10 @@ sptr< Dictionary::DataRequest > GlsDictionary::getSearchResults( QString const & int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } } // anonymous namespace diff --git a/mainwindow.cc b/mainwindow.cc index 83b9c71c..1521daed 100644 --- a/mainwindow.cc +++ b/mainwindow.cc @@ -2792,14 +2792,16 @@ void MainWindow::showTranslationFor( QString const & inWord, void MainWindow::showTranslationFor( QString const & inWord, QStringList const & dictIDs, - QRegExp const & searchRegExp ) + QRegExp const & searchRegExp, + bool ignoreDiacritics ) { ArticleView *view = getCurrentArticleView(); navPronounce->setEnabled( false ); view->showDefinition( inWord, dictIDs, searchRegExp, - groupInstances[ groupList->currentIndex() ].id ); + groupInstances[ groupList->currentIndex() ].id, + ignoreDiacritics ); updatePronounceAvailability(); updateFoundInDictsList(); @@ -4407,8 +4409,8 @@ void MainWindow::showFullTextSearchDialog() ftsDlg = new FTS::FullTextSearchDialog( this, cfg, dictionaries, groupInstances, ftsIndexing ); addGlobalActionsToDialog( ftsDlg ); - connect( ftsDlg, SIGNAL( showTranslationFor( QString, QStringList, QRegExp ) ), - this, SLOT( showTranslationFor( QString, QStringList, QRegExp ) ) ); + connect( ftsDlg, SIGNAL( showTranslationFor( QString, QStringList, QRegExp, bool ) ), + this, SLOT( showTranslationFor( QString, QStringList, QRegExp, bool ) ) ); connect( ftsDlg, SIGNAL( closeDialog() ), this, SLOT( closeFullTextSearchDialog() ), Qt::QueuedConnection ); connect( &configEvents, SIGNAL( mutedDictionariesChanged() ), diff --git a/mainwindow.hh b/mainwindow.hh index 2cd15bfd..5b5ff27b 100644 --- a/mainwindow.hh +++ b/mainwindow.hh @@ -391,7 +391,7 @@ private slots: QString const & dictID = QString() ); void showTranslationFor( QString const &, QStringList const & dictIDs, - QRegExp const & searchRegExp ); + QRegExp const & searchRegExp, bool ignoreDiacritics ); void showHistoryItem( QString const & ); diff --git a/mdx.cc b/mdx.cc index eaaaa33a..9104507d 100644 --- a/mdx.cc +++ b/mdx.cc @@ -254,7 +254,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -515,9 +516,10 @@ sptr< Dictionary::DataRequest > MdxDictionary::getSearchResults( QString const & int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } /// MdxDictionary::getArticle diff --git a/sdict.cc b/sdict.cc index 44484886..54727910 100644 --- a/sdict.cc +++ b/sdict.cc @@ -172,7 +172,8 @@ class SdictDictionary: public BtreeIndexing::BtreeDictionary int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -477,9 +478,10 @@ sptr< Dictionary::DataRequest > SdictDictionary::getSearchResults( QString const int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } /// SdictDictionary::getArticle() diff --git a/slob.cc b/slob.cc index 0d8c272b..5564fe4d 100644 --- a/slob.cc +++ b/slob.cc @@ -556,7 +556,8 @@ class SlobDictionary: public BtreeIndexing::BtreeDictionary int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -1262,9 +1263,10 @@ sptr< Dictionary::DataRequest > SlobDictionary::getSearchResults( QString const int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString, searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString, searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } diff --git a/stardict.cc b/stardict.cc index 2eacde0c..4dc35fd3 100644 --- a/stardict.cc +++ b/stardict.cc @@ -190,7 +190,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -1264,9 +1265,10 @@ sptr< Dictionary::DataRequest > StardictDictionary::getSearchResults( QString co int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } /// StardictDictionary::findHeadwordsForSynonym() diff --git a/xdxf.cc b/xdxf.cc index 3a92fb62..7fc68a62 100644 --- a/xdxf.cc +++ b/xdxf.cc @@ -179,7 +179,8 @@ public: int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); @@ -423,9 +424,10 @@ sptr< Dictionary::DataRequest > XdxfDictionary::getSearchResults( QString const int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } /// XdxfDictionary::getArticle() diff --git a/zim.cc b/zim.cc index 22d84f69..bf5cbae5 100644 --- a/zim.cc +++ b/zim.cc @@ -569,7 +569,8 @@ class ZimDictionary: public BtreeIndexing::BtreeDictionary int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ); + bool ignoreWordsOrder, + bool ignoreDiacritics ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); quint32 getArticleText( uint32_t articleAddress, QString & headword, QString & text, @@ -1148,9 +1149,10 @@ sptr< Dictionary::DataRequest > ZimDictionary::getSearchResults( QString const & int searchMode, bool matchCase, int distanceBetweenWords, int maxResults, - bool ignoreWordsOrder ) + bool ignoreWordsOrder, + bool ignoreDiacritics ) { - return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder ); + return new FtsHelpers::FTSResultsRequest( *this, searchString,searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics ); } /// ZimDictionary::getArticle()