diff --git a/article_maker.cc b/article_maker.cc index 29e34b62..b7cf47fa 100644 --- a/article_maker.cc +++ b/article_maker.cc @@ -927,8 +927,8 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded ) // DPRINTF( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) ); - stemmedWordFinder->prefixMatch( currentSplittedWordCompound, activeDicts, 40, // Would one be enough? Leave 40 to be safe. - Dictionary::SuitableForCompoundSearching ); + stemmedWordFinder->expressionMatch( currentSplittedWordCompound, activeDicts, 40, // Would one be enough? Leave 40 to be safe. + Dictionary::SuitableForCompoundSearching ); } QString ArticleRequest::makeSplittedWordCompound() @@ -960,7 +960,6 @@ void ArticleRequest::individualWordFinished() if ( results.size() ) { - // Check if the aliases are acceptable wstring source = Folding::applySimpleCaseOnly( gd::toWString( currentSplittedWordCompound ) ); bool hadSomething = false; @@ -968,7 +967,14 @@ void ArticleRequest::individualWordFinished() for( unsigned x = 0; x < results.size(); ++x ) { if ( results[ x ].second ) - continue; // We're not interested in suggestions + { + // Spelling suggestion match found. No need to continue. + hadSomething = true; + lastGoodCompoundResult = currentSplittedWordCompound; + break; + } + + // Prefix match found. Check if the aliases are acceptable. wstring result( Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) ); diff --git a/hunspell.cc b/hunspell.cc index d376af8e..8dd453e8 100644 --- a/hunspell.cc +++ b/hunspell.cc @@ -83,6 +83,8 @@ public: virtual bool isLocalDictionary() { return true; } + virtual vector< wstring > getAlternateWritings( const wstring & word ) throw(); + protected: virtual void loadIcon() throw(); @@ -110,6 +112,16 @@ string encodeToHunspell( Hunspell &, wstring const & ); /// Iconv::Ex wstring decodeFromHunspell( Hunspell &, char const * ); +/// Generates suggestions via hunspell +QVector< wstring > suggest( wstring & word, Mutex & hunspellMutex, + Hunspell & hunspell ); + +/// Generates suggestions for compound expression +void getSuggestionsForExpression( wstring const & expression, + vector< wstring > & suggestions, + Mutex & hunspellMutex, + Hunspell & hunspell ); + /// Returns true if the string contains whitespace, false otherwise bool containsWhitespace( wstring const & str ) { @@ -142,6 +154,18 @@ void HunspellDictionary::loadIcon() throw() dictionaryIconLoaded = true; } +vector< wstring > HunspellDictionary::getAlternateWritings( wstring const & word ) throw() +{ + vector< wstring > results; + + if( containsWhitespace( word ) ) + { + getSuggestionsForExpression( word, results, getHunspellMutex(), hunspell ); + } + + return results; +} + /// HunspellDictionary::getArticle() class HunspellArticleRequest; @@ -377,11 +401,6 @@ public: isCancelled.ref(); hasExited.acquire(); } - -private: - - /// Generates suggestions via hunspell - QVector< wstring > suggest( wstring & word ); }; void HunspellHeadwordsRequestRunnable::run() @@ -409,58 +428,18 @@ void HunspellHeadwordsRequest::run() if ( containsWhitespace( trimmedWord ) ) { - // Analyze each word separately and use the first suggestion, if any. - // This is useful for compound expressions where one of the words is - // in different form, e.g. "dozing off" -> "doze off". - // In this mode, we only provide a single suggestion at most. + vector< wstring > results; - wstring result; + getSuggestionsForExpression( trimmedWord, results, hunspellMutex, hunspell ); - wstring word; + Mutex::Lock _( dataMutex ); + for( unsigned i = 0; i < results.size(); i++ ) + matches.push_back( results[ i ] ); - for( wchar const * c = trimmedWord.c_str(); ; ++c ) - { - if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( * c ) ) - { - if ( word.size() ) - { - QVector< wstring > suggestions = suggest( word ); - - if ( suggestions.size() ) - result += suggestions[ 0 ]; - else - result += word; - - word.clear(); - } - if ( *c ) - result.push_back( *c ); - else - break; - } - else - word.push_back( *c ); - } - - if ( word.size() ) - { - QVector< wstring > suggestions = suggest( trimmedWord ); - - if ( suggestions.size() ) - result += suggestions[ 0 ]; - else - result += word; - } - - if ( result != trimmedWord ) - { - Mutex::Lock _( dataMutex ); - matches.push_back( result ); - } } else { - QVector< wstring > suggestions = suggest( trimmedWord ); + QVector< wstring > suggestions = suggest( trimmedWord, hunspellMutex, hunspell ); if ( !suggestions.empty() ) { @@ -474,7 +453,7 @@ void HunspellHeadwordsRequest::run() finish(); } -QVector< wstring > HunspellHeadwordsRequest::suggest( wstring & word ) +QVector< wstring > suggest( wstring & word, Mutex & hunspellMutex, Hunspell & hunspell ) { QVector< wstring > result; @@ -656,6 +635,102 @@ sptr< WordSearchRequest > HunspellDictionary::prefixMatch( wstring const & word, return new HunspellPrefixMatchRequest( word, getHunspellMutex(), hunspell ); } +void getSuggestionsForExpression( wstring const & expression, + vector & suggestions, + Mutex & hunspellMutex, + Hunspell & hunspell ) +{ + // Analyze each word separately and use the first two suggestions, if any. + // This is useful for compound expressions where some words is + // in different form, e.g. "dozing off" -> "doze off". + + wstring trimmedWord = Folding::trimWhitespaceOrPunct( expression ); + wstring word, punct; + QVector< wstring > words; + + suggestions.clear(); + + // Parse string to separate words + + for( wchar const * c = trimmedWord.c_str(); ; ++c ) + { + if ( !*c || Folding::isPunct( *c ) || Folding::isWhitespace( * c ) ) + { + if ( word.size() ) + { + words.push_back( word ); + word.clear(); + } + if ( *c ) + punct.push_back( *c ); + } + else + { + if( punct.size() ) + { + words.push_back( punct ); + punct.clear(); + } + if( *c ) + word.push_back( *c ); + } + if( !*c ) + break; + } + + if( words.size() > 21 ) + { + // Too many words - no suggestions + return; + } + + // Combine result strings from suggestions + + QVector< wstring > results; + + for( int i = 0; i < words.size(); i++ ) + { + word = words.at( i ); + if( Folding::isPunct( word[ 0 ] ) || Folding::isWhitespace( word[ 0 ] ) ) + { + for( int j = 0; j < results.size(); j++ ) + results[ j ].append( word ); + } + else + { + QVector< wstring > sugg = suggest( word, hunspellMutex, hunspell ); + int suggNum = sugg.size() + 1; + if( suggNum > 3 ) + suggNum = 3; + int resNum = results.size(); + wstring resultStr; + + if( resNum == 0 ) + { + for( int k = 0; k < suggNum; k++ ) + results.push_back( k == 0 ? word : sugg.at( k - 1 ) ); + } + else + { + for( int j = 0; j < resNum; j++ ) + { + resultStr = results.at( j ); + for( int k = 0; k < suggNum; k++ ) + { + if( k == 0) + results[ j ].append( word ); + else + results.push_back( resultStr + sugg.at( k - 1 ) ); + } + } + } + } + } + + for( int i = 0; i < results.size(); i++ ) + if( results.at( i ) != trimmedWord ) + suggestions.push_back( results.at( i ) ); +} string encodeToHunspell( Hunspell & hunspell, wstring const & str ) { diff --git a/wordfinder.cc b/wordfinder.cc index 3589fd4f..768de598 100644 --- a/wordfinder.cc +++ b/wordfinder.cc @@ -86,6 +86,31 @@ void WordFinder::stemmedMatch( QString const & str, startSearch(); } +void WordFinder::expressionMatch( QString const & str, + std::vector< sptr< Dictionary::Class > > const & dicts, + unsigned long maxResults, + Dictionary::Features features ) +{ + cancel(); + + searchQueued = true; + searchType = ExpressionMatch; + inputWord = str; + inputDicts = &dicts; + requestedMaxResults = maxResults; + requestedFeatures = features; + + resultsArray.clear(); + resultsIndex.clear(); + searchResults.clear(); + + if ( queuedRequests.empty() ) + { + // No requests are queued, no need to wait for them to finish. + startSearch(); + } +} + void WordFinder::startSearch() { if ( !searchQueued ) @@ -127,7 +152,7 @@ void WordFinder::startSearch() try { sptr< Dictionary::WordSearchRequest > sr = - ( searchType == PrefixMatch ) ? + ( searchType == PrefixMatch || searchType == ExpressionMatch ) ? (*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], requestedMaxResults ) : (*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], stemmedMinLength, stemmedMaxSuffixVariation, requestedMaxResults ); @@ -273,6 +298,8 @@ void WordFinder::updateResults() if ( updateResultsTimer.isActive() ) updateResultsTimer.stop(); // Can happen when we were done before it'd expire + wstring original = Folding::applySimpleCaseOnly( allWordWritings[ 0 ] ); + for( list< sptr< Dictionary::WordSearchRequest > >::iterator i = finishedRequests.begin(); i != finishedRequests.end(); ) { @@ -282,6 +309,30 @@ void WordFinder::updateResults() int weight = (**i)[ x ].weight; wstring lowerCased = Folding::applySimpleCaseOnly( match ); + if( searchType == ExpressionMatch ) + { + unsigned ws; + + for( ws = 0; ws < allWordWritings.size(); ws++ ) + { + if( ws == 0 ) + { + // Check for prefix match with original expression + if( lowerCased.compare( 0, original.size(), original ) == 0 ) + break; + } + else + if( lowerCased == Folding::applySimpleCaseOnly( allWordWritings[ ws ] ) ) + break; + } + + if( ws >= allWordWritings.size() ) + { + // No exact matches found + continue; + } + weight = ws; + } pair< ResultsIndex::iterator, bool > insertResult = resultsIndex.insert( pair< wstring, ResultsArray::iterator >( lowerCased, resultsArray.end() ) ); @@ -400,6 +451,7 @@ void WordFinder::updateResults() resultsArray.sort( SortByRank() ); } else + if( searchType == StemmedMatch ) { // Handling stemmed matches diff --git a/wordfinder.hh b/wordfinder.hh index 504a08ef..581dedb7 100644 --- a/wordfinder.hh +++ b/wordfinder.hh @@ -41,7 +41,8 @@ private: enum SearchType { PrefixMatch, - StemmedMatch + StemmedMatch, + ExpressionMatch } searchType; unsigned long requestedMaxResults; Dictionary::Features requestedFeatures; @@ -94,6 +95,13 @@ public: unsigned long maxResults = 30, Dictionary::Features = Dictionary::NoFeatures ); + /// Do the expression-match search in the given list of dictionaries. + /// Function find exact matches for one of spelling suggestions. + void expressionMatch( QString const &, + std::vector< sptr< Dictionary::Class > > const &, + unsigned long maxResults = 40, + Dictionary::Features = Dictionary::NoFeatures ); + /// Returns the vector containing search results from the last operation. /// If it didn't finish yet, the result is not final and may be changing /// over time.