mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-30 17:24:08 +00:00
04bdf3aa36
This is just a refinement on how the word search is done in the dictionaries that don't actually ability to search words in. Previously they emitted dummy italicized suggestions, which were getting in the way. Now they don't emit anything, but mark the search as uncertain. Any uncertain searches don't mark the word input with the different color to indicate the search has failed.
464 lines
14 KiB
C++
464 lines
14 KiB
C++
/* This file is (c) 2008-2010 Konstantin Isakov <ikm@users.berlios.de>
|
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
|
|
|
#include "wordfinder.hh"
|
|
#include "folding.hh"
|
|
#include "wstring_qt.hh"
|
|
#include <QThreadPool>
|
|
#include <map>
|
|
|
|
using std::vector;
|
|
using std::list;
|
|
using gd::wstring;
|
|
using gd::wchar;
|
|
using std::map;
|
|
using std::pair;
|
|
|
|
WordFinder::WordFinder( QObject * parent ):
|
|
QObject( parent ), searchInProgress( false ),
|
|
updateResultsTimer( this ),
|
|
searchQueued( false )
|
|
{
|
|
updateResultsTimer.setInterval( 1000 ); // We use a one second update timer
|
|
updateResultsTimer.setSingleShot( true );
|
|
|
|
connect( &updateResultsTimer, SIGNAL( timeout() ),
|
|
this, SLOT( updateResults() ) );
|
|
}
|
|
|
|
WordFinder::~WordFinder()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
void WordFinder::prefixMatch( QString const & str,
|
|
std::vector< sptr< Dictionary::Class > > const & dicts,
|
|
unsigned long maxResults,
|
|
Dictionary::Features features )
|
|
{
|
|
cancel();
|
|
|
|
searchQueued = true;
|
|
searchType = PrefixMatch;
|
|
inputWord = str;
|
|
inputDicts = &dicts;
|
|
requestedMaxResults = maxResults;
|
|
requestedFeatures = features;
|
|
|
|
resultsArray.clear();
|
|
resultsIndex.clear();
|
|
searchResults.clear();
|
|
|
|
if ( queuedRequests.empty() )
|
|
{
|
|
// No requests are queued, no need to wait for them to finish.
|
|
startSearch();
|
|
}
|
|
|
|
// Else some requests are still queued, last one to finish would trigger
|
|
// new search. This shouldn't take a lot of time, since they were all
|
|
// cancelled, but still it could take some time.
|
|
}
|
|
void WordFinder::stemmedMatch( QString const & str,
|
|
std::vector< sptr< Dictionary::Class > > const & dicts,
|
|
unsigned minLength,
|
|
unsigned maxSuffixVariation,
|
|
unsigned long maxResults,
|
|
Dictionary::Features features )
|
|
{
|
|
cancel();
|
|
|
|
searchQueued = true;
|
|
searchType = StemmedMatch;
|
|
inputWord = str;
|
|
inputDicts = &dicts;
|
|
requestedMaxResults = maxResults;
|
|
requestedFeatures = features;
|
|
stemmedMinLength = minLength;
|
|
stemmedMaxSuffixVariation = maxSuffixVariation;
|
|
|
|
resultsArray.clear();
|
|
resultsIndex.clear();
|
|
searchResults.clear();
|
|
|
|
if ( queuedRequests.empty() )
|
|
startSearch();
|
|
}
|
|
|
|
void WordFinder::startSearch()
|
|
{
|
|
if ( !searchQueued )
|
|
return; // Search was probably cancelled
|
|
|
|
// Clear the requests just in case
|
|
queuedRequests.clear();
|
|
finishedRequests.clear();
|
|
|
|
searchErrorString.clear();
|
|
searchResultsUncertain = false;
|
|
|
|
searchQueued = false;
|
|
searchInProgress = true;
|
|
|
|
// Gather all writings of the word
|
|
|
|
if ( allWordWritings.size() != 1 )
|
|
allWordWritings.resize( 1 );
|
|
|
|
allWordWritings[ 0 ] = gd::toWString( inputWord );
|
|
|
|
for( size_t x = 0; x < inputDicts->size(); ++x )
|
|
{
|
|
vector< wstring > writings = (*inputDicts)[ x ]->getAlternateWritings( allWordWritings[ 0 ] );
|
|
|
|
allWordWritings.insert( allWordWritings.end(), writings.begin(), writings.end() );
|
|
}
|
|
|
|
// Query each dictionary for all word writings
|
|
|
|
for( size_t x = 0; x < inputDicts->size(); ++x )
|
|
{
|
|
if ( ( (*inputDicts)[ x ]->getFeatures() & requestedFeatures ) != requestedFeatures )
|
|
continue;
|
|
|
|
for( size_t y = 0; y < allWordWritings.size(); ++y )
|
|
{
|
|
sptr< Dictionary::WordSearchRequest > sr =
|
|
( searchType == PrefixMatch ) ?
|
|
(*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], requestedMaxResults ) :
|
|
(*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], stemmedMinLength, stemmedMaxSuffixVariation, requestedMaxResults );
|
|
|
|
connect( sr.get(), SIGNAL( finished() ),
|
|
this, SLOT( requestFinished() ), Qt::QueuedConnection );
|
|
|
|
queuedRequests.push_back( sr );
|
|
}
|
|
}
|
|
|
|
// Handle any requests finished already
|
|
|
|
requestFinished();
|
|
}
|
|
|
|
void WordFinder::cancel()
|
|
{
|
|
searchQueued = false;
|
|
searchInProgress = false;
|
|
|
|
cancelSearches();
|
|
}
|
|
|
|
void WordFinder::clear()
|
|
{
|
|
cancel();
|
|
queuedRequests.clear();
|
|
finishedRequests.clear();
|
|
}
|
|
|
|
void WordFinder::requestFinished()
|
|
{
|
|
bool newResults = false;
|
|
|
|
// See how many new requests have finished, and if we have any new results
|
|
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
|
|
queuedRequests.begin(); i != queuedRequests.end(); )
|
|
{
|
|
if ( (*i)->isFinished() )
|
|
{
|
|
if ( searchInProgress && !(*i)->getErrorString().isEmpty() )
|
|
searchErrorString = tr( "Failed to query some dictionaries." );
|
|
|
|
if ( (*i)->isUncertain() )
|
|
searchResultsUncertain = true;
|
|
|
|
if ( (*i)->matchesCount() )
|
|
{
|
|
newResults = true;
|
|
|
|
// This list is handled by updateResults()
|
|
finishedRequests.splice( finishedRequests.end(), queuedRequests, i++ );
|
|
}
|
|
else // We won't do anything with it anymore, so we erase it
|
|
queuedRequests.erase( i++ );
|
|
}
|
|
else
|
|
++i;
|
|
}
|
|
|
|
if ( !searchInProgress )
|
|
{
|
|
// There is no search in progress, so we just wait until there's
|
|
// no requests left
|
|
|
|
if ( queuedRequests.empty() )
|
|
{
|
|
// We got rid of all queries, queued search can now start
|
|
finishedRequests.clear();
|
|
|
|
if ( searchQueued )
|
|
startSearch();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if ( newResults && queuedRequests.size() && !updateResultsTimer.isActive() )
|
|
{
|
|
// If we have got some new results, but not all of them, we would start a
|
|
// timer to update a user some time in the future
|
|
updateResultsTimer.start();
|
|
}
|
|
|
|
if ( queuedRequests.empty() )
|
|
{
|
|
// Search is finished.
|
|
updateResults();
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
|
|
|
|
unsigned saturated( unsigned x )
|
|
{
|
|
return x < 255 ? x : 255;
|
|
}
|
|
|
|
/// Checks whether the first string has the second one inside, surrounded from
|
|
/// both sides by either whitespace, punctuation or begin/end of string.
|
|
/// If true is returned, pos holds the offset in the haystack. If the offset
|
|
/// is larger than 255, it is set to 255.
|
|
bool hasSurroundedWithWs( wstring const & haystack, wstring const & needle,
|
|
wstring::size_type & pos )
|
|
{
|
|
if ( haystack.size() < needle.size() )
|
|
return false; // Needle won't even fit into a haystack
|
|
|
|
for( pos = 0; ; ++pos )
|
|
{
|
|
pos = haystack.find( needle, pos );
|
|
|
|
if ( pos == wstring::npos )
|
|
return false; // Not found
|
|
|
|
if ( ( !pos || Folding::isWhitespace( haystack[ pos - 1 ] ) ||
|
|
Folding::isPunct( haystack[ pos - 1 ] ) ) &&
|
|
( ( pos + needle.size() == haystack.size() ) ||
|
|
Folding::isWhitespace( haystack[ pos + needle.size() ] ) ||
|
|
Folding::isPunct( haystack[ pos + needle.size() ] ) ) )
|
|
{
|
|
pos = saturated( pos );
|
|
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void WordFinder::updateResults()
|
|
{
|
|
if ( !searchInProgress )
|
|
return; // Old queued signal
|
|
|
|
if ( updateResultsTimer.isActive() )
|
|
updateResultsTimer.stop(); // Can happen when we were done before it'd expire
|
|
|
|
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
|
|
finishedRequests.begin(); i != finishedRequests.end(); )
|
|
{
|
|
for( size_t count = (*i)->matchesCount(), x = 0; x < count; ++x )
|
|
{
|
|
wstring match = (**i)[ x ].word;
|
|
int weight = (**i)[ x ].weight;
|
|
wstring lowerCased = Folding::applySimpleCaseOnly( match );
|
|
|
|
pair< ResultsIndex::iterator, bool > insertResult =
|
|
resultsIndex.insert( pair< wstring, ResultsArray::iterator >( lowerCased,
|
|
resultsArray.end() ) );
|
|
|
|
if ( !insertResult.second )
|
|
{
|
|
// Wasn't inserted since there was already an item -- check the case
|
|
if ( insertResult.first->second->word != match )
|
|
{
|
|
// The case is different -- agree on a lowercase version
|
|
insertResult.first->second->word = lowerCased;
|
|
}
|
|
if ( !weight && insertResult.first->second->wasSuggested )
|
|
insertResult.first->second->wasSuggested = false;
|
|
}
|
|
else
|
|
{
|
|
resultsArray.push_back( OneResult() );
|
|
|
|
resultsArray.back().word = match;
|
|
resultsArray.back().rank = INT_MAX;
|
|
resultsArray.back().wasSuggested = ( weight != 0 );
|
|
|
|
insertResult.first->second = --resultsArray.end();
|
|
}
|
|
}
|
|
finishedRequests.erase( i++ );
|
|
}
|
|
|
|
size_t maxSearchResults = 500;
|
|
|
|
if ( resultsArray.size() )
|
|
{
|
|
if ( searchType == PrefixMatch )
|
|
{
|
|
/// Assign each result a category, storing it in the rank's field
|
|
|
|
enum Category
|
|
{
|
|
ExactMatch,
|
|
ExactNoFullCaseMatch,
|
|
ExactNoDiaMatch,
|
|
ExactNoPunctMatch,
|
|
ExactNoWsMatch,
|
|
ExactInsideMatch,
|
|
ExactNoDiaInsideMatch,
|
|
ExactNoPunctInsideMatch,
|
|
PrefixMatch,
|
|
PrefixNoDiaMatch,
|
|
PrefixNoPunctMatch,
|
|
PrefixNoWsMatch,
|
|
WorstMatch,
|
|
Multiplier = 256 // Categories should be multiplied by Multiplier
|
|
};
|
|
|
|
for( unsigned wr = 0; wr < allWordWritings.size(); ++wr )
|
|
{
|
|
wstring target = Folding::applySimpleCaseOnly( allWordWritings[ wr ] );
|
|
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
|
|
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
|
|
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
|
|
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
|
|
|
|
wstring::size_type matchPos = 0;
|
|
|
|
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
|
|
i != j; ++i )
|
|
{
|
|
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
|
|
|
|
int rank;
|
|
|
|
if ( i->first == target )
|
|
rank = ExactMatch * Multiplier;
|
|
else
|
|
if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase )
|
|
rank = ExactNoFullCaseMatch * Multiplier;
|
|
else
|
|
if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia )
|
|
rank = ExactNoDiaMatch * Multiplier;
|
|
else
|
|
if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct )
|
|
rank = ExactNoPunctMatch * Multiplier;
|
|
else
|
|
if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs )
|
|
rank = ExactNoWsMatch * Multiplier;
|
|
else
|
|
if ( hasSurroundedWithWs( i->first, target, matchPos ) )
|
|
rank = ExactInsideMatch * Multiplier + matchPos;
|
|
else
|
|
if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) )
|
|
rank = ExactNoDiaInsideMatch * Multiplier + matchPos;
|
|
else
|
|
if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) )
|
|
rank = ExactNoPunctInsideMatch * Multiplier + matchPos;
|
|
else
|
|
if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 )
|
|
rank = PrefixMatch * Multiplier + saturated( i->first.size() );
|
|
else
|
|
if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 )
|
|
rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() );
|
|
else
|
|
if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 )
|
|
rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() );
|
|
else
|
|
if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 )
|
|
rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() );
|
|
else
|
|
rank = WorstMatch * Multiplier;
|
|
|
|
if ( i->second->rank > rank )
|
|
i->second->rank = rank; // We store the best rank of any writing
|
|
}
|
|
}
|
|
|
|
resultsArray.sort( SortByRank() );
|
|
}
|
|
else
|
|
{
|
|
// Handling stemmed matches
|
|
|
|
// We use two factors -- first is the number of characters strings share
|
|
// in their beginnings, and second, the length of the strings. Here we assign
|
|
// only the first one, storing it in rank. Then we sort the results using
|
|
// SortByRankAndLength.
|
|
for( unsigned wr = 0; wr < allWordWritings.size(); ++wr )
|
|
{
|
|
wstring target = Folding::apply( allWordWritings[ wr ] );
|
|
|
|
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
|
|
i != j; ++i )
|
|
{
|
|
wstring resultFolded = Folding::apply( i->first );
|
|
|
|
int charsInCommon = 0;
|
|
|
|
for( wchar const * t = target.c_str(), * r = resultFolded.c_str();
|
|
*t && *t == *r; ++t, ++r, ++charsInCommon ) ;
|
|
|
|
int rank = -charsInCommon; // Negated so the lesser-than
|
|
// comparison would yield right
|
|
// results.
|
|
|
|
if ( i->second->rank > rank )
|
|
i->second->rank = rank; // We store the best rank of any writing
|
|
}
|
|
}
|
|
|
|
resultsArray.sort( SortByRankAndLength() );
|
|
|
|
maxSearchResults = 15;
|
|
}
|
|
}
|
|
|
|
searchResults.clear();
|
|
searchResults.reserve( resultsArray.size() < maxSearchResults ? resultsArray.size() : maxSearchResults );
|
|
|
|
for( ResultsArray::const_iterator i = resultsArray.begin(), j = resultsArray.end();
|
|
i != j; ++i )
|
|
{
|
|
//printf( "%d: %ls\n", i->second, i->first.c_str() );
|
|
|
|
if ( searchResults.size() < maxSearchResults )
|
|
searchResults.push_back( std::pair< QString, bool >( gd::toQString( i->word ), i->wasSuggested ) );
|
|
else
|
|
break;
|
|
}
|
|
|
|
if ( queuedRequests.size() )
|
|
{
|
|
// There are still some unhandled results.
|
|
emit updated();
|
|
}
|
|
else
|
|
{
|
|
// That were all of them.
|
|
searchInProgress = false;
|
|
emit finished();
|
|
}
|
|
}
|
|
|
|
void WordFinder::cancelSearches()
|
|
{
|
|
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
|
|
queuedRequests.begin(); i != queuedRequests.end(); ++i )
|
|
(*i)->cancel();
|
|
}
|
|
|