2009-02-05 14:21:47 +00:00
|
|
|
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
|
2009-01-29 19:16:25 +00:00
|
|
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
|
|
|
|
|
|
|
#include "wordfinder.hh"
|
|
|
|
#include "folding.hh"
|
2009-03-26 19:00:08 +00:00
|
|
|
#include <QThreadPool>
|
2009-01-29 19:16:25 +00:00
|
|
|
#include <map>
|
|
|
|
|
|
|
|
using std::vector;
|
2009-03-26 19:00:08 +00:00
|
|
|
using std::list;
|
2009-01-29 19:16:25 +00:00
|
|
|
using std::wstring;
|
|
|
|
using std::map;
|
|
|
|
using std::pair;
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
WordFinder::WordFinder( QObject * parent ):
|
|
|
|
QObject( parent ), searchInProgress( false ),
|
|
|
|
updateResultsTimer( this ),
|
|
|
|
searchQueued( false )
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
updateResultsTimer.setInterval( 1000 ); // We use a one second update timer
|
|
|
|
updateResultsTimer.setSingleShot( true );
|
|
|
|
|
|
|
|
connect( &updateResultsTimer, SIGNAL( timeout() ),
|
|
|
|
this, SLOT( updateResults() ) );
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
WordFinder::~WordFinder()
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
clear();
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::prefixMatch( QString const & str,
|
|
|
|
std::vector< sptr< Dictionary::Class > > const & dicts )
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
cancel();
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
searchQueued = true;
|
|
|
|
inputWord = str;
|
|
|
|
inputDicts = &dicts;
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
resultsArray.clear();
|
|
|
|
resultsIndex.clear();
|
2009-03-26 19:00:08 +00:00
|
|
|
searchResults.clear();
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( queuedRequests.empty() )
|
|
|
|
{
|
|
|
|
// No requests are queued, no need to wait for them to finish.
|
|
|
|
startSearch();
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
// Else some requests are still queued, last one to finish would trigger
|
|
|
|
// new search. This shouldn't take a lot of time, since they were all
|
|
|
|
// cancelled, but still it could take some time.
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::startSearch()
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( !searchQueued )
|
|
|
|
return; // Search was probably cancelled
|
|
|
|
|
|
|
|
// Clear the requests just in case
|
|
|
|
queuedRequests.clear();
|
|
|
|
finishedRequests.clear();
|
|
|
|
|
|
|
|
searchErrorString.clear();
|
|
|
|
|
|
|
|
searchQueued = false;
|
|
|
|
searchInProgress = true;
|
|
|
|
|
|
|
|
wstring word = inputWord.toStdWString();
|
|
|
|
|
|
|
|
for( size_t x = 0; x < inputDicts->size(); ++x )
|
|
|
|
{
|
|
|
|
sptr< Dictionary::WordSearchRequest > sr = (*inputDicts)[ x ]->prefixMatch( word, 40 );
|
|
|
|
|
|
|
|
connect( sr.get(), SIGNAL( finished() ),
|
|
|
|
this, SLOT( requestFinished() ), Qt::QueuedConnection );
|
|
|
|
|
|
|
|
queuedRequests.push_back( sr );
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
// Handle any requests finished already
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
requestFinished();
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::cancel()
|
|
|
|
{
|
|
|
|
searchQueued = false;
|
|
|
|
searchInProgress = false;
|
|
|
|
|
|
|
|
cancelSearches();
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::clear()
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
cancel();
|
|
|
|
queuedRequests.clear();
|
|
|
|
finishedRequests.clear();
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::requestFinished()
|
|
|
|
{
|
|
|
|
bool newResults = false;
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
// See how many new requests have finished, and if we have any new results
|
|
|
|
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
|
|
|
|
queuedRequests.begin(); i != queuedRequests.end(); )
|
|
|
|
{
|
|
|
|
if ( (*i)->isFinished() )
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( searchInProgress && !(*i)->getErrorString().isEmpty() )
|
|
|
|
searchErrorString = tr( "Failed to query some dictionaries." );
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( (*i)->matchesCount() )
|
|
|
|
{
|
|
|
|
newResults = true;
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
// This list is handled by updateResults()
|
|
|
|
finishedRequests.splice( finishedRequests.end(), queuedRequests, i++ );
|
|
|
|
}
|
|
|
|
else // We won't do anything with it anymore, so we erase it
|
|
|
|
queuedRequests.erase( i++ );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
++i;
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( !searchInProgress )
|
|
|
|
{
|
|
|
|
// There is no search in progress, so we just wait until there's
|
|
|
|
// no requests left
|
|
|
|
|
|
|
|
if ( queuedRequests.empty() )
|
|
|
|
{
|
|
|
|
// We got rid of all queries, queued search can now start
|
|
|
|
finishedRequests.clear();
|
|
|
|
|
|
|
|
if ( searchQueued )
|
|
|
|
startSearch();
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
return;
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( newResults && queuedRequests.size() && !updateResultsTimer.isActive() )
|
|
|
|
{
|
|
|
|
// If we have got some new results, but not all of them, we would start a
|
|
|
|
// timer to update a user some time in the future
|
|
|
|
updateResultsTimer.start();
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( queuedRequests.empty() )
|
|
|
|
{
|
|
|
|
// Search is finished.
|
|
|
|
updateResults();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
namespace {
|
|
|
|
|
2009-04-09 22:09:38 +00:00
|
|
|
|
|
|
|
unsigned saturated( unsigned x )
|
|
|
|
{
|
|
|
|
return x < 255 ? x : 255;
|
|
|
|
}
|
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
/// Checks whether the first string has the second one inside, surrounded from
|
|
|
|
/// both sides by either whitespace, punctuation or begin/end of string.
|
|
|
|
/// If true is returned, pos holds the offset in the haystack. If the offset
|
|
|
|
/// is larger than 255, it is set to 255.
|
|
|
|
bool hasSurroundedWithWs( wstring const & haystack, wstring const & needle,
|
|
|
|
wstring::size_type & pos )
|
|
|
|
{
|
|
|
|
if ( haystack.size() < needle.size() )
|
|
|
|
return false; // Needle won't even fit into a haystack
|
|
|
|
|
|
|
|
for( pos = 0; ; ++pos )
|
|
|
|
{
|
|
|
|
pos = haystack.find( needle, pos );
|
|
|
|
|
|
|
|
if ( pos == wstring::npos )
|
|
|
|
return false; // Not found
|
|
|
|
|
|
|
|
if ( ( !pos || Folding::isWhitespace( haystack[ pos - 1 ] ) ||
|
|
|
|
Folding::isPunct( haystack[ pos - 1 ] ) ) &&
|
|
|
|
( ( pos + needle.size() == haystack.size() ) ||
|
|
|
|
Folding::isWhitespace( haystack[ pos + needle.size() ] ) ||
|
|
|
|
Folding::isPunct( haystack[ pos + needle.size() ] ) ) )
|
|
|
|
{
|
2009-04-09 22:09:38 +00:00
|
|
|
pos = saturated( pos );
|
2009-04-08 16:02:12 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::updateResults()
|
|
|
|
{
|
|
|
|
if ( !searchInProgress )
|
|
|
|
return; // Old queued signal
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( updateResultsTimer.isActive() )
|
|
|
|
updateResultsTimer.stop(); // Can happen when we were done before it'd expire
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
|
|
|
|
finishedRequests.begin(); i != finishedRequests.end(); )
|
|
|
|
{
|
|
|
|
for( size_t count = (*i)->matchesCount(), x = 0; x < count; ++x )
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
wstring match = (**i)[ x ].word;
|
2009-04-09 15:27:34 +00:00
|
|
|
int weight = (**i)[ x ].weight;
|
2009-03-26 19:00:08 +00:00
|
|
|
wstring lowerCased = Folding::applySimpleCaseOnly( match );
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
pair< ResultsIndex::iterator, bool > insertResult =
|
|
|
|
resultsIndex.insert( pair< wstring, ResultsArray::iterator >( lowerCased,
|
|
|
|
resultsArray.end() ) );
|
2009-03-26 19:00:08 +00:00
|
|
|
|
|
|
|
if ( !insertResult.second )
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
// Wasn't inserted since there was already an item -- check the case
|
2009-04-09 15:27:34 +00:00
|
|
|
if ( insertResult.first->second->word != match )
|
2009-01-29 19:16:25 +00:00
|
|
|
{
|
2009-03-26 19:00:08 +00:00
|
|
|
// The case is different -- agree on a lowercase version
|
2009-04-09 15:27:34 +00:00
|
|
|
insertResult.first->second->word = lowerCased;
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
2009-04-09 15:27:34 +00:00
|
|
|
if ( !weight && insertResult.first->second->wasSuggested )
|
|
|
|
insertResult.first->second->wasSuggested = false;
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
{
|
2009-04-09 15:27:34 +00:00
|
|
|
resultsArray.push_back( OneResult() );
|
|
|
|
|
|
|
|
resultsArray.back().word = match;
|
|
|
|
resultsArray.back().rank = -1;
|
|
|
|
resultsArray.back().wasSuggested = ( weight != 0 );
|
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
insertResult.first->second = --resultsArray.end();
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
2009-03-26 19:00:08 +00:00
|
|
|
finishedRequests.erase( i++ );
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
if ( resultsArray.size() )
|
|
|
|
{
|
|
|
|
/// Assign each result a category, storing it in the rank's field
|
|
|
|
|
|
|
|
enum Category
|
|
|
|
{
|
|
|
|
ExactMatch,
|
2009-04-08 21:22:50 +00:00
|
|
|
ExactNoFullCaseMatch,
|
2009-04-08 16:02:12 +00:00
|
|
|
ExactNoDiaMatch,
|
|
|
|
ExactNoPunctMatch,
|
|
|
|
ExactNoWsMatch,
|
|
|
|
ExactInsideMatch,
|
|
|
|
ExactNoDiaInsideMatch,
|
|
|
|
ExactNoPunctInsideMatch,
|
|
|
|
PrefixMatch,
|
|
|
|
PrefixNoDiaMatch,
|
|
|
|
PrefixNoPunctMatch,
|
|
|
|
PrefixNoWsMatch,
|
|
|
|
WorstMatch,
|
|
|
|
Multiplier = 256 // Categories should be multiplied by Multiplier
|
|
|
|
};
|
2009-04-09 15:27:34 +00:00
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
wstring target = Folding::applySimpleCaseOnly( inputWord.toStdWString() );
|
2009-04-08 21:22:50 +00:00
|
|
|
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
|
|
|
|
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
|
2009-04-08 16:02:12 +00:00
|
|
|
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
|
|
|
|
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
|
|
|
|
|
|
|
|
wstring::size_type matchPos = 0;
|
|
|
|
|
|
|
|
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
|
|
|
|
i != j; ++i )
|
|
|
|
{
|
2009-04-08 21:22:50 +00:00
|
|
|
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
|
2009-04-08 16:02:12 +00:00
|
|
|
|
|
|
|
if ( i->first == target )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactMatch * Multiplier;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
2009-04-08 21:22:50 +00:00
|
|
|
if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactNoFullCaseMatch * Multiplier;
|
2009-04-08 21:22:50 +00:00
|
|
|
else
|
|
|
|
if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactNoDiaMatch * Multiplier;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactNoPunctMatch * Multiplier;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactNoWsMatch * Multiplier;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( hasSurroundedWithWs( i->first, target, matchPos ) )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactInsideMatch * Multiplier + matchPos;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactNoDiaInsideMatch * Multiplier + matchPos;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) )
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = ExactNoPunctInsideMatch * Multiplier + matchPos;
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 )
|
2009-04-09 22:09:38 +00:00
|
|
|
i->second->rank = PrefixMatch * Multiplier + saturated( i->first.size() );
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 )
|
2009-04-09 22:09:38 +00:00
|
|
|
i->second->rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() );
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 )
|
2009-04-09 22:09:38 +00:00
|
|
|
i->second->rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() );
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
|
|
|
if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 )
|
2009-04-09 22:09:38 +00:00
|
|
|
i->second->rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() );
|
2009-04-08 16:02:12 +00:00
|
|
|
else
|
2009-04-09 15:27:34 +00:00
|
|
|
i->second->rank = WorstMatch * Multiplier;
|
2009-04-08 16:02:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
resultsArray.sort( SortByRank() );
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
searchResults.clear();
|
2009-04-08 16:02:12 +00:00
|
|
|
searchResults.reserve( resultsArray.size() < 500 ? resultsArray.size() : 500 );
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-04-08 16:02:12 +00:00
|
|
|
for( ResultsArray::const_iterator i = resultsArray.begin(), j = resultsArray.end();
|
|
|
|
i != j; ++i )
|
2009-03-26 19:00:08 +00:00
|
|
|
{
|
2009-04-08 16:02:12 +00:00
|
|
|
//printf( "%d: %ls\n", i->second, i->first.c_str() );
|
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( searchResults.size() < 500 )
|
2009-04-09 15:27:34 +00:00
|
|
|
searchResults.push_back( std::pair< QString, bool >( QString::fromStdWString( i->word ), i->wasSuggested ) );
|
2009-03-26 19:00:08 +00:00
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
if ( queuedRequests.size() )
|
|
|
|
{
|
|
|
|
// There are still some unhandled results.
|
|
|
|
emit updated();
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
2009-03-26 19:00:08 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
// That were all of them.
|
|
|
|
searchInProgress = false;
|
|
|
|
emit finished();
|
|
|
|
}
|
|
|
|
}
|
2009-01-29 19:16:25 +00:00
|
|
|
|
2009-03-26 19:00:08 +00:00
|
|
|
void WordFinder::cancelSearches()
|
|
|
|
{
|
|
|
|
for( list< sptr< Dictionary::WordSearchRequest > >::iterator i =
|
|
|
|
queuedRequests.begin(); i != queuedRequests.end(); ++i )
|
|
|
|
(*i)->cancel();
|
2009-01-29 19:16:25 +00:00
|
|
|
}
|
|
|
|
|