mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
+ Dictionary::stemmedMatch() added, and is now used for word suggestions when
no translations were found.
This commit is contained in:
parent
3d41751636
commit
629a138f7e
|
@ -246,6 +246,26 @@ div.sdct_x
|
|||
margin-top: 1em;
|
||||
}
|
||||
|
||||
/************* Stemmed suggestions *****************/
|
||||
|
||||
.gdstemmedsuggestion
|
||||
{
|
||||
/* Add some horizontal and vertical space */
|
||||
margin-top: 1em;
|
||||
margin-left: 1px;
|
||||
}
|
||||
|
||||
|
||||
.gdstemmedsuggestion_head
|
||||
{
|
||||
margin-left: 11px;
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
.gdstemmedsuggestion_body
|
||||
{
|
||||
}
|
||||
|
||||
/************* Dictd articles *****************/
|
||||
.dictd_article
|
||||
{
|
||||
|
|
|
@ -353,9 +353,19 @@ void ArticleRequest::bodyFinished()
|
|||
{
|
||||
// No definitions were ever found, say so to the user.
|
||||
footer += ArticleMaker::makeNotFoundBody( word, group );
|
||||
}
|
||||
|
||||
footer += "</body></html>";
|
||||
// When there were no definitions, we run stemmed search.
|
||||
stemmedWordFinder = new WordFinder( this );
|
||||
|
||||
connect( stemmedWordFinder.get(), SIGNAL( finished() ),
|
||||
this, SLOT( stemmedSearchFinished() ), Qt::QueuedConnection );
|
||||
|
||||
stemmedWordFinder->stemmedMatch( word, activeDicts );
|
||||
}
|
||||
else
|
||||
{
|
||||
footer += "</body></html>";
|
||||
}
|
||||
|
||||
Mutex::Lock _( dataMutex );
|
||||
|
||||
|
@ -366,10 +376,56 @@ void ArticleRequest::bodyFinished()
|
|||
memcpy( &data.front() + offset, footer.data(), footer.size() );
|
||||
}
|
||||
|
||||
finish();
|
||||
if ( stemmedWordFinder.get() )
|
||||
update();
|
||||
else
|
||||
finish();
|
||||
}
|
||||
else
|
||||
if ( wasUpdated )
|
||||
update();
|
||||
}
|
||||
|
||||
void ArticleRequest::stemmedSearchFinished()
|
||||
{
|
||||
// Got stemmed matching results
|
||||
|
||||
WordFinder::SearchResults sr = stemmedWordFinder->getResults();
|
||||
|
||||
string footer;
|
||||
|
||||
if ( sr.size() )
|
||||
{
|
||||
footer += "<div class=\"gdstemmedsuggestion\"><span class=\"gdstemmedsuggestion_head\">" +
|
||||
Html::escape( tr( "Close words: " ).toUtf8().data() ) +
|
||||
"</span><span class=\"gdstemmedsuggestion_body\">";
|
||||
|
||||
for( unsigned x = 0; x < sr.size(); ++x )
|
||||
{
|
||||
string escapedResult = Html::escape( sr[ x ].first.toUtf8().data() );
|
||||
footer += "<a href=\"bword://" + escapedResult + "\">" + escapedResult +"</a>";
|
||||
|
||||
if ( x != sr.size() - 1 )
|
||||
{
|
||||
footer += ", ";
|
||||
}
|
||||
}
|
||||
|
||||
footer += "</span></div>";
|
||||
}
|
||||
|
||||
footer += "</body></html>";
|
||||
|
||||
{
|
||||
Mutex::Lock _( dataMutex );
|
||||
|
||||
size_t offset = data.size();
|
||||
|
||||
data.resize( data.size() + footer.size() );
|
||||
|
||||
memcpy( &data.front() + offset, footer.data(), footer.size() );
|
||||
}
|
||||
|
||||
finish();
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <list>
|
||||
#include "dictionary.hh"
|
||||
#include "instances.hh"
|
||||
#include "wordfinder.hh"
|
||||
|
||||
/// This class generates the article's body for the given lookup request
|
||||
class ArticleMaker: public QObject
|
||||
|
@ -66,6 +67,7 @@ class ArticleRequest: public Dictionary::DataRequest
|
|||
bool foundAnyDefinitions;
|
||||
bool closePrevSpan; // Indicates whether the last opened article span is to
|
||||
// be closed after the article ends.
|
||||
sptr< WordFinder > stemmedWordFinder; // Used when there're no results
|
||||
|
||||
public:
|
||||
|
||||
|
@ -80,6 +82,7 @@ private slots:
|
|||
|
||||
void altSearchFinished();
|
||||
void bodyFinished();
|
||||
void stemmedSearchFinished();
|
||||
};
|
||||
|
||||
|
||||
|
|
175
src/btreeidx.cc
175
src/btreeidx.cc
|
@ -115,6 +115,9 @@ class BtreeWordSearchRequest: public Dictionary::WordSearchRequest
|
|||
BtreeDictionary & dict;
|
||||
wstring str;
|
||||
unsigned long maxResults;
|
||||
unsigned minLength;
|
||||
int maxSuffixVariation;
|
||||
bool allowMiddleMatches;
|
||||
QAtomicInt isCancelled;
|
||||
QSemaphore hasExited;
|
||||
|
||||
|
@ -122,8 +125,15 @@ public:
|
|||
|
||||
BtreeWordSearchRequest( BtreeDictionary & dict_,
|
||||
wstring const & str_,
|
||||
unsigned minLength_,
|
||||
int maxSuffixVariation_,
|
||||
bool allowMiddleMatches_,
|
||||
unsigned long maxResults_ ):
|
||||
dict( dict_ ), str( str_ ), maxResults( maxResults_ )
|
||||
dict( dict_ ), str( str_ ),
|
||||
maxResults( maxResults_ ),
|
||||
minLength( minLength_ ),
|
||||
maxSuffixVariation( maxSuffixVariation_ ),
|
||||
allowMiddleMatches( allowMiddleMatches_ )
|
||||
{
|
||||
QThreadPool::globalInstance()->start(
|
||||
new BtreeWordSearchRunnable( *this, hasExited ) );
|
||||
|
@ -158,80 +168,112 @@ void BtreeWordSearchRequest::run()
|
|||
|
||||
wstring folded = Folding::apply( str );
|
||||
|
||||
bool exactMatch;
|
||||
int initialFoldedSize = folded.size();
|
||||
|
||||
vector< char > leaf;
|
||||
uint32_t nextLeaf;
|
||||
char const * leafEnd;
|
||||
int charsLeftToChop = 0;
|
||||
|
||||
char const * chainOffset = dict.findChainOffsetExactOrPrefix( folded, exactMatch,
|
||||
leaf, nextLeaf,
|
||||
leafEnd );
|
||||
if ( maxSuffixVariation >= 0 )
|
||||
{
|
||||
charsLeftToChop = initialFoldedSize - (int)minLength;
|
||||
|
||||
if ( charsLeftToChop < 0 )
|
||||
charsLeftToChop = 0;
|
||||
else
|
||||
if ( charsLeftToChop > maxSuffixVariation )
|
||||
charsLeftToChop = maxSuffixVariation;
|
||||
}
|
||||
|
||||
if ( chainOffset )
|
||||
for( ; ; )
|
||||
{
|
||||
if ( isCancelled )
|
||||
break;
|
||||
|
||||
//printf( "offset = %u, size = %u\n", chainOffset - &leaf.front(), leaf.size() );
|
||||
|
||||
vector< WordArticleLink > chain = dict.readChain( chainOffset );
|
||||
|
||||
wstring chainHead = Utf8::decode( chain[ 0 ].word );
|
||||
|
||||
wstring resultFolded = Folding::apply( chainHead );
|
||||
|
||||
if ( resultFolded.size() >= folded.size() && !resultFolded.compare( 0, folded.size(), folded ) )
|
||||
bool exactMatch;
|
||||
|
||||
vector< char > leaf;
|
||||
uint32_t nextLeaf;
|
||||
char const * leafEnd;
|
||||
|
||||
char const * chainOffset = dict.findChainOffsetExactOrPrefix( folded, exactMatch,
|
||||
leaf, nextLeaf,
|
||||
leafEnd );
|
||||
|
||||
if ( chainOffset )
|
||||
for( ; ; )
|
||||
{
|
||||
// Exact or prefix match
|
||||
|
||||
Mutex::Lock _( dataMutex );
|
||||
|
||||
for( unsigned x = 0; x < chain.size(); ++x )
|
||||
matches.push_back( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) );
|
||||
|
||||
if ( matches.size() >= maxResults )
|
||||
{
|
||||
// For now we actually allow more than maxResults if the last
|
||||
// chain yield more than one result. That's ok and maybe even more
|
||||
// desirable.
|
||||
if ( isCancelled )
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
// Neither exact nor a prefix match, end this
|
||||
break;
|
||||
|
||||
// Fetch new leaf if we're out of chains here
|
||||
|
||||
if ( chainOffset >= leafEnd )
|
||||
{
|
||||
// We're past the current leaf, fetch the next one
|
||||
|
||||
//printf( "advancing\n" );
|
||||
|
||||
if ( nextLeaf )
|
||||
|
||||
//printf( "offset = %u, size = %u\n", chainOffset - &leaf.front(), leaf.size() );
|
||||
|
||||
vector< WordArticleLink > chain = dict.readChain( chainOffset );
|
||||
|
||||
wstring chainHead = Utf8::decode( chain[ 0 ].word );
|
||||
|
||||
wstring resultFolded = Folding::apply( chainHead );
|
||||
|
||||
if ( resultFolded.size() >= folded.size() && !resultFolded.compare( 0, folded.size(), folded ) )
|
||||
{
|
||||
Mutex::Lock _( *dict.idxFileMutex );
|
||||
|
||||
dict.readNode( nextLeaf, leaf );
|
||||
leafEnd = &leaf.front() + leaf.size();
|
||||
// Exact or prefix match
|
||||
|
||||
nextLeaf = dict.idxFile->read< uint32_t >();
|
||||
chainOffset = &leaf.front() + sizeof( uint32_t );
|
||||
|
||||
uint32_t leafEntries = *(uint32_t *)&leaf.front();
|
||||
|
||||
if ( leafEntries == 0xffffFFFF )
|
||||
Mutex::Lock _( dataMutex );
|
||||
|
||||
for( unsigned x = 0; x < chain.size(); ++x )
|
||||
{
|
||||
//printf( "bah!\n" );
|
||||
exit( 1 );
|
||||
// Skip middle matches, if requested. If suffix variation is specified,
|
||||
// make sure the string isn't larger than requested.
|
||||
if ( ( allowMiddleMatches || Folding::apply( Utf8::decode( chain[ x ].prefix ) ).empty() ) &&
|
||||
( maxSuffixVariation < 0 || (int)resultFolded.size() - initialFoldedSize <= maxSuffixVariation ) )
|
||||
matches.push_back( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) );
|
||||
}
|
||||
|
||||
if ( matches.size() >= maxResults )
|
||||
{
|
||||
// For now we actually allow more than maxResults if the last
|
||||
// chain yield more than one result. That's ok and maybe even more
|
||||
// desirable.
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
break; // That was the last leaf
|
||||
// Neither exact nor a prefix match, end this
|
||||
break;
|
||||
|
||||
// Fetch new leaf if we're out of chains here
|
||||
|
||||
if ( chainOffset >= leafEnd )
|
||||
{
|
||||
// We're past the current leaf, fetch the next one
|
||||
|
||||
//printf( "advancing\n" );
|
||||
|
||||
if ( nextLeaf )
|
||||
{
|
||||
Mutex::Lock _( *dict.idxFileMutex );
|
||||
|
||||
dict.readNode( nextLeaf, leaf );
|
||||
leafEnd = &leaf.front() + leaf.size();
|
||||
|
||||
nextLeaf = dict.idxFile->read< uint32_t >();
|
||||
chainOffset = &leaf.front() + sizeof( uint32_t );
|
||||
|
||||
uint32_t leafEntries = *(uint32_t *)&leaf.front();
|
||||
|
||||
if ( leafEntries == 0xffffFFFF )
|
||||
{
|
||||
//printf( "bah!\n" );
|
||||
exit( 1 );
|
||||
}
|
||||
}
|
||||
else
|
||||
break; // That was the last leaf
|
||||
}
|
||||
}
|
||||
|
||||
if ( charsLeftToChop && !isCancelled )
|
||||
{
|
||||
--charsLeftToChop;
|
||||
folded.resize( folded.size() - 1 );
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
finish();
|
||||
|
@ -241,7 +283,16 @@ sptr< Dictionary::WordSearchRequest > BtreeDictionary::prefixMatch(
|
|||
wstring const & str, unsigned long maxResults )
|
||||
throw( std::exception )
|
||||
{
|
||||
return new BtreeWordSearchRequest( *this, str, maxResults );
|
||||
return new BtreeWordSearchRequest( *this, str, 0, -1, true, maxResults );
|
||||
}
|
||||
|
||||
sptr< Dictionary::WordSearchRequest > BtreeDictionary::stemmedMatch(
|
||||
wstring const & str, unsigned minLength, unsigned maxSuffixVariation,
|
||||
unsigned long maxResults )
|
||||
throw( std::exception )
|
||||
{
|
||||
return new BtreeWordSearchRequest( *this, str, minLength, (int)maxSuffixVariation,
|
||||
false, maxResults );
|
||||
}
|
||||
|
||||
void BtreeDictionary::readNode( uint32_t offset, vector< char > & out )
|
||||
|
|
|
@ -75,6 +75,12 @@ public:
|
|||
unsigned long )
|
||||
throw( std::exception );
|
||||
|
||||
virtual sptr< Dictionary::WordSearchRequest > stemmedMatch( wstring const &,
|
||||
unsigned minLength,
|
||||
unsigned maxSuffixVariation,
|
||||
unsigned long maxResults )
|
||||
throw( std::exception );
|
||||
|
||||
protected:
|
||||
|
||||
/// Opens the index. The file reference is saved to be used for
|
||||
|
|
|
@ -69,6 +69,14 @@ WordMatch WordSearchRequest::operator [] ( size_t index ) throw( exIndexOutOfRan
|
|||
return matches[ index ];
|
||||
}
|
||||
|
||||
vector< WordMatch > & WordSearchRequest::getAllMatches() throw( exRequestUnfinished )
|
||||
{
|
||||
if ( !isFinished() )
|
||||
throw exRequestUnfinished();
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
////////////// DataRequest
|
||||
|
||||
long DataRequest::dataSize()
|
||||
|
@ -102,6 +110,15 @@ Class::Class( string const & id_, vector< string > const & dictionaryFiles_ ):
|
|||
{
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > Class::stemmedMatch( wstring const & /*str*/,
|
||||
unsigned /*minLength*/,
|
||||
unsigned /*maxSuffixVariation*/,
|
||||
unsigned long /*maxResults*/ )
|
||||
throw( std::exception )
|
||||
{
|
||||
return new WordSearchRequestInstant();
|
||||
}
|
||||
|
||||
sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & )
|
||||
throw( std::exception )
|
||||
{
|
||||
|
|
|
@ -136,6 +136,10 @@ public:
|
|||
/// Returns the match with the given zero-based index, which should be less
|
||||
/// than matchesCount().
|
||||
WordMatch operator [] ( size_t index ) throw( exIndexOutOfRange );
|
||||
|
||||
/// Returns all the matches found. Since no further locking can or would be
|
||||
/// done, this can only be called after the request has finished.
|
||||
vector< WordMatch > & getAllMatches() throw( exRequestUnfinished );
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -261,6 +265,19 @@ public:
|
|||
virtual sptr< WordSearchRequest > prefixMatch( wstring const &,
|
||||
unsigned long maxResults ) throw( std::exception )=0;
|
||||
|
||||
/// Looks up a given word in the dictionary, aiming to find different forms
|
||||
/// of the given word by allowing suffix variations. This means allowing words
|
||||
/// which can be as short as the input word size minus maxSuffixVariation, or as
|
||||
/// long as the input word size plus maxSuffixVariation, which share at least
|
||||
/// the input word size minus maxSuffixVariation initial symbols.
|
||||
/// Since the goal is to find forms of the words, no matches where a word
|
||||
/// in the middle of a phrase got matched should be returned.
|
||||
/// The default implementation does nothing, returning an empty result.
|
||||
virtual sptr< WordSearchRequest > stemmedMatch( wstring const &,
|
||||
unsigned minLength,
|
||||
unsigned maxSuffixVariation,
|
||||
unsigned long maxResults ) throw( std::exception );
|
||||
|
||||
/// Finds known headwords for the given word, that is, the words for which
|
||||
/// the given word is a synonym. If a dictionary can't perform this operation,
|
||||
/// it should leave the default implementation which always returns an empty
|
||||
|
|
|
@ -843,7 +843,7 @@ void MainWindow::prefixMatchFinished()
|
|||
|
||||
void MainWindow::updateMatchResults( bool finished )
|
||||
{
|
||||
WordFinder::SearchResults const & results = wordFinder.getPrefixMatchResults();
|
||||
WordFinder::SearchResults const & results = wordFinder.getResults();
|
||||
|
||||
ui.wordList->setUpdatesEnabled( false );
|
||||
|
||||
|
|
|
@ -373,7 +373,7 @@ void ScanPopup::prefixMatchFinished()
|
|||
else
|
||||
ui.queryError->hide();
|
||||
|
||||
ui.wordListButton->setVisible( wordFinder.getPrefixMatchResults().size() );
|
||||
ui.wordListButton->setVisible( wordFinder.getResults().size() );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -382,7 +382,7 @@ void ScanPopup::on_wordListButton_clicked()
|
|||
if ( !isVisible() )
|
||||
return;
|
||||
|
||||
WordFinder::SearchResults const & results = wordFinder.getPrefixMatchResults();
|
||||
WordFinder::SearchResults const & results = wordFinder.getResults();
|
||||
|
||||
if ( results.empty() )
|
||||
return;
|
||||
|
|
|
@ -35,6 +35,7 @@ void WordFinder::prefixMatch( QString const & str,
|
|||
cancel();
|
||||
|
||||
searchQueued = true;
|
||||
searchType = PrefixMatch;
|
||||
inputWord = str;
|
||||
inputDicts = &dicts;
|
||||
|
||||
|
@ -52,6 +53,23 @@ void WordFinder::prefixMatch( QString const & str,
|
|||
// new search. This shouldn't take a lot of time, since they were all
|
||||
// cancelled, but still it could take some time.
|
||||
}
|
||||
void WordFinder::stemmedMatch( QString const & str,
|
||||
std::vector< sptr< Dictionary::Class > > const & dicts )
|
||||
{
|
||||
cancel();
|
||||
|
||||
searchQueued = true;
|
||||
searchType = StemmedMatch;
|
||||
inputWord = str;
|
||||
inputDicts = &dicts;
|
||||
|
||||
resultsArray.clear();
|
||||
resultsIndex.clear();
|
||||
searchResults.clear();
|
||||
|
||||
if ( queuedRequests.empty() )
|
||||
startSearch();
|
||||
}
|
||||
|
||||
void WordFinder::startSearch()
|
||||
{
|
||||
|
@ -68,10 +86,13 @@ void WordFinder::startSearch()
|
|||
searchInProgress = true;
|
||||
|
||||
wstring word = inputWord.toStdWString();
|
||||
|
||||
|
||||
for( size_t x = 0; x < inputDicts->size(); ++x )
|
||||
{
|
||||
sptr< Dictionary::WordSearchRequest > sr = (*inputDicts)[ x ]->prefixMatch( word, 40 );
|
||||
sptr< Dictionary::WordSearchRequest > sr =
|
||||
( searchType == PrefixMatch ) ?
|
||||
(*inputDicts)[ x ]->prefixMatch( word, 40 ) :
|
||||
(*inputDicts)[ x ]->stemmedMatch( word, 3, 3, 30 );
|
||||
|
||||
connect( sr.get(), SIGNAL( finished() ),
|
||||
this, SLOT( requestFinished() ), Qt::QueuedConnection );
|
||||
|
@ -243,92 +264,126 @@ void WordFinder::updateResults()
|
|||
finishedRequests.erase( i++ );
|
||||
}
|
||||
|
||||
size_t maxSearchResults = 500;
|
||||
|
||||
if ( resultsArray.size() )
|
||||
{
|
||||
/// Assign each result a category, storing it in the rank's field
|
||||
|
||||
enum Category
|
||||
if ( searchType == PrefixMatch )
|
||||
{
|
||||
ExactMatch,
|
||||
ExactNoFullCaseMatch,
|
||||
ExactNoDiaMatch,
|
||||
ExactNoPunctMatch,
|
||||
ExactNoWsMatch,
|
||||
ExactInsideMatch,
|
||||
ExactNoDiaInsideMatch,
|
||||
ExactNoPunctInsideMatch,
|
||||
PrefixMatch,
|
||||
PrefixNoDiaMatch,
|
||||
PrefixNoPunctMatch,
|
||||
PrefixNoWsMatch,
|
||||
WorstMatch,
|
||||
Multiplier = 256 // Categories should be multiplied by Multiplier
|
||||
};
|
||||
|
||||
wstring target = Folding::applySimpleCaseOnly( inputWord.toStdWString() );
|
||||
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
|
||||
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
|
||||
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
|
||||
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
|
||||
|
||||
wstring::size_type matchPos = 0;
|
||||
|
||||
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
|
||||
i != j; ++i )
|
||||
{
|
||||
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
|
||||
|
||||
if ( i->first == target )
|
||||
i->second->rank = ExactMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase )
|
||||
i->second->rank = ExactNoFullCaseMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia )
|
||||
i->second->rank = ExactNoDiaMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct )
|
||||
i->second->rank = ExactNoPunctMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs )
|
||||
i->second->rank = ExactNoWsMatch * Multiplier;
|
||||
else
|
||||
if ( hasSurroundedWithWs( i->first, target, matchPos ) )
|
||||
i->second->rank = ExactInsideMatch * Multiplier + matchPos;
|
||||
else
|
||||
if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) )
|
||||
i->second->rank = ExactNoDiaInsideMatch * Multiplier + matchPos;
|
||||
else
|
||||
if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) )
|
||||
i->second->rank = ExactNoPunctInsideMatch * Multiplier + matchPos;
|
||||
else
|
||||
if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 )
|
||||
i->second->rank = PrefixMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 )
|
||||
i->second->rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 )
|
||||
i->second->rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 )
|
||||
i->second->rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
i->second->rank = WorstMatch * Multiplier;
|
||||
/// Assign each result a category, storing it in the rank's field
|
||||
|
||||
enum Category
|
||||
{
|
||||
ExactMatch,
|
||||
ExactNoFullCaseMatch,
|
||||
ExactNoDiaMatch,
|
||||
ExactNoPunctMatch,
|
||||
ExactNoWsMatch,
|
||||
ExactInsideMatch,
|
||||
ExactNoDiaInsideMatch,
|
||||
ExactNoPunctInsideMatch,
|
||||
PrefixMatch,
|
||||
PrefixNoDiaMatch,
|
||||
PrefixNoPunctMatch,
|
||||
PrefixNoWsMatch,
|
||||
WorstMatch,
|
||||
Multiplier = 256 // Categories should be multiplied by Multiplier
|
||||
};
|
||||
|
||||
wstring target = Folding::applySimpleCaseOnly( inputWord.toStdWString() );
|
||||
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
|
||||
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
|
||||
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
|
||||
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
|
||||
|
||||
wstring::size_type matchPos = 0;
|
||||
|
||||
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
|
||||
i != j; ++i )
|
||||
{
|
||||
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
|
||||
|
||||
if ( i->first == target )
|
||||
i->second->rank = ExactMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase )
|
||||
i->second->rank = ExactNoFullCaseMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia )
|
||||
i->second->rank = ExactNoDiaMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct )
|
||||
i->second->rank = ExactNoPunctMatch * Multiplier;
|
||||
else
|
||||
if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs )
|
||||
i->second->rank = ExactNoWsMatch * Multiplier;
|
||||
else
|
||||
if ( hasSurroundedWithWs( i->first, target, matchPos ) )
|
||||
i->second->rank = ExactInsideMatch * Multiplier + matchPos;
|
||||
else
|
||||
if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) )
|
||||
i->second->rank = ExactNoDiaInsideMatch * Multiplier + matchPos;
|
||||
else
|
||||
if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) )
|
||||
i->second->rank = ExactNoPunctInsideMatch * Multiplier + matchPos;
|
||||
else
|
||||
if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 )
|
||||
i->second->rank = PrefixMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 )
|
||||
i->second->rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 )
|
||||
i->second->rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 )
|
||||
i->second->rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() );
|
||||
else
|
||||
i->second->rank = WorstMatch * Multiplier;
|
||||
}
|
||||
|
||||
resultsArray.sort( SortByRank() );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Handling stemmed matches
|
||||
|
||||
resultsArray.sort( SortByRank() );
|
||||
// We use two factors -- first is the number of characters strings share
|
||||
// in their beginnings, and second, the length of the strings. Here we assign
|
||||
// only the first one, storing it in rank. Then we sort the results using
|
||||
// SortByRankAndLength.
|
||||
wstring target = Folding::apply( inputWord.toStdWString() );
|
||||
|
||||
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
|
||||
i != j; ++i )
|
||||
{
|
||||
wstring resultFolded = Folding::apply( i->first );
|
||||
|
||||
int charsInCommon = 0;
|
||||
|
||||
for( wchar_t const * t = target.c_str(), * r = resultFolded.c_str();
|
||||
*t && *t == *r; ++t, ++r, ++charsInCommon ) ;
|
||||
|
||||
i->second->rank = -charsInCommon; // Negated so the lesser-than
|
||||
// comparison would yield right
|
||||
// results.
|
||||
}
|
||||
|
||||
resultsArray.sort( SortByRankAndLength() );
|
||||
|
||||
maxSearchResults = 15;
|
||||
}
|
||||
}
|
||||
|
||||
searchResults.clear();
|
||||
searchResults.reserve( resultsArray.size() < 500 ? resultsArray.size() : 500 );
|
||||
searchResults.reserve( resultsArray.size() < maxSearchResults ? resultsArray.size() : maxSearchResults );
|
||||
|
||||
for( ResultsArray::const_iterator i = resultsArray.begin(), j = resultsArray.end();
|
||||
i != j; ++i )
|
||||
{
|
||||
//printf( "%d: %ls\n", i->second, i->first.c_str() );
|
||||
|
||||
if ( searchResults.size() < 500 )
|
||||
if ( searchResults.size() < maxSearchResults )
|
||||
searchResults.push_back( std::pair< QString, bool >( QString::fromStdWString( i->word ), i->wasSuggested ) );
|
||||
else
|
||||
break;
|
||||
|
|
|
@ -37,6 +37,12 @@ private:
|
|||
// Saved search params
|
||||
bool searchQueued;
|
||||
QString inputWord;
|
||||
enum SearchType
|
||||
{
|
||||
PrefixMatch,
|
||||
StemmedMatch
|
||||
} searchType;
|
||||
|
||||
std::vector< sptr< Dictionary::Class > > const * inputDicts;
|
||||
|
||||
struct OneResult
|
||||
|
@ -67,11 +73,17 @@ public:
|
|||
/// and the new one replaces it.
|
||||
void prefixMatch( QString const &,
|
||||
std::vector< sptr< Dictionary::Class > > const & );
|
||||
|
||||
|
||||
/// Do a stemmed-match search in the given list of dictionaries. All comments
|
||||
/// from prefixMatch() generally apply as well.
|
||||
void stemmedMatch( QString const &,
|
||||
std::vector< sptr< Dictionary::Class > > const & );
|
||||
|
||||
/// Returns the vector containing search results from the last prefixMatch()
|
||||
/// operation. If it didn't finish yet, the result is not final and may
|
||||
/// be changing over time.
|
||||
SearchResults const & getPrefixMatchResults() const
|
||||
/// Returns the vector containing search results from the last operation.
|
||||
/// If it didn't finish yet, the result is not final and may be changing
|
||||
/// over time.
|
||||
SearchResults const & getResults() const
|
||||
{ return searchResults; }
|
||||
|
||||
/// Returns a human-readable error string for the last finished request. Empty
|
||||
|
@ -93,7 +105,7 @@ signals:
|
|||
/// searching.
|
||||
void updated();
|
||||
|
||||
/// Idicates that the search has finished.
|
||||
/// Indicates that the search has finished.
|
||||
void finished();
|
||||
|
||||
private slots:
|
||||
|
@ -129,6 +141,29 @@ private:
|
|||
return first.word < second.word;
|
||||
}
|
||||
};
|
||||
|
||||
/// Compares results based on their ranks and lengths
|
||||
struct SortByRankAndLength
|
||||
{
|
||||
bool operator () ( OneResult const & first, OneResult const & second )
|
||||
{
|
||||
if ( first.rank < second.rank )
|
||||
return true;
|
||||
|
||||
if ( first.rank > second.rank )
|
||||
return false;
|
||||
|
||||
if ( first.word.size() < second.word.size() )
|
||||
return true;
|
||||
|
||||
if ( first.word.size() > second.word.size() )
|
||||
return false;
|
||||
|
||||
// Do any sort of collation here in the future. For now we just put the
|
||||
// strings sorted lexicographically.
|
||||
return first.word < second.word;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue