mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 00:14:06 +00:00
Successfully find arbitrarily large compound expressions.
Previously the program could only safely find two-word compounds. Now it always finds all of them, even if they are large sentences with many words. To choose the source for compounds, a notion of dictionary features was added. It may be utilized later for some more interesting things.
This commit is contained in:
parent
9eb20cf0b6
commit
67ca2ee1dd
|
@ -573,12 +573,10 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
|||
|
||||
string footer;
|
||||
|
||||
if ( currentSplittedWordEnd - currentSplittedWordStart > 1 ) // We have something to append
|
||||
if ( lastGoodCompoundResult.size() ) // We have something to append
|
||||
{
|
||||
// printf( "Appending\n" );
|
||||
|
||||
--currentSplittedWordEnd;
|
||||
|
||||
if ( !firstCompoundWasFound )
|
||||
{
|
||||
// Append the beginning
|
||||
|
@ -594,7 +592,9 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
|||
footer += " / ";
|
||||
}
|
||||
|
||||
footer += linkWord( makeSplittedWordCompound() );
|
||||
footer += linkWord( lastGoodCompoundResult );
|
||||
|
||||
lastGoodCompoundResult.clear();
|
||||
}
|
||||
|
||||
// Then, start a new search for the next word, if possible
|
||||
|
@ -672,7 +672,8 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
|||
|
||||
// printf( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) );
|
||||
|
||||
stemmedWordFinder->stemmedMatch( currentSplittedWordCompound, activeDicts, 0, 0, 1 );
|
||||
stemmedWordFinder->prefixMatch( currentSplittedWordCompound, activeDicts, 40, // Would one be enough? Leave 40 to be safe.
|
||||
Dictionary::SuitableForCompoundSearching );
|
||||
}
|
||||
|
||||
QString ArticleRequest::makeSplittedWordCompound()
|
||||
|
@ -707,13 +708,35 @@ void ArticleRequest::individualWordFinished()
|
|||
// Check if the aliases are acceptable
|
||||
wstring source = Folding::applySimpleCaseOnly( gd::toWString( currentSplittedWordCompound ) );
|
||||
|
||||
bool hadSomething = false;
|
||||
|
||||
for( unsigned x = 0; x < results.size(); ++x )
|
||||
if ( source == Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) )
|
||||
{
|
||||
if ( results[ x ].second )
|
||||
continue; // We're not interested in suggestions
|
||||
|
||||
wstring result( Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) );
|
||||
|
||||
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 )
|
||||
{
|
||||
// Ok, good enough
|
||||
compoundSearchNextStep( true );
|
||||
return;
|
||||
// The resulting string begins with the source one
|
||||
|
||||
hadSomething = true;
|
||||
|
||||
if ( source.size() == result.size() )
|
||||
{
|
||||
// Got the match. No need to continue.
|
||||
lastGoodCompoundResult = currentSplittedWordCompound;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( hadSomething )
|
||||
{
|
||||
compoundSearchNextStep( true );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
compoundSearchNextStep( false );
|
||||
|
|
|
@ -100,6 +100,7 @@ class ArticleRequest: public Dictionary::DataRequest
|
|||
int currentSplittedWordStart;
|
||||
int currentSplittedWordEnd;
|
||||
QString currentSplittedWordCompound;
|
||||
QString lastGoodCompoundResult;
|
||||
bool firstCompoundWasFound;
|
||||
|
||||
public:
|
||||
|
|
|
@ -139,6 +139,10 @@ public:
|
|||
|
||||
BtreeDictionary( string const & id, vector< string > const & dictionaryFiles );
|
||||
|
||||
/// Btree-indexed dictionaries are usually a good source for compound searches.
|
||||
virtual Dictionary::Features getFeatures() const throw()
|
||||
{ return Dictionary::SuitableForCompoundSearching; }
|
||||
|
||||
/// This function does the search using the btree index. Derivatives
|
||||
/// need not to implement this function.
|
||||
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &,
|
||||
|
|
|
@ -222,6 +222,18 @@ public:
|
|||
{ return data; }
|
||||
};
|
||||
|
||||
/// Dictionary features. Different dictionaries can possess different features,
|
||||
/// which hint at some of their aspects.
|
||||
enum Feature
|
||||
{
|
||||
/// No features
|
||||
NoFeatures = 0,
|
||||
/// The dictionary is suitable to query when searching for compound expressions.
|
||||
SuitableForCompoundSearching = 1
|
||||
};
|
||||
|
||||
Q_DECLARE_FLAGS( Features, Feature )
|
||||
Q_DECLARE_OPERATORS_FOR_FLAGS( Features )
|
||||
|
||||
/// A dictionary. Can be used to query words.
|
||||
class Class
|
||||
|
@ -259,6 +271,11 @@ public:
|
|||
/// description etc. All strings are in utf8.
|
||||
virtual map< Property, string > getProperties() throw()=0;
|
||||
|
||||
/// Returns the features the dictionary possess. See the Feature enum for
|
||||
/// their list.
|
||||
virtual Features getFeatures() const throw()
|
||||
{ return NoFeatures; }
|
||||
|
||||
/// Returns the number of articles in the dictionary.
|
||||
virtual unsigned long getArticleCount() throw()=0;
|
||||
|
||||
|
|
|
@ -32,7 +32,9 @@ WordFinder::~WordFinder()
|
|||
}
|
||||
|
||||
void WordFinder::prefixMatch( QString const & str,
|
||||
std::vector< sptr< Dictionary::Class > > const & dicts )
|
||||
std::vector< sptr< Dictionary::Class > > const & dicts,
|
||||
unsigned long maxResults,
|
||||
Dictionary::Features features )
|
||||
{
|
||||
cancel();
|
||||
|
||||
|
@ -40,6 +42,8 @@ void WordFinder::prefixMatch( QString const & str,
|
|||
searchType = PrefixMatch;
|
||||
inputWord = str;
|
||||
inputDicts = &dicts;
|
||||
requestedMaxResults = maxResults;
|
||||
requestedFeatures = features;
|
||||
|
||||
resultsArray.clear();
|
||||
resultsIndex.clear();
|
||||
|
@ -59,7 +63,8 @@ void WordFinder::stemmedMatch( QString const & str,
|
|||
std::vector< sptr< Dictionary::Class > > const & dicts,
|
||||
unsigned minLength,
|
||||
unsigned maxSuffixVariation,
|
||||
unsigned long maxResults )
|
||||
unsigned long maxResults,
|
||||
Dictionary::Features features )
|
||||
{
|
||||
cancel();
|
||||
|
||||
|
@ -67,9 +72,10 @@ void WordFinder::stemmedMatch( QString const & str,
|
|||
searchType = StemmedMatch;
|
||||
inputWord = str;
|
||||
inputDicts = &dicts;
|
||||
requestedMaxResults = maxResults;
|
||||
requestedFeatures = features;
|
||||
stemmedMinLength = minLength;
|
||||
stemmedMaxSuffixVariation = maxSuffixVariation;
|
||||
stemmedMaxResults = maxResults;
|
||||
|
||||
resultsArray.clear();
|
||||
resultsIndex.clear();
|
||||
|
@ -111,12 +117,15 @@ void WordFinder::startSearch()
|
|||
|
||||
for( size_t x = 0; x < inputDicts->size(); ++x )
|
||||
{
|
||||
if ( ( (*inputDicts)[ x ]->getFeatures() & requestedFeatures ) != requestedFeatures )
|
||||
continue;
|
||||
|
||||
for( size_t y = 0; y < allWordWritings.size(); ++y )
|
||||
{
|
||||
sptr< Dictionary::WordSearchRequest > sr =
|
||||
( searchType == PrefixMatch ) ?
|
||||
(*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], 40 ) :
|
||||
(*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], stemmedMinLength, stemmedMaxSuffixVariation, stemmedMaxResults );
|
||||
(*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], requestedMaxResults ) :
|
||||
(*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], stemmedMinLength, stemmedMaxSuffixVariation, requestedMaxResults );
|
||||
|
||||
connect( sr.get(), SIGNAL( finished() ),
|
||||
this, SLOT( requestFinished() ), Qt::QueuedConnection );
|
||||
|
|
|
@ -42,9 +42,10 @@ private:
|
|||
PrefixMatch,
|
||||
StemmedMatch
|
||||
} searchType;
|
||||
unsigned long requestedMaxResults;
|
||||
Dictionary::Features requestedFeatures;
|
||||
unsigned stemmedMinLength;
|
||||
unsigned stemmedMaxSuffixVariation;
|
||||
unsigned long stemmedMaxResults;
|
||||
|
||||
std::vector< sptr< Dictionary::Class > > const * inputDicts;
|
||||
|
||||
|
@ -74,10 +75,14 @@ public:
|
|||
/// the exact matches would be found. All search results are put into a single
|
||||
/// list containing the exact matches first, then the prefix ones. Duplicate
|
||||
/// matches from different dictionaries are merged together.
|
||||
/// If a list of features is specified, the search will only be performed in
|
||||
/// the dictionaries which possess all the features requested.
|
||||
/// If there already was a prefixMatch operation underway, it gets cancelled
|
||||
/// and the new one replaces it.
|
||||
void prefixMatch( QString const &,
|
||||
std::vector< sptr< Dictionary::Class > > const & );
|
||||
std::vector< sptr< Dictionary::Class > > const &,
|
||||
unsigned long maxResults = 40,
|
||||
Dictionary::Features = Dictionary::NoFeatures );
|
||||
|
||||
/// Do a stemmed-match search in the given list of dictionaries. All comments
|
||||
/// from prefixMatch() generally apply as well.
|
||||
|
@ -85,7 +90,8 @@ public:
|
|||
std::vector< sptr< Dictionary::Class > > const &,
|
||||
unsigned minLength = 3,
|
||||
unsigned maxSuffixVariation = 3,
|
||||
unsigned long maxResults = 30 );
|
||||
unsigned long maxResults = 30,
|
||||
Dictionary::Features = Dictionary::NoFeatures );
|
||||
|
||||
/// Returns the vector containing search results from the last operation.
|
||||
/// If it didn't finish yet, the result is not final and may be changing
|
||||
|
|
Loading…
Reference in a new issue