mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
Successfully find arbitrarily large compound expressions.
Previously the program could only safely find two-word compounds. Now it always finds all of them, even if they are large sentences with many words. To choose the source for compounds, a notion of dictionary features was added. It may be utilized later for some more interesting things.
This commit is contained in:
parent
9eb20cf0b6
commit
67ca2ee1dd
|
@ -573,12 +573,10 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
||||||
|
|
||||||
string footer;
|
string footer;
|
||||||
|
|
||||||
if ( currentSplittedWordEnd - currentSplittedWordStart > 1 ) // We have something to append
|
if ( lastGoodCompoundResult.size() ) // We have something to append
|
||||||
{
|
{
|
||||||
// printf( "Appending\n" );
|
// printf( "Appending\n" );
|
||||||
|
|
||||||
--currentSplittedWordEnd;
|
|
||||||
|
|
||||||
if ( !firstCompoundWasFound )
|
if ( !firstCompoundWasFound )
|
||||||
{
|
{
|
||||||
// Append the beginning
|
// Append the beginning
|
||||||
|
@ -594,7 +592,9 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
||||||
footer += " / ";
|
footer += " / ";
|
||||||
}
|
}
|
||||||
|
|
||||||
footer += linkWord( makeSplittedWordCompound() );
|
footer += linkWord( lastGoodCompoundResult );
|
||||||
|
|
||||||
|
lastGoodCompoundResult.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then, start a new search for the next word, if possible
|
// Then, start a new search for the next word, if possible
|
||||||
|
@ -672,7 +672,8 @@ void ArticleRequest::compoundSearchNextStep( bool lastSearchSucceeded )
|
||||||
|
|
||||||
// printf( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) );
|
// printf( "Looking up %s\n", qPrintable( currentSplittedWordCompound ) );
|
||||||
|
|
||||||
stemmedWordFinder->stemmedMatch( currentSplittedWordCompound, activeDicts, 0, 0, 1 );
|
stemmedWordFinder->prefixMatch( currentSplittedWordCompound, activeDicts, 40, // Would one be enough? Leave 40 to be safe.
|
||||||
|
Dictionary::SuitableForCompoundSearching );
|
||||||
}
|
}
|
||||||
|
|
||||||
QString ArticleRequest::makeSplittedWordCompound()
|
QString ArticleRequest::makeSplittedWordCompound()
|
||||||
|
@ -707,13 +708,35 @@ void ArticleRequest::individualWordFinished()
|
||||||
// Check if the aliases are acceptable
|
// Check if the aliases are acceptable
|
||||||
wstring source = Folding::applySimpleCaseOnly( gd::toWString( currentSplittedWordCompound ) );
|
wstring source = Folding::applySimpleCaseOnly( gd::toWString( currentSplittedWordCompound ) );
|
||||||
|
|
||||||
|
bool hadSomething = false;
|
||||||
|
|
||||||
for( unsigned x = 0; x < results.size(); ++x )
|
for( unsigned x = 0; x < results.size(); ++x )
|
||||||
if ( source == Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) )
|
{
|
||||||
|
if ( results[ x ].second )
|
||||||
|
continue; // We're not interested in suggestions
|
||||||
|
|
||||||
|
wstring result( Folding::applySimpleCaseOnly( gd::toWString( results[ x ].first ) ) );
|
||||||
|
|
||||||
|
if ( source.size() <= result.size() && result.compare( 0, source.size(), source ) == 0 )
|
||||||
{
|
{
|
||||||
// Ok, good enough
|
// The resulting string begins with the source one
|
||||||
compoundSearchNextStep( true );
|
|
||||||
return;
|
hadSomething = true;
|
||||||
|
|
||||||
|
if ( source.size() == result.size() )
|
||||||
|
{
|
||||||
|
// Got the match. No need to continue.
|
||||||
|
lastGoodCompoundResult = currentSplittedWordCompound;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( hadSomething )
|
||||||
|
{
|
||||||
|
compoundSearchNextStep( true );
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
compoundSearchNextStep( false );
|
compoundSearchNextStep( false );
|
||||||
|
|
|
@ -100,6 +100,7 @@ class ArticleRequest: public Dictionary::DataRequest
|
||||||
int currentSplittedWordStart;
|
int currentSplittedWordStart;
|
||||||
int currentSplittedWordEnd;
|
int currentSplittedWordEnd;
|
||||||
QString currentSplittedWordCompound;
|
QString currentSplittedWordCompound;
|
||||||
|
QString lastGoodCompoundResult;
|
||||||
bool firstCompoundWasFound;
|
bool firstCompoundWasFound;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -139,6 +139,10 @@ public:
|
||||||
|
|
||||||
BtreeDictionary( string const & id, vector< string > const & dictionaryFiles );
|
BtreeDictionary( string const & id, vector< string > const & dictionaryFiles );
|
||||||
|
|
||||||
|
/// Btree-indexed dictionaries are usually a good source for compound searches.
|
||||||
|
virtual Dictionary::Features getFeatures() const throw()
|
||||||
|
{ return Dictionary::SuitableForCompoundSearching; }
|
||||||
|
|
||||||
/// This function does the search using the btree index. Derivatives
|
/// This function does the search using the btree index. Derivatives
|
||||||
/// need not to implement this function.
|
/// need not to implement this function.
|
||||||
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &,
|
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &,
|
||||||
|
|
|
@ -222,6 +222,18 @@ public:
|
||||||
{ return data; }
|
{ return data; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Dictionary features. Different dictionaries can possess different features,
|
||||||
|
/// which hint at some of their aspects.
|
||||||
|
enum Feature
|
||||||
|
{
|
||||||
|
/// No features
|
||||||
|
NoFeatures = 0,
|
||||||
|
/// The dictionary is suitable to query when searching for compound expressions.
|
||||||
|
SuitableForCompoundSearching = 1
|
||||||
|
};
|
||||||
|
|
||||||
|
Q_DECLARE_FLAGS( Features, Feature )
|
||||||
|
Q_DECLARE_OPERATORS_FOR_FLAGS( Features )
|
||||||
|
|
||||||
/// A dictionary. Can be used to query words.
|
/// A dictionary. Can be used to query words.
|
||||||
class Class
|
class Class
|
||||||
|
@ -259,6 +271,11 @@ public:
|
||||||
/// description etc. All strings are in utf8.
|
/// description etc. All strings are in utf8.
|
||||||
virtual map< Property, string > getProperties() throw()=0;
|
virtual map< Property, string > getProperties() throw()=0;
|
||||||
|
|
||||||
|
/// Returns the features the dictionary possess. See the Feature enum for
|
||||||
|
/// their list.
|
||||||
|
virtual Features getFeatures() const throw()
|
||||||
|
{ return NoFeatures; }
|
||||||
|
|
||||||
/// Returns the number of articles in the dictionary.
|
/// Returns the number of articles in the dictionary.
|
||||||
virtual unsigned long getArticleCount() throw()=0;
|
virtual unsigned long getArticleCount() throw()=0;
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,9 @@ WordFinder::~WordFinder()
|
||||||
}
|
}
|
||||||
|
|
||||||
void WordFinder::prefixMatch( QString const & str,
|
void WordFinder::prefixMatch( QString const & str,
|
||||||
std::vector< sptr< Dictionary::Class > > const & dicts )
|
std::vector< sptr< Dictionary::Class > > const & dicts,
|
||||||
|
unsigned long maxResults,
|
||||||
|
Dictionary::Features features )
|
||||||
{
|
{
|
||||||
cancel();
|
cancel();
|
||||||
|
|
||||||
|
@ -40,6 +42,8 @@ void WordFinder::prefixMatch( QString const & str,
|
||||||
searchType = PrefixMatch;
|
searchType = PrefixMatch;
|
||||||
inputWord = str;
|
inputWord = str;
|
||||||
inputDicts = &dicts;
|
inputDicts = &dicts;
|
||||||
|
requestedMaxResults = maxResults;
|
||||||
|
requestedFeatures = features;
|
||||||
|
|
||||||
resultsArray.clear();
|
resultsArray.clear();
|
||||||
resultsIndex.clear();
|
resultsIndex.clear();
|
||||||
|
@ -59,7 +63,8 @@ void WordFinder::stemmedMatch( QString const & str,
|
||||||
std::vector< sptr< Dictionary::Class > > const & dicts,
|
std::vector< sptr< Dictionary::Class > > const & dicts,
|
||||||
unsigned minLength,
|
unsigned minLength,
|
||||||
unsigned maxSuffixVariation,
|
unsigned maxSuffixVariation,
|
||||||
unsigned long maxResults )
|
unsigned long maxResults,
|
||||||
|
Dictionary::Features features )
|
||||||
{
|
{
|
||||||
cancel();
|
cancel();
|
||||||
|
|
||||||
|
@ -67,9 +72,10 @@ void WordFinder::stemmedMatch( QString const & str,
|
||||||
searchType = StemmedMatch;
|
searchType = StemmedMatch;
|
||||||
inputWord = str;
|
inputWord = str;
|
||||||
inputDicts = &dicts;
|
inputDicts = &dicts;
|
||||||
|
requestedMaxResults = maxResults;
|
||||||
|
requestedFeatures = features;
|
||||||
stemmedMinLength = minLength;
|
stemmedMinLength = minLength;
|
||||||
stemmedMaxSuffixVariation = maxSuffixVariation;
|
stemmedMaxSuffixVariation = maxSuffixVariation;
|
||||||
stemmedMaxResults = maxResults;
|
|
||||||
|
|
||||||
resultsArray.clear();
|
resultsArray.clear();
|
||||||
resultsIndex.clear();
|
resultsIndex.clear();
|
||||||
|
@ -111,12 +117,15 @@ void WordFinder::startSearch()
|
||||||
|
|
||||||
for( size_t x = 0; x < inputDicts->size(); ++x )
|
for( size_t x = 0; x < inputDicts->size(); ++x )
|
||||||
{
|
{
|
||||||
|
if ( ( (*inputDicts)[ x ]->getFeatures() & requestedFeatures ) != requestedFeatures )
|
||||||
|
continue;
|
||||||
|
|
||||||
for( size_t y = 0; y < allWordWritings.size(); ++y )
|
for( size_t y = 0; y < allWordWritings.size(); ++y )
|
||||||
{
|
{
|
||||||
sptr< Dictionary::WordSearchRequest > sr =
|
sptr< Dictionary::WordSearchRequest > sr =
|
||||||
( searchType == PrefixMatch ) ?
|
( searchType == PrefixMatch ) ?
|
||||||
(*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], 40 ) :
|
(*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], requestedMaxResults ) :
|
||||||
(*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], stemmedMinLength, stemmedMaxSuffixVariation, stemmedMaxResults );
|
(*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], stemmedMinLength, stemmedMaxSuffixVariation, requestedMaxResults );
|
||||||
|
|
||||||
connect( sr.get(), SIGNAL( finished() ),
|
connect( sr.get(), SIGNAL( finished() ),
|
||||||
this, SLOT( requestFinished() ), Qt::QueuedConnection );
|
this, SLOT( requestFinished() ), Qt::QueuedConnection );
|
||||||
|
|
|
@ -42,9 +42,10 @@ private:
|
||||||
PrefixMatch,
|
PrefixMatch,
|
||||||
StemmedMatch
|
StemmedMatch
|
||||||
} searchType;
|
} searchType;
|
||||||
|
unsigned long requestedMaxResults;
|
||||||
|
Dictionary::Features requestedFeatures;
|
||||||
unsigned stemmedMinLength;
|
unsigned stemmedMinLength;
|
||||||
unsigned stemmedMaxSuffixVariation;
|
unsigned stemmedMaxSuffixVariation;
|
||||||
unsigned long stemmedMaxResults;
|
|
||||||
|
|
||||||
std::vector< sptr< Dictionary::Class > > const * inputDicts;
|
std::vector< sptr< Dictionary::Class > > const * inputDicts;
|
||||||
|
|
||||||
|
@ -74,10 +75,14 @@ public:
|
||||||
/// the exact matches would be found. All search results are put into a single
|
/// the exact matches would be found. All search results are put into a single
|
||||||
/// list containing the exact matches first, then the prefix ones. Duplicate
|
/// list containing the exact matches first, then the prefix ones. Duplicate
|
||||||
/// matches from different dictionaries are merged together.
|
/// matches from different dictionaries are merged together.
|
||||||
|
/// If a list of features is specified, the search will only be performed in
|
||||||
|
/// the dictionaries which possess all the features requested.
|
||||||
/// If there already was a prefixMatch operation underway, it gets cancelled
|
/// If there already was a prefixMatch operation underway, it gets cancelled
|
||||||
/// and the new one replaces it.
|
/// and the new one replaces it.
|
||||||
void prefixMatch( QString const &,
|
void prefixMatch( QString const &,
|
||||||
std::vector< sptr< Dictionary::Class > > const & );
|
std::vector< sptr< Dictionary::Class > > const &,
|
||||||
|
unsigned long maxResults = 40,
|
||||||
|
Dictionary::Features = Dictionary::NoFeatures );
|
||||||
|
|
||||||
/// Do a stemmed-match search in the given list of dictionaries. All comments
|
/// Do a stemmed-match search in the given list of dictionaries. All comments
|
||||||
/// from prefixMatch() generally apply as well.
|
/// from prefixMatch() generally apply as well.
|
||||||
|
@ -85,7 +90,8 @@ public:
|
||||||
std::vector< sptr< Dictionary::Class > > const &,
|
std::vector< sptr< Dictionary::Class > > const &,
|
||||||
unsigned minLength = 3,
|
unsigned minLength = 3,
|
||||||
unsigned maxSuffixVariation = 3,
|
unsigned maxSuffixVariation = 3,
|
||||||
unsigned long maxResults = 30 );
|
unsigned long maxResults = 30,
|
||||||
|
Dictionary::Features = Dictionary::NoFeatures );
|
||||||
|
|
||||||
/// Returns the vector containing search results from the last operation.
|
/// Returns the vector containing search results from the last operation.
|
||||||
/// If it didn't finish yet, the result is not final and may be changing
|
/// If it didn't finish yet, the result is not final and may be changing
|
||||||
|
|
Loading…
Reference in a new issue