Fix search for escaped wildcard symbols in the input line

This commit is contained in:
Abs62 2014-03-17 23:14:16 +04:00
parent d320b1ed70
commit 6902ca5eda
3 changed files with 53 additions and 11 deletions

View file

@ -83,9 +83,12 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & str )
try try
{ {
wstring folded = Folding::apply( str ); // Exast search - unescape all wildcard symbols
wstring word = Folding::unescapeWildcardSymbols( str );
wstring folded = Folding::apply( word );
if( folded.empty() ) if( folded.empty() )
folded = Folding::applyWhitespaceOnly( str ); folded = Folding::applyWhitespaceOnly( word );
bool exactMatch; bool exactMatch;
@ -102,7 +105,7 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & str )
{ {
result = readChain( chainOffset ); result = readChain( chainOffset );
antialias( str, result ); antialias( word, result );
} }
} }
catch( std::exception & e ) catch( std::exception & e )
@ -207,11 +210,17 @@ void BtreeWordSearchRequest::run()
} }
QRegExp regexp; QRegExp regexp;
bool useWildcards = allowMiddleMatches bool useWildcards = false;
&& ( str.find( '*' ) != wstring::npos || if( allowMiddleMatches )
str.find( '?' ) != wstring::npos || useWildcards = ( str.find( '*' ) != wstring::npos ||
str.find( '[' ) != wstring::npos || str.find( '?' ) != wstring::npos ||
str.find( ']' ) != wstring::npos); str.find( '[' ) != wstring::npos ||
str.find( ']' ) != wstring::npos );
else
{
// Exast search - unescape all wildcard symbols
str = Folding::unescapeWildcardSymbols( str );
}
wstring folded = Folding::apply( str ); wstring folded = Folding::apply( str );
@ -253,7 +262,12 @@ void BtreeWordSearchRequest::run()
else else
{ {
if( ch == '\\' || ch == '*' || ch == '?' || ch == '[' || ch == ']' ) if( ch == '\\' || ch == '*' || ch == '?' || ch == '[' || ch == ']' )
break; {
if( folded.empty() )
continue;
else
break;
}
} }
folded.push_back( ch ); folded.push_back( ch );
@ -320,9 +334,10 @@ void BtreeWordSearchRequest::run()
{ {
if( useWildcards ) if( useWildcards )
{ {
wstring result = Folding::applyDiacriticsOnly( Utf8::decode( chain[ x ].word ) ); wstring fullword = Utf8::decode( chain[ x ].prefix + chain[ x ].word );
wstring result = Folding::applyDiacriticsOnly( fullword );
if( regexp.indexIn( gd::toQString( result ) ) == 0 ) if( regexp.indexIn( gd::toQString( result ) ) == 0 )
matches.push_back( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ); matches.push_back( fullword );
} }
else else
{ {

View file

@ -664,4 +664,28 @@ void normalizeWhitespace( wstring & str )
} }
} }
wstring unescapeWildcardSymbols( wstring const & in )
{
wstring tmp;
tmp.reserve( in.size() );
wchar const * wordBegin = in.c_str();
for( ; *wordBegin; ++wordBegin )
{
if( *wordBegin == '*' || *wordBegin == '?'
|| *wordBegin == '[' || *wordBegin == ']' )
{
wstring::size_type n = tmp.size();
if( n && tmp[ n - 1 ] == '\\' )
{
tmp[ n - 1 ] = *wordBegin;
continue;
}
}
tmp.push_back( *wordBegin );
}
return tmp;
}
} }

View file

@ -78,6 +78,9 @@ void normalizeWhitespace( wstring & );
/// generation would be too slow. /// generation would be too slow.
//ssize_t apply( wchar const * in, wchar * out, size_t outSize ); //ssize_t apply( wchar const * in, wchar * out, size_t outSize );
/// Unescape all wildcard symbols (for exast search)
wstring unescapeWildcardSymbols( wstring const & );
} }
#endif #endif