Fix search for escaped wildcard symbols in the input line

This commit is contained in:
Abs62 2014-03-17 23:14:16 +04:00
parent d320b1ed70
commit 6902ca5eda
3 changed files with 53 additions and 11 deletions

View file

@ -83,9 +83,12 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & str )
try
{
wstring folded = Folding::apply( str );
// Exast search - unescape all wildcard symbols
wstring word = Folding::unescapeWildcardSymbols( str );
wstring folded = Folding::apply( word );
if( folded.empty() )
folded = Folding::applyWhitespaceOnly( str );
folded = Folding::applyWhitespaceOnly( word );
bool exactMatch;
@ -102,7 +105,7 @@ vector< WordArticleLink > BtreeIndex::findArticles( wstring const & str )
{
result = readChain( chainOffset );
antialias( str, result );
antialias( word, result );
}
}
catch( std::exception & e )
@ -207,11 +210,17 @@ void BtreeWordSearchRequest::run()
}
QRegExp regexp;
bool useWildcards = allowMiddleMatches
&& ( str.find( '*' ) != wstring::npos ||
bool useWildcards = false;
if( allowMiddleMatches )
useWildcards = ( str.find( '*' ) != wstring::npos ||
str.find( '?' ) != wstring::npos ||
str.find( '[' ) != wstring::npos ||
str.find( ']' ) != wstring::npos);
str.find( ']' ) != wstring::npos );
else
{
// Exast search - unescape all wildcard symbols
str = Folding::unescapeWildcardSymbols( str );
}
wstring folded = Folding::apply( str );
@ -253,8 +262,13 @@ void BtreeWordSearchRequest::run()
else
{
if( ch == '\\' || ch == '*' || ch == '?' || ch == '[' || ch == ']' )
{
if( folded.empty() )
continue;
else
break;
}
}
folded.push_back( ch );
}
@ -320,9 +334,10 @@ void BtreeWordSearchRequest::run()
{
if( useWildcards )
{
wstring result = Folding::applyDiacriticsOnly( Utf8::decode( chain[ x ].word ) );
wstring fullword = Utf8::decode( chain[ x ].prefix + chain[ x ].word );
wstring result = Folding::applyDiacriticsOnly( fullword );
if( regexp.indexIn( gd::toQString( result ) ) == 0 )
matches.push_back( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) );
matches.push_back( fullword );
}
else
{

View file

@ -664,4 +664,28 @@ void normalizeWhitespace( wstring & str )
}
}
wstring unescapeWildcardSymbols( wstring const & in )
{
wstring tmp;
tmp.reserve( in.size() );
wchar const * wordBegin = in.c_str();
for( ; *wordBegin; ++wordBegin )
{
if( *wordBegin == '*' || *wordBegin == '?'
|| *wordBegin == '[' || *wordBegin == ']' )
{
wstring::size_type n = tmp.size();
if( n && tmp[ n - 1 ] == '\\' )
{
tmp[ n - 1 ] = *wordBegin;
continue;
}
}
tmp.push_back( *wordBegin );
}
return tmp;
}
}

View file

@ -78,6 +78,9 @@ void normalizeWhitespace( wstring & );
/// generation would be too slow.
//ssize_t apply( wchar const * in, wchar * out, size_t outSize );
/// Unescape all wildcard symbols (for exast search)
wstring unescapeWildcardSymbols( wstring const & );
}
#endif