mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
Properly handle non-normalized unicode headwords
With that change users should be able to search headwords in any form. For example: U+03B5 GREEK SMALL LETTER EPSILON and U+0301 COMBINING ACUTE ACCENT is considered equal to U+03AD GREEK SMALL LETTER EPSILON WITH TONOS And no matter in what form the headword is provided in the dictionary, users will be able to find it, even using the different form.
This commit is contained in:
parent
89755f8c09
commit
27c4bf7d30
|
@ -11,6 +11,7 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "dprintf.hh"
|
||||
#include "wstring_qt.hh"
|
||||
|
||||
//#define __BTREE_USE_LZO
|
||||
// LZO mode is experimental and unsupported. Tests didn't show any substantial
|
||||
|
@ -710,13 +711,13 @@ vector< WordArticleLink > BtreeIndex::readChain( char const * & ptr )
|
|||
void BtreeIndex::antialias( wstring const & str,
|
||||
vector< WordArticleLink > & chain )
|
||||
{
|
||||
wstring caseFolded = Folding::applySimpleCaseOnly( str );
|
||||
wstring caseFolded = Folding::applySimpleCaseOnly( gd::normalize( str ) );
|
||||
|
||||
for( unsigned x = chain.size(); x--; )
|
||||
{
|
||||
// If after applying case folding to each word they wouldn't match, we
|
||||
// drop the entry.
|
||||
if ( Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) !=
|
||||
if ( Folding::applySimpleCaseOnly( gd::normalize( Utf8::decode( chain[ x ].prefix + chain[ x ].word ) ) ) !=
|
||||
caseFolded )
|
||||
chain.erase( chain.begin() + x );
|
||||
else
|
||||
|
|
2
dsl.cc
2
dsl.cc
|
@ -668,7 +668,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
string DslDictionary::dslToHtml( wstring const & str )
|
||||
{
|
||||
// Normalize the string
|
||||
wstring normalizedStr = gd::toWString( gd::toQString( str ).normalized( QString::NormalizationForm_C ) );
|
||||
wstring normalizedStr = gd::normalize( str );
|
||||
|
||||
ArticleDom dom( normalizedStr );
|
||||
|
||||
|
|
|
@ -33,4 +33,10 @@ namespace gd
|
|||
|
||||
return wstring( ( const wchar * ) v.constData(), v.size() );
|
||||
}
|
||||
|
||||
wstring normalize( const wstring & str )
|
||||
{
|
||||
return gd::toWString( gd::toQString( str ).normalized( QString::NormalizationForm_C ) );
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ namespace gd
|
|||
{
|
||||
QString toQString( wstring const & );
|
||||
wstring toWString( QString const & );
|
||||
wstring normalize( wstring const & );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue