goldendict-ng/wstring_qt.cc
Tvangeste 27c4bf7d30 Properly handle non-normalized unicode headwords
With that change users should be able to search headwords in
any form. For example:

U+03B5 GREEK SMALL LETTER EPSILON and U+0301 COMBINING ACUTE ACCENT

is considered equal to

U+03AD GREEK SMALL LETTER EPSILON WITH TONOS

And no matter in what form the headword is provided in the dictionary, users will be able to find it,
even using the different form.
2013-07-06 20:18:43 +02:00

43 lines
919 B
C++

#include "wstring_qt.hh"
#include <QVector>
namespace gd
{
#ifdef __WIN32
QString toQString( wstring const & in )
{
return QString::fromUcs4( in.c_str() );
}
#else
QString toQString( wstring const & in )
{
return QString::fromStdWString( in );
}
#endif
wstring toWString( QString const & in )
{
QVector< unsigned int > v = in.toUcs4();
// Fix for QString instance which contains non-BMP characters
// Qt will created unexpected null characters may confuse btree indexer.
// Related: https://bugreports.qt-project.org/browse/QTBUG-25536
int n = v.size();
while ( n > 0 && v[ n - 1 ] == 0 ) n--;
if ( n != v.size() )
v.resize( n );
return wstring( ( const wchar * ) v.constData(), v.size() );
}
wstring normalize( const wstring & str )
{
return gd::toWString( gd::toQString( str ).normalized( QString::NormalizationForm_C ) );
}
}