Compare commits

..

1 commit

Author SHA1 Message Date
shenleban tongying 243ce1c7b7
Merge daf2e81ae3 into 20fcea33e1 2024-11-12 20:01:59 -05:00

View file

@ -20,23 +20,41 @@ bool isCombiningMark( wchar ch )
wstring apply( wstring const & in, bool preserveWildcards ) wstring apply( wstring const & in, bool preserveWildcards )
{ {
// remove diacritics (normalization), white space, punt, //remove space and accent;
auto temp = QString::fromStdU32String( in ) auto withPunc = QString::fromStdU32String( in )
.normalized( QString::NormalizationForm_KD ) .normalized( QString::NormalizationForm_KD )
.remove( RX::markSpace ) .remove( RX::markSpace )
.removeIf( [ preserveWildcards ]( const QChar & ch ) -> bool { .toStdU32String();
return ch.isPunct()
&& !( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) ); //First, strip diacritics and apply ws/punctuation removal
} ) wstring withoutDiacritics;
.toStdU32String();
// case folding withoutDiacritics.reserve( withPunc.size() );
std::u32string caseFolded;
caseFolded.reserve( temp.size() );
wchar buf[ foldCaseMaxOut ]; for ( auto const & ch : withPunc ) {
for ( const char32_t ch : temp ) {
auto n = foldCase( ch, buf ); if ( !isPunct( ch )
caseFolded.append( buf, n ); || ( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) ) ) {
withoutDiacritics.push_back( ch );
}
} }
// Now, fold the case
wstring caseFolded;
caseFolded.reserve( withoutDiacritics.size() * foldCaseMaxOut );
wchar const * nextChar = withoutDiacritics.data();
wchar buf[ foldCaseMaxOut ];
for ( size_t left = withoutDiacritics.size(); left--; ) {
caseFolded.append( buf, foldCase( *nextChar++, buf ) );
}
return caseFolded; return caseFolded;
} }