From fbec70e41a3e5f973c033436120d54d3a20e54b6 Mon Sep 17 00:00:00 2001 From: shenleban tongying Date: Tue, 12 Nov 2024 22:06:27 -0500 Subject: [PATCH] opt: simplify Folding::apply --- src/common/folding.cc | 48 ++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/src/common/folding.cc b/src/common/folding.cc index a6073342..f737371a 100644 --- a/src/common/folding.cc +++ b/src/common/folding.cc @@ -20,41 +20,23 @@ bool isCombiningMark( wchar ch ) wstring apply( wstring const & in, bool preserveWildcards ) { - //remove space and accent; - auto withPunc = QString::fromStdU32String( in ) - .normalized( QString::NormalizationForm_KD ) - .remove( RX::markSpace ) - .toStdU32String(); - - //First, strip diacritics and apply ws/punctuation removal - wstring withoutDiacritics; - - withoutDiacritics.reserve( withPunc.size() ); - - - for ( auto const & ch : withPunc ) { - - if ( !isPunct( ch ) - || ( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) ) ) { - withoutDiacritics.push_back( ch ); - } - } - - - // Now, fold the case - - wstring caseFolded; - - caseFolded.reserve( withoutDiacritics.size() * foldCaseMaxOut ); - - wchar const * nextChar = withoutDiacritics.data(); - + // remove diacritics (normalization), white space, punt, + auto temp = QString::fromStdU32String( in ) + .normalized( QString::NormalizationForm_KD ) + .remove( RX::markSpace ) + .removeIf( [ preserveWildcards ]( const QChar & ch ) -> bool { + return ch.isPunct() + && !( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) ); + } ) + .toStdU32String(); + // case folding + std::u32string caseFolded; + caseFolded.reserve( temp.size() ); wchar buf[ foldCaseMaxOut ]; - - for ( size_t left = withoutDiacritics.size(); left--; ) { - caseFolded.append( buf, foldCase( *nextChar++, buf ) ); + for ( const char32_t ch : temp ) { + auto n = foldCase( ch, buf ); + caseFolded.append( buf, n ); } - return caseFolded; }