diff --git a/src/common/folding.cc b/src/common/folding.cc index a5bceffb..b28397b4 100644 --- a/src/common/folding.cc +++ b/src/common/folding.cc @@ -22,8 +22,8 @@ std::u32string apply( std::u32string const & in, bool preserveWildcards ) { // remove diacritics (normalization), white space, punt, auto temp = QString::fromStdU32String( in ) - .normalized( QString::NormalizationForm_KD ) .remove( RX::markSpace ) + .normalized( QString::NormalizationForm_KD ) .removeIf( [ preserveWildcards ]( const QChar & ch ) -> bool { return ch.isPunct() && !( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) ); @@ -155,8 +155,7 @@ std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in ) bool isWhitespace( char32_t ch ) { - //invisible character should be treated as whitespace as well. - return QChar::isSpace( ch ) || !QChar::isPrint( ch ); + return QChar::isSpace( ch ); } bool isWhitespaceOrPunct( char32_t ch ) diff --git a/src/common/globalregex.hh b/src/common/globalregex.hh index 85fbc460..c9068960 100644 --- a/src/common/globalregex.hh +++ b/src/common/globalregex.hh @@ -71,8 +71,8 @@ const static QRegularExpression accentMark( R"(\p{M})", QRegularExpression::UseU //contain unicode space mark,invisible, and punctuation const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{C}\p{P}])", QRegularExpression::UseUnicodePropertiesOption ); -//contain unicode space and mark.invisible -const static QRegularExpression markSpace( R"([\p{M}\p{Z}\p{C}])", QRegularExpression::UseUnicodePropertiesOption ); +//contain unicode space and mark. +const static QRegularExpression markSpace( R"([\p{M}\p{Z}])", QRegularExpression::UseUnicodePropertiesOption ); const static QRegularExpression whiteSpace( "\\s+" );