fix: cjk expand character can not be searched (#2016)
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run

* fix: cjk expand character can not be searched

* Update src/common/folding.cc

* Update src/common/globalregex.hh
This commit is contained in:
xiaoyifang 2024-12-12 15:09:44 +08:00 committed by GitHub
parent a45a3092b0
commit a5337770da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 4 additions and 5 deletions

View file

@ -22,8 +22,8 @@ std::u32string apply( std::u32string const & in, bool preserveWildcards )
{ {
// remove diacritics (normalization), white space, punt, // remove diacritics (normalization), white space, punt,
auto temp = QString::fromStdU32String( in ) auto temp = QString::fromStdU32String( in )
.normalized( QString::NormalizationForm_KD )
.remove( RX::markSpace ) .remove( RX::markSpace )
.normalized( QString::NormalizationForm_KD )
.removeIf( [ preserveWildcards ]( const QChar & ch ) -> bool { .removeIf( [ preserveWildcards ]( const QChar & ch ) -> bool {
return ch.isPunct() return ch.isPunct()
&& !( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) ); && !( preserveWildcards && ( ch == '\\' || ch == '?' || ch == '*' || ch == '[' || ch == ']' ) );
@ -155,8 +155,7 @@ std::u32string applyWhitespaceAndPunctOnly( std::u32string const & in )
bool isWhitespace( char32_t ch ) bool isWhitespace( char32_t ch )
{ {
//invisible character should be treated as whitespace as well. return QChar::isSpace( ch );
return QChar::isSpace( ch ) || !QChar::isPrint( ch );
} }
bool isWhitespaceOrPunct( char32_t ch ) bool isWhitespaceOrPunct( char32_t ch )

View file

@ -71,8 +71,8 @@ const static QRegularExpression accentMark( R"(\p{M})", QRegularExpression::UseU
//contain unicode space mark,invisible, and punctuation //contain unicode space mark,invisible, and punctuation
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{C}\p{P}])", const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{C}\p{P}])",
QRegularExpression::UseUnicodePropertiesOption ); QRegularExpression::UseUnicodePropertiesOption );
//contain unicode space and mark.invisible //contain unicode space and mark.
const static QRegularExpression markSpace( R"([\p{M}\p{Z}\p{C}])", QRegularExpression::UseUnicodePropertiesOption ); const static QRegularExpression markSpace( R"([\p{M}\p{Z}])", QRegularExpression::UseUnicodePropertiesOption );
const static QRegularExpression whiteSpace( "\\s+" ); const static QRegularExpression whiteSpace( "\\s+" );