mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-23 20:14:05 +00:00
opt: treat invisible character as whitespace (#1696)
* opt: remove invisible character * opt: remove invisible character * opt: whitespace and punct character * [autofix.ci] apply automated fixes * opt: whitespace and punct character --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
521c359b24
commit
5fb4526158
|
@ -154,7 +154,7 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
|
|||
out.reserve( in.size() );
|
||||
|
||||
for ( size_t left = in.size(); left--; ++nextChar ) {
|
||||
if ( !isWhitespace( *nextChar ) && !isPunct( *nextChar ) )
|
||||
if ( !isWhitespaceOrPunct( *nextChar ) )
|
||||
out.push_back( *nextChar );
|
||||
}
|
||||
|
||||
|
@ -163,12 +163,13 @@ wstring applyWhitespaceAndPunctOnly( wstring const & in )
|
|||
|
||||
bool isWhitespace( wchar ch )
|
||||
{
|
||||
return QChar::isSpace( ch );
|
||||
//invisible character should be treated as whitespace as well.
|
||||
return QChar::isSpace( ch ) || !QChar::isPrint( ch );
|
||||
}
|
||||
|
||||
bool isWhitespaceOrPunct( wchar ch )
|
||||
{
|
||||
return QChar::isSpace( ch ) || QChar::isPunct( ch );
|
||||
return isWhitespace( ch ) || QChar::isPunct( ch );
|
||||
}
|
||||
|
||||
bool isPunct( wchar ch )
|
||||
|
@ -182,14 +183,13 @@ wstring trimWhitespaceOrPunct( wstring const & in )
|
|||
wstring::size_type wordSize = in.size();
|
||||
|
||||
// Skip any leading whitespace
|
||||
while ( *wordBegin && ( Folding::isWhitespace( *wordBegin ) || Folding::isPunct( *wordBegin ) ) ) {
|
||||
while ( *wordBegin && Folding::isWhitespaceOrPunct( *wordBegin ) ) {
|
||||
++wordBegin;
|
||||
--wordSize;
|
||||
}
|
||||
|
||||
// Skip any trailing whitespace
|
||||
while ( wordSize
|
||||
&& ( Folding::isWhitespace( wordBegin[ wordSize - 1 ] ) || Folding::isPunct( wordBegin[ wordSize - 1 ] ) ) )
|
||||
while ( wordSize && Folding::isWhitespaceOrPunct( wordBegin[ wordSize - 1 ] ) )
|
||||
--wordSize;
|
||||
|
||||
return wstring( wordBegin, wordSize );
|
||||
|
|
|
@ -69,10 +69,11 @@ bool containHtmlEntity( std::string const & text );
|
|||
} // namespace Html
|
||||
|
||||
const static QRegularExpression accentMark( R"(\p{M})", QRegularExpression::UseUnicodePropertiesOption );
|
||||
//contain unicode space mark and punctuation
|
||||
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{P}])", QRegularExpression::UseUnicodePropertiesOption );
|
||||
//contain unicode space and mark.
|
||||
const static QRegularExpression markSpace( R"([\p{M}\p{Z}])", QRegularExpression::UseUnicodePropertiesOption );
|
||||
//contain unicode space mark,invisible, and punctuation
|
||||
const static QRegularExpression markPuncSpace( R"([\p{M}\p{Z}\p{C}\p{P}])",
|
||||
QRegularExpression::UseUnicodePropertiesOption );
|
||||
//contain unicode space and mark.invisible
|
||||
const static QRegularExpression markSpace( R"([\p{M}\p{Z}\p{C}])", QRegularExpression::UseUnicodePropertiesOption );
|
||||
|
||||
const static QRegularExpression whiteSpace( "\\s+" );
|
||||
|
||||
|
|
Loading…
Reference in a new issue