opt: Folding::trimWhiteSpace refactor

add a new method which accept QString .
modern c++ style format
This commit is contained in:
xiaoyifang 2023-04-28 23:21:51 +08:00
parent 93c3bdc007
commit b8d4d49f54
5 changed files with 87 additions and 105 deletions

View file

@ -234,6 +234,11 @@ wstring trimWhitespace( wstring const & in )
return wstring( wordBegin, wordSize );
}
QString trimWhitespace( QString const & in )
{
return in.trimmed();
}
void normalizeWhitespace( wstring & str )
{
for( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char

View file

@ -68,8 +68,8 @@ QString trimWhitespaceOrPunct( QString const & in );
/// Removes any whitespace from the beginning and the end of
/// the word.
wstring trimWhitespace( wstring const & );
/// Turns any sequences of consecutive whitespace into a single basic space.
QString trimWhitespace( QString const & in );
/// Turns any sequences of consecutive whitespace into a single basic space.
void normalizeWhitespace( wstring & );
/// Same as apply( wstring ), but without any heap operations, therefore

View file

@ -651,16 +651,15 @@ articleData = U"\n\r\t" + gd::toWString( QString( "DICTZIP error: " ) + dict_e
// Does one of the results match the requested word? If so, we'd choose
// it as our headword.
for( list< wstring >::iterator i = lst.begin(); i != lst.end(); ++i )
{
unescapeDsl( *i );
normalizeHeadword( *i );
for ( auto & i : lst ) {
unescapeDsl( i );
normalizeHeadword( i );
bool found;
if( ignoreDiacritics )
found = Folding::applyDiacriticsOnly( Folding::trimWhitespace( *i ) ) == Folding::applyDiacriticsOnly( requestedHeadwordFolded );
found = Folding::applyDiacriticsOnly( Folding::trimWhitespace( i ) ) == Folding::applyDiacriticsOnly( requestedHeadwordFolded );
else
found = Folding::trimWhitespace( *i ) == requestedHeadwordFolded;
found = Folding::trimWhitespace( i ) == requestedHeadwordFolded;
if ( found )
{
@ -761,9 +760,8 @@ string DslDictionary::processNodeChildren( ArticleDom::Node const & node )
{
string result;
for( ArticleDom::Node::const_iterator i = node.begin(); i != node.end();
++i )
result += nodeToHtml( *i );
for ( const auto & i : node )
result += nodeToHtml( i );
return result;
}
@ -1598,11 +1596,10 @@ void DslArticleRequest::run()
vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );
for( unsigned x = 0; x < alts.size(); ++x )
{
for ( auto & alt : alts ) {
/// Make an additional query for each alt
vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
@ -1615,8 +1612,7 @@ void DslArticleRequest::run()
wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
for( unsigned x = 0; x < chain.size(); ++x )
{
for ( auto & x : chain ) {
// Check if we're cancelled occasionally
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
{
@ -1635,10 +1631,10 @@ void DslArticleRequest::run()
try
{
dict.loadArticle( chain[ x ].articleOffset, wordCaseFolded, ignoreDiacritics, tildeValue,
dict.loadArticle( x.articleOffset, wordCaseFolded, ignoreDiacritics, tildeValue,
displayedHeadword, headwordIndex, articleBody );
if ( !articlesIncluded.insert( std::make_pair( chain[ x ].articleOffset,
if ( !articlesIncluded.insert( std::make_pair( x.articleOffset,
headwordIndex ) ).second )
continue; // We already have this article in the body.
@ -1872,23 +1868,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
{
vector< sptr< Dictionary::Class > > dictionaries;
for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
++i )
{
for ( const auto & fileName : fileNames ) {
// Try .dsl and .dsl.dz suffixes
bool uncompressedDsl = ( i->size() >= 4 &&
strcasecmp( i->c_str() + ( i->size() - 4 ), ".dsl" ) == 0 );
bool uncompressedDsl = ( fileName.size() >= 4 &&
strcasecmp( fileName.c_str() + ( fileName.size() - 4 ), ".dsl" ) == 0 );
if ( !uncompressedDsl &&
( i->size() < 7 ||
strcasecmp( i->c_str() + ( i->size() - 7 ), ".dsl.dz" ) != 0 ) )
( fileName.size() < 7 ||
strcasecmp( fileName.c_str() + ( fileName.size() - 7 ), ".dsl.dz" ) != 0 ) )
continue;
// Make sure it's not an abbreviation file
int extSize = ( uncompressedDsl ? 4 : 7 );
if ( i->size() - extSize >= 5 &&
strncasecmp( i->c_str() + i->size() - extSize - 5, "_abrv", 5 ) == 0 )
if ( fileName.size() - extSize >= 5 &&
strncasecmp( fileName.c_str() + fileName.size() - extSize - 5, "_abrv", 5 ) == 0 )
{
// It is, skip it
continue;
@ -1898,11 +1892,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
try
{
vector< string > dictFiles( 1, *i );
vector< string > dictFiles( 1, fileName );
// Check if there is an 'abrv' file present
string baseName = ( (*i)[ i->size() - 4 ] == '.' ) ?
string( *i, 0, i->size() - 4 ) : string( *i, 0, i->size() - 7 );
string baseName = ( fileName[ fileName.size() - 4 ] == '.' ) ?
string( fileName, 0, fileName.size() - 4 ) : string( fileName, 0, fileName.size() - 7 );
string abrvFileName;
@ -1930,7 +1924,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
indexIsOldOrBad( indexFile, zipFileName.size() ) )
{
DslScanner scanner( *i );
DslScanner scanner( fileName );
try { // Here we intercept any errors during the read to save line at
// which the incident happened. We need alive scanner for that.
@ -2031,13 +2025,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
// If the string has any dsl markup, we strip it
string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() );
for( list< wstring >::iterator i = keys.begin(); i != keys.end();
++i )
{
unescapeDsl( *i );
normalizeHeadword( *i );
for ( auto & key : keys ) {
unescapeDsl( key );
normalizeHeadword( key );
abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value;
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
}
}
@ -2048,17 +2040,15 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
chunks.addToBlock( &sz, sizeof( uint32_t ) );
for( map< string, string >::const_iterator i = abrv.begin();
i != abrv.end(); ++i )
{
for ( const auto & i : abrv ) {
// GD_DPRINTF( "%s:%s\n", i->first.c_str(), i->second.c_str() );
sz = i->first.size();
sz = i.first.size();
chunks.addToBlock( &sz, sizeof( uint32_t ) );
chunks.addToBlock( i->first.data(), sz );
sz = i->second.size();
chunks.addToBlock( i.first.data(), sz );
sz = i.second.size();
chunks.addToBlock( &sz, sizeof( uint32_t ) );
chunks.addToBlock( i->second.data(), sz );
chunks.addToBlock( i.second.data(), sz );
}
}
catch( std::exception & e )
@ -2096,7 +2086,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
{
if ( !isDslWs( curString[ x ] ) )
{
gdWarning( "Garbage string in %s at offset 0x%lX\n", i->c_str(), (unsigned long) curOffset );
gdWarning( "Garbage string in %s at offset 0x%lX\n", fileName.c_str(), (unsigned long) curOffset );
break;
}
}
@ -2120,7 +2110,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
{
if ( ! ( hasString = scanner.readNextLineWithoutComments( curString, curOffset ) ) )
{
gdWarning( "Premature end of file %s\n", i->c_str() );
gdWarning( "Premature end of file %s\n", fileName.c_str() );
break;
}
@ -2149,12 +2139,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
chunks.addToBlock( &articleOffset, sizeof( articleOffset ) );
for( list< wstring >::iterator j = allEntryWords.begin();
j != allEntryWords.end(); ++j )
{
unescapeDsl( *j );
normalizeHeadword( *j );
indexedWords.addWord( *j, descOffset, maxHeadwordSize );
for ( auto & allEntryWord : allEntryWords ) {
unescapeDsl( allEntryWord );
normalizeHeadword( allEntryWord );
indexedWords.addWord( allEntryWord, descOffset, maxHeadwordSize );
}
++articleCount;
@ -2273,23 +2261,20 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
chunks.addToBlock( &articleSize, sizeof( articleSize ) );
for( QVector< InsidedCard >::iterator i = insidedCards.begin(); i != insidedCards.end(); ++i )
{
for ( auto & insidedCard : insidedCards ) {
uint32_t descOffset = chunks.startNewBlock();
chunks.addToBlock( &(*i).offset, sizeof( (*i).offset ) );
chunks.addToBlock( &(*i).size, sizeof( (*i).size ) );
chunks.addToBlock( &insidedCard.offset, sizeof( insidedCard.offset ) );
chunks.addToBlock( &insidedCard.size, sizeof( insidedCard.size ) );
for( int x = 0; x < (*i).headwords.size(); x++ )
for( int x = 0; x < insidedCard.headwords.size(); x++ )
{
allEntryWords.clear();
expandOptionalParts( (*i).headwords[ x ], &allEntryWords );
expandOptionalParts( insidedCard.headwords[ x ], &allEntryWords );
for( list< wstring >::iterator j = allEntryWords.begin();
j != allEntryWords.end(); ++j )
{
unescapeDsl( *j );
normalizeHeadword( *j );
indexedWords.addWord( *j, descOffset, maxHeadwordSize );
for ( auto & allEntryWord : allEntryWords ) {
unescapeDsl( allEntryWord );
normalizeHeadword( allEntryWord );
indexedWords.addWord( allEntryWord, descOffset, maxHeadwordSize );
}
wordCount += allEntryWords.size();
@ -2380,7 +2365,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
catch( std::exception & e )
{
gdWarning( "DSL dictionary reading failed: %s:%u, error: %s\n",
i->c_str(), atLine, e.what() );
fileName.c_str(), atLine, e.what() );
}
}

View file

@ -492,11 +492,10 @@ void XdxfArticleRequest::run()
vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );
for( unsigned x = 0; x < alts.size(); ++x )
{
for ( auto & alt : alts ) {
/// Make an additional query for each alt
vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics );
chain.insert( chain.end(), altChain.begin(), altChain.end() );
}
@ -511,26 +510,25 @@ void XdxfArticleRequest::run()
if( ignoreDiacritics )
wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );
for( unsigned x = 0; x < chain.size(); ++x )
{
for ( auto & x : chain ) {
if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
{
finish();
return;
}
if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
if ( articlesIncluded.find( x.articleOffset ) != articlesIncluded.end() )
continue; // We already have this article in the body.
// Now grab that article
string headword, articleText;
headword = chain[ x ].word;
headword = x.word;
try
{
dict.loadArticle( chain[ x ].articleOffset, articleText );
dict.loadArticle( x.articleOffset, articleText );
// Ok. Now, does it go to main articles, or to alternate ones? We list
// main ones first, and alternates after.
@ -550,7 +548,7 @@ void XdxfArticleRequest::run()
Folding::applySimpleCaseOnly( Utf8::decode( headword ) ),
pair< string, string >( headword, articleText ) ) );
articlesIncluded.insert( chain[ x ].articleOffset );
articlesIncluded.insert( x.articleOffset );
}
catch( std::exception &ex )
{
@ -923,9 +921,8 @@ void indexArticle( GzippedFile & gzFile,
// Add words to index
for( list< QString >::const_iterator i = words.begin(); i != words.end();
++i )
indexedWords.addWord( gd::toWString( *i ), offset );
for ( const auto & word : words )
indexedWords.addWord( gd::toWString( word ), offset );
++articleCount;
@ -1075,22 +1072,20 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
{
vector< sptr< Dictionary::Class > > dictionaries;
for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
++i )
{
for ( const auto & fileName : fileNames ) {
// Only allow .xdxf and .xdxf.dz suffixes
if ( ( i->size() < 5 || strcasecmp( i->c_str() + ( i->size() - 5 ), ".xdxf" ) != 0 ) &&
( i->size() < 8 ||
strcasecmp( i->c_str() + ( i->size() - 8 ), ".xdxf.dz" ) != 0 ) )
if ( ( fileName.size() < 5 || strcasecmp( fileName.c_str() + ( fileName.size() - 5 ), ".xdxf" ) != 0 ) &&
( fileName.size() < 8 ||
strcasecmp( fileName.c_str() + ( fileName.size() - 8 ), ".xdxf.dz" ) != 0 ) )
continue;
try
{
vector< string > dictFiles( 1, *i );
vector< string > dictFiles( 1, fileName );
string baseName = ( (*i)[ i->size() - 5 ] == '.' ) ?
string( *i, 0, i->size() - 5 ) : string( *i, 0, i->size() - 8 );
string baseName = ( fileName[ fileName.size() - 5 ] == '.' ) ?
string( fileName, 0, fileName.size() - 5 ) : string( fileName, 0, fileName.size() - 8 );
// See if there's a zip file with resources present. If so, include it.
@ -1111,7 +1106,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
{
// Building the index
gdDebug( "Xdxf: Building the index for dictionary: %s\n", i->c_str() );
gdDebug( "Xdxf: Building the index for dictionary: %s\n", fileName.c_str() );
//initializing.indexingDictionary( nameFromFileName( dictFiles[ 0 ] ) );
@ -1288,10 +1283,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
else if ( stream.isStartElement() && stream.name() == u"abbr_v" )
{
s = readElementText( stream );
value = Utf8::encode( Folding::trimWhitespace( gd::toWString( s ) ) );
for( list< wstring >::iterator i = keys.begin(); i != keys.end(); ++i )
{
abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value;
value = Folding::trimWhitespace( s ).toStdString();
for ( auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
}
keys.clear();
}
@ -1312,10 +1306,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
else if ( stream.isStartElement() && stream.name() == u"v" )
{
s = readElementText( stream );
value = Utf8::encode( Folding::trimWhitespace( gd::toWString( s ) ) );
for( list< wstring >::iterator i = keys.begin(); i != keys.end(); ++i )
{
abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value;
value = Folding::trimWhitespace( s ).toStdString();
for ( auto & key : keys ) {
abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
}
keys.clear();
}
@ -1346,14 +1339,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
chunks.addToBlock( &sz, sizeof( uint32_t ) );
for( map< string, string >::const_iterator i = abrv.begin(); i != abrv.end(); ++i )
{
sz = i->first.size();
for ( const auto & i : abrv ) {
sz = i.first.size();
chunks.addToBlock( &sz, sizeof( uint32_t ) );
chunks.addToBlock( i->first.data(), sz );
sz = i->second.size();
chunks.addToBlock( i.first.data(), sz );
sz = i.second.size();
chunks.addToBlock( &sz, sizeof( uint32_t ) );
chunks.addToBlock( i->second.data(), sz );
chunks.addToBlock( i.second.data(), sz );
}
}
@ -1438,7 +1430,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
catch( std::exception & e )
{
gdWarning( "Xdxf dictionary initializing failed: %s, error: %s\n",
i->c_str(), e.what() );
fileName.c_str(), e.what() );
}
}

View file

@ -425,13 +425,13 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const
el.setTagName( "span" );
el.setAttribute( "class", "xdxf_abbr" );
if( type == XDXF && pAbrv != NULL )
if( type == XDXF && pAbrv != nullptr )
{
string val = Utf8::encode( Folding::trimWhitespace( gd::toWString( el.text() ) ) );
string val = Folding::trimWhitespace( el.text() ).toStdString();
// If we have such a key, display a title
map< string, string >::const_iterator i = pAbrv->find( val );
auto i = pAbrv->find( val );
if ( i != pAbrv->end() )
{