opt: Folding::trimWhiteSpace refactor

add a new method which accept QString . modern c++ style format
2024-11-27 15:24:05 +00:00 · 2023-04-28 23:21:51 +08:00 · 2023-04-28 23:21:51 +08:00 · b8d4d49f54
parent 93c3bdc007
commit b8d4d49f54
5 changed files with 87 additions and 105 deletions
--- a/src/common/folding.cc
+++ b/src/common/folding.cc
@ -234,6 +234,11 @@ wstring trimWhitespace( wstring const & in )
  return wstring( wordBegin, wordSize );
 }

+QString trimWhitespace( QString const & in )
+{
+  return in.trimmed();
+}
+
 void normalizeWhitespace( wstring & str )
 {
  for( size_t x = str.size(); x-- > 1; ) // >1 -- Don't test the first char
--- a/src/common/folding.hh
+++ b/src/common/folding.hh
@ -68,8 +68,8 @@ QString trimWhitespaceOrPunct( QString const & in );
 /// Removes any whitespace from the beginning and the end of
 /// the word.
 wstring trimWhitespace( wstring const & );
-
-/// Turns any sequences of consecutive whitespace into a single basic space.
+QString trimWhitespace( QString const & in );
+  /// Turns any sequences of consecutive whitespace into a single basic space.
 void normalizeWhitespace( wstring & );

 /// Same as apply( wstring ), but without any heap operations, therefore
--- a/src/dict/dsl.cc
+++ b/src/dict/dsl.cc
@ -651,16 +651,15 @@ articleData =  U"\n\r\t"  + gd::toWString( QString( "DICTZIP error: " ) + dict_e
        // Does one of the results match the requested word? If so, we'd choose
        // it as our headword.

-        for( list< wstring >::iterator i = lst.begin(); i != lst.end(); ++i )
-        {
-          unescapeDsl( *i );
-          normalizeHeadword( *i );
+        for ( auto & i : lst ) {
+          unescapeDsl( i );
+          normalizeHeadword( i );

          bool found;
          if( ignoreDiacritics )
-            found = Folding::applyDiacriticsOnly( Folding::trimWhitespace( *i ) ) == Folding::applyDiacriticsOnly( requestedHeadwordFolded );
+            found = Folding::applyDiacriticsOnly( Folding::trimWhitespace( i ) ) == Folding::applyDiacriticsOnly( requestedHeadwordFolded );
          else
-            found = Folding::trimWhitespace( *i ) == requestedHeadwordFolded;
+            found = Folding::trimWhitespace( i ) == requestedHeadwordFolded;

          if ( found )
          {
@ -761,9 +760,8 @@ string DslDictionary::processNodeChildren( ArticleDom::Node const & node )
 {
  string result;

-  for( ArticleDom::Node::const_iterator i = node.begin(); i != node.end();
-       ++i )
-    result += nodeToHtml( *i );
+  for ( const auto & i : node )
+    result += nodeToHtml( i );

  return result;
 }
@ -1598,11 +1596,10 @@ void DslArticleRequest::run()

  vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );

-  for( unsigned x = 0; x < alts.size(); ++x )
-  {
+  for ( auto & alt : alts ) {
    /// Make an additional query for each alt

-    vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
+    vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics );

    chain.insert( chain.end(), altChain.begin(), altChain.end() );
  }
@ -1615,8 +1612,7 @@ void DslArticleRequest::run()

  wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );

-  for( unsigned x = 0; x < chain.size(); ++x )
-  {
+  for ( auto & x : chain ) {
    // Check if we're cancelled occasionally
    if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
    {
@ -1635,10 +1631,10 @@ void DslArticleRequest::run()

    try
    {
-      dict.loadArticle( chain[ x ].articleOffset, wordCaseFolded, ignoreDiacritics, tildeValue,
+      dict.loadArticle( x.articleOffset, wordCaseFolded, ignoreDiacritics, tildeValue,
                        displayedHeadword, headwordIndex, articleBody );

-      if ( !articlesIncluded.insert( std::make_pair( chain[ x ].articleOffset,
+      if ( !articlesIncluded.insert( std::make_pair( x.articleOffset,
                                                     headwordIndex ) ).second )
        continue; // We already have this article in the body.

@ -1872,23 +1868,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 {
  vector< sptr< Dictionary::Class > > dictionaries;

-  for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
-       ++i )
-  {
+  for ( const auto & fileName : fileNames ) {
    // Try .dsl and .dsl.dz suffixes

-    bool uncompressedDsl = ( i->size() >= 4 &&
-                             strcasecmp( i->c_str() + ( i->size() - 4 ), ".dsl" ) == 0 );
+    bool uncompressedDsl = ( fileName.size() >= 4 &&
+                             strcasecmp( fileName.c_str() + ( fileName.size() - 4 ), ".dsl" ) == 0 );
    if ( !uncompressedDsl &&
-         ( i->size() < 7 ||
-           strcasecmp( i->c_str() + ( i->size() - 7 ), ".dsl.dz" ) != 0 ) )
+         ( fileName.size() < 7 ||
+           strcasecmp( fileName.c_str() + ( fileName.size() - 7 ), ".dsl.dz" ) != 0 ) )
      continue;

    // Make sure it's not an abbreviation file

    int extSize = ( uncompressedDsl ? 4 : 7 );
-    if ( i->size() - extSize >= 5 &&
-         strncasecmp( i->c_str() + i->size() - extSize - 5, "_abrv", 5 ) == 0 )
+    if ( fileName.size() - extSize >= 5 &&
+         strncasecmp( fileName.c_str() + fileName.size() - extSize - 5, "_abrv", 5 ) == 0 )
    {
      // It is, skip it
      continue;
@ -1898,11 +1892,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

    try
    {
-      vector< string > dictFiles( 1, *i );
+      vector< string > dictFiles( 1, fileName );

      // Check if there is an 'abrv' file present
-      string baseName = ( (*i)[ i->size() - 4 ] == '.' ) ?
-               string( *i, 0, i->size() - 4 ) : string( *i, 0, i->size() - 7 );
+      string baseName = ( fileName[ fileName.size() - 4 ] == '.' ) ?
+               string( fileName, 0, fileName.size() - 4 ) : string( fileName, 0, fileName.size() - 7 );

      string abrvFileName;

@ -1930,7 +1924,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
      if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
           indexIsOldOrBad( indexFile, zipFileName.size() ) )
      {
-        DslScanner scanner( *i );
+        DslScanner scanner( fileName );

        try { // Here we intercept any errors during the read to save line at
              // which the incident happened. We need alive scanner for that.
@ -2031,13 +2025,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
              // If the string has any dsl markup, we strip it
              string value = Utf8::encode( ArticleDom( curString ).root.renderAsText() );

-              for( list< wstring >::iterator i = keys.begin(); i != keys.end();
-                   ++i )
-              {
-                unescapeDsl( *i );
-                normalizeHeadword( *i );
+              for ( auto & key : keys ) {
+                unescapeDsl( key );
+                normalizeHeadword( key );

-                abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value;
+                abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
              }
            }

@ -2048,17 +2040,15 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

            chunks.addToBlock( &sz, sizeof( uint32_t ) );

-            for( map< string, string >::const_iterator i = abrv.begin();
-                 i != abrv.end(); ++i )
-            {
+            for ( const auto & i : abrv ) {
 //              GD_DPRINTF( "%s:%s\n", i->first.c_str(), i->second.c_str() );

-              sz = i->first.size();
+              sz = i.first.size();
              chunks.addToBlock( &sz, sizeof( uint32_t ) );
-              chunks.addToBlock( i->first.data(), sz );
-              sz = i->second.size();
+              chunks.addToBlock( i.first.data(), sz );
+              sz = i.second.size();
              chunks.addToBlock( &sz, sizeof( uint32_t ) );
-              chunks.addToBlock( i->second.data(), sz );
+              chunks.addToBlock( i.second.data(), sz );
            }
          }
          catch( std::exception & e )
@ -2096,7 +2086,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
            {
              if ( !isDslWs( curString[ x ] ) )
              {
-                gdWarning( "Garbage string in %s at offset 0x%lX\n", i->c_str(), (unsigned long) curOffset );
+                gdWarning( "Garbage string in %s at offset 0x%lX\n", fileName.c_str(), (unsigned long) curOffset );
                break;
              }
            }
@ -2120,7 +2110,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
          {
            if ( ! ( hasString = scanner.readNextLineWithoutComments( curString, curOffset ) ) )
            {
-              gdWarning( "Premature end of file %s\n", i->c_str() );
+              gdWarning( "Premature end of file %s\n", fileName.c_str() );
              break;
            }

@ -2149,12 +2139,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

          chunks.addToBlock( &articleOffset, sizeof( articleOffset ) );

-          for( list< wstring >::iterator j = allEntryWords.begin();
-               j != allEntryWords.end(); ++j )
-          {
-            unescapeDsl( *j );
-            normalizeHeadword( *j );
-            indexedWords.addWord( *j, descOffset, maxHeadwordSize );
+          for ( auto & allEntryWord : allEntryWords ) {
+            unescapeDsl( allEntryWord );
+            normalizeHeadword( allEntryWord );
+            indexedWords.addWord( allEntryWord, descOffset, maxHeadwordSize );
          }

          ++articleCount;
@ -2273,23 +2261,20 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

          chunks.addToBlock( &articleSize, sizeof( articleSize ) );

-          for( QVector< InsidedCard >::iterator i = insidedCards.begin(); i != insidedCards.end(); ++i )
-          {
+          for ( auto & insidedCard : insidedCards ) {
            uint32_t descOffset = chunks.startNewBlock();
-            chunks.addToBlock( &(*i).offset, sizeof( (*i).offset ) );
-            chunks.addToBlock( &(*i).size, sizeof( (*i).size ) );
+            chunks.addToBlock( &insidedCard.offset, sizeof( insidedCard.offset ) );
+            chunks.addToBlock( &insidedCard.size, sizeof( insidedCard.size ) );

-            for( int x = 0; x < (*i).headwords.size(); x++ )
+            for( int x = 0; x < insidedCard.headwords.size(); x++ )
            {
              allEntryWords.clear();
-              expandOptionalParts( (*i).headwords[ x ], &allEntryWords );
+              expandOptionalParts( insidedCard.headwords[ x ], &allEntryWords );

-              for( list< wstring >::iterator j = allEntryWords.begin();
-                   j != allEntryWords.end(); ++j )
-              {
-                unescapeDsl( *j );
-                normalizeHeadword( *j );
-                indexedWords.addWord( *j, descOffset, maxHeadwordSize );
+              for ( auto & allEntryWord : allEntryWords ) {
+                unescapeDsl( allEntryWord );
+                normalizeHeadword( allEntryWord );
+                indexedWords.addWord( allEntryWord, descOffset, maxHeadwordSize );
              }

              wordCount += allEntryWords.size();
@ -2380,7 +2365,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
    catch( std::exception & e )
    {
      gdWarning( "DSL dictionary reading failed: %s:%u, error: %s\n",
-                 i->c_str(), atLine, e.what() );
+                 fileName.c_str(), atLine, e.what() );
    }
  }

--- a/src/dict/xdxf.cc
+++ b/src/dict/xdxf.cc
@ -492,11 +492,10 @@ void XdxfArticleRequest::run()

  vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );

-  for( unsigned x = 0; x < alts.size(); ++x )
-  {
+  for ( auto & alt : alts ) {
    /// Make an additional query for each alt

-    vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );
+    vector< WordArticleLink > altChain = dict.findArticles( alt, ignoreDiacritics );

    chain.insert( chain.end(), altChain.begin(), altChain.end() );
  }
@ -511,26 +510,25 @@ void XdxfArticleRequest::run()
  if( ignoreDiacritics )
    wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );

-  for( unsigned x = 0; x < chain.size(); ++x )
-  {
+  for ( auto & x : chain ) {
    if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
    {
      finish();
      return;
    }

-    if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
+    if ( articlesIncluded.find( x.articleOffset ) != articlesIncluded.end() )
      continue; // We already have this article in the body.

    // Now grab that article

    string headword, articleText;

-    headword = chain[ x ].word;
+    headword = x.word;

    try
    {
-      dict.loadArticle( chain[ x ].articleOffset, articleText );
+      dict.loadArticle( x.articleOffset, articleText );

      // Ok. Now, does it go to main articles, or to alternate ones? We list
      // main ones first, and alternates after.
@ -550,7 +548,7 @@ void XdxfArticleRequest::run()
        Folding::applySimpleCaseOnly( Utf8::decode( headword ) ),
        pair< string, string >( headword, articleText ) ) );

-      articlesIncluded.insert( chain[ x ].articleOffset );
+      articlesIncluded.insert( x.articleOffset );
    }
    catch( std::exception &ex )
    {
@ -923,9 +921,8 @@ void indexArticle( GzippedFile & gzFile,

        // Add words to index

-        for( list< QString >::const_iterator i = words.begin(); i != words.end();
-             ++i )
-            indexedWords.addWord( gd::toWString( *i ), offset );
+        for ( const auto & word : words )
+          indexedWords.addWord( gd::toWString( word ), offset );

        ++articleCount;

@ -1075,22 +1072,20 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 {
  vector< sptr< Dictionary::Class > > dictionaries;

-  for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
-       ++i )
-  {
+  for ( const auto & fileName : fileNames ) {
    // Only allow .xdxf and .xdxf.dz suffixes

-    if ( ( i->size() < 5  || strcasecmp( i->c_str() + ( i->size() - 5 ), ".xdxf" ) != 0 ) &&
-         ( i->size() < 8 ||
-           strcasecmp( i->c_str() + ( i->size() - 8 ), ".xdxf.dz" ) != 0 ) )
+    if ( ( fileName.size() < 5  || strcasecmp( fileName.c_str() + ( fileName.size() - 5 ), ".xdxf" ) != 0 ) &&
+         ( fileName.size() < 8 ||
+           strcasecmp( fileName.c_str() + ( fileName.size() - 8 ), ".xdxf.dz" ) != 0 ) )
      continue;

    try
    {
-      vector< string > dictFiles( 1, *i );
+      vector< string > dictFiles( 1, fileName );

-      string baseName = ( (*i)[ i->size() - 5 ] == '.' ) ?
-               string( *i, 0, i->size() - 5 ) : string( *i, 0, i->size() - 8 );
+      string baseName = ( fileName[ fileName.size() - 5 ] == '.' ) ?
+               string( fileName, 0, fileName.size() - 5 ) : string( fileName, 0, fileName.size() - 8 );

      // See if there's a zip file with resources present. If so, include it.

@ -1111,7 +1106,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
      {
        // Building the index

-        gdDebug( "Xdxf: Building the index for dictionary: %s\n", i->c_str() );
+        gdDebug( "Xdxf: Building the index for dictionary: %s\n", fileName.c_str() );

        //initializing.indexingDictionary( nameFromFileName( dictFiles[ 0 ] ) );

@ -1288,10 +1283,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
                          else if ( stream.isStartElement() && stream.name() == u"abbr_v" )
                          {
                            s =  readElementText( stream );
-                              value = Utf8::encode( Folding::trimWhitespace( gd::toWString( s ) ) );
-                              for( list< wstring >::iterator i = keys.begin(); i != keys.end(); ++i )
-                              {
-                                abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value;
+                            value = Folding::trimWhitespace( s ).toStdString();
+                              for ( auto & key : keys ) {
+                                abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
                              }
                              keys.clear();
                          }
@ -1312,10 +1306,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
                          else if ( stream.isStartElement() && stream.name() == u"v" )
                          {
                            s =  readElementText( stream );
-                              value = Utf8::encode( Folding::trimWhitespace( gd::toWString( s ) ) );
-                              for( list< wstring >::iterator i = keys.begin(); i != keys.end(); ++i )
-                              {
-                                abrv[ Utf8::encode( Folding::trimWhitespace( *i ) ) ] = value;
+                            value = Folding::trimWhitespace( s ).toStdString();
+                              for ( auto & key : keys ) {
+                                abrv[ Utf8::encode( Folding::trimWhitespace( key ) ) ] = value;
                              }
                              keys.clear();
                          }
@ -1346,14 +1339,13 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

                chunks.addToBlock( &sz, sizeof( uint32_t ) );

-                for( map< string, string >::const_iterator i = abrv.begin();  i != abrv.end(); ++i )
-                {
-                  sz = i->first.size();
+                for ( const auto & i : abrv ) {
+                  sz = i.first.size();
                  chunks.addToBlock( &sz, sizeof( uint32_t ) );
-                  chunks.addToBlock( i->first.data(), sz );
-                  sz = i->second.size();
+                  chunks.addToBlock( i.first.data(), sz );
+                  sz = i.second.size();
                  chunks.addToBlock( &sz, sizeof( uint32_t ) );
-                  chunks.addToBlock( i->second.data(), sz );
+                  chunks.addToBlock( i.second.data(), sz );
                }
              }

@ -1438,7 +1430,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
    catch( std::exception & e )
    {
      gdWarning( "Xdxf dictionary initializing failed: %s, error: %s\n",
-                 i->c_str(), e.what() );
+                 fileName.c_str(), e.what() );
    }
  }

--- a/src/dict/xdxf2html.cc
+++ b/src/dict/xdxf2html.cc
@ -425,13 +425,13 @@ string convert( string const & in, DICT_TYPE type, map < string, string > const

    el.setTagName( "span" );
    el.setAttribute( "class", "xdxf_abbr" );
-    if( type == XDXF && pAbrv != NULL )
+    if( type == XDXF && pAbrv != nullptr )
    {
-        string val = Utf8::encode( Folding::trimWhitespace( gd::toWString( el.text() ) ) );
+      string val = Folding::trimWhitespace( el.text() ).toStdString();

        // If we have such a key, display a title

-        map< string, string >::const_iterator i = pAbrv->find( val );
+      auto i = pAbrv->find( val );

        if ( i != pAbrv->end() )
        {