opt: use std::move and reduce valid word length

2024-11-23 20:14:05 +00:00 · 2022-08-13 12:41:01 +08:00 · 2022-08-13 12:41:01 +08:00 · e2b6fd51e2
parent 20a4220a01
commit e2b6fd51e2
2 changed files with 4 additions and 5 deletions
--- a/btreeidx.cc
+++ b/btreeidx.cc
@ -1112,17 +1112,16 @@ void IndexedWords::addWord( wstring const & word, uint32_t articleOffset, unsign
    // Insert this word
    wstring folded = Folding::apply( nextChar );
    auto name      = Utf8::encode( folded );
-    iterator i = insert( { Utf8::encode( folded ), vector< WordArticleLink >() } ).first;
+    iterator i = insert( { std::move(name), vector< WordArticleLink >() } ).first;
    if( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
    {
      string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) );
      string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) );
-      i->second.emplace_back(utfWord, articleOffset, utfPrefix);
+      i->second.emplace_back(std::move(utfWord), articleOffset, std::move(utfPrefix));
      // reduce the vector reallocation.
      if( i->second.size() * 1.0 / i->second.capacity() > 0.75 )
      {
--- a/btreeidx.hh
+++ b/btreeidx.hh
@ -254,7 +254,7 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
  /// Instead of adding to the map directly, use this function. It does folding
  /// itself, and for phrases/sentences it adds additional entries beginning with
  /// each new word.
-  void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 256U );
+  void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
  /// Differs from addWord() in that it only adds a single entry. We use this
  /// for zip's file names.