opt: use std::move and reduce valid word length

This commit is contained in:
Xiao YiFang 2022-08-13 12:41:01 +08:00
parent 20a4220a01
commit e2b6fd51e2
2 changed files with 4 additions and 5 deletions

View file

@ -1112,17 +1112,16 @@ void IndexedWords::addWord( wstring const & word, uint32_t articleOffset, unsign
// Insert this word // Insert this word
wstring folded = Folding::apply( nextChar ); wstring folded = Folding::apply( nextChar );
auto name = Utf8::encode( folded );
iterator i = insert( { Utf8::encode( folded ), vector< WordArticleLink >() } ).first; iterator i = insert( { std::move(name), vector< WordArticleLink >() } ).first;
if( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches if( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
{ {
string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) ); string utfWord = Utf8::encode( wstring( nextChar, wordSize - ( nextChar - wordBegin ) ) );
string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) ); string utfPrefix = Utf8::encode( wstring( wordBegin, nextChar - wordBegin ) );
i->second.emplace_back(utfWord, articleOffset, utfPrefix); i->second.emplace_back(std::move(utfWord), articleOffset, std::move(utfPrefix));
// reduce the vector reallocation. // reduce the vector reallocation.
if( i->second.size() * 1.0 / i->second.capacity() > 0.75 ) if( i->second.size() * 1.0 / i->second.capacity() > 0.75 )
{ {

View file

@ -254,7 +254,7 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
/// Instead of adding to the map directly, use this function. It does folding /// Instead of adding to the map directly, use this function. It does folding
/// itself, and for phrases/sentences it adds additional entries beginning with /// itself, and for phrases/sentences it adds additional entries beginning with
/// each new word. /// each new word.
void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 256U ); void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 100U );
/// Differs from addWord() in that it only adds a single entry. We use this /// Differs from addWord() in that it only adds a single entry. We use this
/// for zip's file names. /// for zip's file names.