Log warning for too long headwords while indexing

This commit is contained in:
Abs62 2014-11-20 18:03:55 +03:00
parent 46298a842c
commit 2472e12421

View file

@ -1065,7 +1065,22 @@ void IndexedWords::addWord( wstring const & word, uint32_t articleOffset, unsign
// Safeguard us against various bugs here. Don't attempt adding words // Safeguard us against various bugs here. Don't attempt adding words
// which are freakishly huge. // which are freakishly huge.
if ( wordSize > maxHeadwordSize ) if ( wordSize > maxHeadwordSize )
{
#define MAX_LOG_WORD_SIZE 500
string headword;
if( wordSize <= MAX_LOG_WORD_SIZE )
headword = Utf8::encode( word );
else
{
std::vector< char > buffer( MAX_LOG_WORD_SIZE * 4 );
headword = string( &buffer.front(),
Utf8::encode( wordBegin, MAX_LOG_WORD_SIZE, &buffer.front() ) );
headword += "...";
}
gdWarning( "Skipped too long headword: \"%s\"", headword.c_str() );
return; return;
#undef MAX_LOG_WORD_SIZE
}
// Skip any leading whitespace // Skip any leading whitespace
while( *wordBegin && Folding::isWhitespace( *wordBegin ) ) while( *wordBegin && Folding::isWhitespace( *wordBegin ) )