diff --git a/btreeidx.cc b/btreeidx.cc index aea9adf6..5806ba20 100644 --- a/btreeidx.cc +++ b/btreeidx.cc @@ -900,14 +900,14 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex, return offset; } -void IndexedWords::addWord( wstring const & word, uint32_t articleOffset ) +void IndexedWords::addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize ) { wchar const * wordBegin = word.c_str(); string::size_type wordSize = word.size(); // Safeguard us against various bugs here. Don't attempt adding words // which are freakishly huge. - if ( wordSize > 256 ) + if ( wordSize > maxHeadwordSize ) return; // Skip any leading whitespace diff --git a/btreeidx.hh b/btreeidx.hh index 135c70bc..6fe45046 100644 --- a/btreeidx.hh +++ b/btreeidx.hh @@ -178,7 +178,7 @@ struct IndexedWords: public map< string, vector< WordArticleLink > > /// Instead of adding to the map directly, use this function. It does folding /// itself, and for phrases/sentences it adds additional entries beginning with /// each new word. - void addWord( wstring const & word, uint32_t articleOffset ); + void addWord( wstring const & word, uint32_t articleOffset, unsigned int maxHeadwordSize = 256U ); /// Differs from addWord() in that it only adds a single entry. We use this /// for zip's file names. diff --git a/config.cc b/config.cc index 44e41db4..0cf4a48c 100644 --- a/config.cc +++ b/config.cc @@ -757,6 +757,15 @@ Class load() throw( exError ) if ( !root.namedItem( "maxPictureWidth" ).isNull() ) c.maxPictureWidth = root.namedItem( "maxPictureWidth" ).toElement().text().toInt(); + if ( !root.namedItem( "maxHeadwordSize" ).isNull() ) + { + unsigned int value = root.namedItem( "maxHeadwordSize" ).toElement().text().toUInt(); + if ( value != 0 ) // 0 is invalid value for our purposes + { + c.maxHeadwordSize = value; + } + } + return c; } @@ -1405,6 +1414,10 @@ void save( Class const & c ) throw( exError ) opt = dd.createElement( "maxPictureWidth" ); opt.appendChild( dd.createTextNode( QString::number( c.maxPictureWidth ) ) ); root.appendChild( opt ); + + opt = dd.createElement( "maxHeadwordSize" ); + opt.appendChild( dd.createTextNode( QString::number( c.maxHeadwordSize ) ) ); + root.appendChild( opt ); } QByteArray result( dd.toByteArray() ); diff --git a/config.hh b/config.hh index 72253f1e..b7cf8ae8 100644 --- a/config.hh +++ b/config.hh @@ -415,12 +415,16 @@ struct Class int maxPictureWidth; // Maximum picture width + /// Maximum size for the headwords. + /// Bigger headwords won't be indexed. For now, only in DSL. + unsigned int maxHeadwordSize; + QString editDictionaryCommandLine; // Command line to call external editor for dictionary Class(): lastMainGroupId( 0 ), lastPopupGroupId( 0 ), pinPopupWindow( false ), showingDictBarNames( false ), usingSmallIconsInToolbars( false ), maxDictionaryRefsInContextMenu( 20 ), - maxPictureWidth( 0 ) + maxPictureWidth( 0 ), maxHeadwordSize ( 256U ) {} Group * getGroup( unsigned id ); Group const * getGroup( unsigned id ) const; diff --git a/dsl.cc b/dsl.cc index e35a56ae..47b6c5da 100644 --- a/dsl.cc +++ b/dsl.cc @@ -1476,7 +1476,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, string const & indicesDir, Dictionary::Initializing & initializing, - int maxPictureWidth ) + int maxPictureWidth, unsigned int maxHeadwordSize ) throw( std::exception ) { vector< sptr< Dictionary::Class > > dictionaries; @@ -1765,7 +1765,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( { unescapeDsl( *j ); normalizeHeadword( *j ); - indexedWords.addWord( *j, descOffset ); + indexedWords.addWord( *j, descOffset, maxHeadwordSize ); } ++articleCount; @@ -1829,7 +1829,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( unescapeDsl( (*i).headword ); normalizeHeadword( (*i).headword ); - indexedWords.addWord( (*i).headword, descOffset ); + indexedWords.addWord( (*i).headword, descOffset, maxHeadwordSize ); ++articleCount; ++wordCount; diff --git a/dsl.hh b/dsl.hh index 868f863f..e39b071d 100644 --- a/dsl.hh +++ b/dsl.hh @@ -16,7 +16,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, string const & indicesDir, Dictionary::Initializing &, - int maxPictureWidth ) + int maxPictureWidth, unsigned int maxHeadwordSize ) throw( std::exception ); } diff --git a/loaddictionaries.cc b/loaddictionaries.cc index f72f58dd..ee0cd94d 100644 --- a/loaddictionaries.cc +++ b/loaddictionaries.cc @@ -39,7 +39,8 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ): paths( cfg.paths ), soundDirs( cfg.soundDirs ), hunspell( cfg.hunspell ), transliteration( cfg.transliteration ), exceptionText( "Load did not finish" ), // Will be cleared upon success - maxPictureWidth( cfg.maxPictureWidth ) + maxPictureWidth( cfg.maxPictureWidth ), + maxHeadwordSize( cfg.maxHeadwordSize ) { // Populate name filters @@ -131,7 +132,8 @@ void LoadDictionaries::handlePath( Config::Path const & path ) { vector< sptr< Dictionary::Class > > dslDictionaries = - Dsl::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxPictureWidth ); + Dsl::makeDictionaries( + allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxPictureWidth, maxHeadwordSize ); dictionaries.insert( dictionaries.end(), dslDictionaries.begin(), dslDictionaries.end() ); diff --git a/loaddictionaries.hh b/loaddictionaries.hh index 168badcd..e540970c 100644 --- a/loaddictionaries.hh +++ b/loaddictionaries.hh @@ -24,6 +24,7 @@ class LoadDictionaries: public QThread, public Dictionary::Initializing std::vector< sptr< Dictionary::Class > > dictionaries; std::string exceptionText; int maxPictureWidth; + unsigned int maxHeadwordSize; public: