mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
Add config file parameter to limit headwords number to expand multi-word headwords while indexing (issie #914)
This commit is contained in:
parent
1824d9ab02
commit
0b6f36479d
9
aard.cc
9
aard.cc
|
@ -823,7 +823,8 @@ sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & initializing )
|
||||
Dictionary::Initializing & initializing,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception )
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > dictionaries;
|
||||
|
@ -995,7 +996,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
articleOffsets.insert( articleOffset );
|
||||
|
||||
// Insert new entry
|
||||
indexedWords.addWord( Utf8::decode( string( data.data(), wordSize ) ), articleOffset);
|
||||
wstring word = Utf8::decode( string( data.data(), wordSize ) );
|
||||
if( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand )
|
||||
indexedWords.addSingleWord( word, articleOffset);
|
||||
else
|
||||
indexedWords.addWord( word, articleOffset);
|
||||
|
||||
pos += has64bitIndex ? sizeof( IndexElement64 ) : sizeof( IndexElement );
|
||||
}
|
||||
|
|
3
aard.hh
3
aard.hh
|
@ -15,7 +15,8 @@ using std::string;
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & )
|
||||
Dictionary::Initializing &,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception );
|
||||
|
||||
}
|
||||
|
|
|
@ -1009,6 +1009,9 @@ Class load() throw( exError )
|
|||
}
|
||||
}
|
||||
|
||||
if ( !root.namedItem( "maxHeadwordsToExpand" ).isNull() )
|
||||
c.maxHeadwordsToExpand = root.namedItem( "maxHeadwordsToExpand" ).toElement().text().toUInt();
|
||||
|
||||
QDomNode headwordsDialog = root.namedItem( "headwordsDialog" );
|
||||
|
||||
if ( !headwordsDialog.isNull() )
|
||||
|
@ -1976,6 +1979,10 @@ void save( Class const & c ) throw( exError )
|
|||
opt = dd.createElement( "maxHeadwordSize" );
|
||||
opt.appendChild( dd.createTextNode( QString::number( c.maxHeadwordSize ) ) );
|
||||
root.appendChild( opt );
|
||||
|
||||
opt = dd.createElement( "maxHeadwordsToExpand" );
|
||||
opt.appendChild( dd.createTextNode( QString::number( c.maxHeadwordsToExpand ) ) );
|
||||
root.appendChild( opt );
|
||||
}
|
||||
|
||||
{
|
||||
|
|
|
@ -598,6 +598,8 @@ struct Class
|
|||
/// Bigger headwords won't be indexed. For now, only in DSL.
|
||||
unsigned int maxHeadwordSize;
|
||||
|
||||
unsigned int maxHeadwordsToExpand;
|
||||
|
||||
HeadwordsDialog headwordsDialog;
|
||||
|
||||
#ifdef Q_OS_WIN
|
||||
|
@ -610,7 +612,8 @@ struct Class
|
|||
Class(): lastMainGroupId( 0 ), lastPopupGroupId( 0 ),
|
||||
pinPopupWindow( false ), showingDictBarNames( false ),
|
||||
usingSmallIconsInToolbars( false ),
|
||||
maxPictureWidth( 0 ), maxHeadwordSize ( 256U )
|
||||
maxPictureWidth( 0 ), maxHeadwordSize ( 256U ),
|
||||
maxHeadwordsToExpand( 0 )
|
||||
{}
|
||||
Group * getGroup( unsigned id );
|
||||
Group const * getGroup( unsigned id ) const;
|
||||
|
|
|
@ -55,7 +55,8 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
|
|||
transliteration( cfg.transliteration ),
|
||||
exceptionText( "Load did not finish" ), // Will be cleared upon success
|
||||
maxPictureWidth( cfg.maxPictureWidth ),
|
||||
maxHeadwordSize( cfg.maxHeadwordSize )
|
||||
maxHeadwordSize( cfg.maxHeadwordSize ),
|
||||
maxHeadwordToExpand( cfg.maxHeadwordsToExpand )
|
||||
{
|
||||
// Populate name filters
|
||||
|
||||
|
@ -140,7 +141,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
|||
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > stardictDictionaries =
|
||||
Stardict::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
||||
Stardict::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||
|
||||
dictionaries.insert( dictionaries.end(), stardictDictionaries.begin(),
|
||||
stardictDictionaries.end() );
|
||||
|
@ -186,7 +187,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
|||
}
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > aardDictionaries =
|
||||
Aard::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
||||
Aard::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||
|
||||
dictionaries.insert( dictionaries.end(), aardDictionaries.begin(),
|
||||
aardDictionaries.end() );
|
||||
|
@ -215,14 +216,14 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
|||
#ifdef MAKE_ZIM_SUPPORT
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > zimDictionaries =
|
||||
Zim::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
||||
Zim::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||
|
||||
dictionaries.insert( dictionaries.end(), zimDictionaries.begin(),
|
||||
zimDictionaries.end() );
|
||||
}
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > slobDictionaries =
|
||||
Slob::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
||||
Slob::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||
|
||||
dictionaries.insert( dictionaries.end(), slobDictionaries.begin(),
|
||||
slobDictionaries.end() );
|
||||
|
|
|
@ -25,6 +25,7 @@ class LoadDictionaries: public QThread, public Dictionary::Initializing
|
|||
std::string exceptionText;
|
||||
int maxPictureWidth;
|
||||
unsigned int maxHeadwordSize;
|
||||
unsigned int maxHeadwordToExpand;
|
||||
|
||||
public:
|
||||
|
||||
|
|
8
slob.cc
8
slob.cc
|
@ -1518,7 +1518,8 @@ sptr< Dictionary::DataRequest > SlobDictionary::getResource( string const & name
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & initializing )
|
||||
Dictionary::Initializing & initializing,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception )
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > dictionaries;
|
||||
|
@ -1583,7 +1584,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
|| contentType.startsWith( "text/plain", Qt::CaseInsensitive ) )
|
||||
{
|
||||
//Article
|
||||
indexedWords.addWord( gd::toWString( refEntry.key ), i );
|
||||
if( maxHeadwordsToExpand && entries > maxHeadwordsToExpand )
|
||||
indexedWords.addSingleWord( gd::toWString( refEntry.key ), i );
|
||||
else
|
||||
indexedWords.addWord( gd::toWString( refEntry.key ), i );
|
||||
|
||||
wordCount += 1;
|
||||
|
||||
|
|
3
slob.hh
3
slob.hh
|
@ -14,7 +14,8 @@ using std::string;
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & )
|
||||
Dictionary::Initializing &,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception );
|
||||
|
||||
}
|
||||
|
|
19
stardict.cc
19
stardict.cc
|
@ -1804,7 +1804,7 @@ static void handleIdxSynFile( string const & fileName,
|
|||
IndexedWords & indexedWords,
|
||||
ChunkedStorage::Writer & chunks,
|
||||
vector< uint32_t > * articleOffsets,
|
||||
bool isSynFile )
|
||||
bool isSynFile, bool parseHeadwords )
|
||||
{
|
||||
gzFile stardictIdx = gd_gzopen( fileName.c_str() );
|
||||
if ( !stardictIdx )
|
||||
|
@ -1927,7 +1927,10 @@ static void handleIdxSynFile( string const & fileName,
|
|||
|
||||
// Insert new entry into an index
|
||||
|
||||
indexedWords.addWord( Utf8::decode( word ), offset );
|
||||
if( parseHeadwords )
|
||||
indexedWords.addWord( Utf8::decode( word ), offset );
|
||||
else
|
||||
indexedWords.addSingleWord( Utf8::decode( word ), offset );
|
||||
}
|
||||
|
||||
GD_DPRINTF( "%u entires made\n", (unsigned) indexedWords.size() );
|
||||
|
@ -1937,7 +1940,8 @@ static void handleIdxSynFile( string const & fileName,
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & initializing )
|
||||
Dictionary::Initializing & initializing,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception )
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > dictionaries;
|
||||
|
@ -2036,7 +2040,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
|
||||
// Load indices
|
||||
if ( !ifo.synwordcount )
|
||||
handleIdxSynFile( idxFileName, indexedWords, chunks, 0, false );
|
||||
handleIdxSynFile( idxFileName, indexedWords, chunks, 0, false,
|
||||
!maxHeadwordsToExpand || ifo.wordcount < maxHeadwordsToExpand );
|
||||
else
|
||||
{
|
||||
vector< uint32_t > articleOffsets;
|
||||
|
@ -2044,10 +2049,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
articleOffsets.reserve( ifo.wordcount );
|
||||
|
||||
handleIdxSynFile( idxFileName, indexedWords, chunks, &articleOffsets,
|
||||
false );
|
||||
false,
|
||||
!maxHeadwordsToExpand || ( ifo.wordcount + ifo.synwordcount ) < maxHeadwordsToExpand );
|
||||
|
||||
handleIdxSynFile( synFileName, indexedWords, chunks, &articleOffsets,
|
||||
true );
|
||||
true,
|
||||
!maxHeadwordsToExpand || ( ifo.wordcount + ifo.synwordcount ) < maxHeadwordsToExpand );
|
||||
}
|
||||
|
||||
// Finish with the chunks
|
||||
|
|
|
@ -15,7 +15,8 @@ using std::string;
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & )
|
||||
Dictionary::Initializing &,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception );
|
||||
|
||||
}
|
||||
|
|
13
zim.cc
13
zim.cc
|
@ -1184,7 +1184,8 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
|
|||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||
vector< string > const & fileNames,
|
||||
string const & indicesDir,
|
||||
Dictionary::Initializing & initializing )
|
||||
Dictionary::Initializing & initializing,
|
||||
unsigned maxHeadwordsToExpand )
|
||||
throw( std::exception )
|
||||
{
|
||||
vector< sptr< Dictionary::Class > > dictionaries;
|
||||
|
@ -1316,10 +1317,16 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
|
||||
if( nameSpace == 'A' )
|
||||
{
|
||||
wstring word;
|
||||
if( !title.empty() )
|
||||
indexedWords.addWord( Utf8::decode( title ), n );
|
||||
word = Utf8::decode( title );
|
||||
else
|
||||
indexedWords.addWord( Utf8::decode( url ), n );
|
||||
word = Utf8::decode( url );
|
||||
|
||||
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
|
||||
indexedWords.addSingleWord( word, n );
|
||||
else
|
||||
indexedWords.addWord( word, n );
|
||||
wordCount++;
|
||||
}
|
||||
else
|
||||
|
|
Loading…
Reference in a new issue