mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
Add config file parameter to limit headwords number to expand multi-word headwords while indexing (issie #914)
This commit is contained in:
parent
1824d9ab02
commit
0b6f36479d
9
aard.cc
9
aard.cc
|
@ -823,7 +823,8 @@ sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & initializing )
|
Dictionary::Initializing & initializing,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception )
|
throw( std::exception )
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > dictionaries;
|
vector< sptr< Dictionary::Class > > dictionaries;
|
||||||
|
@ -995,7 +996,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
articleOffsets.insert( articleOffset );
|
articleOffsets.insert( articleOffset );
|
||||||
|
|
||||||
// Insert new entry
|
// Insert new entry
|
||||||
indexedWords.addWord( Utf8::decode( string( data.data(), wordSize ) ), articleOffset);
|
wstring word = Utf8::decode( string( data.data(), wordSize ) );
|
||||||
|
if( maxHeadwordsToExpand && dictHeader.wordsCount >= maxHeadwordsToExpand )
|
||||||
|
indexedWords.addSingleWord( word, articleOffset);
|
||||||
|
else
|
||||||
|
indexedWords.addWord( word, articleOffset);
|
||||||
|
|
||||||
pos += has64bitIndex ? sizeof( IndexElement64 ) : sizeof( IndexElement );
|
pos += has64bitIndex ? sizeof( IndexElement64 ) : sizeof( IndexElement );
|
||||||
}
|
}
|
||||||
|
|
3
aard.hh
3
aard.hh
|
@ -15,7 +15,8 @@ using std::string;
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & )
|
Dictionary::Initializing &,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception );
|
throw( std::exception );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1009,6 +1009,9 @@ Class load() throw( exError )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( !root.namedItem( "maxHeadwordsToExpand" ).isNull() )
|
||||||
|
c.maxHeadwordsToExpand = root.namedItem( "maxHeadwordsToExpand" ).toElement().text().toUInt();
|
||||||
|
|
||||||
QDomNode headwordsDialog = root.namedItem( "headwordsDialog" );
|
QDomNode headwordsDialog = root.namedItem( "headwordsDialog" );
|
||||||
|
|
||||||
if ( !headwordsDialog.isNull() )
|
if ( !headwordsDialog.isNull() )
|
||||||
|
@ -1976,6 +1979,10 @@ void save( Class const & c ) throw( exError )
|
||||||
opt = dd.createElement( "maxHeadwordSize" );
|
opt = dd.createElement( "maxHeadwordSize" );
|
||||||
opt.appendChild( dd.createTextNode( QString::number( c.maxHeadwordSize ) ) );
|
opt.appendChild( dd.createTextNode( QString::number( c.maxHeadwordSize ) ) );
|
||||||
root.appendChild( opt );
|
root.appendChild( opt );
|
||||||
|
|
||||||
|
opt = dd.createElement( "maxHeadwordsToExpand" );
|
||||||
|
opt.appendChild( dd.createTextNode( QString::number( c.maxHeadwordsToExpand ) ) );
|
||||||
|
root.appendChild( opt );
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|
|
@ -598,6 +598,8 @@ struct Class
|
||||||
/// Bigger headwords won't be indexed. For now, only in DSL.
|
/// Bigger headwords won't be indexed. For now, only in DSL.
|
||||||
unsigned int maxHeadwordSize;
|
unsigned int maxHeadwordSize;
|
||||||
|
|
||||||
|
unsigned int maxHeadwordsToExpand;
|
||||||
|
|
||||||
HeadwordsDialog headwordsDialog;
|
HeadwordsDialog headwordsDialog;
|
||||||
|
|
||||||
#ifdef Q_OS_WIN
|
#ifdef Q_OS_WIN
|
||||||
|
@ -610,7 +612,8 @@ struct Class
|
||||||
Class(): lastMainGroupId( 0 ), lastPopupGroupId( 0 ),
|
Class(): lastMainGroupId( 0 ), lastPopupGroupId( 0 ),
|
||||||
pinPopupWindow( false ), showingDictBarNames( false ),
|
pinPopupWindow( false ), showingDictBarNames( false ),
|
||||||
usingSmallIconsInToolbars( false ),
|
usingSmallIconsInToolbars( false ),
|
||||||
maxPictureWidth( 0 ), maxHeadwordSize ( 256U )
|
maxPictureWidth( 0 ), maxHeadwordSize ( 256U ),
|
||||||
|
maxHeadwordsToExpand( 0 )
|
||||||
{}
|
{}
|
||||||
Group * getGroup( unsigned id );
|
Group * getGroup( unsigned id );
|
||||||
Group const * getGroup( unsigned id ) const;
|
Group const * getGroup( unsigned id ) const;
|
||||||
|
|
|
@ -55,7 +55,8 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
|
||||||
transliteration( cfg.transliteration ),
|
transliteration( cfg.transliteration ),
|
||||||
exceptionText( "Load did not finish" ), // Will be cleared upon success
|
exceptionText( "Load did not finish" ), // Will be cleared upon success
|
||||||
maxPictureWidth( cfg.maxPictureWidth ),
|
maxPictureWidth( cfg.maxPictureWidth ),
|
||||||
maxHeadwordSize( cfg.maxHeadwordSize )
|
maxHeadwordSize( cfg.maxHeadwordSize ),
|
||||||
|
maxHeadwordToExpand( cfg.maxHeadwordsToExpand )
|
||||||
{
|
{
|
||||||
// Populate name filters
|
// Populate name filters
|
||||||
|
|
||||||
|
@ -140,7 +141,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
||||||
|
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > stardictDictionaries =
|
vector< sptr< Dictionary::Class > > stardictDictionaries =
|
||||||
Stardict::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
Stardict::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||||
|
|
||||||
dictionaries.insert( dictionaries.end(), stardictDictionaries.begin(),
|
dictionaries.insert( dictionaries.end(), stardictDictionaries.begin(),
|
||||||
stardictDictionaries.end() );
|
stardictDictionaries.end() );
|
||||||
|
@ -186,7 +187,7 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > aardDictionaries =
|
vector< sptr< Dictionary::Class > > aardDictionaries =
|
||||||
Aard::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
Aard::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||||
|
|
||||||
dictionaries.insert( dictionaries.end(), aardDictionaries.begin(),
|
dictionaries.insert( dictionaries.end(), aardDictionaries.begin(),
|
||||||
aardDictionaries.end() );
|
aardDictionaries.end() );
|
||||||
|
@ -215,14 +216,14 @@ void LoadDictionaries::handlePath( Config::Path const & path )
|
||||||
#ifdef MAKE_ZIM_SUPPORT
|
#ifdef MAKE_ZIM_SUPPORT
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > zimDictionaries =
|
vector< sptr< Dictionary::Class > > zimDictionaries =
|
||||||
Zim::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
Zim::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||||
|
|
||||||
dictionaries.insert( dictionaries.end(), zimDictionaries.begin(),
|
dictionaries.insert( dictionaries.end(), zimDictionaries.begin(),
|
||||||
zimDictionaries.end() );
|
zimDictionaries.end() );
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > slobDictionaries =
|
vector< sptr< Dictionary::Class > > slobDictionaries =
|
||||||
Slob::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
|
Slob::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this, maxHeadwordToExpand );
|
||||||
|
|
||||||
dictionaries.insert( dictionaries.end(), slobDictionaries.begin(),
|
dictionaries.insert( dictionaries.end(), slobDictionaries.begin(),
|
||||||
slobDictionaries.end() );
|
slobDictionaries.end() );
|
||||||
|
|
|
@ -25,6 +25,7 @@ class LoadDictionaries: public QThread, public Dictionary::Initializing
|
||||||
std::string exceptionText;
|
std::string exceptionText;
|
||||||
int maxPictureWidth;
|
int maxPictureWidth;
|
||||||
unsigned int maxHeadwordSize;
|
unsigned int maxHeadwordSize;
|
||||||
|
unsigned int maxHeadwordToExpand;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
8
slob.cc
8
slob.cc
|
@ -1518,7 +1518,8 @@ sptr< Dictionary::DataRequest > SlobDictionary::getResource( string const & name
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & initializing )
|
Dictionary::Initializing & initializing,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception )
|
throw( std::exception )
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > dictionaries;
|
vector< sptr< Dictionary::Class > > dictionaries;
|
||||||
|
@ -1583,7 +1584,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|| contentType.startsWith( "text/plain", Qt::CaseInsensitive ) )
|
|| contentType.startsWith( "text/plain", Qt::CaseInsensitive ) )
|
||||||
{
|
{
|
||||||
//Article
|
//Article
|
||||||
indexedWords.addWord( gd::toWString( refEntry.key ), i );
|
if( maxHeadwordsToExpand && entries > maxHeadwordsToExpand )
|
||||||
|
indexedWords.addSingleWord( gd::toWString( refEntry.key ), i );
|
||||||
|
else
|
||||||
|
indexedWords.addWord( gd::toWString( refEntry.key ), i );
|
||||||
|
|
||||||
wordCount += 1;
|
wordCount += 1;
|
||||||
|
|
||||||
|
|
3
slob.hh
3
slob.hh
|
@ -14,7 +14,8 @@ using std::string;
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & )
|
Dictionary::Initializing &,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception );
|
throw( std::exception );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
19
stardict.cc
19
stardict.cc
|
@ -1804,7 +1804,7 @@ static void handleIdxSynFile( string const & fileName,
|
||||||
IndexedWords & indexedWords,
|
IndexedWords & indexedWords,
|
||||||
ChunkedStorage::Writer & chunks,
|
ChunkedStorage::Writer & chunks,
|
||||||
vector< uint32_t > * articleOffsets,
|
vector< uint32_t > * articleOffsets,
|
||||||
bool isSynFile )
|
bool isSynFile, bool parseHeadwords )
|
||||||
{
|
{
|
||||||
gzFile stardictIdx = gd_gzopen( fileName.c_str() );
|
gzFile stardictIdx = gd_gzopen( fileName.c_str() );
|
||||||
if ( !stardictIdx )
|
if ( !stardictIdx )
|
||||||
|
@ -1927,7 +1927,10 @@ static void handleIdxSynFile( string const & fileName,
|
||||||
|
|
||||||
// Insert new entry into an index
|
// Insert new entry into an index
|
||||||
|
|
||||||
indexedWords.addWord( Utf8::decode( word ), offset );
|
if( parseHeadwords )
|
||||||
|
indexedWords.addWord( Utf8::decode( word ), offset );
|
||||||
|
else
|
||||||
|
indexedWords.addSingleWord( Utf8::decode( word ), offset );
|
||||||
}
|
}
|
||||||
|
|
||||||
GD_DPRINTF( "%u entires made\n", (unsigned) indexedWords.size() );
|
GD_DPRINTF( "%u entires made\n", (unsigned) indexedWords.size() );
|
||||||
|
@ -1937,7 +1940,8 @@ static void handleIdxSynFile( string const & fileName,
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & initializing )
|
Dictionary::Initializing & initializing,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception )
|
throw( std::exception )
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > dictionaries;
|
vector< sptr< Dictionary::Class > > dictionaries;
|
||||||
|
@ -2036,7 +2040,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|
|
||||||
// Load indices
|
// Load indices
|
||||||
if ( !ifo.synwordcount )
|
if ( !ifo.synwordcount )
|
||||||
handleIdxSynFile( idxFileName, indexedWords, chunks, 0, false );
|
handleIdxSynFile( idxFileName, indexedWords, chunks, 0, false,
|
||||||
|
!maxHeadwordsToExpand || ifo.wordcount < maxHeadwordsToExpand );
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vector< uint32_t > articleOffsets;
|
vector< uint32_t > articleOffsets;
|
||||||
|
@ -2044,10 +2049,12 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
articleOffsets.reserve( ifo.wordcount );
|
articleOffsets.reserve( ifo.wordcount );
|
||||||
|
|
||||||
handleIdxSynFile( idxFileName, indexedWords, chunks, &articleOffsets,
|
handleIdxSynFile( idxFileName, indexedWords, chunks, &articleOffsets,
|
||||||
false );
|
false,
|
||||||
|
!maxHeadwordsToExpand || ( ifo.wordcount + ifo.synwordcount ) < maxHeadwordsToExpand );
|
||||||
|
|
||||||
handleIdxSynFile( synFileName, indexedWords, chunks, &articleOffsets,
|
handleIdxSynFile( synFileName, indexedWords, chunks, &articleOffsets,
|
||||||
true );
|
true,
|
||||||
|
!maxHeadwordsToExpand || ( ifo.wordcount + ifo.synwordcount ) < maxHeadwordsToExpand );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finish with the chunks
|
// Finish with the chunks
|
||||||
|
|
|
@ -15,7 +15,8 @@ using std::string;
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & )
|
Dictionary::Initializing &,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception );
|
throw( std::exception );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
13
zim.cc
13
zim.cc
|
@ -1184,7 +1184,8 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & initializing )
|
Dictionary::Initializing & initializing,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception )
|
throw( std::exception )
|
||||||
{
|
{
|
||||||
vector< sptr< Dictionary::Class > > dictionaries;
|
vector< sptr< Dictionary::Class > > dictionaries;
|
||||||
|
@ -1316,10 +1317,16 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|
|
||||||
if( nameSpace == 'A' )
|
if( nameSpace == 'A' )
|
||||||
{
|
{
|
||||||
|
wstring word;
|
||||||
if( !title.empty() )
|
if( !title.empty() )
|
||||||
indexedWords.addWord( Utf8::decode( title ), n );
|
word = Utf8::decode( title );
|
||||||
else
|
else
|
||||||
indexedWords.addWord( Utf8::decode( url ), n );
|
word = Utf8::decode( url );
|
||||||
|
|
||||||
|
if( maxHeadwordsToExpand && zh.articleCount >= maxHeadwordsToExpand )
|
||||||
|
indexedWords.addSingleWord( word, n );
|
||||||
|
else
|
||||||
|
indexedWords.addWord( word, n );
|
||||||
wordCount++;
|
wordCount++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
3
zim.hh
3
zim.hh
|
@ -14,7 +14,8 @@ using std::string;
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
string const & indicesDir,
|
string const & indicesDir,
|
||||||
Dictionary::Initializing & )
|
Dictionary::Initializing &,
|
||||||
|
unsigned maxHeadwordsToExpand )
|
||||||
throw( std::exception );
|
throw( std::exception );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue