mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
opt: use xapian as headword index for mdx dictionary
This commit is contained in:
parent
bf19b960fd
commit
1a75bc3e86
|
@ -1,6 +1,6 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#include "xapian.h"
|
||||
#include "btreeidx.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
|
@ -1020,6 +1020,46 @@ IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )
|
|||
return IndexInfo( btreeMaxElements, rootOffset );
|
||||
}
|
||||
|
||||
void BtreeIndex::buildXapianIndex( IndexedWords const & indexedWords, string file ) {
|
||||
try {
|
||||
// Open the database for update, creating a new database if necessary.
|
||||
Xapian::WritableDatabase db( file + "_temp", Xapian::DB_CREATE_OR_OPEN );
|
||||
|
||||
Xapian::TermGenerator indexer;
|
||||
// Xapian::Stem stemmer("english");
|
||||
// indexer.set_stemmer(stemmer);
|
||||
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
|
||||
indexer.set_flags( Xapian::TermGenerator::FLAG_CJK_NGRAM );
|
||||
|
||||
for ( const auto &[ word, articleLinks ] : indexedWords ) {
|
||||
|
||||
for ( const auto & articleLink : articleLinks ) {
|
||||
Xapian::Document doc;
|
||||
|
||||
indexer.set_document( doc );
|
||||
|
||||
indexer.index_text( word );
|
||||
|
||||
|
||||
doc.set_data( std::to_string( articleLink.articleOffset ) );
|
||||
// Add the document to the database.
|
||||
db.add_document( doc );
|
||||
}
|
||||
}
|
||||
|
||||
db.commit();
|
||||
|
||||
db.compact( file );
|
||||
|
||||
db.close();
|
||||
|
||||
Utils::Fs::removeDirectory( file + "_temp" );
|
||||
}
|
||||
catch ( Xapian::Error & e ) {
|
||||
qWarning() << "create xapian headword index:" << QString::fromStdString( e.get_description() );
|
||||
}
|
||||
}
|
||||
|
||||
void BtreeIndex::getAllHeadwords( QSet< QString > & headwords )
|
||||
{
|
||||
if ( !idxFile )
|
||||
|
|
|
@ -269,6 +269,8 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
|
|||
/// position.
|
||||
IndexInfo buildIndex( IndexedWords const &, File::Index & file );
|
||||
|
||||
void buildXapianIndex( IndexedWords const &, string file );
|
||||
|
||||
} // namespace BtreeIndexing
|
||||
|
||||
#endif
|
||||
|
|
|
@ -57,7 +57,7 @@ using namespace Mdict;
|
|||
|
||||
enum {
|
||||
kSignature = 0x4349444d, // MDIC
|
||||
kCurrentFormatVersion = 11 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
kCurrentFormatVersion = 12 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
};
|
||||
|
||||
DEF_EX( exCorruptDictionary, "dictionary file was tampered or corrupted", std::exception )
|
||||
|
@ -1342,6 +1342,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
}
|
||||
|
||||
File::Index idx( indexFile, "wb" );
|
||||
auto headIndexFile = indexFile+".head";
|
||||
IdxHeader idxHeader;
|
||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||
// We write a dummy header first. At the end of the process the header
|
||||
|
@ -1411,6 +1412,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
|||
|
||||
GD_DPRINTF( "Writing index...\n" );
|
||||
|
||||
BtreeIndexing::buildXapianIndex( indexedWords, headIndexFile );
|
||||
// Good. Now build the index
|
||||
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
|
||||
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
|
||||
|
|
Loading…
Reference in a new issue