mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-30 17:24:08 +00:00
opt: use xapian as headword index for mdx dictionary
This commit is contained in:
parent
bf19b960fd
commit
1a75bc3e86
|
@ -1,6 +1,6 @@
|
||||||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||||
|
#include "xapian.h"
|
||||||
#include "btreeidx.hh"
|
#include "btreeidx.hh"
|
||||||
#include "folding.hh"
|
#include "folding.hh"
|
||||||
#include "utf8.hh"
|
#include "utf8.hh"
|
||||||
|
@ -1020,6 +1020,46 @@ IndexInfo buildIndex( IndexedWords const & indexedWords, File::Index & file )
|
||||||
return IndexInfo( btreeMaxElements, rootOffset );
|
return IndexInfo( btreeMaxElements, rootOffset );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BtreeIndex::buildXapianIndex( IndexedWords const & indexedWords, string file ) {
|
||||||
|
try {
|
||||||
|
// Open the database for update, creating a new database if necessary.
|
||||||
|
Xapian::WritableDatabase db( file + "_temp", Xapian::DB_CREATE_OR_OPEN );
|
||||||
|
|
||||||
|
Xapian::TermGenerator indexer;
|
||||||
|
// Xapian::Stem stemmer("english");
|
||||||
|
// indexer.set_stemmer(stemmer);
|
||||||
|
// indexer.set_stemming_strategy(indexer.STEM_SOME_FULL_POS);
|
||||||
|
indexer.set_flags( Xapian::TermGenerator::FLAG_CJK_NGRAM );
|
||||||
|
|
||||||
|
for ( const auto &[ word, articleLinks ] : indexedWords ) {
|
||||||
|
|
||||||
|
for ( const auto & articleLink : articleLinks ) {
|
||||||
|
Xapian::Document doc;
|
||||||
|
|
||||||
|
indexer.set_document( doc );
|
||||||
|
|
||||||
|
indexer.index_text( word );
|
||||||
|
|
||||||
|
|
||||||
|
doc.set_data( std::to_string( articleLink.articleOffset ) );
|
||||||
|
// Add the document to the database.
|
||||||
|
db.add_document( doc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
db.commit();
|
||||||
|
|
||||||
|
db.compact( file );
|
||||||
|
|
||||||
|
db.close();
|
||||||
|
|
||||||
|
Utils::Fs::removeDirectory( file + "_temp" );
|
||||||
|
}
|
||||||
|
catch ( Xapian::Error & e ) {
|
||||||
|
qWarning() << "create xapian headword index:" << QString::fromStdString( e.get_description() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void BtreeIndex::getAllHeadwords( QSet< QString > & headwords )
|
void BtreeIndex::getAllHeadwords( QSet< QString > & headwords )
|
||||||
{
|
{
|
||||||
if ( !idxFile )
|
if ( !idxFile )
|
||||||
|
|
|
@ -269,6 +269,8 @@ struct IndexedWords: public map< string, vector< WordArticleLink > >
|
||||||
/// position.
|
/// position.
|
||||||
IndexInfo buildIndex( IndexedWords const &, File::Index & file );
|
IndexInfo buildIndex( IndexedWords const &, File::Index & file );
|
||||||
|
|
||||||
|
void buildXapianIndex( IndexedWords const &, string file );
|
||||||
|
|
||||||
} // namespace BtreeIndexing
|
} // namespace BtreeIndexing
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -57,7 +57,7 @@ using namespace Mdict;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
kSignature = 0x4349444d, // MDIC
|
kSignature = 0x4349444d, // MDIC
|
||||||
kCurrentFormatVersion = 11 + BtreeIndexing::FormatVersion + Folding::Version
|
kCurrentFormatVersion = 12 + BtreeIndexing::FormatVersion + Folding::Version
|
||||||
};
|
};
|
||||||
|
|
||||||
DEF_EX( exCorruptDictionary, "dictionary file was tampered or corrupted", std::exception )
|
DEF_EX( exCorruptDictionary, "dictionary file was tampered or corrupted", std::exception )
|
||||||
|
@ -1342,6 +1342,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
}
|
}
|
||||||
|
|
||||||
File::Index idx( indexFile, "wb" );
|
File::Index idx( indexFile, "wb" );
|
||||||
|
auto headIndexFile = indexFile+".head";
|
||||||
IdxHeader idxHeader;
|
IdxHeader idxHeader;
|
||||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||||
// We write a dummy header first. At the end of the process the header
|
// We write a dummy header first. At the end of the process the header
|
||||||
|
@ -1411,6 +1412,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
|
|
||||||
GD_DPRINTF( "Writing index...\n" );
|
GD_DPRINTF( "Writing index...\n" );
|
||||||
|
|
||||||
|
BtreeIndexing::buildXapianIndex( indexedWords, headIndexFile );
|
||||||
// Good. Now build the index
|
// Good. Now build the index
|
||||||
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
|
IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
|
||||||
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
|
idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
|
||||||
|
|
Loading…
Reference in a new issue