diff --git a/README.md b/README.md index 1c618d94..42b62302 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Alternatively, you might want to load `goldendict.pro` file from within Qt Creat ### Building with Zim dictionaries support -To add Zim format support you need at first install lzma-dev package: +To add Zim and Slob formats support you need at first install lzma-dev package: sudo apt-get liblzma-dev diff --git a/article-style.css b/article-style.css index 63307860..20495931 100644 --- a/article-style.css +++ b/article-style.css @@ -2876,3 +2876,11 @@ in bg url to hide it from iemac */ vertical-align: text-bottom; background-image:url('qrcx://localhost/icons/downarrow.png'); } + +/********** Slob dictionaries ***********/ + +.slobdict img.imgtex +{ + vertical-align: baseline !important; +} + diff --git a/decompress.cc b/decompress.cc index 9ca0fff2..3a66ef1d 100644 --- a/decompress.cc +++ b/decompress.cc @@ -82,7 +82,8 @@ int res; #define BUFSIZE 0xFFFF -string decompressLzma2( const char * bufptr, unsigned length ) +string decompressLzma2( const char * bufptr, unsigned length, + bool raw_decoder ) { string str; lzma_ret res; @@ -92,9 +93,25 @@ char buf[BUFSIZE]; strm.next_in = reinterpret_cast< const uint8_t * >( bufptr ); strm.avail_in = length; + lzma_options_lzma opt; + lzma_filter filters[ 2 ]; + + if( raw_decoder ) + { + lzma_lzma_preset(&opt, LZMA_PRESET_DEFAULT); + + filters[ 0 ].id = LZMA_FILTER_LZMA2; + filters[ 0 ].options = &opt; + filters[ 1 ].id = LZMA_VLI_UNKNOWN; + } + while( 1 ) { - res = lzma_stream_decoder( &strm, UINT64_MAX, 0 ); + if( raw_decoder ) + res = lzma_raw_decoder( &strm, filters ); + else + res = lzma_stream_decoder( &strm, UINT64_MAX, 0 ); + if( res != LZMA_OK ) break; diff --git a/decompress.hh b/decompress.hh index f0d00c79..c5b6254b 100644 --- a/decompress.hh +++ b/decompress.hh @@ -14,7 +14,8 @@ string decompressBzip2( const char * bufptr, unsigned length ); #ifdef MAKE_ZIM_SUPPORT -string decompressLzma2( const char * bufptr, unsigned length ); +string decompressLzma2( const char * bufptr, unsigned length, + bool raw_decoder = false ); #endif diff --git a/goldendict.pro b/goldendict.pro index 8a973e00..5a37c328 100644 --- a/goldendict.pro +++ b/goldendict.pro @@ -329,7 +329,8 @@ HEADERS += folding.hh \ fulltextsearch.hh \ ftshelpers.hh \ dictserver.hh \ - helpwindow.hh + helpwindow.hh \ + slob.hh FORMS += groups.ui \ dictgroupwidget.ui \ @@ -347,6 +348,7 @@ FORMS += groups.ui \ dictheadwords.ui \ authentication.ui \ fulltextsearch.ui + SOURCES += folding.cc \ main.cc \ dictionary.cc \ @@ -450,7 +452,8 @@ SOURCES += folding.cc \ fulltextsearch.cc \ ftshelpers.cc \ dictserver.cc \ - helpwindow.cc + helpwindow.cc \ + slob.cc win32 { FORMS += texttospeechsource.ui diff --git a/help/gdhelp_en.qch b/help/gdhelp_en.qch index cc76900b..ae4bc7dd 100644 Binary files a/help/gdhelp_en.qch and b/help/gdhelp_en.qch differ diff --git a/help/gdhelp_ru.qch b/help/gdhelp_ru.qch index 4ea7ec8f..c43aac20 100644 Binary files a/help/gdhelp_ru.qch and b/help/gdhelp_ru.qch differ diff --git a/icons/icon32_slob.png b/icons/icon32_slob.png new file mode 100644 index 00000000..984205ea Binary files /dev/null and b/icons/icon32_slob.png differ diff --git a/loaddictionaries.cc b/loaddictionaries.cc index 43322540..e0c73cff 100644 --- a/loaddictionaries.cc +++ b/loaddictionaries.cc @@ -29,6 +29,7 @@ #include "mdx.hh" #include "zim.hh" #include "dictserver.hh" +#include "slob.hh" #ifndef NO_EPWING_SUPPORT #include "epwing.hh" @@ -58,7 +59,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ): << "*.xdxf.dz" << "*.dct" << "*.aar" << "*.zips" << "*.mdx" #ifdef MAKE_ZIM_SUPPORT - << "*.zim" << "*.zimaa" + << "*.zim" << "*.zimaa" << "*.slob" #endif #ifndef NO_EPWING_SUPPORT << "*catalogs" @@ -207,6 +208,13 @@ void LoadDictionaries::handlePath( Config::Path const & path ) dictionaries.insert( dictionaries.end(), zimDictionaries.begin(), zimDictionaries.end() ); } + { + vector< sptr< Dictionary::Class > > slobDictionaries = + Slob::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); + + dictionaries.insert( dictionaries.end(), slobDictionaries.begin(), + slobDictionaries.end() ); + } #endif #ifndef NO_EPWING_SUPPORT { diff --git a/preferences.cc b/preferences.cc index fab02032..7e7763ad 100644 --- a/preferences.cc +++ b/preferences.cc @@ -294,12 +294,14 @@ Preferences::Preferences( QWidget * parent, Config::Class & cfg_ ): ui.allowDSL->setChecked( !p.fts.disabledTypes.contains( "DSL", Qt::CaseInsensitive ) ); ui.allowMDict->setChecked( !p.fts.disabledTypes.contains( "MDICT", Qt::CaseInsensitive ) ); ui.allowSDict->setChecked( !p.fts.disabledTypes.contains( "SDICT", Qt::CaseInsensitive ) ); + ui.allowSlob->setChecked( !p.fts.disabledTypes.contains( "SLOB", Qt::CaseInsensitive ) ); ui.allowStardict->setChecked( !p.fts.disabledTypes.contains( "STARDICT", Qt::CaseInsensitive ) ); ui.allowXDXF->setChecked( !p.fts.disabledTypes.contains( "XDXF", Qt::CaseInsensitive ) ); ui.allowZim->setChecked( !p.fts.disabledTypes.contains( "ZIM", Qt::CaseInsensitive ) ); ui.allowEpwing->setChecked( !p.fts.disabledTypes.contains( "EPWING", Qt::CaseInsensitive ) ); #ifndef MAKE_ZIM_SUPPORT ui.allowZim->hide(); + ui.allowSlob->hide(); #endif #ifdef NO_EPWING_SUPPORT ui.allowEpwing->hide(); @@ -441,6 +443,13 @@ Config::Preferences Preferences::getPreferences() p.fts.disabledTypes += "SDICT"; } + if( !ui.allowSlob->isChecked() ) + { + if( !p.fts.disabledTypes.isEmpty() ) + p.fts.disabledTypes += ','; + p.fts.disabledTypes += "SLOB"; + } + if( !ui.allowStardict->isChecked() ) { if( !p.fts.disabledTypes.isEmpty() ) diff --git a/preferences.ui b/preferences.ui index 2e50ff58..e5591b7f 100644 --- a/preferences.ui +++ b/preferences.ui @@ -1240,13 +1240,20 @@ download page. - + SDict + + + + Slob + + + @@ -1268,7 +1275,7 @@ download page. - + diff --git a/resources.qrc b/resources.qrc index cc29474e..97e33451 100644 --- a/resources.qrc +++ b/resources.qrc @@ -78,5 +78,6 @@ icons/playsound_full.png icons/icon32_zim.png icons/icon32_epwing.png + icons/icon32_slob.png diff --git a/slob.cc b/slob.cc new file mode 100644 index 00000000..ae252a00 --- /dev/null +++ b/slob.cc @@ -0,0 +1,1559 @@ +/* This file is (c) 2015 Abs62 + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#ifdef MAKE_ZIM_SUPPORT + +#include "slob.hh" +#include "btreeidx.hh" +#include "fsencoding.hh" +#include "folding.hh" +#include "gddebug.hh" +#include "utf8.hh" +#include "decompress.hh" +#include "langcoder.hh" +#include "wstring.hh" +#include "wstring_qt.hh" +#include "ftshelpers.hh" +#include "htmlescape.hh" +#include "filetype.hh" +#include "tiff.hh" + +#ifdef _MSC_VER +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace Slob { + +using std::string; +using std::map; +using std::vector; +using std::multimap; +using std::pair; +using std::set; +using gd::wstring; + +using BtreeIndexing::WordArticleLink; +using BtreeIndexing::IndexedWords; +using BtreeIndexing::IndexInfo; + +DEF_EX_STR( exNotSlobFile, "Not an Slob file", Dictionary::Ex ) +DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex ) +DEF_EX_STR( exCantDecodeFile, "Can't decode file", Dictionary::Ex ) +DEF_EX_STR( exNoCodecFound, "No text codec found", Dictionary::Ex ) +DEF_EX( exUserAbort, "User abort", Dictionary::Ex ) + +#pragma pack( push, 1 ) + +enum +{ + Signature = 0x58424C53, // SLBX on little-endian, XBLS on big-endian + CurrentFormatVersion = 1 + BtreeIndexing::FormatVersion + Folding::Version +}; + +struct IdxHeader +{ + quint32 signature; // First comes the signature, SLBX + quint32 formatVersion; // File format version (CurrentFormatVersion) + quint32 indexBtreeMaxElements; // Two fields from IndexInfo + quint32 indexRootOffset; + quint32 resourceIndexBtreeMaxElements; // Two fields from IndexInfo + quint32 resourceIndexRootOffset; + quint32 wordCount; + quint32 articleCount; + quint32 langFrom; // Source language + quint32 langTo; // Target language +} +#ifndef _MSC_VER +__attribute__((packed)) +#endif +; + +#pragma pack( pop ) + +const char SLOB_MAGIC[ 8 ] = { 0x21, 0x2d, 0x31, 0x53, 0x4c, 0x4f, 0x42, 0x1f }; + +struct RefEntry +{ + QString key; + quint32 binIndex; + quint16 itemIndex; + QString fragment; +}; + +bool indexIsOldOrBad( string const & indexFile ) +{ + File::Class idx( indexFile, "rb" ); + + IdxHeader header; + + return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || + header.signature != Signature || + header.formatVersion != CurrentFormatVersion; +} + + +class SlobFile +{ + enum Compressions + { UNKNOWN = 0, ZLIB, BZ2, LZMA2 }; + + QFile file; + QString fileName, dictionaryName; + Compressions compression; + QString encoding; + unsigned char uuid[ 16 ]; + QTextCodec *codec; + QMap< QString, QString > tags; + QVector< QString > contentTypes; + quint32 blobCount; + quint64 storeOffset, fileSize, refsOffset; + quint32 refsCount, itemsCount; + quint64 itemsOffset, itemsDataOffset; + quint32 currentItem; + quint32 contentTypesCount; + string currentItemData; + + QString readTinyText(); + QString readText(); + QString readLargeText(); + QString readString( unsigned lenght ); + +public: + SlobFile() : + compression( UNKNOWN ) + , codec( 0 ) + , blobCount( 0 ) + , storeOffset( 0 ) + , fileSize( 0 ) + , refsOffset( 0 ) + , refsCount( 0 ) + , itemsCount( 0 ) + , itemsOffset( 0 ) + , itemsDataOffset( 0 ) + , currentItem( 0xFFFFFFFF ) + , contentTypesCount( 0 ) + {} + + ~SlobFile(); + + Compressions getCompression() const + { return compression; } + + QString const & getEncoding() const + { return encoding; } + + QString const & getDictionaryName() const + { return dictionaryName; } + + quint32 blobsCount() const + { return blobCount; } + + quint64 dataOffset() const + { return storeOffset; } + + quint32 getRefsCount() const + { return refsCount; } + + quint32 getContentTypesCount() const + { return contentTypesCount; } + + QTextCodec * getCodec() const + { return codec; } + + QString getContentType( quint8 content_id ) const + { return content_id < contentTypes.size() ? contentTypes[ content_id ] : QString(); } + + QMap< QString, QString > const & getTags() const + { return tags; } + + void open( const QString & name ); + + void getRefEntry( quint32 ref_nom, RefEntry & entry ); + + quint8 getItem( RefEntry const & entry, string * data ); +}; + +SlobFile::~SlobFile() +{ + file.close(); +} + +QString SlobFile::readString( unsigned length ) +{ + QByteArray data = file.read( length ); + QString str; + + if( codec != 0 && !data.isEmpty() ) + str = codec->toUnicode( data ); + else + str = QString( data ); + + char term = 0; + int n = str.indexOf( term ); + if( n > 0 ) + str.resize( n ); + + return str; +} + +QString SlobFile::readTinyText() +{ + unsigned char len; + if( !file.getChar( ( char * )&len ) ) + { + QString error = fileName + ": " + file.errorString(); + throw exCantReadFile( string( error.toUtf8().data() ) ); + } + return readString( len ); +} + +QString SlobFile::readText() +{ + quint16 len; + if( file.read( ( char * )&len, sizeof( len ) ) != sizeof( len ) ) + { + QString error = fileName + ": " + file.errorString(); + throw exCantReadFile( string( error.toUtf8().data() ) ); + } + return readString( qFromBigEndian( len ) ); +} + +QString SlobFile::readLargeText() +{ + quint32 len; + if( file.read( ( char * )&len, sizeof( len ) ) != sizeof( len ) ) + { + QString error = fileName + ": " + file.errorString(); + throw exCantReadFile( string( error.toUtf8().data() ) ); + } + return readString( qFromBigEndian( len ) ); +} + +void SlobFile::open( const QString & name ) +{ +QString error( name + ": " ); + + if( file.isOpen() ) + file.close(); + + fileName = name; + + file.setFileName( name ); + + { + QFileInfo fi( name ); + dictionaryName = fi.fileName(); + } + + for( ; ; ) + { + + if( !file.open( QFile::ReadOnly ) ) + break; + + char magic[ 8 ]; + if( file.read( magic, sizeof( magic ) ) != sizeof( magic ) ) + break; + + if( memcmp( magic, SLOB_MAGIC, sizeof( magic ) ) != 0 ) + throw exNotSlobFile( string( name.toUtf8().data() ) ); + + if( file.read( ( char * )uuid, sizeof( uuid ) ) != sizeof( uuid ) ) + break; + + // Read encoding + + encoding = readTinyText(); + + codec = QTextCodec::codecForName( encoding.toLatin1() ); + if( codec == 0 ) + { + error = QString( "for encoding \"") + encoding + "\""; + throw exNoCodecFound( string( error.toUtf8().data() ) ); + } + + // Read compression type + + QString compr = readTinyText(); + + if( compr.compare( "zlib", Qt::CaseInsensitive ) == 0 ) + compression = ZLIB; + else + if( compr.compare( "bz2", Qt::CaseInsensitive ) == 0 ) + compression = BZ2; + else + if( compr.compare( "lzma2", Qt::CaseInsensitive ) == 0 ) + compression = LZMA2; + + // Read tags + + unsigned char count; + if( !file.getChar( ( char * )&count ) ) + break; + + for( unsigned i = 0; i < count; i++ ) + { + QString key = readTinyText(); + QString value = readTinyText(); + tags[ key ] = value; + + if( key.compare( "label", Qt::CaseInsensitive ) == 0 + || key.compare( "name", Qt::CaseInsensitive ) == 0) + dictionaryName = value; + } + + // Read content types + + if( !file.getChar( ( char * )&count ) ) + break; + + for( unsigned i = 0; i < count; i++ ) + { + QString type = readText(); + contentTypes.append( type ); + } + contentTypesCount = count; + + // Read data parameters + + quint32 cnt; + if( file.read( ( char * )&cnt, sizeof( cnt ) ) != sizeof( cnt ) ) + break; + blobCount = qFromBigEndian( cnt ); + + quint64 tmp; + if( file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) ) + break; + storeOffset = qFromBigEndian( tmp ); + + if( file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) ) + break; + fileSize = qFromBigEndian( tmp ); + + if( file.read( ( char * )&cnt, sizeof( cnt ) ) != sizeof( cnt ) ) + break; + refsCount = qFromBigEndian( cnt ); + + refsOffset = file.pos(); + + if( !file.seek( storeOffset ) ) + break; + + if( file.read( ( char * )&cnt, sizeof( cnt ) ) != sizeof( cnt ) ) + break; + itemsCount = qFromBigEndian( cnt ); + + itemsOffset = storeOffset + sizeof( itemsCount ); + itemsDataOffset = itemsOffset + itemsCount * sizeof( quint64 ); + + return; + } + error += file.errorString(); + throw exCantReadFile( string( error.toUtf8().data() ) ); +} + +void SlobFile::getRefEntry( quint32 ref_nom, RefEntry & entry ) +{ + quint64 pos = refsOffset + ref_nom * sizeof( quint64 ); + quint64 offset, tmp; + + for( ; ; ) + { + if( !file.seek( pos ) || file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) ) + break; + offset = qFromBigEndian( tmp ) + refsOffset + refsCount * sizeof( quint64 ); + + if( !file.seek( offset ) ) + break; + + entry.key = readText(); + + quint32 index; + if( file.read( ( char * )&index, sizeof( index ) ) != sizeof( index ) ) + break; + entry.itemIndex = qFromBigEndian( index ); + + quint16 binIndex; + if( file.read( ( char * )&binIndex, sizeof( binIndex ) ) != sizeof( binIndex ) ) + break; + entry.binIndex = qFromBigEndian( binIndex ); + + entry.fragment = readTinyText(); + + return; + } + QString error = fileName + ": " + file.errorString(); + throw exCantReadFile( string( error.toUtf8().data() ) ); +} + +quint8 SlobFile::getItem( RefEntry const & entry, string * data ) +{ + quint64 pos = itemsOffset + entry.itemIndex * sizeof( quint64 ); + quint64 offset, tmp; + + for( ; ; ) + { + // Read item data types + + if( !file.seek( pos ) || file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) ) + break; + + offset = qFromBigEndian( tmp ) + itemsDataOffset; + + if( !file.seek( offset ) ) + break; + + quint32 bins, bins_be; + if( file.read( ( char * )&bins_be, sizeof( bins_be ) ) != sizeof( bins_be ) ) + break; + bins = qFromBigEndian( bins_be ); + + if( entry.binIndex >= bins ) + return 0xFF; + + QVector< quint8 > ids; + ids.resize( bins ); + if( file.read( ( char * )ids.data(), bins ) != bins ) + break; + + quint8 id = ids[ entry.binIndex ]; + + if( id >= (unsigned)contentTypes.size() ) + return 0xFF; + + if( data != 0 ) + { + // Read item data + if( currentItem != entry.itemIndex ) + { + currentItemData.clear(); + quint32 length, length_be; + if( file.read( ( char * )&length_be, sizeof( length_be ) ) != sizeof( length_be ) ) + break; + length = qFromBigEndian( length_be ); + + QByteArray compressedData = file.read( length ); + + if( compression == ZLIB ) + currentItemData = decompressZlib( compressedData.data(), length ); + else + if( compression == ZLIB ) + currentItemData = decompressBzip2( compressedData.data(), length ); + else + currentItemData = decompressLzma2( compressedData.data(), length, true ); + + if( currentItemData.empty() ) + { + currentItem = 0xFFFFFFFF; + return 0xFF; + } + currentItem = entry.itemIndex; + } + + // Find bin data inside item + + const char * ptr = currentItemData.c_str(); + quint32 pos = entry.binIndex * sizeof( quint32 ); + + if( pos >= currentItemData.length() - sizeof( quint32 ) ) + return 0xFF; + + quint32 offset, offset_be; + memcpy( &offset_be, ptr + pos, sizeof( offset_be ) ); + offset = qFromBigEndian( offset_be ); + + pos = bins * sizeof( quint32 ) + offset; + + if( pos >= currentItemData.length() - sizeof( quint32 ) ) + return 0xFF; + + quint32 length, len_be; + memcpy( &len_be, ptr + pos, sizeof( len_be ) ); + length = qFromBigEndian( len_be ); + + *data = currentItemData.substr( pos + sizeof( len_be ), length ); + } + + return ids[ entry.binIndex ]; + } + QString error = fileName + ": " + file.errorString(); + throw exCantReadFile( string( error.toUtf8().data() ) ); +} + +// SlobDictionary + +class SlobDictionary: public BtreeIndexing::BtreeDictionary +{ + Mutex idxMutex; + Mutex slobMutex, idxResourceMutex; + File::Class idx; + BtreeIndex resourceIndex; + IdxHeader idxHeader; + string dictionaryName; + SlobFile sf; + QString texCgiPath, texCachePath; + + public: + + SlobDictionary( string const & id, string const & indexFile, + vector< string > const & dictionaryFiles ); + + ~SlobDictionary(); + + virtual string getName() throw() + { return dictionaryName; } + + virtual map< Dictionary::Property, string > getProperties() throw() + { return map< Dictionary::Property, string >(); } + + virtual unsigned long getArticleCount() throw() + { return idxHeader.articleCount; } + + virtual unsigned long getWordCount() throw() + { return idxHeader.wordCount; } + + inline virtual quint32 getLangFrom() const + { return idxHeader.langFrom; } + + inline virtual quint32 getLangTo() const + { return idxHeader.langTo; } + + virtual sptr< Dictionary::DataRequest > getArticle( wstring const &, + vector< wstring > const & alts, + wstring const & ) + throw( std::exception ); + + virtual sptr< Dictionary::DataRequest > getResource( string const & name ) + throw( std::exception ); + + virtual QString const& getDescription(); + + /// Loads the resource. + void loadResource( std::string &resourceName, string & data ); + + virtual sptr< Dictionary::DataRequest > getSearchResults( QString const & searchString, + int searchMode, bool matchCase, + int distanceBetweenWords, + int maxResults ); + virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); + + virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); + + virtual void setFTSParameters( Config::FullTextSearch const & fts ) + { + can_FTS = fts.enabled + && !fts.disabledTypes.contains( "SLOB", Qt::CaseInsensitive ) + && ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize ); + } + + virtual uint32_t getFtsIndexVersion() + { return 1; } + +protected: + + virtual void loadIcon() throw(); + +private: + + /// Loads the article. + void loadArticle( quint32 address, + string & articleText ); + + quint32 readArticle( quint32 address, + string & articleText, + RefEntry & entry ); + + string convert( string const & in_data, RefEntry const & entry ); + + void removeDirectory( QString const & directory ); + + friend class SlobArticleRequest; + friend class SlobResourceRequest; +}; + +SlobDictionary::SlobDictionary( string const & id, + string const & indexFile, + vector< string > const & dictionaryFiles ): + BtreeDictionary( id, dictionaryFiles ), + idx( indexFile, "rb" ), + idxHeader( idx.read< IdxHeader >() ) +{ + // Open data file + + try + { + sf.open( FsEncoding::decode( dictionaryFiles[ 0 ].c_str() ) ); + } + catch( std::exception & e ) + { + gdWarning( "Slob dictionary initializing failed: %s, error: %s\n", + dictionaryFiles[ 0 ].c_str(), e.what() ); + } + + // Initialize the indexes + + openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, + idxHeader.indexRootOffset ), + idx, idxMutex ); + + resourceIndex.openIndex( IndexInfo( idxHeader.resourceIndexBtreeMaxElements, + idxHeader.resourceIndexRootOffset ), + idx, idxResourceMutex ); + + // Read dictionary name + + dictionaryName = string( sf.getDictionaryName().toUtf8().constData() ); + if( dictionaryName.empty() ) + { + QString name = QDir::fromNativeSeparators( FsEncoding::decode( dictionaryFiles[ 0 ].c_str() ) ); + int n = name.lastIndexOf( '/' ); + dictionaryName = string( name.mid( n + 1 ).toUtf8().constData() ); + } + + // Full-text search parameters + + can_FTS = true; + + ftsIdxName = indexFile + "_FTS"; + + if( !Dictionary::needToRebuildIndex( dictionaryFiles, ftsIdxName ) + && !FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) ) + FTS_index_completed.ref(); + + texCgiPath = Config::getProgramDataDir() + "/mimetex.cgi"; + if( QFileInfo( texCgiPath ).exists() ) + { + QString dirName = QString::fromStdString( getId() ); + QDir( QDir::tempPath() ).mkdir( dirName ); + texCachePath = QDir::tempPath() + "/" + dirName; + } + else + texCgiPath.clear(); +} + +SlobDictionary::~SlobDictionary() +{ + if( !texCachePath.isEmpty() ) + removeDirectory( texCachePath ); +} + +void SlobDictionary::removeDirectory( QString const & directory ) +{ + QDir dir( directory ); + Q_FOREACH( QFileInfo info, dir.entryInfoList( QDir::NoDotAndDotDot + | QDir::AllDirs + | QDir::Files, + QDir::DirsFirst)) + { + if( info.isDir() ) + removeDirectory( info.absoluteFilePath() ); + else + QFile::remove( info.absoluteFilePath() ); + } + + dir.rmdir( directory ); +} + +void SlobDictionary::loadIcon() throw() +{ + if ( dictionaryIconLoaded ) + return; + + QString fileName = + QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ); + + // Remove the extension + fileName.chop( 4 ); + + if( !loadIconFromFile( fileName ) ) + { + // Load failed -- use default icons + dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/icon32_slob.png"); + } + + dictionaryIconLoaded = true; +} + +QString const& SlobDictionary::getDescription() +{ + if( !dictionaryDescription.isEmpty() ) + return dictionaryDescription; + + QMap< QString, QString > const & tags = sf.getTags(); + + QMap< QString, QString >::const_iterator it; + for( it = tags.begin(); it != tags.end(); ++it ) + { + if( it != tags.begin() ) + dictionaryDescription += "\n\n"; + + dictionaryDescription += it.key() + ": " +it.value(); + } + + return dictionaryDescription; +} + +void SlobDictionary::loadArticle( quint32 address, + string & articleText ) +{ + articleText.clear(); + + RefEntry entry; + + readArticle( address, articleText, entry ); + + if( !articleText.empty() ) + { + articleText = convert( articleText, entry ); + } + else + articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() ); + + // See Issue #271: A mechanism to clean-up invalid HTML cards. + string cleaner = """""""""""" + """""""""""" + "" + "" + ""; + + string prefix( "
"; +} + +string SlobDictionary::convert( const string & in, RefEntry const & entry ) +{ + QString text = QString::fromUtf8( in.c_str() ); + + // pattern of img and script + text.replace( QRegExp( "<\\s*(img|script)\\s*([^>]*)src=\"(|/)([^\"]*)\"" ), + QString( "<\\1 \\2src=\"bres://%1/\\4\"").arg( getId().c_str() ) ); + + // pattern + text.replace( QRegExp( "<\\s*link\\s*([^>]*)href=\"" ), + QString( ", excluding any known protocols such as http://, mailto:, #(comment) + // these links will be translated into local definitions + QRegExp rxLink( "<\\s*a\\s+([^>]*)href=\"(?!(\\w+://|#|mailto:|tel:))(/|)([^\"]*)\"\\s*(title=\"[^\"]*\")?[^>]*>", + Qt::CaseSensitive, + QRegExp::RegExp2 ); + int pos = 0; + while( (pos = rxLink.indexIn( text, pos )) >= 0 ) + { + QStringList list = rxLink.capturedTexts(); + QString tag = list[3]; + if ( !list[4].isEmpty() ) + tag = list[4].split("\"")[1]; + + tag.remove( QRegExp(".*/") ). + remove( QRegExp( "\\.(s|)htm(l|)$", Qt::CaseInsensitive ) ). + replace( "_", "%20" ). + prepend( "" ); + + text.replace( pos, list[0].length(), tag ); + pos += tag.length() + 1; + } + + // Handle TeX formulas via mimetex.cgi + + if( !texCgiPath.isEmpty() ) + { + QRegExp texImage( "<\\s*img\\s*class=\"([^\"]+)\"\\s*alt=\"([^\"]+)\"[^>]*>", + Qt::CaseSensitive, + QRegExp::RegExp2 ); + pos = 0; + unsigned texCount = 0; + QString imgName; + + QRegExp regFrac = QRegExp( "\\\\[dt]frac" ); + + while( (pos = texImage.indexIn( text, pos )) >= 0 ) + { + QStringList list = texImage.capturedTexts(); + + if( list[ 1 ].compare( "tex" ) == 0 + || list[ 1 ].endsWith( " tex" ) ) + { + QString name; + name.sprintf( "%04X%04X%04X.gif", entry.itemIndex, entry.binIndex, texCount ); + imgName = texCachePath + "/" + name; + + if( !QFileInfo( imgName ).exists() ) + { + QString tex = list[ 2 ]; + tex.replace( regFrac, "\\frac" ); + + QString command = texCgiPath + " -e " + imgName + + " \"" + tex + "\""; + QProcess::execute( command ); + } + + QString tag = QString( "\"""; + + text.replace( pos, list[0].length(), tag ); + pos += tag.length() + 1; + + texCount += 1; + } + else + pos += list[ 0 ].length(); + } + } + + // Fix outstanding elements + text += "
"; + + return text.toUtf8().data(); +} + +void SlobDictionary::loadResource( std::string & resourceName, string & data ) +{ + vector< WordArticleLink > link; + string resData; + RefEntry entry; + + link = resourceIndex.findArticles( Utf8::decode( resourceName ) ); + + if( link.empty() ) + return; + + readArticle( link[ 0 ].articleOffset, data, entry ); +} + +quint32 SlobDictionary::readArticle( quint32 articleNumber, std::string & result, + RefEntry & entry ) +{ + string data; + quint8 contentId; + + { + Mutex::Lock _( slobMutex ); + if( entry.key.isEmpty() ) + sf.getRefEntry( articleNumber, entry ); + contentId = sf.getItem( entry, &data ); + } + + if( contentId == 0xFF ) + return 0xFFFFFFFF; + + QString contentType = sf.getContentType( contentId ); + + if( contentType.contains( "text/html", Qt::CaseInsensitive ) + || contentType.contains( "text/plain", Qt::CaseInsensitive ) + || contentType.contains( "/css", Qt::CaseInsensitive ) + || contentType.contains( "/javascript", Qt::CaseInsensitive ) + || contentType.contains( "/json", Qt::CaseInsensitive )) + { + QTextCodec *codec = sf.getCodec(); + QString content = codec->toUnicode( data.c_str(), data.size() ); + result = string( content.toUtf8().data() ); + } + else + result = data; + + return contentId; +} + +void SlobDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration ) +{ + if( !( Dictionary::needToRebuildIndex( getDictionaryFilenames(), ftsIdxName ) + || FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) ) ) + FTS_index_completed.ref(); + + if( haveFTSIndex() ) + return; + + if( ensureInitDone().size() ) + return; + + if( firstIteration && getArticleCount() > FTS::MaxDictionarySizeForFastSearch ) + return; + + gdDebug( "Slob: Building the full-text index for dictionary: %s\n", + getName().c_str() ); + + try + { + Mutex::Lock _( getFtsMutex() ); + + File::Class ftsIdx( ftsIndexName(), "wb" ); + + FtsHelpers::FtsIdxHeader ftsIdxHeader; + memset( &ftsIdxHeader, 0, sizeof( ftsIdxHeader ) ); + + // We write a dummy header first. At the end of the process the header + // will be rewritten with the right values. + + ftsIdx.write( ftsIdxHeader ); + + ChunkedStorage::Writer chunks( ftsIdx ); + + BtreeIndexing::IndexedWords indexedWords; + + QSet< uint32_t > setOfOffsets; + + findArticleLinks( 0, &setOfOffsets, 0, &isCancelled ); + + if( isCancelled ) + throw exUserAbort(); + + QVector< uint32_t > offsets; + offsets.resize( setOfOffsets.size() ); + uint32_t * ptr = &offsets.front(); + + for( QSet< uint32_t >::ConstIterator it = setOfOffsets.constBegin(); + it != setOfOffsets.constEnd(); ++it ) + { + *ptr = *it; + ptr++; + } + + // Free memory + setOfOffsets.clear(); + + if( isCancelled ) + throw exUserAbort(); + + qSort( offsets ); + + if( isCancelled ) + throw exUserAbort(); + + QMap< QString, QVector< uint32_t > > ftsWords; + + set< quint64 > indexedArticles; + RefEntry entry; + string articleText; + quint32 htmlType = 0xFFFFFFFF; + + for( unsigned i = 0; i < sf.getContentTypesCount(); i++ ) + { + if( sf.getContentType( i ).startsWith( "text/html", Qt::CaseInsensitive ) ) + { + htmlType = i; + break; + } + } + + // index articles for full-text search + for( int i = 0; i < offsets.size(); i++ ) + { + if( isCancelled ) + throw exUserAbort(); + + QString articleStr; + quint32 articleNom = offsets.at( i ); + + sf.getRefEntry( articleNom, entry ); + + quint64 articleID = ( ( (quint64)entry.itemIndex ) << 32 ) | entry.binIndex; + + set< quint64 >::iterator it = indexedArticles.find( articleID ); + if( it != indexedArticles.end() ) + continue; + + indexedArticles.insert( articleID ); + + quint32 type = readArticle( 0, articleText, entry ); + + articleStr = QString::fromUtf8( articleText.c_str(), articleText.length() ); + + if( type == htmlType ) + articleStr = Html::unescape( articleStr ); + + FtsHelpers::parseArticleForFts( articleNom, articleStr, ftsWords ); + } + + // Free memory + offsets.clear(); + + QMap< QString, QVector< uint32_t > >::iterator it = ftsWords.begin(); + while( it != ftsWords.end() ) + { + if( isCancelled ) + throw exUserAbort(); + + uint32_t offset = chunks.startNewBlock(); + uint32_t size = it.value().size(); + + chunks.addToBlock( &size, sizeof(uint32_t) ); + chunks.addToBlock( it.value().data(), size * sizeof(uint32_t) ); + + indexedWords.addSingleWord( gd::toWString( it.key() ), offset ); + + it = ftsWords.erase( it ); + } + + // Free memory + ftsWords.clear(); + + if( isCancelled ) + throw exUserAbort(); + + ftsIdxHeader.chunksOffset = chunks.finish(); + ftsIdxHeader.wordCount = indexedWords.size(); + + if( isCancelled ) + throw exUserAbort(); + + BtreeIndexing::IndexInfo ftsIdxInfo = BtreeIndexing::buildIndex( indexedWords, ftsIdx ); + + // Free memory + indexedWords.clear(); + + ftsIdxHeader.indexBtreeMaxElements = ftsIdxInfo.btreeMaxElements; + ftsIdxHeader.indexRootOffset = ftsIdxInfo.rootOffset; + + ftsIdxHeader.signature = FtsHelpers::FtsSignature; + ftsIdxHeader.formatVersion = FtsHelpers::CurrentFtsFormatVersion + getFtsIndexVersion(); + + ftsIdx.rewind(); + ftsIdx.writeRecords( &ftsIdxHeader, sizeof(ftsIdxHeader), 1 ); + + FTS_index_completed.ref(); + } + catch( std::exception &ex ) + { + gdWarning( "Slob: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() ); + QFile::remove( FsEncoding::decode( ftsIdxName.c_str() ) ); + } +} + +void SlobDictionary::getArticleText( uint32_t articleAddress, QString & headword, QString & text ) +{ + try + { + RefEntry entry; + string articleText; + + quint32 htmlType = 0xFFFFFFFF; + + for( unsigned i = 0; i < sf.getContentTypesCount(); i++ ) + { + if( sf.getContentType( i ).startsWith( "text/html", Qt::CaseInsensitive ) ) + { + htmlType = i; + break; + } + } + + quint32 type = readArticle( articleAddress, articleText, entry ); + headword = entry.key; + + text = QString::fromUtf8( articleText.data(), articleText.size() ); + + if( type == htmlType ) + text = Html::unescape( text ); + } + catch( std::exception &ex ) + { + gdWarning( "Slob: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() ); + } +} + + +sptr< Dictionary::DataRequest > SlobDictionary::getSearchResults( QString const & searchString, + int searchMode, bool matchCase, + int distanceBetweenWords, + int maxResults ) +{ + return new FtsHelpers::FTSResultsRequest( *this, searchString, searchMode, matchCase, distanceBetweenWords, maxResults ); +} + + +/// SlobDictionary::getArticle() + +class SlobArticleRequest; + +class SlobArticleRequestRunnable: public QRunnable +{ + SlobArticleRequest & r; + QSemaphore & hasExited; + +public: + + SlobArticleRequestRunnable( SlobArticleRequest & r_, + QSemaphore & hasExited_ ): r( r_ ), + hasExited( hasExited_ ) + {} + + ~SlobArticleRequestRunnable() + { + hasExited.release(); + } + + virtual void run(); +}; + +class SlobArticleRequest: public Dictionary::DataRequest +{ + friend class SlobArticleRequestRunnable; + + wstring word; + vector< wstring > alts; + SlobDictionary & dict; + + QAtomicInt isCancelled; + QSemaphore hasExited; + +public: + + SlobArticleRequest( wstring const & word_, + vector< wstring > const & alts_, + SlobDictionary & dict_ ): + word( word_ ), alts( alts_ ), dict( dict_ ) + { + QThreadPool::globalInstance()->start( + new SlobArticleRequestRunnable( *this, hasExited ) ); + } + + void run(); // Run from another thread by DslArticleRequestRunnable + + virtual void cancel() + { + isCancelled.ref(); + } + + ~SlobArticleRequest() + { + isCancelled.ref(); + hasExited.acquire(); + } +}; + +void SlobArticleRequestRunnable::run() +{ + r.run(); +} + +void SlobArticleRequest::run() +{ + if ( isCancelled ) + { + finish(); + return; + } + + vector< WordArticleLink > chain = dict.findArticles( word ); + + for( unsigned x = 0; x < alts.size(); ++x ) + { + /// Make an additional query for each alt + + vector< WordArticleLink > altChain = dict.findArticles( alts[ x ] ); + + chain.insert( chain.end(), altChain.begin(), altChain.end() ); + } + + multimap< wstring, pair< string, string > > mainArticles, alternateArticles; + + set< quint32 > articlesIncluded; // Some synonims make it that the articles + // appear several times. We combat this + // by only allowing them to appear once. + + wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); + + for( unsigned x = 0; x < chain.size(); ++x ) + { + if ( isCancelled ) + { + finish(); + return; + } + + if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() ) + continue; // We already have this article in the body. + + // Now grab that article + + string headword, articleText; + + headword = chain[ x ].word; + try + { + dict.loadArticle( chain[ x ].articleOffset, articleText ); + } + catch(...) + { + } + + // Ok. Now, does it go to main articles, or to alternate ones? We list + // main ones first, and alternates after. + + // We do the case-folded comparison here. + + wstring headwordStripped = + Folding::applySimpleCaseOnly( Utf8::decode( headword ) ); + + multimap< wstring, pair< string, string > > & mapToUse = + ( wordCaseFolded == headwordStripped ) ? + mainArticles : alternateArticles; + + mapToUse.insert( pair< wstring, pair< string, string > >( + Folding::applySimpleCaseOnly( Utf8::decode( headword ) ), + pair< string, string >( headword, articleText ) ) ); + + articlesIncluded.insert( chain[ x ].articleOffset ); + } + + if ( mainArticles.empty() && alternateArticles.empty() ) + { + // No such word + finish(); + return; + } + + string result; + + multimap< wstring, pair< string, string > >::const_iterator i; + + for( i = mainArticles.begin(); i != mainArticles.end(); ++i ) + { + result += "

"; + result += i->second.first; + result += "

"; + result += i->second.second; + } + + for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i ) + { + result += "

"; + result += i->second.first; + result += "

"; + result += i->second.second; + } + + Mutex::Lock _( dataMutex ); + + data.resize( result.size() ); + + memcpy( &data.front(), result.data(), result.size() ); + + hasAnyData = true; + + finish(); +} + +sptr< Dictionary::DataRequest > SlobDictionary::getArticle( wstring const & word, + vector< wstring > const & alts, + wstring const & ) + throw( std::exception ) +{ + return new SlobArticleRequest( word, alts, *this ); +} + +//// SlobDictionary::getResource() + +class SlobResourceRequest; + +class SlobResourceRequestRunnable: public QRunnable +{ + SlobResourceRequest & r; + QSemaphore & hasExited; + +public: + + SlobResourceRequestRunnable( SlobResourceRequest & r_, + QSemaphore & hasExited_ ): r( r_ ), + hasExited( hasExited_ ) + {} + + ~SlobResourceRequestRunnable() + { + hasExited.release(); + } + + virtual void run(); +}; + +class SlobResourceRequest: public Dictionary::DataRequest +{ + friend class SlobResourceRequestRunnable; + + SlobDictionary & dict; + + string resourceName; + + QAtomicInt isCancelled; + QSemaphore hasExited; + +public: + + SlobResourceRequest( SlobDictionary & dict_, + string const & resourceName_ ): + dict( dict_ ), + resourceName( resourceName_ ) + { + QThreadPool::globalInstance()->start( + new SlobResourceRequestRunnable( *this, hasExited ) ); + } + + void run(); // Run from another thread by ZimResourceRequestRunnable + + virtual void cancel() + { + isCancelled.ref(); + } + + ~SlobResourceRequest() + { + isCancelled.ref(); + hasExited.acquire(); + } +}; + +void SlobResourceRequestRunnable::run() +{ + r.run(); +} + +void SlobResourceRequest::run() +{ + // Some runnables linger enough that they are cancelled before they start + if ( isCancelled ) + { + finish(); + return; + } + + try + { + string resource; + dict.loadResource( resourceName, resource ); + if( resource.empty() ) + throw File::Ex(); + + if( Filetype::isNameOfCSS( resourceName ) ) + { + QString css = QString::fromUtf8( resource.data(), resource.size() ); + dict.isolateCSS( css, ".slobdict" ); + QByteArray bytes = css.toUtf8(); + data.resize( bytes.size() ); + memcpy( &data.front(), bytes.constData(), bytes.size() ); + } + else + if ( Filetype::isNameOfTiff( resourceName ) ) + { + // Convert it + + dataMutex.lock(); + + QImage img = QImage::fromData( reinterpret_cast< const uchar * >( resource.data() ), resource.size() ); + +#ifdef MAKE_EXTRA_TIFF_HANDLER + if( img.isNull() ) + GdTiff::tiffToQImage( &data.front(), data.size(), img ); +#endif + + dataMutex.unlock(); + + if ( !img.isNull() ) + { + // Managed to load -- now store it back as BMP + + QByteArray ba; + QBuffer buffer( &ba ); + buffer.open( QIODevice::WriteOnly ); + img.save( &buffer, "BMP" ); + + Mutex::Lock _( dataMutex ); + + data.resize( buffer.size() ); + + memcpy( &data.front(), buffer.data(), data.size() ); + } + } + else + { + Mutex::Lock _( dataMutex ); + data.resize( resource.size() ); + memcpy( &data.front(), resource.data(), data.size() ); + } + + hasAnyData = true; + } + catch( std::exception &ex ) + { + gdWarning( "SLOB: Failed loading resource \"%s\" from \"%s\", reason: %s\n", + resourceName.c_str(), dict.getName().c_str(), ex.what() ); + // Resource not loaded -- we don't set the hasAnyData flag then + } + + finish(); +} + +sptr< Dictionary::DataRequest > SlobDictionary::getResource( string const & name ) + throw( std::exception ) +{ + return new SlobResourceRequest( *this, name ); +} + + +vector< sptr< Dictionary::Class > > makeDictionaries( + vector< string > const & fileNames, + string const & indicesDir, + Dictionary::Initializing & initializing ) + throw( std::exception ) +{ + vector< sptr< Dictionary::Class > > dictionaries; + + for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end(); + ++i ) + { + // Skip files with the extensions different to .slob to speed up the + // scanning + + QString firstName = QDir::fromNativeSeparators( FsEncoding::decode( i->c_str() ) ); + if( !firstName.endsWith( ".slob") ) + continue; + + // Got the file -- check if we need to rebuid the index + + vector< string > dictFiles( 1, *i ); + + string dictId = Dictionary::makeDictionaryId( dictFiles ); + + string indexFile = indicesDir + dictId; + + try + { + if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || + indexIsOldOrBad( indexFile ) ) + { + SlobFile sf; + + gdDebug( "Slob: Building the index for dictionary: %s\n", i->c_str() ); + + sf.open( firstName ); + + initializing.indexingDictionary( sf.getDictionaryName().toUtf8().constData() ); + + File::Class idx( indexFile, "wb" ); + IdxHeader idxHeader; + memset( &idxHeader, 0, sizeof( idxHeader ) ); + + // We write a dummy header first. At the end of the process the header + // will be rewritten with the right values. + + idx.write( idxHeader ); + + RefEntry refEntry; + quint32 entries = sf.getRefsCount(); + + IndexedWords indexedWords, indexedResources; + + set< quint64 > articlesPos; + quint32 articleCount = 0, wordCount = 0; + + for( quint32 i = 0; i < entries; i++ ) + { + sf.getRefEntry( i, refEntry ); + + quint8 type = sf.getItem( refEntry, 0 ); + + QString contentType = sf.getContentType( type ); + + if( contentType.startsWith( "text/html", Qt::CaseInsensitive ) + || contentType.startsWith( "text/plain", Qt::CaseInsensitive ) ) + { + //Article + indexedWords.addWord( gd::toWString( refEntry.key ), i ); + + wordCount += 1; + + quint64 pos = ( ( (quint64)refEntry.itemIndex ) << 32 ) + refEntry.binIndex; + if( articlesPos.find( pos ) == articlesPos.end() ) + { + articleCount += 1; + articlesPos.insert( pos ); + } + } + else + { + indexedResources.addSingleWord( gd::toWString( refEntry.key ), i ); + } + } + + // Build index + + { + IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx ); + + idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements; + idxHeader.indexRootOffset = idxInfo.rootOffset; + + indexedWords.clear(); // Release memory -- no need for this data + } + + { + IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedResources, idx ); + + idxHeader.resourceIndexBtreeMaxElements = idxInfo.btreeMaxElements; + idxHeader.resourceIndexRootOffset = idxInfo.rootOffset; + + indexedResources.clear(); // Release memory -- no need for this data + } + + idxHeader.signature = Signature; + idxHeader.formatVersion = CurrentFormatVersion; + + idxHeader.articleCount = articleCount; + idxHeader.wordCount = wordCount; + + QPair langs = + LangCoder::findIdsForFilename( QString::fromStdString( dictFiles[ 0 ] ) ); + + idxHeader.langFrom = langs.first; + idxHeader.langTo = langs.second; + + idx.rewind(); + + idx.write( &idxHeader, sizeof( idxHeader ) ); + + } + dictionaries.push_back( new SlobDictionary( dictId, + indexFile, + dictFiles ) ); + } + catch( std::exception & e ) + { + gdWarning( "Slob dictionary initializing failed: %s, error: %s\n", + i->c_str(), e.what() ); + continue; + } + catch( ... ) + { + qWarning( "Slob dictionary initializing failed\n" ); + continue; + } + } + return dictionaries; +} + +} // namespace Slob + +#endif diff --git a/slob.hh b/slob.hh new file mode 100644 index 00000000..9fdbde94 --- /dev/null +++ b/slob.hh @@ -0,0 +1,24 @@ +#ifndef __SLOB_HH_INCLUDED__ +#define __SLOB_HH_INCLUDED__ + +#ifdef MAKE_ZIM_SUPPORT + +#include "dictionary.hh" + +/// Support for the Slob dictionaries. +namespace Slob { + +using std::vector; +using std::string; + +vector< sptr< Dictionary::Class > > makeDictionaries( + vector< string > const & fileNames, + string const & indicesDir, + Dictionary::Initializing & ) + throw( std::exception ); + +} + +#endif + +#endif // __SLOB_HH_INCLUDED__ diff --git a/zim.cc b/zim.cc index e085a053..8f878a11 100644 --- a/zim.cc +++ b/zim.cc @@ -840,7 +840,7 @@ void ZimDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration ftsIdxHeader.indexRootOffset = ftsIdxInfo.rootOffset; ftsIdxHeader.signature = FtsHelpers::FtsSignature; - ftsIdxHeader.formatVersion = FtsHelpers::CurrentFtsFormatVersion; + ftsIdxHeader.formatVersion = FtsHelpers::CurrentFtsFormatVersion + getFtsIndexVersion(); ftsIdx.rewind(); ftsIdx.writeRecords( &ftsIdxHeader, sizeof(ftsIdxHeader), 1 );