/* This file is (c) 2008-2011 Konstantin Isakov * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include "stardict.hh" #include "btreeidx.hh" #include "folding.hh" #include "utf8.hh" #include "chunkedstorage.hh" #include "dictzip.h" #include "xdxf2html.hh" #include "htmlescape.hh" #include "langcoder.hh" #include #include #include #include #ifndef __WIN32 #include #else #include #endif #include #ifdef _MSC_VER #include #endif #include #include #include #include namespace Stardict { using std::map; using std::multimap; using std::pair; using std::set; using std::string; using gd::wstring; using BtreeIndexing::WordArticleLink; using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexInfo; namespace { DEF_EX( exNotAnIfoFile, "Not an .ifo file", Dictionary::Ex ) DEF_EX_STR( exBadFieldInIfo, "Bad field in .ifo file encountered:", Dictionary::Ex ) DEF_EX_STR( exNoIdxFile, "No corresponding .idx file was found for", Dictionary::Ex ) DEF_EX_STR( exNoDictFile, "No corresponding .dict file was found for", Dictionary::Ex ) DEF_EX_STR( exNoSynFile, "No corresponding .syn file was found for", Dictionary::Ex ) DEF_EX( ex64BitsNotSupported, "64-bit indices are not presently supported, sorry", Dictionary::Ex ) DEF_EX( exDicttypeNotSupported, "Dictionaries with dicttypes are not supported, sorry", Dictionary::Ex ) DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex ) DEF_EX_STR( exWordIsTooLarge, "Enountered a word that is too large:", Dictionary::Ex ) DEF_EX_STR( exSuddenEndOfFile, "Sudden end of file", Dictionary::Ex ) DEF_EX_STR( exIncorrectOffset, "Incorrect offset encountered in file", Dictionary::Ex ) /// Contents of an ifo file struct Ifo { string version; string bookname; uint32_t wordcount, synwordcount, idxfilesize, idxoffsetbits; string sametypesequence, dicttype; Ifo( File::Class & ); }; enum { Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian CurrentFormatVersion = 7 + BtreeIndexing::FormatVersion + Folding::Version }; struct IdxHeader { uint32_t signature; // First comes the signature, SIDX uint32_t formatVersion; // File format version (CurrentFormatVersion) uint32_t chunksOffset; // The offset to chunks' storage uint32_t indexBtreeMaxElements; // Two fields from IndexInfo uint32_t indexRootOffset; uint32_t wordCount; // Saved from Ifo::wordcount uint32_t synWordCount; // Saved from Ifo::synwordcount uint32_t bookNameSize; // Book name's length. Used to read it then. uint32_t sameTypeSequenceSize; // That string's size. Used to read it then. uint32_t langFrom; // Source language uint32_t langTo; // Target language } #ifndef _MSC_VER __attribute__((packed)) #endif ; bool indexIsOldOrBad( string const & indexFile ) { File::Class idx( indexFile, "rb" ); IdxHeader header; return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != Signature || header.formatVersion != CurrentFormatVersion; } class StardictDictionary: public BtreeIndexing::BtreeDictionary { Mutex idxMutex; File::Class idx; IdxHeader idxHeader; string bookName; string sameTypeSequence; ChunkedStorage::Reader chunks; Mutex dzMutex; dictData * dz; public: StardictDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ); ~StardictDictionary(); virtual string getName() throw() { return bookName; } virtual map< Dictionary::Property, string > getProperties() throw() { return map< Dictionary::Property, string >(); } virtual unsigned long getArticleCount() throw() { return idxHeader.wordCount; } virtual unsigned long getWordCount() throw() { return idxHeader.wordCount + idxHeader.synWordCount; } virtual QIcon getIcon() throw() { return QIcon(":/icons/icon32_stardict.png"); } inline virtual quint32 getLangFrom() const { return idxHeader.langFrom; } inline virtual quint32 getLangTo() const { return idxHeader.langTo; } virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) throw( std::exception ); virtual sptr< Dictionary::DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const & ) throw( std::exception ); private: /// Retrives the article's offset/size in .dict file, and its headword. void getArticleProps( uint32_t articleAddress, string & headword, uint32_t & offset, uint32_t & size ); /// Loads the article, storing its headword and formatting the data it has /// into an html. void loadArticle( uint32_t address, string & headword, string & articleText ); string loadString( size_t size ); friend class StardictArticleRequest; friend class StardictHeadwordsRequest; }; StardictDictionary::StardictDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ): BtreeDictionary( id, dictionaryFiles ), idx( indexFile, "rb" ), idxHeader( idx.read< IdxHeader >() ), bookName( loadString( idxHeader.bookNameSize ) ), sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ), chunks( idx, idxHeader.chunksOffset ) { // Open the .dict file dz = dict_data_open( dictionaryFiles[ 2 ].c_str(), 0 ); if ( !dz ) throw exCantReadFile( dictionaryFiles[ 2 ] ); // Initialize the index openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex ); } StardictDictionary::~StardictDictionary() { if ( dz ) dict_data_close( dz ); } string StardictDictionary::loadString( size_t size ) { vector< char > data( size ); idx.read( &data.front(), data.size() ); return string( &data.front(), data.size() ); } void StardictDictionary::getArticleProps( uint32_t articleAddress, string & headword, uint32_t & offset, uint32_t & size ) { vector< char > chunk; Mutex::Lock _( idxMutex ); char * articleData = chunks.getBlock( articleAddress, chunk ); memcpy( &offset, articleData, sizeof( uint32_t ) ); articleData += sizeof( uint32_t ); memcpy( &size, articleData, sizeof( uint32_t ) ); articleData += sizeof( uint32_t ); headword = articleData; } /// This function tries to make an html of the Stardict's resource typed /// 'type', contained in a block pointed to by 'resource', 'size' bytes long. static string handleResource( char type, char const * resource, size_t size ) { switch( type ) { case 'x': // Xdxf content return Xdxf2Html::convert( string( resource, size ) ); case 'h': // Html content return "

" + string( resource, size ) + "

"; case 'm': // Pure meaning, usually means preformatted text return "

" + Html::preformat( string( resource, size ) ) + "

"; case 'l': // Same as 'm', but not in utf8, instead in current locale's // encoding. // We just use Qt here, it should know better about system's // locale. return "

" + Html::preformat( QString::fromLocal8Bit( resource, size ).toUtf8().data() ) + "

"; case 'g': // Pango markup. return "

" + string( resource, size ) + "

"; case 't': // Transcription return "