/* This file is (c) 2008-2009 Konstantin Isakov * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include "stardict.hh" #include "btreeidx.hh" #include "folding.hh" #include "utf8.hh" #include "chunkedstorage.hh" #include "dictzip.h" #include "xdxf2html.hh" #include "htmlescape.hh" #include #include #include #include #ifndef __WIN32 #include #else #include #endif #include #include namespace Stardict { using std::map; using std::multimap; using std::pair; using std::set; using std::string; using std::wstring; using BtreeIndexing::WordArticleLink; using BtreeIndexing::IndexedWords; namespace { DEF_EX( exNotAnIfoFile, "Not an .ifo file", Dictionary::Ex ) DEF_EX_STR( exBadFieldInIfo, "Bad field in .ifo file encountered:", Dictionary::Ex ) DEF_EX_STR( exNoIdxFile, "No corresponding .idx file was found for", Dictionary::Ex ) DEF_EX_STR( exNoDictFile, "No corresponding .dict file was found for", Dictionary::Ex ) DEF_EX_STR( exNoSynFile, "No corresponding .syn file was found for", Dictionary::Ex ) DEF_EX( ex64BitsNotSupported, "64-bit indices are not presently supported, sorry", Dictionary::Ex ) DEF_EX( exDicttypeNotSupported, "Dictionaries with dicttypes are not supported, sorry", Dictionary::Ex ) DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex ) DEF_EX_STR( exWordIsTooLarge, "Enountered a word that is too large:", Dictionary::Ex ) DEF_EX_STR( exSuddenEndOfFile, "Sudden end of file", Dictionary::Ex ) DEF_EX_STR( exIncorrectOffset, "Incorrect offset encountered in file", Dictionary::Ex ) /// Contents of an ifo file struct Ifo { string version; string bookname; uint32_t wordcount, synwordcount, idxfilesize, idxoffsetbits; string sametypesequence, dicttype; Ifo( File::Class & ); }; enum { Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian CurrentFormatVersion = 4 + BtreeIndexing::FormatVersion + Folding::Version }; struct IdxHeader { uint32_t signature; // First comes the signature, SIDX uint32_t formatVersion; // File format version (CurrentFormatVersion) uint32_t chunksOffset; // The offset to chunks' storage uint32_t indexOffset; // The offset of the index in the file } __attribute__((packed)); bool indexIsOldOrBad( string const & indexFile ) { File::Class idx( indexFile, "rb" ); IdxHeader header; return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != Signature || header.formatVersion != CurrentFormatVersion; } class StardictDictionary: public BtreeIndexing::BtreeDictionary { Ifo ifo; File::Class idx; IdxHeader idxHeader; ChunkedStorage::Reader chunks; dictData * dz; public: StardictDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles, Ifo const & ); ~StardictDictionary(); virtual string getName() throw() { return ifo.bookname; } virtual map< Dictionary::Property, string > getProperties() throw() { return map< Dictionary::Property, string >(); } virtual unsigned long getArticleCount() throw() { return ifo.wordcount; } virtual unsigned long getWordCount() throw() { return ifo.wordcount + ifo.synwordcount; } virtual vector< wstring > findHeadwordsForSynonym( wstring const & ) throw( std::exception ); virtual string getArticle( wstring const &, vector< wstring > const & alts ) throw( Dictionary::exNoSuchWord, std::exception ); private: /// Retrives the article's offset/size in .dict file, and its headword. void getArticleProps( uint32_t articleAddress, string & headword, uint32_t & offset, uint32_t & size ); /// Loads the article, storing its headword and formatting the data it has /// into an html. void loadArticle( uint32_t address, string & headword, string & articleText ); }; StardictDictionary::StardictDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles, Ifo const & ifo_ ): BtreeDictionary( id, dictionaryFiles ), ifo( ifo_ ), idx( indexFile, "rb" ), idxHeader( idx.read< IdxHeader >() ), chunks( idx, idxHeader.chunksOffset ) { // Open the .dict file dz = dict_data_open( dictionaryFiles[ 2 ].c_str(), 0 ); if ( !dz ) throw exCantReadFile( dictionaryFiles[ 2 ] ); // Initialize the index idx.seek( idxHeader.indexOffset ); openIndex( idx ); } StardictDictionary::~StardictDictionary() { if ( dz ) dict_data_close( dz ); } void StardictDictionary::getArticleProps( uint32_t articleAddress, string & headword, uint32_t & offset, uint32_t & size ) { vector< char > chunk; char * articleData = chunks.getBlock( articleAddress, chunk ); memcpy( &offset, articleData, sizeof( uint32_t ) ); articleData += sizeof( uint32_t ); memcpy( &size, articleData, sizeof( uint32_t ) ); articleData += sizeof( uint32_t ); headword = articleData; } /// This function tries to make an html of the Stardict's resource typed /// 'type', contained in a block pointed to by 'resource', 'size' bytes long. static string handleResource( char type, char const * resource, size_t size ) { switch( type ) { case 'x': // Xdxf content return Xdxf2Html::convert( string( resource, size ) ); case 'h': // Html content return "

" + string( resource, size ) + "

"; case 'm': // Pure meaning, usually means preformatted text return "

" + Html::escape( string( resource, size ) ) + "

"; case 'l': // Same as 'm', but not in utf8, instead in current locale's // encoding. // We just use Qt here, it should know better about system's // locale. return "

" + Html::escape( QString::fromLocal8Bit( resource, size ).toUtf8().data() ) + "

"; case 'g': // Pango markup. return "

" + string( resource, size ) + "

"; case 't': // Transcription return "