/* This file is (c) 2008-2012 Konstantin Isakov * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include "stardict.hh" #include "btreeidx.hh" #include "folding.hh" #include "utf8.hh" #include "chunkedstorage.hh" #include "dictzip.h" #include "xdxf2html.hh" #include "htmlescape.hh" #include "langcoder.hh" #include "gddebug.hh" #include "fsencoding.hh" #include "filetype.hh" #include "indexedzip.hh" #include "tiff.hh" #include "ftshelpers.hh" #include "wstring_qt.hh" #include #include #include #include #ifndef __WIN32 #include #else #include #endif #include #ifdef _MSC_VER #include #endif #include #include #include #include #include #include #include #include "ufile.hh" namespace Stardict { using std::map; using std::multimap; using std::pair; using std::set; using std::string; using gd::wstring; using BtreeIndexing::WordArticleLink; using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexInfo; namespace { DEF_EX( exNotAnIfoFile, "Not an .ifo file", Dictionary::Ex ) DEF_EX_STR( exBadFieldInIfo, "Bad field in .ifo file encountered:", Dictionary::Ex ) DEF_EX_STR( exNoIdxFile, "No corresponding .idx file was found for", Dictionary::Ex ) DEF_EX_STR( exNoDictFile, "No corresponding .dict file was found for", Dictionary::Ex ) DEF_EX_STR( exNoSynFile, "No corresponding .syn file was found for", Dictionary::Ex ) DEF_EX( ex64BitsNotSupported, "64-bit indices are not presently supported, sorry", Dictionary::Ex ) DEF_EX( exDicttypeNotSupported, "Dictionaries with dicttypes are not supported, sorry", Dictionary::Ex ) DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex ) DEF_EX_STR( exWordIsTooLarge, "Enountered a word that is too large:", Dictionary::Ex ) DEF_EX_STR( exSuddenEndOfFile, "Sudden end of file", Dictionary::Ex ) DEF_EX_STR( exDictzipError, "DICTZIP error", Dictionary::Ex ) DEF_EX_STR( exIncorrectOffset, "Incorrect offset encountered in file", Dictionary::Ex ) /// Contents of an ifo file struct Ifo { string version; string bookname; uint32_t wordcount, synwordcount, idxfilesize, idxoffsetbits; string sametypesequence, dicttype, description; string copyright, author, email; Ifo( File::Class & ); }; enum { Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian CurrentFormatVersion = 9 + BtreeIndexing::FormatVersion + Folding::Version }; struct IdxHeader { uint32_t signature; // First comes the signature, SIDX uint32_t formatVersion; // File format version (CurrentFormatVersion) uint32_t chunksOffset; // The offset to chunks' storage uint32_t indexBtreeMaxElements; // Two fields from IndexInfo uint32_t indexRootOffset; uint32_t wordCount; // Saved from Ifo::wordcount uint32_t synWordCount; // Saved from Ifo::synwordcount uint32_t bookNameSize; // Book name's length. Used to read it then. uint32_t sameTypeSequenceSize; // That string's size. Used to read it then. uint32_t langFrom; // Source language uint32_t langTo; // Target language uint32_t hasZipFile; // Non-zero means there's a zip file with resources present uint32_t zipIndexBtreeMaxElements; // Two fields from IndexInfo of the zip // resource index. uint32_t zipIndexRootOffset; } #ifndef _MSC_VER __attribute__((packed)) #endif ; bool indexIsOldOrBad( string const & indexFile ) { File::Class idx( indexFile, "rb" ); IdxHeader header; return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != Signature || header.formatVersion != CurrentFormatVersion; } class StardictDictionary: public BtreeIndexing::BtreeDictionary { Mutex idxMutex; File::Class idx; IdxHeader idxHeader; string bookName; string sameTypeSequence; ChunkedStorage::Reader chunks; Mutex dzMutex; dictData * dz; Mutex resourceZipMutex; IndexedZip resourceZip; public: StardictDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ); ~StardictDictionary(); virtual string getName() throw() { return bookName; } virtual map< Dictionary::Property, string > getProperties() throw() { return map< Dictionary::Property, string >(); } virtual unsigned long getArticleCount() throw() { return idxHeader.wordCount; } virtual unsigned long getWordCount() throw() { return idxHeader.wordCount + idxHeader.synWordCount; } inline virtual quint32 getLangFrom() const { return idxHeader.langFrom; } inline virtual quint32 getLangTo() const { return idxHeader.langTo; } virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) throw( std::exception ); virtual sptr< Dictionary::DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const & ) throw( std::exception ); virtual sptr< Dictionary::DataRequest > getResource( string const & name ) throw( std::exception ); virtual QString const& getDescription(); virtual QString getMainFilename(); virtual sptr< Dictionary::DataRequest > getSearchResults( QString const & searchString, int searchMode, bool matchCase, int distanceBetweenWords, int maxResults ); virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text ); virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration ); virtual void setFTSParameters( Config::FullTextSearch const & fts ) { can_FTS = fts.enabled && !fts.disabledTypes.contains( "STARDICT", Qt::CaseInsensitive ) && ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize ); } protected: void loadIcon() throw(); private: /// Retrives the article's offset/size in .dict file, and its headword. void getArticleProps( uint32_t articleAddress, string & headword, uint32_t & offset, uint32_t & size ); /// Loads the article, storing its headword and formatting the data it has /// into an html. void loadArticle( uint32_t address, string & headword, string & articleText ); string loadString( size_t size ); string handleResource( char type, char const * resource, size_t size ); void pangoToHtml( QString & text ); friend class StardictResourceRequest; friend class StardictArticleRequest; friend class StardictHeadwordsRequest; }; StardictDictionary::StardictDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ): BtreeDictionary( id, dictionaryFiles ), idx( indexFile, "rb" ), idxHeader( idx.read< IdxHeader >() ), bookName( loadString( idxHeader.bookNameSize ) ), sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ), chunks( idx, idxHeader.chunksOffset ) { // Open the .dict file DZ_ERRORS error; dz = dict_data_open( dictionaryFiles[ 2 ].c_str(), &error, 0 ); if ( !dz ) throw exDictzipError( string( dz_error_str( error ) ) + "(" + dictionaryFiles[ 2 ] + ")" ); // Initialize the index openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex ); // Open a resource zip file, if there's one if ( idxHeader.hasZipFile && ( idxHeader.zipIndexBtreeMaxElements || idxHeader.zipIndexRootOffset ) ) { resourceZip.openIndex( IndexInfo( idxHeader.zipIndexBtreeMaxElements, idxHeader.zipIndexRootOffset ), idx, idxMutex ); QString zipName = QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames().back().c_str() ) ); if ( zipName.endsWith( ".zip", Qt::CaseInsensitive ) ) // Sanity check resourceZip.openZipFile( zipName ); } // Full-text search parameters can_FTS = true; ftsIdxName = indexFile + "_FTS"; if( !Dictionary::needToRebuildIndex( dictionaryFiles, ftsIdxName ) && !FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) ) FTS_index_completed.ref(); } StardictDictionary::~StardictDictionary() { if ( dz ) dict_data_close( dz ); } void StardictDictionary::loadIcon() throw() { if ( dictionaryIconLoaded ) return; QString fileName = QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ); // Remove the extension fileName.chop( 3 ); if( !loadIconFromFile( fileName ) ) { // Load failed -- use default icons dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/icon32_stardict.png"); } dictionaryIconLoaded = true; } string StardictDictionary::loadString( size_t size ) { vector< char > data( size ); idx.read( &data.front(), data.size() ); return string( &data.front(), data.size() ); } void StardictDictionary::getArticleProps( uint32_t articleAddress, string & headword, uint32_t & offset, uint32_t & size ) { vector< char > chunk; Mutex::Lock _( idxMutex ); char * articleData = chunks.getBlock( articleAddress, chunk ); memcpy( &offset, articleData, sizeof( uint32_t ) ); articleData += sizeof( uint32_t ); memcpy( &size, articleData, sizeof( uint32_t ) ); articleData += sizeof( uint32_t ); headword = articleData; } /// This function tries to make an html of the Stardict's resource typed /// 'type', contained in a block pointed to by 'resource', 'size' bytes long. string StardictDictionary::handleResource( char type, char const * resource, size_t size ) { QString text; switch( type ) { case 'x': // Xdxf content return Xdxf2Html::convert( string( resource, size ), Xdxf2Html::STARDICT, NULL, this, &resourceZip ); case 'h': // Html content { string articleText = "

" + string( resource, size ) + "

"; return ( QString::fromUtf8( articleText.c_str() ) .replace( QRegExp( "(<\\s*img\\s+[^>]*src\\s*=\\s*[\"']+)((?!data:)[^\"']*)", Qt::CaseInsensitive ), "\\1bres://" + QString::fromStdString( getId() ) + "/\\2" ) .replace( QRegExp( "(<\\s*link\\s+[^>]*href\\s*=\\s*[\"']+)((?!data:)[^\"']*)", Qt::CaseInsensitive ), "\\1bres://" + QString::fromStdString( getId() ) + "/\\2" ) .toUtf8().data() ); } case 'm': // Pure meaning, usually means preformatted text return "

" + Html::preformat( string( resource, size ), isToLanguageRTL() ) + "

"; case 'l': // Same as 'm', but not in utf8, instead in current locale's // encoding. // We just use Qt here, it should know better about system's // locale. return "

" + Html::preformat( QString::fromLocal8Bit( resource, size ).toUtf8().data(), isToLanguageRTL() ) + "

"; case 'g': // Pango markup. text = QString::fromUtf8( resource, size ); pangoToHtml( text ); return "

" + string( text.toUtf8().data() ) + "

"; case 't': // Transcription return "