/* This file is (c) 2008-2012 Konstantin Isakov * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include "zipsounds.hh" #include "file.hh" #include "folding.hh" #include "utf8.hh" #include "btreeidx.hh" #include "fsencoding.hh" #include "audiolink.hh" #include "dprintf.hh" #include "indexedzip.hh" #include #include #include #include #ifdef _MSC_VER #include #endif #include namespace ZipSounds { using std::string; using gd::wstring; using std::map; using std::multimap; using std::set; using BtreeIndexing::WordArticleLink; using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexInfo; namespace { DEF_EX( exInvalidData, "Invalid data encountered", Dictionary::Ex ) enum { Signature = 0x5350495a, // ZIPS on little-endian, SPIZ on big-endian CurrentFormatVersion = 1 }; struct IdxHeader { uint32_t signature; // First comes the signature, ZIPS uint32_t formatVersion; // File format version, currently 1. uint32_t soundsCount; // Total number of sounds, for informative purposes only uint32_t indexBtreeMaxElements; // Two fields from IndexInfo uint32_t indexRootOffset; } #ifndef _MSC_VER __attribute__((packed)) #endif ; bool indexIsOldOrBad( string const & indexFile ) { File::Class idx( indexFile, "rb" ); IdxHeader header; return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || header.signature != Signature || header.formatVersion != CurrentFormatVersion; } string stripExtension( string const & str ) { if ( str.size() > 3 && ( strcasecmp( str.c_str() + ( str.size() - 4 ), ".wav" ) == 0 ) ) return string( str, 0, str.size() - 4 ); else return str; } class ZipSoundsDictionary: public BtreeIndexing::BtreeDictionary { Mutex idxMutex; File::Class idx; IdxHeader idxHeader; IndexedZip zipsFile; public: ZipSoundsDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ); virtual string getName() throw(); virtual map< Dictionary::Property, string > getProperties() throw() { return map< Dictionary::Property, string >(); } virtual unsigned long getArticleCount() throw() { return idxHeader.soundsCount; } virtual unsigned long getWordCount() throw() { return getArticleCount(); } virtual QIcon getIcon() throw() { return QIcon(":/icons/playsound.png"); } virtual sptr< Dictionary::DataRequest > getArticle( wstring const &, vector< wstring > const & alts, wstring const & ) throw( std::exception ); virtual sptr< Dictionary::DataRequest > getResource( string const & name ) throw( std::exception ); }; ZipSoundsDictionary::ZipSoundsDictionary( string const & id, string const & indexFile, vector< string > const & dictionaryFiles ): BtreeDictionary( id, dictionaryFiles ), idx( indexFile, "rb" ), idxHeader( idx.read< IdxHeader >() ) { // Initialize the index openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex ); QString zipName = QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ); zipsFile.openZipFile( zipName ); zipsFile.openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, idxHeader.indexRootOffset ), idx, idxMutex ); } string ZipSoundsDictionary::getName() throw() { string result = FsEncoding::basename( getDictionaryFilenames()[ 0 ] ); // Strip the extension result.erase( result.rfind( '.' ) ); return result; } sptr< Dictionary::DataRequest > ZipSoundsDictionary::getArticle( wstring const & word, vector< wstring > const & alts, wstring const & ) throw( std::exception ) { vector< WordArticleLink > chain = findArticles( word ); for( unsigned x = 0; x < alts.size(); ++x ) { /// Make an additional query for each alt vector< WordArticleLink > altChain = findArticles( alts[ x ] ); chain.insert( chain.end(), altChain.begin(), altChain.end() ); } multimap< wstring, string > mainArticles, alternateArticles; set< uint32_t > articlesIncluded; // Some synonims make it that the articles // appear several times. We combat this // by only allowing them to appear once. wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); for( unsigned x = 0; x < chain.size(); ++x ) { if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() ) continue; // We already have this article in the body. // Ok. Now, does it go to main articles, or to alternate ones? We list // main ones first, and alternates after. // We do the case-folded comparison here. wstring headwordStripped = Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) ); multimap< wstring, string > & mapToUse = ( wordCaseFolded == headwordStripped ) ? mainArticles : alternateArticles; mapToUse.insert( std::pair< wstring, string >( Folding::applySimpleCaseOnly( Utf8::decode( chain[ x ].word ) ), chain[ x ].word ) ); articlesIncluded.insert( chain[ x ].articleOffset ); } if ( mainArticles.empty() && alternateArticles.empty() ) return new Dictionary::DataRequestInstant( false ); // No such word string result; multimap< wstring, string >::const_iterator i; result += ""; for( i = mainArticles.begin(); i != mainArticles.end(); ++i ) { result += ""; QUrl url; url.setScheme( "gdau" ); url.setHost( QString::fromUtf8( getId().c_str() ) ); url.setPath( QString::fromUtf8( i->second.c_str() ) ); string ref = string( "\"" ) + url.toEncoded().data() + "\""; result += addAudioLink( ref, getId() ); result += ""; result += ""; result += ""; } for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i ) { result += ""; QUrl url; url.setScheme( "gdau" ); url.setHost( QString::fromUtf8( getId().c_str() ) ); url.setPath( QString::fromUtf8( i->second.c_str() ) ); string ref = string( "\"" ) + url.toEncoded().data() + "\""; result += addAudioLink( ref, getId() ); result += ""; result += ""; result += ""; } result += "
\"Play\"/" + stripExtension( i->second ) + "
\"Play\"/" + stripExtension( i->second ) + "
"; Dictionary::DataRequestInstant * ret = new Dictionary::DataRequestInstant( true ); ret->getData().resize( result.size() ); memcpy( &(ret->getData().front()), result.data(), result.size() ); return ret; } sptr< Dictionary::DataRequest > ZipSoundsDictionary::getResource( string const & name ) throw( std::exception ) { vector< WordArticleLink > chain = findArticles( Utf8::decode( name ) ); if ( chain.empty() ) return new Dictionary::DataRequestInstant( false ); // No such resource sptr< Dictionary::DataRequestInstant > dr = new Dictionary::DataRequestInstant( true ); if ( zipsFile.loadFile( chain[ 0 ].articleOffset, dr->getData() ) ) return dr; return new Dictionary::DataRequestInstant( false ); } } vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames, string const & indicesDir, Dictionary::Initializing & initializing ) throw( std::exception ) { vector< sptr< Dictionary::Class > > dictionaries; for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end(); ++i ) { /// Only allow .zips extension if ( i->size() < 5 || strcasecmp( i->c_str() + ( i->size() - 5 ), ".zips" ) != 0 ) continue; try { vector< string > dictFiles( 1, *i ); string dictId = Dictionary::makeDictionaryId( dictFiles ); string indexFile = indicesDir + dictId; if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || indexIsOldOrBad( indexFile ) ) { File::Class idx( indexFile, "wb" ); IdxHeader idxHeader; memset( &idxHeader, 0, sizeof( idxHeader ) ); // We write a dummy header first. At the end of the process the header // will be rewritten with the right values. idx.write( idxHeader ); IndexedWords names, zipFileNames; IndexedZip zipFile; if( zipFile.openZipFile( QDir::fromNativeSeparators( FsEncoding::decode( i->c_str() ) ) ) ) zipFile.indexFile( zipFileNames ); if( !zipFileNames.empty() ) { // See if the name ends in .wav. Remove that extension then for( IndexedWords::iterator i = zipFileNames.begin(); i != zipFileNames.end(); ++i ) { vector< WordArticleLink > links = i->second; for( unsigned x = 0; x < links.size(); x++ ) { string word = stripExtension( links[ x ].word ); names.addSingleWord( Utf8::decode( word ), links[ x ].articleOffset ); } } // Build the resulting zip file index IndexInfo idxInfo = BtreeIndexing::buildIndex( names, idx ); // That concludes it. Update the header. idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements; idxHeader.indexRootOffset = idxInfo.rootOffset; idxHeader.signature = Signature; idxHeader.formatVersion = CurrentFormatVersion; idxHeader.soundsCount = names.size(); idx.rewind(); idx.write( &idxHeader, sizeof( idxHeader ) ); } else { idx.close(); QFile::remove( QDir::fromNativeSeparators( FsEncoding::decode( indexFile.c_str() ) ) ); throw exInvalidData(); } } dictionaries.push_back( new ZipSoundsDictionary( dictId, indexFile, dictFiles ) ); } catch( std::exception & e ) { FDPRINTF( stderr, "Zipped sounds pack reading failed: %s, error: %s\n", i->c_str(), e.what() ); } } return dictionaries; } }