diff --git a/dictionary.cc b/dictionary.cc index cd5c5c61..141fa8a3 100644 --- a/dictionary.cc +++ b/dictionary.cc @@ -25,6 +25,7 @@ #include #include "qt4x5.hh" +#include "zipfile.hh" namespace Dictionary { @@ -443,15 +444,26 @@ bool needToRebuildIndex( vector< string > const & dictionaryFiles, for( std::vector< string >::const_iterator i = dictionaryFiles.begin(); i != dictionaryFiles.end(); ++i ) { - QFileInfo fileInfo( FsEncoding::decode( i->c_str() ) ); + QString name = FsEncoding::decode( i->c_str() ); + QFileInfo fileInfo( name ); + unsigned long ts; if( fileInfo.isDir() ) continue; - if ( !fileInfo.exists() ) - return true; - - unsigned long ts = fileInfo.lastModified().toTime_t(); + if( name.toLower().endsWith( ".zip" ) ) + { + ZipFile::SplitZipFile zf( name ); + if( !zf.exists() ) + return true; + ts = zf.lastModified().toTime_t(); + } + else + { + if ( !fileInfo.exists() ) + return true; + ts = fileInfo.lastModified().toTime_t(); + } if ( ts > lastModified ) lastModified = ts; diff --git a/dsl.cc b/dsl.cc index 9f55ff98..dde84cbc 100644 --- a/dsl.cc +++ b/dsl.cc @@ -2016,10 +2016,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( string zipFileName; - if ( File::tryPossibleName( baseName + ".dsl.files.zip", zipFileName ) || - File::tryPossibleName( baseName + ".dsl.dz.files.zip", zipFileName ) || - File::tryPossibleName( baseName + ".DSL.FILES.ZIP", zipFileName ) || - File::tryPossibleName( baseName + ".DSL.DZ.FILES.ZIP", zipFileName ) ) + if ( File::tryPossibleZipName( baseName + ".dsl.files.zip", zipFileName ) || + File::tryPossibleZipName( baseName + ".dsl.dz.files.zip", zipFileName ) || + File::tryPossibleZipName( baseName + ".DSL.FILES.ZIP", zipFileName ) || + File::tryPossibleZipName( baseName + ".DSL.DZ.FILES.ZIP", zipFileName ) ) dictFiles.push_back( zipFileName ); string indexFile = indicesDir + dictId; diff --git a/file.cc b/file.cc index faa9fdba..0e451a6e 100644 --- a/file.cc +++ b/file.cc @@ -19,6 +19,7 @@ #include "ufile.hh" #include "fsencoding.hh" +#include "zipfile.hh" namespace File { @@ -40,6 +41,17 @@ bool tryPossibleName( std::string const & name, std::string & copyTo ) return false; } +bool tryPossibleZipName( std::string const & name, std::string & copyTo ) +{ + if ( ZipFile::SplitZipFile( FsEncoding::decode( name.c_str() ) ).exists() ) + { + copyTo = name; + return true; + } + else + return false; +} + void loadFromFile( std::string const & n, std::vector< char > & data ) { File::Class f( n, "rb" ); diff --git a/file.hh b/file.hh index f2c28ac5..aeff01ad 100644 --- a/file.hh +++ b/file.hh @@ -26,6 +26,8 @@ DEF_EX( exAllocation, "Memory allocation error", Ex ) bool tryPossibleName( std::string const & name, std::string & copyTo ); +bool tryPossibleZipName( std::string const & name, std::string & copyTo ); + void loadFromFile( std::string const & n, std::vector< char > & data ); bool exists( char const * filename ) throw(); diff --git a/gls.cc b/gls.cc index d318658c..626dfa2b 100644 --- a/gls.cc +++ b/gls.cc @@ -1423,10 +1423,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( string zipFileName; - if ( File::tryPossibleName( baseName + ".gls.files.zip", zipFileName ) || - File::tryPossibleName( baseName + ".gls.dz.files.zip", zipFileName ) || - File::tryPossibleName( baseName + ".GLS.FILES.ZIP", zipFileName ) || - File::tryPossibleName( baseName + ".GLS.DZ.FILES.ZIP", zipFileName ) ) + if ( File::tryPossibleZipName( baseName + ".gls.files.zip", zipFileName ) || + File::tryPossibleZipName( baseName + ".gls.dz.files.zip", zipFileName ) || + File::tryPossibleZipName( baseName + ".GLS.FILES.ZIP", zipFileName ) || + File::tryPossibleZipName( baseName + ".GLS.DZ.FILES.ZIP", zipFileName ) ) dictFiles.push_back( zipFileName ); string indexFile = indicesDir + dictId; diff --git a/goldendict.pro b/goldendict.pro index 824c0f32..cde83454 100644 --- a/goldendict.pro +++ b/goldendict.pro @@ -355,7 +355,8 @@ HEADERS += folding.hh \ helpwindow.hh \ slob.hh \ ripemd.hh \ - gls.hh + gls.hh \ + splitfile.hh FORMS += groups.ui \ dictgroupwidget.ui \ @@ -480,7 +481,8 @@ SOURCES += folding.cc \ helpwindow.cc \ slob.cc \ ripemd.cc \ - gls.cc + gls.cc \ + splitfile.cc win32 { FORMS += texttospeechsource.ui diff --git a/indexedzip.hh b/indexedzip.hh index 837f1e85..30448d54 100644 --- a/indexedzip.hh +++ b/indexedzip.hh @@ -6,12 +6,13 @@ #include "btreeidx.hh" #include +#include "zipfile.hh" /// Allows using a btree index to read zip files. Basically built on top of /// the base dictionary infrastructure adapted for zips. class IndexedZip: public BtreeIndexing::BtreeIndex { - QFile zip; + ZipFile::SplitZipFile zip; bool zipIsOpen; public: diff --git a/splitfile.cc b/splitfile.cc new file mode 100644 index 00000000..67daa3d5 --- /dev/null +++ b/splitfile.cc @@ -0,0 +1,126 @@ +/* This file is (c) 2017 Abs62 + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#include "splitfile.hh" +#include "fsencoding.hh" + +namespace SplitFile +{ + +SplitFile::SplitFile() : + currentFile( 0 ) +{ +} + +SplitFile::~SplitFile() +{ + close(); +} + +void SplitFile::appendFile( const QString & name ) +{ + if( offsets.isEmpty() ) + offsets.append( 0 ); + else + offsets.append( offsets.last() + files.last()->size() ); + files.append( new QFile( name ) ); +} + +void SplitFile::close() +{ + for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i ) + { + (*i)->close(); + delete (*i); + } + + files.clear(); + offsets.clear(); + + currentFile = 0; +} + +void SplitFile::getFilenames( vector< string > &names ) const +{ + for( QVector< QFile const * >::const_iterator i = files.begin(); i != files.end(); ++i ) + names.push_back( FsEncoding::encode( (*i)->fileName() ) ); +} + +bool SplitFile::open( QFile::OpenMode mode ) +{ + for( QVector< QFile * >::iterator i = files.begin(); i != files.end(); ++i ) + if( !(*i)->open( mode ) ) + { + close(); + return false; + } + + return true; +} + +bool SplitFile::seek( quint64 pos ) +{ + int fileNom; + + for( fileNom = 0; fileNom < offsets.size() - 1; fileNom++ ) + if( pos < offsets.at( fileNom + 1 ) ) + break; + + pos -= offsets.at( fileNom ); + + currentFile = fileNom; + return files.at( fileNom )->seek( pos ); +} + +qint64 SplitFile::read( char *data, qint64 maxSize ) +{ + quint64 bytesReaded = 0; + for( int i = currentFile; i < files.size(); i++ ) + { + if( i != currentFile ) + { + files.at( i )->seek( 0 ); + currentFile = i; + } + + qint64 ret = files.at( i )->read( data + bytesReaded, maxSize ); + if( ret < 0 ) + break; + + bytesReaded += ret; + maxSize -= ret; + + if( maxSize <= 0 ) + break; + } + return bytesReaded; +} + +QByteArray SplitFile::read( qint64 maxSize ) +{ + QByteArray data; + data.resize( maxSize ); + + qint64 ret = read( data.data(), maxSize ); + + if( ret != maxSize ) + data.resize( ret ); + + return data; +} + +bool SplitFile::getChar( char *c ) +{ + char ch; + return read( c ? c : &ch, 1 ) == 1; +} + +qint64 SplitFile::pos() const +{ + if( files.isEmpty() ) + return 0; + + return offsets.at( currentFile ) + files.at( currentFile )->pos(); +} + +} // namespace SplitFile diff --git a/splitfile.hh b/splitfile.hh new file mode 100644 index 00000000..0a8edc2f --- /dev/null +++ b/splitfile.hh @@ -0,0 +1,51 @@ +#ifndef __SPLITFILE_HH_INCLUDED__ +#define __SPLITFILE_HH_INCLUDED__ + +#include +#include +#include + +#include +#include + +namespace SplitFile +{ + +using std::vector; +using std::string; + +// Class for work with split files + +class SplitFile +{ +protected: + + QVector< QFile * > files; + QVector< quint64 > offsets; + int currentFile; + + void appendFile( const QString & name ); + +public: + + SplitFile(); + ~SplitFile(); + + virtual void setFileName( const QString & name ) = 0; + void getFilenames( vector< string > & names ) const; + bool open( QFile::OpenMode mode ); + void close(); + bool seek( quint64 pos ); + qint64 read( char * data, qint64 maxSize ); + QByteArray read( qint64 maxSize ); + bool getChar( char * c ); + qint64 size() const + { return files.isEmpty() ? 0 : offsets.last() + files.last()->size(); } + bool exists() const + { return !files.isEmpty(); } + qint64 pos() const; +}; + +} // namespace SplitFile + +#endif // __SPLITFILE_HH_INCLUDED__ diff --git a/stardict.cc b/stardict.cc index 452cb80a..4feb4b81 100644 --- a/stardict.cc +++ b/stardict.cc @@ -1839,9 +1839,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries( string zipFileName; string baseName = FsEncoding::dirname( idxFileName ) + FsEncoding::separator(); - if ( File::tryPossibleName( baseName + "res.zip", zipFileName ) || - File::tryPossibleName( baseName + "RES.ZIP", zipFileName ) || - File::tryPossibleName( baseName + "res" + FsEncoding::separator() + "res.zip", zipFileName ) ) + if ( File::tryPossibleZipName( baseName + "res.zip", zipFileName ) || + File::tryPossibleZipName( baseName + "RES.ZIP", zipFileName ) || + File::tryPossibleZipName( baseName + "res" + FsEncoding::separator() + "res.zip", zipFileName ) ) dictFiles.push_back( zipFileName ); string dictId = Dictionary::makeDictionaryId( dictFiles ); diff --git a/xdxf.cc b/xdxf.cc index 2e61e987..7a39557b 100644 --- a/xdxf.cc +++ b/xdxf.cc @@ -1164,10 +1164,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( string zipFileName; - if ( File::tryPossibleName( baseName + ".xdxf.files.zip", zipFileName ) || - File::tryPossibleName( baseName + ".xdxf.dz.files.zip", zipFileName ) || - File::tryPossibleName( baseName + ".XDXF.FILES.ZIP", zipFileName ) || - File::tryPossibleName( baseName + ".XDXF.DZ.FILES.ZIP", zipFileName ) ) + if ( File::tryPossibleZipName( baseName + ".xdxf.files.zip", zipFileName ) || + File::tryPossibleZipName( baseName + ".xdxf.dz.files.zip", zipFileName ) || + File::tryPossibleZipName( baseName + ".XDXF.FILES.ZIP", zipFileName ) || + File::tryPossibleZipName( baseName + ".XDXF.DZ.FILES.ZIP", zipFileName ) ) dictFiles.push_back( zipFileName ); string dictId = Dictionary::makeDictionaryId( dictFiles ); diff --git a/zim.cc b/zim.cc index 5175f63a..94b7303f 100644 --- a/zim.cc +++ b/zim.cc @@ -18,6 +18,7 @@ #include "tiff.hh" #include "ftshelpers.hh" #include "htmlescape.hh" +#include "splitfile.hh" #ifdef _MSC_VER #include @@ -146,52 +147,35 @@ __attribute__((packed)) // Class for support of split zim files -class ZimFile +class ZimFile : public SplitFile::SplitFile { - QVector< QFile * > files; - QVector< quint64 > offsets; - int currentFile; - public: ZimFile(); ZimFile( const QString & name ); ~ZimFile(); - void setFileName( const QString & name ); - void getFilenames( vector< string > & names ); - bool open( QFile::OpenMode mode ); - void close(); - bool seek( quint64 pos ); - qint64 read( char * data, qint64 maxSize ); - QByteArray read( qint64 maxSize ); - bool getChar( char * c ); - qint64 size() - { return files.isEmpty() ? 0 : offsets.last() + files.last()->size(); } + virtual void setFileName( const QString & name ); }; -ZimFile::ZimFile() : - currentFile( 0 ) +ZimFile::ZimFile() { } -ZimFile::ZimFile( const QString & name ) : - currentFile( 0 ) +ZimFile::ZimFile( const QString & name ) { setFileName( name ); } ZimFile::~ZimFile() { - close(); } void ZimFile::setFileName( const QString & name ) { close(); - files.append( new QFile( name ) ); - offsets.append( 0 ); + appendFile( name ); if( name.endsWith( ".zimaa", Qt::CaseInsensitive ) ) { @@ -208,10 +192,7 @@ void ZimFile::setFileName( const QString & name ) if( !QFileInfo( fname ).isFile() ) break; - quint64 offset = offsets.last() + files.last()->size(); - - files.append( new QFile( fname ) ); - offsets.append( offset ); + appendFile( fname ); } if( j < 26 ) @@ -220,92 +201,6 @@ void ZimFile::setFileName( const QString & name ) } } -void ZimFile::close() -{ - for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i ) - { - (*i)->close(); - delete (*i); - } - - files.clear(); - offsets.clear(); - - currentFile = 0; -} - -void ZimFile::getFilenames( vector< string > &names ) -{ - for( QVector< QFile const * >::const_iterator i = files.begin(); i != files.end(); ++i ) - names.push_back( FsEncoding::encode( (*i)->fileName() ) ); -} - -bool ZimFile::open( QFile::OpenMode mode ) -{ - for( QVector< QFile * >::iterator i = files.begin(); i != files.end(); ++i ) - if( !(*i)->open( mode ) ) - { - close(); - return false; - } - - return true; -} - -bool ZimFile::seek( quint64 pos ) -{ - int fileNom; - - for( fileNom = 0; fileNom < offsets.size() - 1; fileNom++ ) - if( pos < offsets.at( fileNom + 1 ) ) - break; - - pos -= offsets.at( fileNom ); - - currentFile = fileNom; - return files.at( fileNom )->seek( pos ); -} - -qint64 ZimFile::read( char *data, qint64 maxSize ) -{ - quint64 bytesReaded = 0; - for( int i = currentFile; i < files.size(); i++ ) - { - if( i != currentFile ) - files.at( i )->seek( 0 ); - - qint64 ret = files.at( i )->read( data + bytesReaded, maxSize ); - if( ret < 0 ) - break; - - bytesReaded += ret; - maxSize -= ret; - - if( maxSize <= 0 ) - break; - } - return bytesReaded; -} - -QByteArray ZimFile::read( qint64 maxSize ) -{ - QByteArray data; - data.resize( maxSize ); - - qint64 ret = read( data.data(), maxSize ); - - if( ret != maxSize ) - data.resize( ret ); - - return data; -} - -bool ZimFile::getChar( char *c ) -{ - char ch; - return read( c ? c : &ch, 1 ) == 1; -} - // Some supporting functions bool indexIsOldOrBad( string const & indexFile ) diff --git a/zipfile.cc b/zipfile.cc index 3b3ede5f..e2345a0b 100644 --- a/zipfile.cc +++ b/zipfile.cc @@ -4,6 +4,7 @@ #include "zipfile.hh" #include #include +#include namespace ZipFile { @@ -67,7 +68,7 @@ static CompressionMethod getCompressionMethod( quint16 compressionMethod ) } } -bool positionAtCentralDir( QFile & zip ) +bool positionAtCentralDir( SplitZipFile & zip ) { // Find the end-of-central-directory record @@ -93,6 +94,8 @@ bool positionAtCentralDir( QFile & zip ) EndOfCdirRecord endOfCdirRecord; + quint32 cdir_offset; + for( ; ; --lastIndex ) { lastIndex = eocBuffer.lastIndexOf( endOfCdirRecordSignature, lastIndex ); @@ -106,7 +109,10 @@ bool positionAtCentralDir( QFile & zip ) /// Sanitize the record by checking the offset - if ( !zip.seek( qFromLittleEndian( endOfCdirRecord.offset ) ) ) + cdir_offset = zip.calcAbsoluteOffset( qFromLittleEndian( endOfCdirRecord.offset ), + qFromLittleEndian( endOfCdirRecord.numDiskCd ) ); + + if ( !zip.seek( cdir_offset ) ) continue; quint32 signature; @@ -120,10 +126,10 @@ bool positionAtCentralDir( QFile & zip ) // Found cdir -- position the file on the first header - return zip.seek( qFromLittleEndian( endOfCdirRecord.offset ) ); + return zip.seek( cdir_offset ); } -bool readNextEntry( QFile & zip, CentralDirEntry & entry ) +bool readNextEntry( SplitZipFile & zip, CentralDirEntry & entry ) { CentralFileHeaderRecord record; @@ -147,7 +153,8 @@ bool readNextEntry( QFile & zip, CentralDirEntry & entry ) qFromLittleEndian( record.fileCommentLength ) ) ) return false; - entry.localHeaderOffset = qFromLittleEndian( record.offsetOfLocalHeader ); + entry.localHeaderOffset = zip.calcAbsoluteOffset( qFromLittleEndian( record.offsetOfLocalHeader ), + qFromLittleEndian( record.diskNumberStart ) ); entry.compressedSize = qFromLittleEndian( record.compressedSize ); entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize ); entry.compressionMethod = getCompressionMethod( record.compressionMethod ); @@ -156,7 +163,7 @@ bool readNextEntry( QFile & zip, CentralDirEntry & entry ) return true; } -bool readLocalHeader( QFile & zip, LocalFileHeader & entry ) +bool readLocalHeader( SplitZipFile & zip, LocalFileHeader & entry ) { LocalFileHeaderRecord record; @@ -186,4 +193,59 @@ bool readLocalHeader( QFile & zip, LocalFileHeader & entry ) return true; } +SplitZipFile::SplitZipFile( const QString & name ) +{ + setFileName( name ); +} + +void SplitZipFile::setFileName( const QString & name ) +{ + if( !name.toLower().endsWith( ".zip" ) ) + return; + + if( QFileInfo( name ).isFile() ) + { + for( int i = 1; i < 100; i++ ) + { + QString name2 = name.left( name.size() - 2 ) + QString( "%1" ).arg( i, 2, 10, QChar( '0' ) ); + if( QFileInfo( name2 ).isFile() ) + appendFile( name2 ); + else + break; + } + appendFile( name ); + } + else + { + for( int i = 1; i < 1000; i++ ) + { + QString name2 = name + QString( ".%1" ).arg( i, 3, 10, QChar( '0' ) ); + if( QFileInfo( name2 ).isFile() ) + appendFile( name2 ); + else + break; + } + } +} + +QDateTime SplitZipFile::lastModified() const +{ + unsigned long ts = 0; + for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i ) + { + unsigned long t = QFileInfo( (*i)->fileName() ).lastModified().toTime_t(); + if( t > ts ) + ts = t; + } + return QDateTime::fromTime_t( ts ); +} + +qint64 SplitZipFile::calcAbsoluteOffset( qint64 offset, quint16 partNo ) +{ + if( partNo >= offsets.size() ) + return 0; + + return offsets.at( partNo ) + offset; +} + } diff --git a/zipfile.hh b/zipfile.hh index 627bb819..7e63746b 100644 --- a/zipfile.hh +++ b/zipfile.hh @@ -5,11 +5,30 @@ #define __ZIPFILE_HH_INCLUDED__ #include +#include +#include "splitfile.hh" /// Support for zip files in GoldenDict. Note that the implementation is /// strictly tailored to GoldenDict needs only. namespace ZipFile { +// Support for split zip files +class SplitZipFile : public SplitFile::SplitFile +{ +public: + SplitZipFile() + {} + SplitZipFile( const QString & name ); + + virtual void setFileName( const QString & name ); + + // Latest modified time for all parts + QDateTime lastModified() const; + + // Calc absolute offset by relative offset in part and part nom + qint64 calcAbsoluteOffset( qint64 offset, quint16 partNo ); +}; + enum CompressionMethod { Uncompressed, @@ -42,17 +61,17 @@ struct LocalFileHeader /// zip file or other error). /// Once the file is positioned, entries may be read by constructing Entry /// objects. -bool positionAtCentralDir( QFile & ); +bool positionAtCentralDir( SplitZipFile & ); /// Reads entry from the zip at its current offset. The file gets advanced /// by the size of entry, so it points to the next entry. /// Returns true on success, false otherwise. -bool readNextEntry( QFile &, CentralDirEntry & ); +bool readNextEntry( SplitZipFile &, CentralDirEntry & ); /// Reads loca file header from the zip at its current offset. The file gets /// advanced by the size of entry and starts pointing to file data. /// Returns true on success, false otherwise. -bool readLocalHeader( QFile &, LocalFileHeader & ); +bool readLocalHeader( SplitZipFile &, LocalFileHeader & ); }