Add support for split zip files

This commit is contained in:
Abs62 2017-04-24 17:42:01 +03:00
parent bef8e2f8ac
commit eb78238f25
14 changed files with 326 additions and 144 deletions

View file

@ -25,6 +25,7 @@
#include <QRegExp>
#include "qt4x5.hh"
#include "zipfile.hh"
namespace Dictionary {
@ -443,15 +444,26 @@ bool needToRebuildIndex( vector< string > const & dictionaryFiles,
for( std::vector< string >::const_iterator i = dictionaryFiles.begin();
i != dictionaryFiles.end(); ++i )
{
QFileInfo fileInfo( FsEncoding::decode( i->c_str() ) );
QString name = FsEncoding::decode( i->c_str() );
QFileInfo fileInfo( name );
unsigned long ts;
if( fileInfo.isDir() )
continue;
if ( !fileInfo.exists() )
return true;
unsigned long ts = fileInfo.lastModified().toTime_t();
if( name.toLower().endsWith( ".zip" ) )
{
ZipFile::SplitZipFile zf( name );
if( !zf.exists() )
return true;
ts = zf.lastModified().toTime_t();
}
else
{
if ( !fileInfo.exists() )
return true;
ts = fileInfo.lastModified().toTime_t();
}
if ( ts > lastModified )
lastModified = ts;

8
dsl.cc
View file

@ -2016,10 +2016,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
string zipFileName;
if ( File::tryPossibleName( baseName + ".dsl.files.zip", zipFileName ) ||
File::tryPossibleName( baseName + ".dsl.dz.files.zip", zipFileName ) ||
File::tryPossibleName( baseName + ".DSL.FILES.ZIP", zipFileName ) ||
File::tryPossibleName( baseName + ".DSL.DZ.FILES.ZIP", zipFileName ) )
if ( File::tryPossibleZipName( baseName + ".dsl.files.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + ".dsl.dz.files.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + ".DSL.FILES.ZIP", zipFileName ) ||
File::tryPossibleZipName( baseName + ".DSL.DZ.FILES.ZIP", zipFileName ) )
dictFiles.push_back( zipFileName );
string indexFile = indicesDir + dictId;

12
file.cc
View file

@ -19,6 +19,7 @@
#include "ufile.hh"
#include "fsencoding.hh"
#include "zipfile.hh"
namespace File {
@ -40,6 +41,17 @@ bool tryPossibleName( std::string const & name, std::string & copyTo )
return false;
}
bool tryPossibleZipName( std::string const & name, std::string & copyTo )
{
if ( ZipFile::SplitZipFile( FsEncoding::decode( name.c_str() ) ).exists() )
{
copyTo = name;
return true;
}
else
return false;
}
void loadFromFile( std::string const & n, std::vector< char > & data )
{
File::Class f( n, "rb" );

View file

@ -26,6 +26,8 @@ DEF_EX( exAllocation, "Memory allocation error", Ex )
bool tryPossibleName( std::string const & name, std::string & copyTo );
bool tryPossibleZipName( std::string const & name, std::string & copyTo );
void loadFromFile( std::string const & n, std::vector< char > & data );
bool exists( char const * filename ) throw();

8
gls.cc
View file

@ -1423,10 +1423,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
string zipFileName;
if ( File::tryPossibleName( baseName + ".gls.files.zip", zipFileName ) ||
File::tryPossibleName( baseName + ".gls.dz.files.zip", zipFileName ) ||
File::tryPossibleName( baseName + ".GLS.FILES.ZIP", zipFileName ) ||
File::tryPossibleName( baseName + ".GLS.DZ.FILES.ZIP", zipFileName ) )
if ( File::tryPossibleZipName( baseName + ".gls.files.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + ".gls.dz.files.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + ".GLS.FILES.ZIP", zipFileName ) ||
File::tryPossibleZipName( baseName + ".GLS.DZ.FILES.ZIP", zipFileName ) )
dictFiles.push_back( zipFileName );
string indexFile = indicesDir + dictId;

View file

@ -355,7 +355,8 @@ HEADERS += folding.hh \
helpwindow.hh \
slob.hh \
ripemd.hh \
gls.hh
gls.hh \
splitfile.hh
FORMS += groups.ui \
dictgroupwidget.ui \
@ -480,7 +481,8 @@ SOURCES += folding.cc \
helpwindow.cc \
slob.cc \
ripemd.cc \
gls.cc
gls.cc \
splitfile.cc
win32 {
FORMS += texttospeechsource.ui

View file

@ -6,12 +6,13 @@
#include "btreeidx.hh"
#include <QFile>
#include "zipfile.hh"
/// Allows using a btree index to read zip files. Basically built on top of
/// the base dictionary infrastructure adapted for zips.
class IndexedZip: public BtreeIndexing::BtreeIndex
{
QFile zip;
ZipFile::SplitZipFile zip;
bool zipIsOpen;
public:

126
splitfile.cc Normal file
View file

@ -0,0 +1,126 @@
/* This file is (c) 2017 Abs62
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "splitfile.hh"
#include "fsencoding.hh"
namespace SplitFile
{
SplitFile::SplitFile() :
currentFile( 0 )
{
}
SplitFile::~SplitFile()
{
close();
}
void SplitFile::appendFile( const QString & name )
{
if( offsets.isEmpty() )
offsets.append( 0 );
else
offsets.append( offsets.last() + files.last()->size() );
files.append( new QFile( name ) );
}
void SplitFile::close()
{
for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i )
{
(*i)->close();
delete (*i);
}
files.clear();
offsets.clear();
currentFile = 0;
}
void SplitFile::getFilenames( vector< string > &names ) const
{
for( QVector< QFile const * >::const_iterator i = files.begin(); i != files.end(); ++i )
names.push_back( FsEncoding::encode( (*i)->fileName() ) );
}
bool SplitFile::open( QFile::OpenMode mode )
{
for( QVector< QFile * >::iterator i = files.begin(); i != files.end(); ++i )
if( !(*i)->open( mode ) )
{
close();
return false;
}
return true;
}
bool SplitFile::seek( quint64 pos )
{
int fileNom;
for( fileNom = 0; fileNom < offsets.size() - 1; fileNom++ )
if( pos < offsets.at( fileNom + 1 ) )
break;
pos -= offsets.at( fileNom );
currentFile = fileNom;
return files.at( fileNom )->seek( pos );
}
qint64 SplitFile::read( char *data, qint64 maxSize )
{
quint64 bytesReaded = 0;
for( int i = currentFile; i < files.size(); i++ )
{
if( i != currentFile )
{
files.at( i )->seek( 0 );
currentFile = i;
}
qint64 ret = files.at( i )->read( data + bytesReaded, maxSize );
if( ret < 0 )
break;
bytesReaded += ret;
maxSize -= ret;
if( maxSize <= 0 )
break;
}
return bytesReaded;
}
QByteArray SplitFile::read( qint64 maxSize )
{
QByteArray data;
data.resize( maxSize );
qint64 ret = read( data.data(), maxSize );
if( ret != maxSize )
data.resize( ret );
return data;
}
bool SplitFile::getChar( char *c )
{
char ch;
return read( c ? c : &ch, 1 ) == 1;
}
qint64 SplitFile::pos() const
{
if( files.isEmpty() )
return 0;
return offsets.at( currentFile ) + files.at( currentFile )->pos();
}
} // namespace SplitFile

51
splitfile.hh Normal file
View file

@ -0,0 +1,51 @@
#ifndef __SPLITFILE_HH_INCLUDED__
#define __SPLITFILE_HH_INCLUDED__
#include <QFile>
#include <QVector>
#include <QString>
#include <vector>
#include <string>
namespace SplitFile
{
using std::vector;
using std::string;
// Class for work with split files
class SplitFile
{
protected:
QVector< QFile * > files;
QVector< quint64 > offsets;
int currentFile;
void appendFile( const QString & name );
public:
SplitFile();
~SplitFile();
virtual void setFileName( const QString & name ) = 0;
void getFilenames( vector< string > & names ) const;
bool open( QFile::OpenMode mode );
void close();
bool seek( quint64 pos );
qint64 read( char * data, qint64 maxSize );
QByteArray read( qint64 maxSize );
bool getChar( char * c );
qint64 size() const
{ return files.isEmpty() ? 0 : offsets.last() + files.last()->size(); }
bool exists() const
{ return !files.isEmpty(); }
qint64 pos() const;
};
} // namespace SplitFile
#endif // __SPLITFILE_HH_INCLUDED__

View file

@ -1839,9 +1839,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
string zipFileName;
string baseName = FsEncoding::dirname( idxFileName ) + FsEncoding::separator();
if ( File::tryPossibleName( baseName + "res.zip", zipFileName ) ||
File::tryPossibleName( baseName + "RES.ZIP", zipFileName ) ||
File::tryPossibleName( baseName + "res" + FsEncoding::separator() + "res.zip", zipFileName ) )
if ( File::tryPossibleZipName( baseName + "res.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + "RES.ZIP", zipFileName ) ||
File::tryPossibleZipName( baseName + "res" + FsEncoding::separator() + "res.zip", zipFileName ) )
dictFiles.push_back( zipFileName );
string dictId = Dictionary::makeDictionaryId( dictFiles );

View file

@ -1164,10 +1164,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
string zipFileName;
if ( File::tryPossibleName( baseName + ".xdxf.files.zip", zipFileName ) ||
File::tryPossibleName( baseName + ".xdxf.dz.files.zip", zipFileName ) ||
File::tryPossibleName( baseName + ".XDXF.FILES.ZIP", zipFileName ) ||
File::tryPossibleName( baseName + ".XDXF.DZ.FILES.ZIP", zipFileName ) )
if ( File::tryPossibleZipName( baseName + ".xdxf.files.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + ".xdxf.dz.files.zip", zipFileName ) ||
File::tryPossibleZipName( baseName + ".XDXF.FILES.ZIP", zipFileName ) ||
File::tryPossibleZipName( baseName + ".XDXF.DZ.FILES.ZIP", zipFileName ) )
dictFiles.push_back( zipFileName );
string dictId = Dictionary::makeDictionaryId( dictFiles );

119
zim.cc
View file

@ -18,6 +18,7 @@
#include "tiff.hh"
#include "ftshelpers.hh"
#include "htmlescape.hh"
#include "splitfile.hh"
#ifdef _MSC_VER
#include <stub_msvc.h>
@ -146,52 +147,35 @@ __attribute__((packed))
// Class for support of split zim files
class ZimFile
class ZimFile : public SplitFile::SplitFile
{
QVector< QFile * > files;
QVector< quint64 > offsets;
int currentFile;
public:
ZimFile();
ZimFile( const QString & name );
~ZimFile();
void setFileName( const QString & name );
void getFilenames( vector< string > & names );
bool open( QFile::OpenMode mode );
void close();
bool seek( quint64 pos );
qint64 read( char * data, qint64 maxSize );
QByteArray read( qint64 maxSize );
bool getChar( char * c );
qint64 size()
{ return files.isEmpty() ? 0 : offsets.last() + files.last()->size(); }
virtual void setFileName( const QString & name );
};
ZimFile::ZimFile() :
currentFile( 0 )
ZimFile::ZimFile()
{
}
ZimFile::ZimFile( const QString & name ) :
currentFile( 0 )
ZimFile::ZimFile( const QString & name )
{
setFileName( name );
}
ZimFile::~ZimFile()
{
close();
}
void ZimFile::setFileName( const QString & name )
{
close();
files.append( new QFile( name ) );
offsets.append( 0 );
appendFile( name );
if( name.endsWith( ".zimaa", Qt::CaseInsensitive ) )
{
@ -208,10 +192,7 @@ void ZimFile::setFileName( const QString & name )
if( !QFileInfo( fname ).isFile() )
break;
quint64 offset = offsets.last() + files.last()->size();
files.append( new QFile( fname ) );
offsets.append( offset );
appendFile( fname );
}
if( j < 26 )
@ -220,92 +201,6 @@ void ZimFile::setFileName( const QString & name )
}
}
void ZimFile::close()
{
for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i )
{
(*i)->close();
delete (*i);
}
files.clear();
offsets.clear();
currentFile = 0;
}
void ZimFile::getFilenames( vector< string > &names )
{
for( QVector< QFile const * >::const_iterator i = files.begin(); i != files.end(); ++i )
names.push_back( FsEncoding::encode( (*i)->fileName() ) );
}
bool ZimFile::open( QFile::OpenMode mode )
{
for( QVector< QFile * >::iterator i = files.begin(); i != files.end(); ++i )
if( !(*i)->open( mode ) )
{
close();
return false;
}
return true;
}
bool ZimFile::seek( quint64 pos )
{
int fileNom;
for( fileNom = 0; fileNom < offsets.size() - 1; fileNom++ )
if( pos < offsets.at( fileNom + 1 ) )
break;
pos -= offsets.at( fileNom );
currentFile = fileNom;
return files.at( fileNom )->seek( pos );
}
qint64 ZimFile::read( char *data, qint64 maxSize )
{
quint64 bytesReaded = 0;
for( int i = currentFile; i < files.size(); i++ )
{
if( i != currentFile )
files.at( i )->seek( 0 );
qint64 ret = files.at( i )->read( data + bytesReaded, maxSize );
if( ret < 0 )
break;
bytesReaded += ret;
maxSize -= ret;
if( maxSize <= 0 )
break;
}
return bytesReaded;
}
QByteArray ZimFile::read( qint64 maxSize )
{
QByteArray data;
data.resize( maxSize );
qint64 ret = read( data.data(), maxSize );
if( ret != maxSize )
data.resize( ret );
return data;
}
bool ZimFile::getChar( char *c )
{
char ch;
return read( c ? c : &ch, 1 ) == 1;
}
// Some supporting functions
bool indexIsOldOrBad( string const & indexFile )

View file

@ -4,6 +4,7 @@
#include "zipfile.hh"
#include <QtEndian>
#include <QByteArray>
#include <QFileInfo>
namespace ZipFile {
@ -67,7 +68,7 @@ static CompressionMethod getCompressionMethod( quint16 compressionMethod )
}
}
bool positionAtCentralDir( QFile & zip )
bool positionAtCentralDir( SplitZipFile & zip )
{
// Find the end-of-central-directory record
@ -93,6 +94,8 @@ bool positionAtCentralDir( QFile & zip )
EndOfCdirRecord endOfCdirRecord;
quint32 cdir_offset;
for( ; ; --lastIndex )
{
lastIndex = eocBuffer.lastIndexOf( endOfCdirRecordSignature, lastIndex );
@ -106,7 +109,10 @@ bool positionAtCentralDir( QFile & zip )
/// Sanitize the record by checking the offset
if ( !zip.seek( qFromLittleEndian( endOfCdirRecord.offset ) ) )
cdir_offset = zip.calcAbsoluteOffset( qFromLittleEndian( endOfCdirRecord.offset ),
qFromLittleEndian( endOfCdirRecord.numDiskCd ) );
if ( !zip.seek( cdir_offset ) )
continue;
quint32 signature;
@ -120,10 +126,10 @@ bool positionAtCentralDir( QFile & zip )
// Found cdir -- position the file on the first header
return zip.seek( qFromLittleEndian( endOfCdirRecord.offset ) );
return zip.seek( cdir_offset );
}
bool readNextEntry( QFile & zip, CentralDirEntry & entry )
bool readNextEntry( SplitZipFile & zip, CentralDirEntry & entry )
{
CentralFileHeaderRecord record;
@ -147,7 +153,8 @@ bool readNextEntry( QFile & zip, CentralDirEntry & entry )
qFromLittleEndian( record.fileCommentLength ) ) )
return false;
entry.localHeaderOffset = qFromLittleEndian( record.offsetOfLocalHeader );
entry.localHeaderOffset = zip.calcAbsoluteOffset( qFromLittleEndian( record.offsetOfLocalHeader ),
qFromLittleEndian( record.diskNumberStart ) );
entry.compressedSize = qFromLittleEndian( record.compressedSize );
entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize );
entry.compressionMethod = getCompressionMethod( record.compressionMethod );
@ -156,7 +163,7 @@ bool readNextEntry( QFile & zip, CentralDirEntry & entry )
return true;
}
bool readLocalHeader( QFile & zip, LocalFileHeader & entry )
bool readLocalHeader( SplitZipFile & zip, LocalFileHeader & entry )
{
LocalFileHeaderRecord record;
@ -186,4 +193,59 @@ bool readLocalHeader( QFile & zip, LocalFileHeader & entry )
return true;
}
SplitZipFile::SplitZipFile( const QString & name )
{
setFileName( name );
}
void SplitZipFile::setFileName( const QString & name )
{
if( !name.toLower().endsWith( ".zip" ) )
return;
if( QFileInfo( name ).isFile() )
{
for( int i = 1; i < 100; i++ )
{
QString name2 = name.left( name.size() - 2 ) + QString( "%1" ).arg( i, 2, 10, QChar( '0' ) );
if( QFileInfo( name2 ).isFile() )
appendFile( name2 );
else
break;
}
appendFile( name );
}
else
{
for( int i = 1; i < 1000; i++ )
{
QString name2 = name + QString( ".%1" ).arg( i, 3, 10, QChar( '0' ) );
if( QFileInfo( name2 ).isFile() )
appendFile( name2 );
else
break;
}
}
}
QDateTime SplitZipFile::lastModified() const
{
unsigned long ts = 0;
for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i )
{
unsigned long t = QFileInfo( (*i)->fileName() ).lastModified().toTime_t();
if( t > ts )
ts = t;
}
return QDateTime::fromTime_t( ts );
}
qint64 SplitZipFile::calcAbsoluteOffset( qint64 offset, quint16 partNo )
{
if( partNo >= offsets.size() )
return 0;
return offsets.at( partNo ) + offset;
}
}

View file

@ -5,11 +5,30 @@
#define __ZIPFILE_HH_INCLUDED__
#include <QFile>
#include <QDateTime>
#include "splitfile.hh"
/// Support for zip files in GoldenDict. Note that the implementation is
/// strictly tailored to GoldenDict needs only.
namespace ZipFile {
// Support for split zip files
class SplitZipFile : public SplitFile::SplitFile
{
public:
SplitZipFile()
{}
SplitZipFile( const QString & name );
virtual void setFileName( const QString & name );
// Latest modified time for all parts
QDateTime lastModified() const;
// Calc absolute offset by relative offset in part and part nom
qint64 calcAbsoluteOffset( qint64 offset, quint16 partNo );
};
enum CompressionMethod
{
Uncompressed,
@ -42,17 +61,17 @@ struct LocalFileHeader
/// zip file or other error).
/// Once the file is positioned, entries may be read by constructing Entry
/// objects.
bool positionAtCentralDir( QFile & );
bool positionAtCentralDir( SplitZipFile & );
/// Reads entry from the zip at its current offset. The file gets advanced
/// by the size of entry, so it points to the next entry.
/// Returns true on success, false otherwise.
bool readNextEntry( QFile &, CentralDirEntry & );
bool readNextEntry( SplitZipFile &, CentralDirEntry & );
/// Reads loca file header from the zip at its current offset. The file gets
/// advanced by the size of entry and starts pointing to file data.
/// Returns true on success, false otherwise.
bool readLocalHeader( QFile &, LocalFileHeader & );
bool readLocalHeader( SplitZipFile &, LocalFileHeader & );
}