mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 04:24:09 +00:00
Zim: Split files support
This commit is contained in:
parent
a56329ce24
commit
2e31b303ea
|
@ -53,7 +53,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
|
||||||
<< "*.xdxf.dz" << "*.dct" << "*.aar" << "*.zips"
|
<< "*.xdxf.dz" << "*.dct" << "*.aar" << "*.zips"
|
||||||
<< "*.mdx"
|
<< "*.mdx"
|
||||||
#ifdef MAKE_ZIM_SUPPORT
|
#ifdef MAKE_ZIM_SUPPORT
|
||||||
<< "*.zim"
|
<< "*.zim" << "*.zimaa"
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
206
zim.cc
206
zim.cc
|
@ -21,11 +21,13 @@
|
||||||
|
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
#include <QFile>
|
#include <QFile>
|
||||||
|
#include <QFileInfo>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QRunnable>
|
#include <QRunnable>
|
||||||
#include <QSemaphore>
|
#include <QSemaphore>
|
||||||
#include <QAtomicInt>
|
#include <QAtomicInt>
|
||||||
#include <QImage>
|
#include <QImage>
|
||||||
|
#include <QDir>
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -48,7 +50,9 @@ using BtreeIndexing::IndexInfo;
|
||||||
DEF_EX_STR( exNotZimFile, "Not an Zim file", Dictionary::Ex )
|
DEF_EX_STR( exNotZimFile, "Not an Zim file", Dictionary::Ex )
|
||||||
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
||||||
|
|
||||||
namespace {
|
//namespace {
|
||||||
|
|
||||||
|
class ZimFile;
|
||||||
|
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#pragma pack( push, 1 )
|
#pragma pack( push, 1 )
|
||||||
|
@ -137,6 +141,170 @@ __attribute__((packed))
|
||||||
#pragma pack( pop, 1 )
|
#pragma pack( pop, 1 )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Class for support of split zim files
|
||||||
|
|
||||||
|
class ZimFile
|
||||||
|
{
|
||||||
|
QVector< QFile * > files;
|
||||||
|
QVector< quint64 > offsets;
|
||||||
|
int currentFile;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
ZimFile();
|
||||||
|
ZimFile( const QString & name );
|
||||||
|
~ZimFile();
|
||||||
|
|
||||||
|
void setFileName( const QString & name );
|
||||||
|
void getFilenames( vector< string > & names );
|
||||||
|
bool open( QFile::OpenMode mode );
|
||||||
|
void close();
|
||||||
|
bool seek( quint64 pos );
|
||||||
|
qint64 read( char * data, qint64 maxSize );
|
||||||
|
QByteArray read( qint64 maxSize );
|
||||||
|
bool getChar( char * c );
|
||||||
|
qint64 size()
|
||||||
|
{ return files.isEmpty() ? 0 : offsets.last() + files.last()->size(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
ZimFile::ZimFile() :
|
||||||
|
currentFile( 0 )
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
ZimFile::ZimFile( const QString & name ) :
|
||||||
|
currentFile( 0 )
|
||||||
|
{
|
||||||
|
setFileName( name );
|
||||||
|
}
|
||||||
|
|
||||||
|
ZimFile::~ZimFile()
|
||||||
|
{
|
||||||
|
close();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ZimFile::setFileName( const QString & name )
|
||||||
|
{
|
||||||
|
close();
|
||||||
|
|
||||||
|
files.append( new QFile( name ) );
|
||||||
|
offsets.append( 0 );
|
||||||
|
|
||||||
|
if( name.endsWith( ".zimaa", Qt::CaseInsensitive ) )
|
||||||
|
{
|
||||||
|
QString fname = name;
|
||||||
|
|
||||||
|
for( int i = 0; i < 26; i++ )
|
||||||
|
{
|
||||||
|
fname[ fname.size() - 2 ] = (char)( 'a' + i );
|
||||||
|
|
||||||
|
int j;
|
||||||
|
for( j = 1; j < 26; j++ )
|
||||||
|
{
|
||||||
|
fname[ fname.size() - 1 ] = (char)( 'a' + j );
|
||||||
|
if( !QFileInfo( fname ).isFile() )
|
||||||
|
break;
|
||||||
|
|
||||||
|
quint64 offset = offsets.last() + files.last()->size();
|
||||||
|
|
||||||
|
files.append( new QFile( fname ) );
|
||||||
|
offsets.append( offset );
|
||||||
|
}
|
||||||
|
|
||||||
|
if( j < 26 )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ZimFile::close()
|
||||||
|
{
|
||||||
|
for( QVector< QFile * >::const_iterator i = files.begin(); i != files.end(); ++i )
|
||||||
|
{
|
||||||
|
(*i)->close();
|
||||||
|
delete (*i);
|
||||||
|
}
|
||||||
|
|
||||||
|
files.clear();
|
||||||
|
offsets.clear();
|
||||||
|
|
||||||
|
currentFile = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ZimFile::getFilenames( vector< string > &names )
|
||||||
|
{
|
||||||
|
for( QVector< QFile const * >::const_iterator i = files.begin(); i != files.end(); ++i )
|
||||||
|
names.push_back( FsEncoding::encode( (*i)->fileName() ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ZimFile::open( QFile::OpenMode mode )
|
||||||
|
{
|
||||||
|
for( QVector< QFile * >::iterator i = files.begin(); i != files.end(); ++i )
|
||||||
|
if( !(*i)->open( mode ) )
|
||||||
|
{
|
||||||
|
close();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ZimFile::seek( quint64 pos )
|
||||||
|
{
|
||||||
|
int fileNom;
|
||||||
|
|
||||||
|
for( fileNom = 0; fileNom < offsets.size() - 1; fileNom++ )
|
||||||
|
if( pos < offsets.at( fileNom + 1 ) )
|
||||||
|
break;
|
||||||
|
|
||||||
|
pos -= offsets.at( fileNom );
|
||||||
|
|
||||||
|
currentFile = fileNom;
|
||||||
|
return files.at( fileNom )->seek( pos );
|
||||||
|
}
|
||||||
|
|
||||||
|
qint64 ZimFile::read( char *data, qint64 maxSize )
|
||||||
|
{
|
||||||
|
quint64 bytesReaded = 0;
|
||||||
|
for( int i = currentFile; i < files.size(); i++ )
|
||||||
|
{
|
||||||
|
if( i != currentFile )
|
||||||
|
files.at( i )->seek( 0 );
|
||||||
|
|
||||||
|
qint64 ret = files.at( i )->read( data + bytesReaded, maxSize );
|
||||||
|
if( ret < 0 )
|
||||||
|
break;
|
||||||
|
|
||||||
|
bytesReaded += ret;
|
||||||
|
maxSize -= ret;
|
||||||
|
|
||||||
|
if( maxSize <= 0 )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return bytesReaded;
|
||||||
|
}
|
||||||
|
|
||||||
|
QByteArray ZimFile::read( qint64 maxSize )
|
||||||
|
{
|
||||||
|
QByteArray data;
|
||||||
|
data.resize( maxSize );
|
||||||
|
|
||||||
|
qint64 ret = read( data.data(), maxSize );
|
||||||
|
|
||||||
|
if( ret != maxSize )
|
||||||
|
data.resize( ret );
|
||||||
|
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ZimFile::getChar( char *c )
|
||||||
|
{
|
||||||
|
char ch;
|
||||||
|
return read( c ? c : &ch, 1 ) == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some supporting functions
|
||||||
|
|
||||||
bool indexIsOldOrBad( string const & indexFile )
|
bool indexIsOldOrBad( string const & indexFile )
|
||||||
{
|
{
|
||||||
File::Class idx( indexFile, "rb" );
|
File::Class idx( indexFile, "rb" );
|
||||||
|
@ -148,7 +316,7 @@ bool indexIsOldOrBad( string const & indexFile )
|
||||||
header.formatVersion != CurrentFormatVersion;
|
header.formatVersion != CurrentFormatVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
quint32 readArticle( QFile & file, ZIM_header & header, uint32_t articleNumber, string & result,
|
quint32 readArticle( ZimFile & file, ZIM_header & header, uint32_t articleNumber, string & result,
|
||||||
set< quint32 > * loadedArticles = NULL )
|
set< quint32 > * loadedArticles = NULL )
|
||||||
{
|
{
|
||||||
while( 1 )
|
while( 1 )
|
||||||
|
@ -247,6 +415,8 @@ quint32 readArticle( QFile & file, ZIM_header & header, uint32_t articleNumber,
|
||||||
return 0xFFFFFFFF;
|
return 0xFFFFFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ZimDictionary
|
||||||
|
|
||||||
class ZimDictionary: public BtreeIndexing::BtreeDictionary
|
class ZimDictionary: public BtreeIndexing::BtreeDictionary
|
||||||
{
|
{
|
||||||
Mutex idxMutex;
|
Mutex idxMutex;
|
||||||
|
@ -255,7 +425,7 @@ class ZimDictionary: public BtreeIndexing::BtreeDictionary
|
||||||
BtreeIndex resourceIndex;
|
BtreeIndex resourceIndex;
|
||||||
IdxHeader idxHeader;
|
IdxHeader idxHeader;
|
||||||
string dictionaryName;
|
string dictionaryName;
|
||||||
QFile df;
|
ZimFile df;
|
||||||
ZIM_header zimHeader;
|
ZIM_header zimHeader;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -340,8 +510,9 @@ ZimDictionary::ZimDictionary( string const & id,
|
||||||
|
|
||||||
if( idxHeader.namePtr == 0xFFFFFFFF )
|
if( idxHeader.namePtr == 0xFFFFFFFF )
|
||||||
{
|
{
|
||||||
int n = df.fileName().lastIndexOf( '/' );
|
QString name = QDir::fromNativeSeparators( FsEncoding::decode( dictionaryFiles[ 0 ].c_str() ) );
|
||||||
dictionaryName = string( df.fileName().mid( n + 1 ).toUtf8().constData() );
|
int n = name.lastIndexOf( '/' );
|
||||||
|
dictionaryName = string( name.mid( n + 1 ).toUtf8().constData() );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -551,7 +722,7 @@ void ZimArticleRequest::run()
|
||||||
|
|
||||||
headword = chain[ x ].word;
|
headword = chain[ x ].word;
|
||||||
|
|
||||||
quint32 articleNumber;
|
quint32 articleNumber = 0xFFFFFFFF;
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
articleNumber = dict.loadArticle( chain[ x ].articleOffset, articleText, &articlesIncluded );
|
articleNumber = dict.loadArticle( chain[ x ].articleOffset, articleText, &articlesIncluded );
|
||||||
|
@ -786,7 +957,7 @@ sptr< Dictionary::DataRequest > ZimDictionary::getResource( string const & name
|
||||||
return new ZimResourceRequest( *this, name );
|
return new ZimResourceRequest( *this, name );
|
||||||
}
|
}
|
||||||
|
|
||||||
} // anonymous namespace
|
//} // anonymous namespace
|
||||||
|
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries(
|
vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
vector< string > const & fileNames,
|
vector< string > const & fileNames,
|
||||||
|
@ -801,29 +972,32 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
{
|
{
|
||||||
// Skip files with the extensions different to .zim to speed up the
|
// Skip files with the extensions different to .zim to speed up the
|
||||||
// scanning
|
// scanning
|
||||||
if ( i->size() < 4 ||
|
|
||||||
strcasecmp( i->c_str() + ( i->size() - 4 ), ".zim" ) != 0 )
|
QString firstName = QDir::fromNativeSeparators( FsEncoding::decode( i->c_str() ) );
|
||||||
|
if( !firstName.endsWith( ".zim") && !firstName.endsWith( ".zimaa" ) )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Got the file -- check if we need to rebuid the index
|
// Got the file -- check if we need to rebuid the index
|
||||||
|
|
||||||
vector< string > dictFiles( 1, *i );
|
ZimFile df( firstName );
|
||||||
|
|
||||||
|
vector< string > dictFiles;
|
||||||
|
df.getFilenames( dictFiles );
|
||||||
|
|
||||||
string dictId = Dictionary::makeDictionaryId( dictFiles );
|
string dictId = Dictionary::makeDictionaryId( dictFiles );
|
||||||
|
|
||||||
string indexFile = indicesDir + dictId;
|
string indexFile = indicesDir + dictId;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
|
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
|
||||||
indexIsOldOrBad( indexFile ) )
|
indexIsOldOrBad( indexFile ) )
|
||||||
{
|
|
||||||
try
|
|
||||||
{
|
{
|
||||||
ZIM_header zh;
|
ZIM_header zh;
|
||||||
|
|
||||||
unsigned articleCount = 0;
|
unsigned articleCount = 0;
|
||||||
unsigned wordCount = 0;
|
unsigned wordCount = 0;
|
||||||
|
|
||||||
QFile df( FsEncoding::decode( i->c_str() ) );
|
|
||||||
df.open( QFile::ReadOnly );
|
df.open( QFile::ReadOnly );
|
||||||
|
|
||||||
qint64 ret = df.read( reinterpret_cast< char * >( &zh ), sizeof( zh ) );
|
qint64 ret = df.read( reinterpret_cast< char * >( &zh ), sizeof( zh ) );
|
||||||
|
@ -834,8 +1008,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
throw exNotZimFile( i->c_str() );
|
throw exNotZimFile( i->c_str() );
|
||||||
|
|
||||||
{
|
{
|
||||||
int n = df.fileName().lastIndexOf( '/' );
|
int n = firstName.lastIndexOf( '/' );
|
||||||
initializing.indexingDictionary( df.fileName().mid( n + 1 ).toUtf8().constData() );
|
initializing.indexingDictionary( firstName.mid( n + 1 ).toUtf8().constData() );
|
||||||
}
|
}
|
||||||
|
|
||||||
File::Class idx( indexFile, "wb" );
|
File::Class idx( indexFile, "wb" );
|
||||||
|
@ -988,6 +1162,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
|
|
||||||
idx.write( &idxHeader, sizeof( idxHeader ) );
|
idx.write( &idxHeader, sizeof( idxHeader ) );
|
||||||
}
|
}
|
||||||
|
}
|
||||||
catch( std::exception & e )
|
catch( std::exception & e )
|
||||||
{
|
{
|
||||||
FDPRINTF( stderr, "Zim dictionary indexing failed: %s, error: %s\n",
|
FDPRINTF( stderr, "Zim dictionary indexing failed: %s, error: %s\n",
|
||||||
|
@ -999,7 +1174,6 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
FDPRINTF( stderr, "Zim dictionary indexing failed\n" );
|
FDPRINTF( stderr, "Zim dictionary indexing failed\n" );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
dictionaries.push_back( new ZimDictionary( dictId,
|
dictionaries.push_back( new ZimDictionary( dictId,
|
||||||
indexFile,
|
indexFile,
|
||||||
dictFiles ) );
|
dictFiles ) );
|
||||||
|
|
Loading…
Reference in a new issue