mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
MDict: Add volumes support for mdd files
This commit is contained in:
parent
3583ac5b4a
commit
76b5b55ff0
231
mdx.cc
231
mdx.cc
|
@ -1,4 +1,4 @@
|
||||||
/* This file is (c) 2013 Timon Wong <timon86.wang.gmail.com>
|
/* This file is (c) 2013 Timon Wong <timon86.wang AT gmail DOT com>
|
||||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||||
|
|
||||||
#include "mdx.hh"
|
#include "mdx.hh"
|
||||||
|
@ -52,18 +52,10 @@ using BtreeIndexing::IndexInfo;
|
||||||
|
|
||||||
using namespace Mdict;
|
using namespace Mdict;
|
||||||
|
|
||||||
|
|
||||||
/// Checks if the given string ends with the given substring
|
|
||||||
static bool endsWith( string const & str, string const & tail )
|
|
||||||
{
|
|
||||||
return str.size() >= tail.size() &&
|
|
||||||
str.compare( str.size() - tail.size(), tail.size(), tail ) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
kSignature = 0x4349444d, // MDIC
|
kSignature = 0x4349444d, // MDIC
|
||||||
kCurrentFormatVersion = 7 + BtreeIndexing::FormatVersion
|
kCurrentFormatVersion = 8 + BtreeIndexing::FormatVersion
|
||||||
};
|
};
|
||||||
|
|
||||||
struct IdxHeader
|
struct IdxHeader
|
||||||
|
@ -94,9 +86,8 @@ struct IdxHeader
|
||||||
uint32_t langFrom; // Source language
|
uint32_t langFrom; // Source language
|
||||||
uint32_t langTo; // Target language
|
uint32_t langTo; // Target language
|
||||||
|
|
||||||
uint32_t hasMddFile;
|
uint32_t mddIndexInfosOffset; // address of IndexInfos for resource files (.mdd)
|
||||||
uint32_t mddIndexBtreeMaxElements;
|
uint32_t mddIndexInfosCount; // count of IndexInfos for resource files
|
||||||
uint32_t mddIndexRootOffset;
|
|
||||||
}
|
}
|
||||||
#ifndef _MSC_VER
|
#ifndef _MSC_VER
|
||||||
__attribute__( ( packed ) )
|
__attribute__( ( packed ) )
|
||||||
|
@ -192,7 +183,7 @@ class MdxDictionary: public BtreeIndexing::BtreeDictionary
|
||||||
string encoding;
|
string encoding;
|
||||||
ChunkedStorage::Reader chunks;
|
ChunkedStorage::Reader chunks;
|
||||||
QFile dictFile;
|
QFile dictFile;
|
||||||
IndexedMdd mddResource;
|
vector< sptr< IndexedMdd > > mddResources;
|
||||||
MdictParser::StyleSheets styleSheets;
|
MdictParser::StyleSheets styleSheets;
|
||||||
|
|
||||||
QAtomicInt deferredInitDone;
|
QAtomicInt deferredInitDone;
|
||||||
|
@ -273,7 +264,6 @@ MdxDictionary::MdxDictionary( string const & id, string const & indexFile,
|
||||||
idx( indexFile, "rb" ),
|
idx( indexFile, "rb" ),
|
||||||
idxHeader( idx.read< IdxHeader >() ),
|
idxHeader( idx.read< IdxHeader >() ),
|
||||||
chunks( idx, idxHeader.chunksOffset ),
|
chunks( idx, idxHeader.chunksOffset ),
|
||||||
mddResource( idxMutex, chunks ),
|
|
||||||
deferredInitRunnableStarted( false )
|
deferredInitRunnableStarted( false )
|
||||||
{
|
{
|
||||||
// Read the dictionary's name
|
// Read the dictionary's name
|
||||||
|
@ -392,20 +382,30 @@ void MdxDictionary::doDeferredInit()
|
||||||
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
|
openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
|
||||||
idxHeader.indexRootOffset ), idx, idxMutex );
|
idxHeader.indexRootOffset ), idx, idxMutex );
|
||||||
|
|
||||||
for ( vector<string>::const_iterator i = getDictionaryFilenames().begin();
|
vector< string > mddFileNames;
|
||||||
i != getDictionaryFilenames().end(); i++ )
|
vector< IndexInfo > mddIndexInfos;
|
||||||
|
idx.seek( idxHeader.mddIndexInfosOffset );
|
||||||
|
for ( uint32_t i = 0; i < idxHeader.mddIndexInfosCount; i++ )
|
||||||
{
|
{
|
||||||
if ( endsWith( *i, ".mdd" ) && File::exists( *i ) )
|
string::size_type sz = idx.read<string::size_type>();
|
||||||
{
|
vector< char > buf( sz );
|
||||||
if ( idxHeader.hasMddFile && ( idxHeader.mddIndexBtreeMaxElements ||
|
idx.read( &buf.front(), sz );
|
||||||
idxHeader.mddIndexRootOffset ) )
|
uint32_t btreeMaxElements = idx.read<uint32_t>();
|
||||||
{
|
uint32_t rootOffset = idx.read<uint32_t>();
|
||||||
mddResource.openIndex( IndexInfo( idxHeader.mddIndexBtreeMaxElements,
|
mddFileNames.push_back( string( &buf.front() ) );
|
||||||
idxHeader.mddIndexRootOffset ),
|
mddIndexInfos.push_back( IndexInfo( btreeMaxElements, rootOffset ) );
|
||||||
idx, idxMutex );
|
}
|
||||||
mddResource.open( i->c_str() );
|
|
||||||
}
|
vector< string > const dictFiles = getDictionaryFilenames();
|
||||||
}
|
for ( uint32_t i = 1; i < dictFiles.size() && i < mddFileNames.size() + 1; i++ )
|
||||||
|
{
|
||||||
|
if ( dictFiles[ i ] != mddFileNames[ i - 1 ] || !File::exists( dictFiles[ i ] ) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
IndexedMdd * mdd = new IndexedMdd( idxMutex, chunks );
|
||||||
|
mdd->openIndex( mddIndexInfos[ i - 1 ], idx, idxMutex );
|
||||||
|
mdd->open( dictFiles[ i ].c_str() );
|
||||||
|
mddResources.push_back( mdd );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch ( std::exception & e )
|
catch ( std::exception & e )
|
||||||
|
@ -669,6 +669,12 @@ void MddResourceRequest::run()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string u8ResourceName = Utf8::encode( resourceName );
|
||||||
|
QCryptographicHash hash( QCryptographicHash::Md5 );
|
||||||
|
hash.addData( u8ResourceName.data(), u8ResourceName.size() );
|
||||||
|
if ( !resourceIncluded.insert( hash.result() ).second )
|
||||||
|
continue;
|
||||||
|
|
||||||
// Convert to the Windows separator
|
// Convert to the Windows separator
|
||||||
std::replace( resourceName.begin(), resourceName.end(), '/', '\\' );
|
std::replace( resourceName.begin(), resourceName.end(), '/', '\\' );
|
||||||
if ( resourceName[ 0 ] != '\\' )
|
if ( resourceName[ 0 ] != '\\' )
|
||||||
|
@ -676,41 +682,51 @@ void MddResourceRequest::run()
|
||||||
resourceName.insert( 0, 1, '\\' );
|
resourceName.insert( 0, 1, '\\' );
|
||||||
}
|
}
|
||||||
|
|
||||||
string u8ResourceName = Utf8::encode( resourceName );
|
|
||||||
QCryptographicHash hash( QCryptographicHash::Md5 );
|
|
||||||
hash.addData( u8ResourceName.data(), u8ResourceName.size() );
|
|
||||||
if ( !resourceIncluded.insert( hash.result() ).second )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Get actual resource
|
|
||||||
Mutex::Lock _( dataMutex );
|
Mutex::Lock _( dataMutex );
|
||||||
data.clear();
|
data.clear();
|
||||||
if ( dict.mddResource.loadFile( resourceName, data ) )
|
|
||||||
|
try
|
||||||
{
|
{
|
||||||
// Check if this file has a redirection
|
// local file takes precedence
|
||||||
// Always encoded in UTF16-LE
|
string fn = FsEncoding::dirname( dict.getDictionaryFilenames()[ 0 ] ) +
|
||||||
// L"@@@LINK="
|
FsEncoding::separator() + u8ResourceName;
|
||||||
static const char pattern[16] =
|
File::loadFromFile( fn, data );
|
||||||
|
}
|
||||||
|
catch ( File::exCantOpen & )
|
||||||
|
{
|
||||||
|
for ( vector< sptr< IndexedMdd > >::const_iterator i = dict.mddResources.begin();
|
||||||
|
i != dict.mddResources.end(); i++ )
|
||||||
{
|
{
|
||||||
'@', '\0', '@', '\0', '@', '\0', 'L', '\0', 'I', '\0', 'N', '\0', 'K', '\0', '=', '\0'
|
sptr< IndexedMdd > mddResource = *i;
|
||||||
};
|
|
||||||
|
|
||||||
if ( data.size() > sizeof( pattern ) )
|
if ( mddResource->loadFile( resourceName, data ) )
|
||||||
{
|
break;
|
||||||
if ( memcmp( &data.front(), pattern, sizeof( pattern ) ) == 0 )
|
|
||||||
{
|
|
||||||
data.push_back( '\0' );
|
|
||||||
data.push_back( '\0' );
|
|
||||||
QString target = MdictParser::toUtf16( "UTF-16LE", &data.front() + sizeof( pattern ),
|
|
||||||
data.size() - sizeof( pattern ) );
|
|
||||||
resourceName = gd::toWString( target.trimmed() );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
hasAnyData = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if this file has a redirection
|
||||||
|
// Always encoded in UTF16-LE
|
||||||
|
// L"@@@LINK="
|
||||||
|
static const char pattern[16] =
|
||||||
|
{
|
||||||
|
'@', '\0', '@', '\0', '@', '\0', 'L', '\0', 'I', '\0', 'N', '\0', 'K', '\0', '=', '\0'
|
||||||
|
};
|
||||||
|
|
||||||
|
if ( data.size() > sizeof( pattern ) )
|
||||||
|
{
|
||||||
|
if ( memcmp( &data.front(), pattern, sizeof( pattern ) ) == 0 )
|
||||||
|
{
|
||||||
|
data.push_back( '\0' );
|
||||||
|
data.push_back( '\0' );
|
||||||
|
QString target = MdictParser::toUtf16( "UTF-16LE", &data.front() + sizeof( pattern ),
|
||||||
|
data.size() - sizeof( pattern ) );
|
||||||
|
resourceName = gd::toWString( target.trimmed() );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( data.size() > 0 )
|
||||||
|
hasAnyData = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -907,7 +923,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static bool indexIsOldOrBad( string const & indexFile, bool hasMddFile )
|
static bool indexIsOldOrBad( vector< string > const & dictFiles, string const & indexFile )
|
||||||
{
|
{
|
||||||
File::Class idx( indexFile, "rb" );
|
File::Class idx( indexFile, "rb" );
|
||||||
IdxHeader header;
|
IdxHeader header;
|
||||||
|
@ -917,7 +933,32 @@ static bool indexIsOldOrBad( string const & indexFile, bool hasMddFile )
|
||||||
header.formatVersion != kCurrentFormatVersion ||
|
header.formatVersion != kCurrentFormatVersion ||
|
||||||
header.parserVersion != MdictParser::kParserVersion ||
|
header.parserVersion != MdictParser::kParserVersion ||
|
||||||
header.foldingVersion != Folding::Version ||
|
header.foldingVersion != Folding::Version ||
|
||||||
header.hasMddFile != hasMddFile;
|
header.mddIndexInfosCount != dictFiles.size() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void findResourceFiles( string const & mdx, vector< string > & dictFiles )
|
||||||
|
{
|
||||||
|
string base( mdx, 0, mdx.size() - 4 );
|
||||||
|
// Check if there' is any file end with .mdd, which is the resource file for the dictionary
|
||||||
|
string resFile;
|
||||||
|
if ( File::tryPossibleName( base + ".mdd", resFile ) )
|
||||||
|
{
|
||||||
|
dictFiles.push_back( resFile );
|
||||||
|
// Find complementary .mdd file (volumes), like follows:
|
||||||
|
// demo.mdx <- main dictionary file
|
||||||
|
// demo.mdd <- main resource file ( 1st volume )
|
||||||
|
// demo.1.mdd <- 2nd volume
|
||||||
|
// ...
|
||||||
|
// demo.n.mdd <- nth volume
|
||||||
|
QString baseU8 = QString::fromUtf8( base.c_str() );
|
||||||
|
int vol = 1;
|
||||||
|
while ( File::tryPossibleName( string( QString( "%1.%2.mdd" ).arg( baseU8 ).arg( vol )
|
||||||
|
.toUtf8().constBegin() ), resFile ) )
|
||||||
|
{
|
||||||
|
dictFiles.push_back( resFile );
|
||||||
|
vol++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames,
|
vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & fileNames,
|
||||||
|
@ -934,42 +975,39 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
vector< string > dictFiles( 1, *i );
|
vector< string > dictFiles( 1, *i );
|
||||||
|
findResourceFiles( *i, dictFiles );
|
||||||
string baseName = ( ( *i )[ i->size() - 4 ] == '.' ) ?
|
|
||||||
string( *i, 0, i->size() - 4 ) : string( *i, 0, i->size() - 7 );
|
|
||||||
|
|
||||||
// Check if there' is any file end with .mdd, which is the resource file for the dictionary
|
|
||||||
string mddFileName;
|
|
||||||
if ( File::tryPossibleName( baseName + ".mdd", mddFileName ) )
|
|
||||||
dictFiles.push_back( mddFileName );
|
|
||||||
|
|
||||||
string dictId = Dictionary::makeDictionaryId( dictFiles );
|
string dictId = Dictionary::makeDictionaryId( dictFiles );
|
||||||
|
|
||||||
string indexFile = indicesDir + dictId;
|
string indexFile = indicesDir + dictId;
|
||||||
|
|
||||||
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
|
if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
|
||||||
indexIsOldOrBad( indexFile, !mddFileName.empty() ) )
|
indexIsOldOrBad( dictFiles, indexFile ) )
|
||||||
{
|
{
|
||||||
// Building the index
|
// Building the index
|
||||||
MdictParser parser( i->c_str() );
|
MdictParser parser( i->c_str() );
|
||||||
sptr<MdictParser> mddParser = NULL;
|
list< sptr< MdictParser > > mddParsers;
|
||||||
|
|
||||||
if ( !parser.open() )
|
if ( !parser.open() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if ( File::exists( mddFileName ) )
|
|
||||||
{
|
|
||||||
mddParser = new MdictParser( mddFileName.c_str() );
|
|
||||||
if ( !mddParser->open() )
|
|
||||||
{
|
|
||||||
FDPRINTF( stderr, "Warning: Invalid mdd (resource) file: %s\n", mddFileName.c_str() );
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
string title = string( parser.title().toUtf8().constData() );
|
string title = string( parser.title().toUtf8().constData() );
|
||||||
initializing.indexingDictionary( title );
|
initializing.indexingDictionary( title );
|
||||||
|
|
||||||
|
for ( vector< string >::const_iterator mddIter = dictFiles.begin() + 1;
|
||||||
|
mddIter != dictFiles.end(); mddIter++ )
|
||||||
|
{
|
||||||
|
if ( File::exists( *mddIter ) )
|
||||||
|
{
|
||||||
|
MdictParser * mddParser = new MdictParser( mddIter->c_str() );
|
||||||
|
if ( !mddParser->open() )
|
||||||
|
{
|
||||||
|
FDPRINTF( stderr, "Warning: Broken mdd (resource) file: %s\n", mddIter->c_str() );
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
mddParsers.push_back( mddParser );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
File::Class idx( indexFile, "wb" );
|
File::Class idx( indexFile, "wb" );
|
||||||
IdxHeader idxHeader;
|
IdxHeader idxHeader;
|
||||||
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
memset( &idxHeader, 0, sizeof( idxHeader ) );
|
||||||
|
@ -1016,16 +1054,23 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
}
|
}
|
||||||
|
|
||||||
// enumerating resources if there's any
|
// enumerating resources if there's any
|
||||||
sptr<IndexedWords> mddIndexedWords;
|
vector< sptr< IndexedWords > > mddIndices;
|
||||||
if ( mddParser )
|
vector< string > mddFileNames;
|
||||||
|
while ( !mddParsers.empty() )
|
||||||
{
|
{
|
||||||
mddIndexedWords = new IndexedWords();
|
sptr< MdictParser > mddParser = mddParsers.front();
|
||||||
|
|
||||||
|
IndexedWords * mddIndexedWords = new IndexedWords();
|
||||||
ResourceHandler resourceHandler( chunks, *mddIndexedWords );
|
ResourceHandler resourceHandler( chunks, *mddIndexedWords );
|
||||||
|
|
||||||
while ( mddParser->readNextHeadWordIndex( headWordIndex ) )
|
while ( mddParser->readNextHeadWordIndex( headWordIndex ) )
|
||||||
{
|
{
|
||||||
mddParser->readRecordBlock( headWordIndex, resourceHandler );
|
mddParser->readRecordBlock( headWordIndex, resourceHandler );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mddIndices.push_back( mddIndexedWords );
|
||||||
|
mddFileNames.push_back( string( mddParser->filename().toUtf8().constData() ) );
|
||||||
|
mddParsers.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finish with the chunks
|
// Finish with the chunks
|
||||||
|
@ -1073,12 +1118,26 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
idxHeader.langFrom = langs.first;
|
idxHeader.langFrom = langs.first;
|
||||||
idxHeader.langTo = langs.second;
|
idxHeader.langTo = langs.second;
|
||||||
|
|
||||||
if ( mddParser )
|
// Build index info for each mdd file
|
||||||
|
vector< IndexInfo > mddIndexInfos;
|
||||||
|
for ( vector< sptr< IndexedWords > >::const_iterator mddIndexIter = mddIndices.begin();
|
||||||
|
mddIndexIter != mddIndices.end(); mddIndexIter++ )
|
||||||
{
|
{
|
||||||
IndexInfo resourceIdxInfo = BtreeIndexing::buildIndex( *mddIndexedWords, idx );
|
IndexInfo resourceIdxInfo = BtreeIndexing::buildIndex( *( *mddIndexIter ), idx );
|
||||||
idxHeader.hasMddFile = true;
|
mddIndexInfos.push_back( resourceIdxInfo );
|
||||||
idxHeader.mddIndexBtreeMaxElements = resourceIdxInfo.btreeMaxElements;
|
}
|
||||||
idxHeader.mddIndexRootOffset = resourceIdxInfo.rootOffset;
|
|
||||||
|
// Save address of IndexInfos for resource files
|
||||||
|
idxHeader.mddIndexInfosOffset = idx.tell();
|
||||||
|
idxHeader.mddIndexInfosCount = mddIndexInfos.size();
|
||||||
|
for ( uint32_t mi = 0; mi < mddIndexInfos.size(); mi++ )
|
||||||
|
{
|
||||||
|
const string & mddfile = mddFileNames[ mi ];
|
||||||
|
|
||||||
|
idx.write<string::size_type>( mddfile.size() + 1 );
|
||||||
|
idx.write( mddfile.c_str(), mddfile.size() + 1 );
|
||||||
|
idx.write<uint32_t>( mddIndexInfos[ mi ].btreeMaxElements );
|
||||||
|
idx.write<uint32_t>( mddIndexInfos[ mi ].rootOffset );
|
||||||
}
|
}
|
||||||
|
|
||||||
// That concludes it. Update the header.
|
// That concludes it. Update the header.
|
||||||
|
|
2
mdx.hh
2
mdx.hh
|
@ -1,4 +1,4 @@
|
||||||
/* This file is (c) 2013 Timon Wong <timon86.wang.gmail.com>
|
/* This file is (c) 2013 Timon Wong <timon86.wang AT gmail DOT com>
|
||||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||||
|
|
||||||
#ifndef __MDX_HH_INCLUDED__
|
#ifndef __MDX_HH_INCLUDED__
|
||||||
|
|
Loading…
Reference in a new issue