mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-28 03:44:07 +00:00
commit
52a91991c4
|
@ -294,7 +294,8 @@ HEADERS += folding.hh \
|
||||||
ftshelpers.hh \
|
ftshelpers.hh \
|
||||||
dictserver.hh \
|
dictserver.hh \
|
||||||
helpwindow.hh \
|
helpwindow.hh \
|
||||||
slob.hh
|
slob.hh \
|
||||||
|
ripemd.hh
|
||||||
|
|
||||||
FORMS += groups.ui \
|
FORMS += groups.ui \
|
||||||
dictgroupwidget.ui \
|
dictgroupwidget.ui \
|
||||||
|
@ -417,7 +418,8 @@ SOURCES += folding.cc \
|
||||||
ftshelpers.cc \
|
ftshelpers.cc \
|
||||||
dictserver.cc \
|
dictserver.cc \
|
||||||
helpwindow.cc \
|
helpwindow.cc \
|
||||||
slob.cc
|
slob.cc \
|
||||||
|
ripemd.cc
|
||||||
|
|
||||||
win32 {
|
win32 {
|
||||||
FORMS += texttospeechsource.ui
|
FORMS += texttospeechsource.ui
|
||||||
|
|
262
mdictparser.cc
262
mdictparser.cc
|
@ -1,8 +1,10 @@
|
||||||
// https://bitbucket.org/xwang/mdict-analysis
|
// https://bitbucket.org/xwang/mdict-analysis
|
||||||
|
// https://github.com/zhansliu/writemdict/blob/master/fileformat.md
|
||||||
// Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser
|
// Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser
|
||||||
//
|
//
|
||||||
// Copyright (C) 2012, 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
|
// Copyright (C) 2012, 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
|
||||||
// Copyright (C) 2013 Timon Wong <timon86.wang AT gmail DOT com>
|
// Copyright (C) 2013 Timon Wong <timon86.wang AT gmail DOT com>
|
||||||
|
// Copyright (C) 2015 Zhe Wang <0x1998 AT gmail DOT com>
|
||||||
//
|
//
|
||||||
// This program is a free software; you can redistribute it and/or modify
|
// This program is a free software; you can redistribute it and/or modify
|
||||||
// it under the terms of the GNU General Public License as published by
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
@ -23,9 +25,6 @@
|
||||||
#include <iconv.h>
|
#include <iconv.h>
|
||||||
#include <lzo/lzo1x.h>
|
#include <lzo/lzo1x.h>
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
#include <QtEndian>
|
#include <QtEndian>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
|
@ -34,13 +33,19 @@
|
||||||
#include <QDomDocument>
|
#include <QDomDocument>
|
||||||
#include <QTextDocumentFragment>
|
#include <QTextDocumentFragment>
|
||||||
|
|
||||||
#include <QDebug>
|
|
||||||
|
|
||||||
#include "decompress.hh"
|
#include "decompress.hh"
|
||||||
|
#include "gddebug.hh"
|
||||||
|
#include "ripemd.hh"
|
||||||
|
|
||||||
namespace Mdict
|
namespace Mdict
|
||||||
{
|
{
|
||||||
|
|
||||||
|
enum EncryptedSection
|
||||||
|
{
|
||||||
|
EcryptedHeadWordHeader = 1,
|
||||||
|
EcryptedHeadWordIndex = 2
|
||||||
|
};
|
||||||
|
|
||||||
static inline int u16StrSize( const ushort * unicode )
|
static inline int u16StrSize( const ushort * unicode )
|
||||||
{
|
{
|
||||||
int size = 0;
|
int size = 0;
|
||||||
|
@ -103,9 +108,8 @@ MdictParser::MdictParser() :
|
||||||
recordPos_( 0 ),
|
recordPos_( 0 ),
|
||||||
wordCount_( 0 ),
|
wordCount_( 0 ),
|
||||||
numberTypeSize_( 0 ),
|
numberTypeSize_( 0 ),
|
||||||
rtl_( false ),
|
encrypted_( 0 ),
|
||||||
bruteForce_( false ),
|
rtl_( false )
|
||||||
bruteForceEnd_( true )
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,7 +118,7 @@ bool MdictParser::open( const char * filename )
|
||||||
filename_ = QString::fromUtf8( filename );
|
filename_ = QString::fromUtf8( filename );
|
||||||
file_ = new QFile( filename_ );
|
file_ = new QFile( filename_ );
|
||||||
|
|
||||||
qDebug() << "MdictParser: open " << filename_;
|
GD_DPRINTF( "MdictParser: open %s\n", filename );
|
||||||
|
|
||||||
if ( file_.isNull() || !file_->exists() )
|
if ( file_.isNull() || !file_->exists() )
|
||||||
return false;
|
return false;
|
||||||
|
@ -138,39 +142,6 @@ bool MdictParser::open( const char * filename )
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MdictParser::readNextHeadWordIndex( MdictParser::HeadWordIndex & headWordIndex )
|
bool MdictParser::readNextHeadWordIndex( MdictParser::HeadWordIndex & headWordIndex )
|
||||||
{
|
|
||||||
if ( bruteForce_ )
|
|
||||||
{
|
|
||||||
if ( bruteForceEnd_ )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
headWordIndex.clear();
|
|
||||||
|
|
||||||
ScopedMemMap mapping( *file_, headWordPos_, headWordBlockSize_ );
|
|
||||||
if ( !mapping.startAddress() )
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const char * pDataStart = ( const char * )mapping.startAddress();
|
|
||||||
const char * pDataEnd = pDataStart + headWordBlockSize_;
|
|
||||||
const char pattern[] = {0x02, 0x00, 0x00, 0x00};
|
|
||||||
const char * patternBegin = pattern;
|
|
||||||
const char * patternEnd = pattern + 4;
|
|
||||||
const char * p;
|
|
||||||
|
|
||||||
do
|
|
||||||
{
|
|
||||||
p = std::search( pDataStart + 4, pDataEnd, patternBegin, patternEnd );
|
|
||||||
QByteArray decompressed = zlibDecompress( pDataStart + 8, p - ( pDataStart + 8 ) );
|
|
||||||
HeadWordIndex currentIndex = splitHeadWordBlock( decompressed );
|
|
||||||
headWordIndex.insert( headWordIndex.end(), currentIndex.begin(), currentIndex.end() );
|
|
||||||
pDataStart = p;
|
|
||||||
}
|
|
||||||
while ( p != pDataEnd );
|
|
||||||
|
|
||||||
bruteForceEnd_ = true;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
if ( headWordBlockInfosIter_ == headWordBlockInfos_.end() )
|
if ( headWordBlockInfosIter_ == headWordBlockInfos_.end() )
|
||||||
return false;
|
return false;
|
||||||
|
@ -195,6 +166,12 @@ bool MdictParser::readNextHeadWordIndex( MdictParser::HeadWordIndex & headWordIn
|
||||||
headWordBlockInfosIter_++;
|
headWordBlockInfosIter_++;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MdictParser::checkAdler32(const char * buffer, unsigned int len, quint32 checksum)
|
||||||
|
{
|
||||||
|
uLong adler = adler32( 0L, Z_NULL, 0 );
|
||||||
|
adler = adler32( adler, ( const Bytef * ) buffer, len );
|
||||||
|
return (adler & 0xFFFFFFFF) == checksum;
|
||||||
}
|
}
|
||||||
|
|
||||||
QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t fromSize )
|
QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t fromSize )
|
||||||
|
@ -236,52 +213,69 @@ QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t f
|
||||||
return QString::fromUtf16( ( const ushort * )&result.front() );
|
return QString::fromUtf16( ( const ushort * )&result.front() );
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize, const char * compressedBlockPtr,
|
bool MdictParser::decryptHeadWordIndex(char * buffer, qint64 len)
|
||||||
qint64 decompressedBlockSize, QByteArray & decompressedBlock )
|
{
|
||||||
|
RIPEMD128 ripemd;
|
||||||
|
ripemd.update( ( const uchar * ) buffer + 4, 4 );
|
||||||
|
ripemd.update( ( const uchar * ) "\x95\x36\x00\x00", 4 );
|
||||||
|
|
||||||
|
uint8_t key[16];
|
||||||
|
ripemd.digest( key );
|
||||||
|
|
||||||
|
buffer += 8;
|
||||||
|
len -= 8;
|
||||||
|
uint8_t prev = 0x36;
|
||||||
|
for (qint64 i = 0; i < len; ++i)
|
||||||
|
{
|
||||||
|
uint8_t byte = buffer[i];
|
||||||
|
byte = (byte >> 4) | (byte << 4);
|
||||||
|
byte = byte ^ prev ^ (i & 0xFF) ^ key[i % 16];
|
||||||
|
prev = buffer[i];
|
||||||
|
buffer[i] = byte;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize,
|
||||||
|
const char * compressedBlockPtr,
|
||||||
|
qint64 decompressedBlockSize,
|
||||||
|
QByteArray & decompressedBlock )
|
||||||
{
|
{
|
||||||
if ( compressedBlockSize <= 8 )
|
if ( compressedBlockSize <= 8 )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
qint64 dataSize = compressedBlockSize - 8;
|
// compression type
|
||||||
const char * dataPtr = compressedBlockPtr + 8;
|
quint32 type = qFromBigEndian<quint32>( ( const uchar * ) compressedBlockPtr );
|
||||||
// 4bytes - type
|
quint32 checksum = qFromBigEndian<quint32>( ( const uchar * )compressedBlockPtr + 4 );
|
||||||
// 4bytes - checksum
|
const char * buf = compressedBlockPtr + 8;
|
||||||
quint32 type;
|
qint64 size = compressedBlockSize - 8;
|
||||||
quint32 checksum;
|
|
||||||
type = qFromBigEndian<quint32>( ( const uchar * ) compressedBlockPtr );
|
|
||||||
checksum = qFromBigEndian<quint32>( ( const uchar * )compressedBlockPtr + sizeof( quint32 ) );
|
|
||||||
|
|
||||||
if ( type == 0x00000000 )
|
switch ( type )
|
||||||
{
|
{
|
||||||
|
case 0x00000000:
|
||||||
// No compression
|
// No compression
|
||||||
checksum &= 0xffff;
|
if ( !checkAdler32( buf, size, checksum ) )
|
||||||
quint16 sum = 0;
|
|
||||||
for ( qint64 i = 0; i < dataSize; i++ )
|
|
||||||
{
|
{
|
||||||
sum += dataPtr[i];
|
gdWarning( "MDict: parseCompressedBlock: plain: checksum not match" );
|
||||||
}
|
|
||||||
sum += 1;
|
|
||||||
|
|
||||||
if ( checksum != sum )
|
|
||||||
{
|
|
||||||
qWarning() << "MDict: parseCompressedBlock: plain: checksum not match";
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
decompressedBlock = QByteArray( dataPtr, dataSize );
|
decompressedBlock = QByteArray( buf, size );
|
||||||
}
|
return true;
|
||||||
else if ( type == 0x01000000 )
|
|
||||||
|
case 0x01000000:
|
||||||
{
|
{
|
||||||
// LZO compression
|
// LZO compression
|
||||||
int result;
|
int result;
|
||||||
lzo_uint blockSize = ( lzo_uint )decompressedBlockSize;
|
lzo_uint blockSize = ( lzo_uint )decompressedBlockSize;
|
||||||
decompressedBlock.resize( blockSize );
|
decompressedBlock.resize( blockSize );
|
||||||
result = lzo1x_decompress_safe( ( const uchar * )dataPtr, dataSize,
|
result = lzo1x_decompress_safe( ( const uchar * ) buf, size,
|
||||||
( uchar * )decompressedBlock.data(), &blockSize, NULL );
|
( uchar * )decompressedBlock.data(),
|
||||||
|
&blockSize, NULL );
|
||||||
|
|
||||||
if ( result != LZO_E_OK || blockSize != ( lzo_uint )decompressedBlockSize )
|
if ( result != LZO_E_OK || blockSize != ( lzo_uint )decompressedBlockSize )
|
||||||
{
|
{
|
||||||
qWarning() << "MDict: parseCompressedBlock: decompression failed";
|
gdWarning( "MDict: parseCompressedBlock: decompression failed" );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,24 +283,26 @@ bool MdictParser::parseCompressedBlock( qint64 compressedBlockSize, const char *
|
||||||
( const uchar * )decompressedBlock.constData(),
|
( const uchar * )decompressedBlock.constData(),
|
||||||
blockSize ) )
|
blockSize ) )
|
||||||
{
|
{
|
||||||
qWarning() << "MDict: parseCompressedBlock: lzo: checksum not match";
|
gdWarning( "MDict: parseCompressedBlock: lzo: checksum does not match" );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ( type == 0x02000000 )
|
break;
|
||||||
{
|
|
||||||
// zlib compression
|
|
||||||
if ( checksum != qFromBigEndian<quint32>( ( const uchar * )dataPtr + dataSize - 4 ) )
|
|
||||||
{
|
|
||||||
qWarning() << "MDict: parseCompressedBlock: zlib: checksum not match";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
decompressedBlock = zlibDecompress( dataPtr, dataSize );
|
case 0x02000000:
|
||||||
}
|
// zlib compression
|
||||||
else
|
decompressedBlock = zlibDecompress( buf, size );
|
||||||
|
|
||||||
|
if ( !checkAdler32( decompressedBlock.constData(), decompressedBlock.size(),
|
||||||
|
checksum ) )
|
||||||
{
|
{
|
||||||
qWarning() << "MDict: parseCompressedBlock: unknown type";
|
gdWarning( "MDict: parseCompressedBlock: zlib: checksum does not match" );
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
gdWarning( "MDict: parseCompressedBlock: unknown type" );
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,7 +351,18 @@ bool MdictParser::readHeader( QDataStream & in )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
QString headerText = toUtf16( "UTF-16LE", headerTextUtf16.constData(), headerTextUtf16.size() );
|
QString headerText = toUtf16( "UTF-16LE", headerTextUtf16.constData(), headerTextUtf16.size() );
|
||||||
|
|
||||||
|
// Adler-32 checksum of the header text (little-endian)
|
||||||
|
quint32 checksum;
|
||||||
|
in.setByteOrder( QDataStream::LittleEndian );
|
||||||
|
in >> checksum;
|
||||||
|
if ( !checkAdler32( headerTextUtf16.constData(), headerTextUtf16.size(), checksum ) )
|
||||||
|
{
|
||||||
|
gdWarning( "MDict: readHeader: checksum does not match" );
|
||||||
|
return false;
|
||||||
|
}
|
||||||
headerTextUtf16.clear();
|
headerTextUtf16.clear();
|
||||||
|
in.setByteOrder( QDataStream::BigEndian );
|
||||||
|
|
||||||
QDomNamedNodeMap headerAttributes = parseHeaderAttributes( headerText );
|
QDomNamedNodeMap headerAttributes = parseHeaderAttributes( headerText );
|
||||||
|
|
||||||
|
@ -391,9 +398,8 @@ bool MdictParser::readHeader( QDataStream & in )
|
||||||
else
|
else
|
||||||
numberTypeSize_ = 8;
|
numberTypeSize_ = 8;
|
||||||
|
|
||||||
// 4 bytes unknown
|
// Encrypted ?
|
||||||
if ( in.skipRawData( 4 ) != 4 )
|
encrypted_ = headerAttributes.namedItem("Encrypted").toAttr().value().toInt();
|
||||||
return false;
|
|
||||||
|
|
||||||
// Read metadata
|
// Read metadata
|
||||||
rtl_ = headerAttributes.namedItem( "Left2Right" ).toAttr().value() != "Yes";
|
rtl_ = headerAttributes.namedItem( "Left2Right" ).toAttr().value() != "Yes";
|
||||||
|
@ -418,87 +424,77 @@ bool MdictParser::readHeader( QDataStream & in )
|
||||||
|
|
||||||
bool MdictParser::readHeadWordBlockInfos( QDataStream & in )
|
bool MdictParser::readHeadWordBlockInfos( QDataStream & in )
|
||||||
{
|
{
|
||||||
|
QByteArray header = file_->read( version_ >= 2.0 ? 40 : 32 );
|
||||||
|
QDataStream stream( header );
|
||||||
|
|
||||||
// number of headword blocks
|
// number of headword blocks
|
||||||
numHeadWordBlocks_ = readNumber( in );
|
numHeadWordBlocks_ = readNumber( stream );
|
||||||
// number of entries
|
// number of entries
|
||||||
wordCount_ = readNumber( in );
|
wordCount_ = readNumber( stream );
|
||||||
|
|
||||||
// unknown field
|
// number of bytes of a headword block info after decompression
|
||||||
|
qint64 decompressedSize;
|
||||||
if ( version_ >= 2.0 )
|
if ( version_ >= 2.0 )
|
||||||
{
|
stream >> decompressedSize;
|
||||||
if ( in.skipRawData( numberTypeSize_ ) != numberTypeSize_ )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// number of bytes of a headword block info
|
// number of bytes of a headword block info before decompression
|
||||||
headWordBlockInfoSize_ = readNumber( in );
|
headWordBlockInfoSize_ = readNumber( stream );
|
||||||
// number of bytes of a headword block
|
// number of bytes of a headword block
|
||||||
headWordBlockSize_ = readNumber( in );
|
headWordBlockSize_ = readNumber( stream );
|
||||||
|
|
||||||
// unknown field
|
// Adler-32 checksum of the header. If those are encrypted, it is
|
||||||
|
// the checksum of the decrypted version
|
||||||
if ( version_ >= 2.0 )
|
if ( version_ >= 2.0 )
|
||||||
{
|
{
|
||||||
if ( in.skipRawData( 4 ) != 4 )
|
quint32 checksum;
|
||||||
|
in >> checksum;
|
||||||
|
if ( !checkAdler32( header.constData(), 40, checksum ) )
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
headWordBlockInfoPos_ = file_->pos();
|
headWordBlockInfoPos_ = file_->pos();
|
||||||
|
|
||||||
// read headword block info, which indicates headword block's compressed and decompressed size
|
// read headword block info
|
||||||
QByteArray headWordBlockInfo = file_->read( headWordBlockInfoSize_ );
|
QByteArray headWordBlockInfo = file_->read( headWordBlockInfoSize_ );
|
||||||
if ( headWordBlockInfo.size() != headWordBlockInfoSize_ )
|
if ( headWordBlockInfo.size() != headWordBlockInfoSize_ )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if ( version_ >= 2.0 )
|
if ( version_ >= 2.0 )
|
||||||
{
|
{
|
||||||
quint32 type;
|
// decrypt
|
||||||
quint32 checksum;
|
if ( encrypted_ & EcryptedHeadWordIndex )
|
||||||
quint32 value;
|
|
||||||
|
|
||||||
QDataStream headWordBlockInfoStream( headWordBlockInfo );
|
|
||||||
headWordBlockInfoStream.setByteOrder( QDataStream::BigEndian );
|
|
||||||
headWordBlockInfoStream >> type >> checksum;
|
|
||||||
headWordBlockInfoStream.skipRawData( headWordBlockInfoSize_ - 8 - 4 );
|
|
||||||
headWordBlockInfoStream >> value;
|
|
||||||
|
|
||||||
// 02 00 00 00
|
|
||||||
if ( type != 0x02000000 )
|
|
||||||
{
|
{
|
||||||
qWarning() << "MDict: readHeadWordBlockInfos: type not match";
|
if ( !decryptHeadWordIndex( headWordBlockInfo.data(),
|
||||||
|
headWordBlockInfo.size() ) )
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( checksum == value )
|
QByteArray decompressed;
|
||||||
{
|
if ( !parseCompressedBlock( headWordBlockInfo.size(), headWordBlockInfo.data(),
|
||||||
// Decompress
|
decompressedSize, decompressed) )
|
||||||
headWordBlockInfo = zlibDecompress( headWordBlockInfo.data() + 8,
|
return false;
|
||||||
headWordBlockInfo.size() - 8 );
|
|
||||||
|
headWordBlockInfos_ = decodeHeadWordBlockInfo( decompressed );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
qWarning() << "MDict: readHeadWordBlockInfos: checksum not match, try brute force...";
|
|
||||||
|
|
||||||
headWordPos_ = file_->pos();
|
|
||||||
bruteForce_ = true;
|
|
||||||
bruteForceEnd_ = false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
headWordPos_ = file_->pos();
|
|
||||||
headWordBlockInfos_ = decodeHeadWordBlockInfo( headWordBlockInfo );
|
headWordBlockInfos_ = decodeHeadWordBlockInfo( headWordBlockInfo );
|
||||||
|
}
|
||||||
|
|
||||||
|
headWordPos_ = file_->pos();
|
||||||
headWordBlockInfosIter_ = headWordBlockInfos_.begin();
|
headWordBlockInfosIter_ = headWordBlockInfos_.begin();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MdictParser::readRecordBlockInfos()
|
bool MdictParser::readRecordBlockInfos()
|
||||||
{
|
{
|
||||||
file_->seek( headWordBlockInfoPos_ + headWordBlockInfoSize_ + headWordBlockSize_ );
|
file_->seek( headWordBlockInfoPos_ + headWordBlockInfoSize_ +
|
||||||
|
headWordBlockSize_ );
|
||||||
|
|
||||||
QDataStream in( file_ );
|
QDataStream in( file_ );
|
||||||
in.setByteOrder( QDataStream::BigEndian );
|
in.setByteOrder( QDataStream::BigEndian );
|
||||||
qint64 numRecordBlocks = readNumber( in );
|
qint64 numRecordBlocks = readNumber( in );
|
||||||
readNumber( in ); // entry count, skip
|
readNumber( in ); // total number of records, skip
|
||||||
qint64 recordInfoSize = readNumber( in );
|
qint64 recordInfoSize = readNumber( in );
|
||||||
totalRecordsSize_ = readNumber( in );
|
totalRecordsSize_ = readNumber( in );
|
||||||
recordPos_ = file_->pos() + recordInfoSize;
|
recordPos_ = file_->pos() + recordInfoSize;
|
||||||
|
@ -544,18 +540,18 @@ MdictParser::BlockInfoVector MdictParser::decodeHeadWordBlockInfo( QByteArray co
|
||||||
|
|
||||||
while ( !s.atEnd() )
|
while ( !s.atEnd() )
|
||||||
{
|
{
|
||||||
// unknown
|
// Number of keywords in the block
|
||||||
s.skipRawData( numberTypeSize_ );
|
s.skipRawData( numberTypeSize_ );
|
||||||
// Text head size
|
// Size of the first headword in the block
|
||||||
quint32 textHeadSize = readU8OrU16( s, isU16 );
|
quint32 textHeadSize = readU8OrU16( s, isU16 );
|
||||||
// Text head
|
// The first headword
|
||||||
if ( encoding_ != "UTF-16LE" )
|
if ( encoding_ != "UTF-16LE" )
|
||||||
s.skipRawData( textHeadSize + textTermSize );
|
s.skipRawData( textHeadSize + textTermSize );
|
||||||
else
|
else
|
||||||
s.skipRawData( ( textHeadSize + textTermSize ) * 2 );
|
s.skipRawData( ( textHeadSize + textTermSize ) * 2 );
|
||||||
// Text tail Size
|
// Size of the last headword in the block
|
||||||
quint32 textTailSize = readU8OrU16( s, isU16 );
|
quint32 textTailSize = readU8OrU16( s, isU16 );
|
||||||
// Text tail
|
// The last headword
|
||||||
if ( encoding_ != "UTF-16LE" )
|
if ( encoding_ != "UTF-16LE" )
|
||||||
s.skipRawData( textTailSize + textTermSize );
|
s.skipRawData( textTailSize + textTermSize );
|
||||||
else
|
else
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
// https://bitbucket.org/xwang/mdict-analysis
|
// https://bitbucket.org/xwang/mdict-analysis
|
||||||
|
// https://github.com/zhansliu/writemdict/blob/master/fileformat.md
|
||||||
// Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser
|
// Octopus MDict Dictionary File (.mdx) and Resource File (.mdd) Analyser
|
||||||
//
|
//
|
||||||
// Copyright (C) 2012, 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
|
// Copyright (C) 2012, 2013 Xiaoqiang Wang <xiaoqiangwang AT gmail DOT com>
|
||||||
// Copyright (C) 2013 Timon Wong <timon86.wang AT gmail DOT com>
|
// Copyright (C) 2013 Timon Wong <timon86.wang AT gmail DOT com>
|
||||||
|
// Copyright (C) 2015 Zhe Wang <0x1998 AT gmail DOT com>
|
||||||
//
|
//
|
||||||
// This program is a free software; you can redistribute it and/or modify
|
// This program is a free software; you can redistribute it and/or modify
|
||||||
// it under the terms of the GNU General Public License as published by
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
@ -166,7 +168,6 @@ public:
|
||||||
}
|
}
|
||||||
static bool parseCompressedBlock( qint64 compressedBlockSize, const char * compressedBlockPtr,
|
static bool parseCompressedBlock( qint64 compressedBlockSize, const char * compressedBlockPtr,
|
||||||
qint64 decompressedBlockSize, QByteArray & decompressedBlock);
|
qint64 decompressedBlockSize, QByteArray & decompressedBlock);
|
||||||
|
|
||||||
static QString & substituteStylesheet( QString & article, StyleSheets const & styleSheets );
|
static QString & substituteStylesheet( QString & article, StyleSheets const & styleSheets );
|
||||||
static inline string substituteStylesheet( string const & article, StyleSheets const & styleSheets )
|
static inline string substituteStylesheet( string const & article, StyleSheets const & styleSheets )
|
||||||
{
|
{
|
||||||
|
@ -178,6 +179,8 @@ public:
|
||||||
protected:
|
protected:
|
||||||
qint64 readNumber( QDataStream & in );
|
qint64 readNumber( QDataStream & in );
|
||||||
static quint32 readU8OrU16( QDataStream & in, bool isU16 );
|
static quint32 readU8OrU16( QDataStream & in, bool isU16 );
|
||||||
|
static bool checkAdler32(const char * buffer, unsigned int len, quint32 checksum);
|
||||||
|
static bool decryptHeadWordIndex(char * buffer, qint64 len);
|
||||||
bool readHeader( QDataStream & in );
|
bool readHeader( QDataStream & in );
|
||||||
bool readHeadWordBlockInfos( QDataStream & in );
|
bool readHeadWordBlockInfos( QDataStream & in );
|
||||||
bool readRecordBlockInfos();
|
bool readRecordBlockInfos();
|
||||||
|
@ -207,9 +210,8 @@ protected:
|
||||||
|
|
||||||
quint32 wordCount_;
|
quint32 wordCount_;
|
||||||
int numberTypeSize_;
|
int numberTypeSize_;
|
||||||
|
int encrypted_;
|
||||||
bool rtl_;
|
bool rtl_;
|
||||||
bool bruteForce_;
|
|
||||||
bool bruteForceEnd_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
183
ripemd.cc
Normal file
183
ripemd.cc
Normal file
|
@ -0,0 +1,183 @@
|
||||||
|
// Copyright (C) 2007 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
// Copyright (C) 2013 James Almer <jamrial@gmail.com>
|
||||||
|
// Copyright (C) 2015 Zhe Wang <0x1998@gmail.com>
|
||||||
|
//
|
||||||
|
// Based on the RIPEMD-128 implementation from libavutil
|
||||||
|
//
|
||||||
|
// This program is a free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, version 3 of the License.
|
||||||
|
//
|
||||||
|
// You can get a copy of GNU General Public License along this program
|
||||||
|
// But you can always get it from http://www.gnu.org/licenses/gpl.txt
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
|
||||||
|
#include "ripemd.hh"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <QtEndian>
|
||||||
|
|
||||||
|
|
||||||
|
static const uint32_t KA[4] = {
|
||||||
|
0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xa953fd4e
|
||||||
|
};
|
||||||
|
|
||||||
|
static const uint32_t KB[4] = {
|
||||||
|
0x50a28be6, 0x5c4dd124, 0x6d703ef3, 0x7a6d76e9
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int ROTA[80] = {
|
||||||
|
11, 14, 15, 12, 5, 8, 7 , 9, 11, 13, 14, 15, 6, 7, 9, 8,
|
||||||
|
7 , 6, 8, 13, 11, 9, 7, 15, 7, 12, 15, 9, 11, 7, 13, 12,
|
||||||
|
11, 13, 6, 7, 14, 9, 13, 15, 14, 8, 13, 6, 5, 12, 7, 5,
|
||||||
|
11, 12, 14, 15, 14, 15, 9, 8, 9, 14, 5, 6, 8, 6, 5, 12,
|
||||||
|
9, 15, 5, 11, 6, 8, 13, 12, 5, 12, 13, 14, 11, 8, 5, 6
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int ROTB[80] = {
|
||||||
|
8, 9, 9, 11, 13, 15, 15, 5, 7, 7, 8, 11, 14, 14, 12, 6,
|
||||||
|
9, 13, 15, 7, 12, 8, 9, 11, 7, 7, 12, 7, 6, 15, 13, 11,
|
||||||
|
9, 7, 15, 11, 8, 6, 6, 14, 12, 13, 5, 14, 13, 13, 7, 5,
|
||||||
|
15, 5, 8, 11, 14, 14, 6, 14, 6, 9, 12, 9, 12, 5, 15, 8,
|
||||||
|
8, 5, 12, 9, 12, 5, 14, 6, 8, 13, 6, 5, 15, 13, 11, 11
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int WA[80] = {
|
||||||
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||||
|
7, 4, 13, 1, 10, 6, 15, 3, 12, 0, 9, 5, 2, 14, 11, 8,
|
||||||
|
3, 10, 14, 4, 9, 15, 8, 1, 2, 7, 0, 6, 13, 11, 5, 12,
|
||||||
|
1, 9, 11, 10, 0, 8, 12, 4, 13, 3, 7, 15, 14, 5, 6, 2,
|
||||||
|
4, 0, 5, 9, 7, 12, 2, 10, 14, 1, 3, 8, 11, 6, 15, 13
|
||||||
|
};
|
||||||
|
|
||||||
|
static const int WB[80] = {
|
||||||
|
5, 14, 7, 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12,
|
||||||
|
6, 11, 3, 7, 0, 13, 5, 10, 14, 15, 8, 12, 4, 9, 1, 2,
|
||||||
|
15, 5, 1, 3, 7, 14, 6, 9, 11, 8, 12, 2, 10, 0, 4, 13,
|
||||||
|
8, 6, 4, 1, 3, 11, 15, 0, 5, 12, 2, 13, 9, 7, 10, 14,
|
||||||
|
12, 15, 10, 4, 1, 5, 8, 7, 6, 2, 13, 14, 0, 3, 9, 11
|
||||||
|
};
|
||||||
|
|
||||||
|
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
|
||||||
|
|
||||||
|
#define ROUND128_0_TO_15(a,b,c,d,e,f,g,h) \
|
||||||
|
a = rol(a + (( b ^ c ^ d) + block[WA[n]]), ROTA[n]); \
|
||||||
|
e = rol(e + ((((f ^ g) & h) ^ g) + block[WB[n]] + KB[0]), ROTB[n]); \
|
||||||
|
n++
|
||||||
|
|
||||||
|
#define ROUND128_16_TO_31(a,b,c,d,e,f,g,h) \
|
||||||
|
a = rol(a + ((((c ^ d) & b) ^ d) + block[WA[n]] + KA[0]), ROTA[n]); \
|
||||||
|
e = rol(e + (((~g | f) ^ h) + block[WB[n]] + KB[1]), ROTB[n]); \
|
||||||
|
n++
|
||||||
|
|
||||||
|
#define ROUND128_32_TO_47(a,b,c,d,e,f,g,h) \
|
||||||
|
a = rol(a + (((~c | b) ^ d) + block[WA[n]] + KA[1]), ROTA[n]); \
|
||||||
|
e = rol(e + ((((g ^ h) & f) ^ h) + block[WB[n]] + KB[2]), ROTB[n]); \
|
||||||
|
n++
|
||||||
|
|
||||||
|
#define ROUND128_48_TO_63(a,b,c,d,e,f,g,h) \
|
||||||
|
a = rol(a + ((((b ^ c) & d) ^ c) + block[WA[n]] + KA[2]), ROTA[n]); \
|
||||||
|
e = rol(e + (( f ^ g ^ h) + block[WB[n]]), ROTB[n]); \
|
||||||
|
n++
|
||||||
|
|
||||||
|
#define R128_0 \
|
||||||
|
ROUND128_0_TO_15(a,b,c,d,e,f,g,h); \
|
||||||
|
ROUND128_0_TO_15(d,a,b,c,h,e,f,g); \
|
||||||
|
ROUND128_0_TO_15(c,d,a,b,g,h,e,f); \
|
||||||
|
ROUND128_0_TO_15(b,c,d,a,f,g,h,e)
|
||||||
|
|
||||||
|
#define R128_16 \
|
||||||
|
ROUND128_16_TO_31(a,b,c,d,e,f,g,h); \
|
||||||
|
ROUND128_16_TO_31(d,a,b,c,h,e,f,g); \
|
||||||
|
ROUND128_16_TO_31(c,d,a,b,g,h,e,f); \
|
||||||
|
ROUND128_16_TO_31(b,c,d,a,f,g,h,e)
|
||||||
|
|
||||||
|
#define R128_32 \
|
||||||
|
ROUND128_32_TO_47(a,b,c,d,e,f,g,h); \
|
||||||
|
ROUND128_32_TO_47(d,a,b,c,h,e,f,g); \
|
||||||
|
ROUND128_32_TO_47(c,d,a,b,g,h,e,f); \
|
||||||
|
ROUND128_32_TO_47(b,c,d,a,f,g,h,e)
|
||||||
|
|
||||||
|
#define R128_48 \
|
||||||
|
ROUND128_48_TO_63(a,b,c,d,e,f,g,h); \
|
||||||
|
ROUND128_48_TO_63(d,a,b,c,h,e,f,g); \
|
||||||
|
ROUND128_48_TO_63(c,d,a,b,g,h,e,f); \
|
||||||
|
ROUND128_48_TO_63(b,c,d,a,f,g,h,e)
|
||||||
|
|
||||||
|
|
||||||
|
RIPEMD128::RIPEMD128()
|
||||||
|
: count(0)
|
||||||
|
, buffer()
|
||||||
|
, state()
|
||||||
|
{
|
||||||
|
state[0] = 0x67452301;
|
||||||
|
state[1] = 0xEFCDAB89;
|
||||||
|
state[2] = 0x98BADCFE;
|
||||||
|
state[3] = 0x10325476;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RIPEMD128::transform( const uint8_t buffer[64] )
|
||||||
|
{
|
||||||
|
uint32_t a, b, c, d, e, f, g, h;
|
||||||
|
uint32_t block[16];
|
||||||
|
int n;
|
||||||
|
|
||||||
|
a = e = state[0];
|
||||||
|
b = f = state[1];
|
||||||
|
c = g = state[2];
|
||||||
|
d = h = state[3];
|
||||||
|
|
||||||
|
for (n = 0; n < 16; n++)
|
||||||
|
block[n] = qFromLittleEndian<uint32_t>( buffer + 4 * n );
|
||||||
|
n = 0;
|
||||||
|
|
||||||
|
R128_0; R128_0; R128_0; R128_0;
|
||||||
|
|
||||||
|
R128_16; R128_16; R128_16; R128_16;
|
||||||
|
|
||||||
|
R128_32; R128_32; R128_32; R128_32;
|
||||||
|
|
||||||
|
R128_48; R128_48; R128_48; R128_48;
|
||||||
|
|
||||||
|
h += c + state[1];
|
||||||
|
state[1] = state[2] + d + e;
|
||||||
|
state[2] = state[3] + a + f;
|
||||||
|
state[3] = state[0] + b + g;
|
||||||
|
state[0] = h;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RIPEMD128::update( const uint8_t * data, size_t len )
|
||||||
|
{
|
||||||
|
size_t i, j;
|
||||||
|
|
||||||
|
j = count & 63;
|
||||||
|
count += len;
|
||||||
|
if ( ( j + len ) > 63 )
|
||||||
|
{
|
||||||
|
memcpy( &buffer[j], data, ( i = 64 - j ) );
|
||||||
|
transform( buffer );
|
||||||
|
for ( ; i + 63 < len; i += 64 )
|
||||||
|
transform( &data[i] );
|
||||||
|
j = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
memcpy( &buffer[j], &data[i], len - i );
|
||||||
|
}
|
||||||
|
|
||||||
|
void RIPEMD128::digest( uint8_t * digest )
|
||||||
|
{
|
||||||
|
uint64_t finalcount = qFromLittleEndian( count << 3 );
|
||||||
|
update( (const uint8_t *) "\200", 1 );
|
||||||
|
while ( ( count & 63 ) != 56 )
|
||||||
|
update( ( const uint8_t * ) "", 1 );
|
||||||
|
update( ( uint8_t * ) &finalcount, 8 ); /* Should cause a transform() */
|
||||||
|
for ( int i = 0; i < 4; i++ )
|
||||||
|
qToLittleEndian( state[i], digest + i*4 );
|
||||||
|
}
|
45
ripemd.hh
Normal file
45
ripemd.hh
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
// Copyright (C) 2007 Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
// Copyright (C) 2013 James Almer <jamrial@gmail.com>
|
||||||
|
// Copyright (C) 2015 Zhe Wang <0x1998@gmail.com>
|
||||||
|
//
|
||||||
|
// Based on the RIPEMD-128 implementation from libavutil
|
||||||
|
//
|
||||||
|
// This program is a free software; you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU General Public License as published by
|
||||||
|
// the Free Software Foundation, version 3 of the License.
|
||||||
|
//
|
||||||
|
// You can get a copy of GNU General Public License along this program
|
||||||
|
// But you can always get it from http://www.gnu.org/licenses/gpl.txt
|
||||||
|
//
|
||||||
|
// This program is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
|
||||||
|
#ifndef __RIPEMD_HH_INCLUDED__
|
||||||
|
#define __RIPEMD_HH_INCLUDED__
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
|
||||||
|
class RIPEMD128
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
RIPEMD128();
|
||||||
|
|
||||||
|
// Update hash value
|
||||||
|
void update( const uint8_t * data, size_t len );
|
||||||
|
|
||||||
|
// Finish hashing and output digest value.
|
||||||
|
void digest( uint8_t * digest );
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint64_t count; // number of bytes in buffer
|
||||||
|
uint8_t buffer[64]; // 512-bit buffer of input values used in hash updating
|
||||||
|
uint32_t state[10]; // current hash value
|
||||||
|
|
||||||
|
void transform( const uint8_t buffer[64] );
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // __RIPEMD_HH_INCLUDED__
|
Loading…
Reference in a new issue