fix dictionary parse error:

1,mdx dictionary load error in windows.
2,dsl dictionary load error in windows.
This commit is contained in:
xiaoyifang 2021-10-19 00:19:25 +08:00
parent c1eef3a228
commit ded545ecf3
5 changed files with 34 additions and 107 deletions

View file

@ -891,6 +891,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
}
iconv.reinit( encoding );
codec=QTextCodec::codecForName(iconv.getEncodingNameFor(encoding));
// We now can use our own readNextLine() function
@ -1009,7 +1010,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
for( ; ; )
{
// Check that we have bytes to read
if ( readBufferLeft < 4 ) // To convert one char, we need at most 4 bytes
if ( readBufferLeft < 1000 ) // To convert one char, we need at most 4 bytes
{
if ( !gzeof( f ) )
{
@ -1026,76 +1027,25 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
readBufferPtr = readBuffer;
readBufferLeft += (size_t) result;
frag = QByteArray::fromRawData(readBuffer, readBufferLeft);
}
}
if ( readBufferLeft < readMultiple )
{
// No more data. Return what we've got so far, forget the last byte if
// it was a 16-bit Unicode and a file had an odd number of bytes.
readBufferLeft = 0;
if ( outPtr != &wcharBuffer.front() )
{
// If there was a stray \r, remove it
if ( outPtr[ -1 ] == L'\r' )
--outPtr;
out = wstring( &wcharBuffer.front(), outPtr - &wcharBuffer.front() );
++linesRead;
return true;
}
else
//QByteArray frag=QByteArray::fromRawData(readBuffer,readBufferLeft);
QTextStream in(frag);
if(in.atEnd())
return false;
}
// Check that we have chars to write
if ( leftInOut < 2 ) // With 16-bit wchars, 2 is needed for a surrogate pair
{
wcharBuffer.resize( wcharBuffer.size() + 64 );
outPtr = &wcharBuffer.front() + wcharBuffer.size() - 64 - leftInOut;
leftInOut += 64;
}
// Ok, now convert one char
size_t outBytesLeft = sizeof( wchar );
Iconv::Result r =
iconv.convert( (void const *&)readBufferPtr, readBufferLeft,
(void *&)outPtr, outBytesLeft );
if ( r == Iconv::NeedMoreOut && outBytesLeft == sizeof( wchar ) )
{
// Seems to be a surrogate pair with a 16-bit target wchar
outBytesLeft *= 2;
r = iconv.convert( (void const *&)readBufferPtr, readBufferLeft,
(void *&)outPtr, outBytesLeft );
--leftInOut; // Complements the next decremention
}
if ( outBytesLeft )
throw exEncodingError();
--leftInOut;
// Have we got \n?
if ( outPtr[ -1 ] == L'\n' )
{
--outPtr;
// Now kill a \r if there is one, and return the result.
if ( outPtr != &wcharBuffer.front() && outPtr[ -1 ] == L'\r' )
--outPtr;
out = wstring( &wcharBuffer.front(), outPtr - &wcharBuffer.front() );
++linesRead;
in.setCodec(codec);
QString line=in.readLine();
qint64 pos=in.pos();
readBufferPtr+=pos;
readBufferLeft-=pos;
linesRead++;
out=line.toStdU32String();
frag.remove(0, pos);
return true;
}
}
}
@ -1330,8 +1280,12 @@ void expandOptionalParts( wstring & str, list< wstring > * result,
// Limit the amount of results to avoid excessive resource consumption
if ( headwords->size() < 32 )
headwords->push_back( str );
if( !inside_recurse )
result->merge( expanded );
if (!inside_recurse)
{
result->sort();
expanded.sort();
result->merge(expanded);
}
}
static const wstring openBraces( GD_NATIVE_TO_WS( L"{{" ) );

View file

@ -10,6 +10,8 @@
#include <zlib.h>
#include "dictionary.hh"
#include "iconv.hh"
#include <QTextCodec>
#include <QByteArray>
// Implementation details for Dsl, not part of its interface
namespace Dsl {
@ -119,11 +121,13 @@ class DslScanner
{
gzFile f;
DslEncoding encoding;
QTextCodec* codec;
DslIconv iconv;
wstring dictionaryName;
wstring langFrom, langTo;
wstring soundDictionary;
char readBuffer[ 65536 ];
char readBuffer[ 3000 ];
QByteArray frag;
char * readBufferPtr;
size_t readBufferLeft;
vector< wchar > wcharBuffer;

View file

@ -25,7 +25,7 @@ QT += core \
greaterThan(QT_MAJOR_VERSION, 4) {
QT += widgets \
webenginewidgets\
webenginewidgets\
printsupport \
help

View file

@ -29,14 +29,11 @@
#include <QStringList>
#include <QByteArray>
#include <QFileInfo>
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
#include <QRegularExpression>
#else
#include <QRegExp>
#endif
#include <QDomDocument>
#include <QTextDocumentFragment>
#include <QDataStream>
#include <QTextCodec>
#include "decompress.hh"
#include "gddebug.hh"
@ -184,38 +181,10 @@ QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t f
if ( !fromCode || !from )
return QString();
iconv_t conv = iconv_open( "UTF-16//IGNORE", fromCode );
if ( conv == ( iconv_t ) - 1 )
return QString();
vector<char> result;
const static int CHUNK_SIZE = 512;
char buf[CHUNK_SIZE];
char ** inBuf = ( char ** )&from;
while ( fromSize )
{
char * outBuf = buf;
size_t outBytesLeft = CHUNK_SIZE;
size_t ret = iconv( conv, inBuf, &fromSize, &outBuf, &outBytesLeft );
if ( ret == ( size_t ) - 1 )
{
if ( errno != E2BIG )
{
// Real problem
result.clear();
break;
}
}
result.insert( result.end(), buf, buf + CHUNK_SIZE - outBytesLeft );
}
iconv_close( conv );
if ( result.size() <= 2 )
return QString();
return QString::fromUtf16( ( const ushort * )&result.front() );
QTextCodec *codec =QTextCodec::codecForName(fromCode);
return codec->toUnicode(from,fromSize);
}
bool MdictParser::decryptHeadWordIndex(char * buffer, qint64 len)

View file

@ -45,8 +45,8 @@ namespace gd
{
#ifdef __WIN32
typedef unsigned int wchar;
typedef std::basic_string< wchar > wstring;
typedef char32_t wchar;
typedef std::u32string wstring;
// GD_NATIVE_TO_WS is used to convert L"" strings to a const pointer to
// wchar.
@ -55,8 +55,8 @@ namespace gd
#else
typedef wchar_t wchar;
using std::wstring;
typedef char32_t wchar;
typedef std::u32string wstring;
#define GD_NATIVE_TO_WS( str ) ( str )
#endif
}