mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 00:14:06 +00:00
fix dictionary parse error:
1,mdx dictionary load error in windows. 2,dsl dictionary load error in windows.
This commit is contained in:
parent
c1eef3a228
commit
ded545ecf3
|
@ -891,6 +891,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
}
|
||||
|
||||
iconv.reinit( encoding );
|
||||
codec=QTextCodec::codecForName(iconv.getEncodingNameFor(encoding));
|
||||
|
||||
// We now can use our own readNextLine() function
|
||||
|
||||
|
@ -1009,7 +1010,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
|||
for( ; ; )
|
||||
{
|
||||
// Check that we have bytes to read
|
||||
if ( readBufferLeft < 4 ) // To convert one char, we need at most 4 bytes
|
||||
if ( readBufferLeft < 1000 ) // To convert one char, we need at most 4 bytes
|
||||
{
|
||||
if ( !gzeof( f ) )
|
||||
{
|
||||
|
@ -1026,76 +1027,25 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
|||
|
||||
readBufferPtr = readBuffer;
|
||||
readBufferLeft += (size_t) result;
|
||||
frag = QByteArray::fromRawData(readBuffer, readBufferLeft);
|
||||
}
|
||||
}
|
||||
|
||||
if ( readBufferLeft < readMultiple )
|
||||
{
|
||||
// No more data. Return what we've got so far, forget the last byte if
|
||||
// it was a 16-bit Unicode and a file had an odd number of bytes.
|
||||
readBufferLeft = 0;
|
||||
|
||||
if ( outPtr != &wcharBuffer.front() )
|
||||
{
|
||||
// If there was a stray \r, remove it
|
||||
if ( outPtr[ -1 ] == L'\r' )
|
||||
--outPtr;
|
||||
|
||||
out = wstring( &wcharBuffer.front(), outPtr - &wcharBuffer.front() );
|
||||
|
||||
++linesRead;
|
||||
|
||||
return true;
|
||||
}
|
||||
else
|
||||
//QByteArray frag=QByteArray::fromRawData(readBuffer,readBufferLeft);
|
||||
QTextStream in(frag);
|
||||
if(in.atEnd())
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check that we have chars to write
|
||||
if ( leftInOut < 2 ) // With 16-bit wchars, 2 is needed for a surrogate pair
|
||||
{
|
||||
wcharBuffer.resize( wcharBuffer.size() + 64 );
|
||||
outPtr = &wcharBuffer.front() + wcharBuffer.size() - 64 - leftInOut;
|
||||
leftInOut += 64;
|
||||
}
|
||||
|
||||
// Ok, now convert one char
|
||||
size_t outBytesLeft = sizeof( wchar );
|
||||
|
||||
Iconv::Result r =
|
||||
iconv.convert( (void const *&)readBufferPtr, readBufferLeft,
|
||||
(void *&)outPtr, outBytesLeft );
|
||||
|
||||
if ( r == Iconv::NeedMoreOut && outBytesLeft == sizeof( wchar ) )
|
||||
{
|
||||
// Seems to be a surrogate pair with a 16-bit target wchar
|
||||
|
||||
outBytesLeft *= 2;
|
||||
r = iconv.convert( (void const *&)readBufferPtr, readBufferLeft,
|
||||
(void *&)outPtr, outBytesLeft );
|
||||
--leftInOut; // Complements the next decremention
|
||||
}
|
||||
|
||||
if ( outBytesLeft )
|
||||
throw exEncodingError();
|
||||
|
||||
--leftInOut;
|
||||
|
||||
// Have we got \n?
|
||||
if ( outPtr[ -1 ] == L'\n' )
|
||||
{
|
||||
--outPtr;
|
||||
|
||||
// Now kill a \r if there is one, and return the result.
|
||||
if ( outPtr != &wcharBuffer.front() && outPtr[ -1 ] == L'\r' )
|
||||
--outPtr;
|
||||
|
||||
out = wstring( &wcharBuffer.front(), outPtr - &wcharBuffer.front() );
|
||||
|
||||
++linesRead;
|
||||
|
||||
in.setCodec(codec);
|
||||
QString line=in.readLine();
|
||||
qint64 pos=in.pos();
|
||||
readBufferPtr+=pos;
|
||||
readBufferLeft-=pos;
|
||||
linesRead++;
|
||||
out=line.toStdU32String();
|
||||
frag.remove(0, pos);
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1330,8 +1280,12 @@ void expandOptionalParts( wstring & str, list< wstring > * result,
|
|||
// Limit the amount of results to avoid excessive resource consumption
|
||||
if ( headwords->size() < 32 )
|
||||
headwords->push_back( str );
|
||||
if( !inside_recurse )
|
||||
result->merge( expanded );
|
||||
if (!inside_recurse)
|
||||
{
|
||||
result->sort();
|
||||
expanded.sort();
|
||||
result->merge(expanded);
|
||||
}
|
||||
}
|
||||
|
||||
static const wstring openBraces( GD_NATIVE_TO_WS( L"{{" ) );
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#include <zlib.h>
|
||||
#include "dictionary.hh"
|
||||
#include "iconv.hh"
|
||||
#include <QTextCodec>
|
||||
#include <QByteArray>
|
||||
|
||||
// Implementation details for Dsl, not part of its interface
|
||||
namespace Dsl {
|
||||
|
@ -119,11 +121,13 @@ class DslScanner
|
|||
{
|
||||
gzFile f;
|
||||
DslEncoding encoding;
|
||||
QTextCodec* codec;
|
||||
DslIconv iconv;
|
||||
wstring dictionaryName;
|
||||
wstring langFrom, langTo;
|
||||
wstring soundDictionary;
|
||||
char readBuffer[ 65536 ];
|
||||
char readBuffer[ 3000 ];
|
||||
QByteArray frag;
|
||||
char * readBufferPtr;
|
||||
size_t readBufferLeft;
|
||||
vector< wchar > wcharBuffer;
|
||||
|
|
|
@ -25,7 +25,7 @@ QT += core \
|
|||
|
||||
greaterThan(QT_MAJOR_VERSION, 4) {
|
||||
QT += widgets \
|
||||
webenginewidgets\
|
||||
webenginewidgets\
|
||||
printsupport \
|
||||
help
|
||||
|
||||
|
|
|
@ -29,14 +29,11 @@
|
|||
#include <QStringList>
|
||||
#include <QByteArray>
|
||||
#include <QFileInfo>
|
||||
#if QT_VERSION >= QT_VERSION_CHECK( 5, 0, 0 )
|
||||
#include <QRegularExpression>
|
||||
#else
|
||||
#include <QRegExp>
|
||||
#endif
|
||||
#include <QDomDocument>
|
||||
#include <QTextDocumentFragment>
|
||||
#include <QDataStream>
|
||||
#include <QTextCodec>
|
||||
|
||||
#include "decompress.hh"
|
||||
#include "gddebug.hh"
|
||||
|
@ -184,38 +181,10 @@ QString MdictParser::toUtf16( const char * fromCode, const char * from, size_t f
|
|||
if ( !fromCode || !from )
|
||||
return QString();
|
||||
|
||||
iconv_t conv = iconv_open( "UTF-16//IGNORE", fromCode );
|
||||
if ( conv == ( iconv_t ) - 1 )
|
||||
return QString();
|
||||
|
||||
vector<char> result;
|
||||
const static int CHUNK_SIZE = 512;
|
||||
char buf[CHUNK_SIZE];
|
||||
char ** inBuf = ( char ** )&from;
|
||||
|
||||
while ( fromSize )
|
||||
{
|
||||
char * outBuf = buf;
|
||||
size_t outBytesLeft = CHUNK_SIZE;
|
||||
size_t ret = iconv( conv, inBuf, &fromSize, &outBuf, &outBytesLeft );
|
||||
|
||||
if ( ret == ( size_t ) - 1 )
|
||||
{
|
||||
if ( errno != E2BIG )
|
||||
{
|
||||
// Real problem
|
||||
result.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result.insert( result.end(), buf, buf + CHUNK_SIZE - outBytesLeft );
|
||||
}
|
||||
|
||||
iconv_close( conv );
|
||||
if ( result.size() <= 2 )
|
||||
return QString();
|
||||
return QString::fromUtf16( ( const ushort * )&result.front() );
|
||||
QTextCodec *codec =QTextCodec::codecForName(fromCode);
|
||||
return codec->toUnicode(from,fromSize);
|
||||
}
|
||||
|
||||
bool MdictParser::decryptHeadWordIndex(char * buffer, qint64 len)
|
||||
|
|
|
@ -45,8 +45,8 @@ namespace gd
|
|||
{
|
||||
#ifdef __WIN32
|
||||
|
||||
typedef unsigned int wchar;
|
||||
typedef std::basic_string< wchar > wstring;
|
||||
typedef char32_t wchar;
|
||||
typedef std::u32string wstring;
|
||||
|
||||
// GD_NATIVE_TO_WS is used to convert L"" strings to a const pointer to
|
||||
// wchar.
|
||||
|
@ -55,8 +55,8 @@ namespace gd
|
|||
|
||||
#else
|
||||
|
||||
typedef wchar_t wchar;
|
||||
using std::wstring;
|
||||
typedef char32_t wchar;
|
||||
typedef std::u32string wstring;
|
||||
#define GD_NATIVE_TO_WS( str ) ( str )
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue