mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
refract encoding method
This commit is contained in:
parent
f0a3df3d6f
commit
8405035d2f
|
@ -895,7 +895,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
||||||
|
|
||||||
//iconv.reinit( encoding );
|
//iconv.reinit( encoding );
|
||||||
codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
|
codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
|
||||||
initLineFeed(encoding);
|
lineFeed=Utf8::initLineFeed(encoding);
|
||||||
// We now can use our own readNextLine() function
|
// We now can use our own readNextLine() function
|
||||||
|
|
||||||
wstring str;
|
wstring str;
|
||||||
|
@ -1025,7 +1025,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
|
||||||
if(readBufferLeft<=0)
|
if(readBufferLeft<=0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length);
|
||||||
if(pos==-1)
|
if(pos==-1)
|
||||||
return false;
|
return false;
|
||||||
QString line = codec->toUnicode(readBufferPtr, pos);
|
QString line = codec->toUnicode(readBufferPtr, pos);
|
||||||
|
@ -1083,31 +1083,6 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b
|
||||||
|
|
||||||
/////////////// DslScanner
|
/////////////// DslScanner
|
||||||
|
|
||||||
void DslScanner::initLineFeed(Utf8::Encoding e)
|
|
||||||
{
|
|
||||||
switch (e)
|
|
||||||
{
|
|
||||||
case Utf8::Utf16LE:
|
|
||||||
lineFeed= new char[2] {0x0A,0};
|
|
||||||
lineFeedLength = 2;
|
|
||||||
break;
|
|
||||||
case Utf8::Utf16BE:
|
|
||||||
lineFeed = new char[2] { 0,0x0A};
|
|
||||||
lineFeedLength = 2;
|
|
||||||
break;
|
|
||||||
case Utf8::Windows1252:
|
|
||||||
|
|
||||||
case Utf8::Windows1251:
|
|
||||||
|
|
||||||
case Utf8::Utf8:
|
|
||||||
|
|
||||||
case Utf8::Windows1250:
|
|
||||||
default:
|
|
||||||
lineFeedLength = 1;
|
|
||||||
lineFeed = new char[1] {0x0A};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void processUnsortedParts( wstring & str, bool strip )
|
void processUnsortedParts( wstring & str, bool strip )
|
||||||
{
|
{
|
||||||
int refCount = 0;
|
int refCount = 0;
|
||||||
|
|
|
@ -24,6 +24,7 @@ using gd::wchar;
|
||||||
using std::list;
|
using std::list;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using Utf8::Encoding;
|
using Utf8::Encoding;
|
||||||
|
using Utf8::LineFeed;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -110,8 +111,7 @@ class DslScanner
|
||||||
char readBuffer[ 65536 ];
|
char readBuffer[ 65536 ];
|
||||||
QTextStream* fragStream;
|
QTextStream* fragStream;
|
||||||
char * readBufferPtr;
|
char * readBufferPtr;
|
||||||
const char* lineFeed;
|
LineFeed lineFeed;
|
||||||
int lineFeedLength;
|
|
||||||
size_t readBufferLeft;
|
size_t readBufferLeft;
|
||||||
//qint64 pos;
|
//qint64 pos;
|
||||||
unsigned linesRead;
|
unsigned linesRead;
|
||||||
|
@ -131,7 +131,6 @@ public:
|
||||||
/// Returns the detected encoding of this file.
|
/// Returns the detected encoding of this file.
|
||||||
Encoding getEncoding() const
|
Encoding getEncoding() const
|
||||||
{ return encoding; }
|
{ return encoding; }
|
||||||
void initLineFeed(Encoding e);
|
|
||||||
|
|
||||||
/// Returns the dictionary's name, as was read from file's headers.
|
/// Returns the dictionary's name, as was read from file's headers.
|
||||||
wstring const & getDictionaryName() const
|
wstring const & getDictionaryName() const
|
||||||
|
|
33
gls.cc
33
gls.cc
|
@ -59,6 +59,7 @@ using BtreeIndexing::WordArticleLink;
|
||||||
using BtreeIndexing::IndexedWords;
|
using BtreeIndexing::IndexedWords;
|
||||||
using BtreeIndexing::IndexInfo;
|
using BtreeIndexing::IndexInfo;
|
||||||
using Utf8::Encoding;
|
using Utf8::Encoding;
|
||||||
|
using Utf8::LineFeed;
|
||||||
|
|
||||||
/////////////// GlsScanner
|
/////////////// GlsScanner
|
||||||
|
|
||||||
|
@ -73,8 +74,7 @@ class GlsScanner
|
||||||
char readBuffer[ 10000 ];
|
char readBuffer[ 10000 ];
|
||||||
char * readBufferPtr;
|
char * readBufferPtr;
|
||||||
size_t readBufferLeft;
|
size_t readBufferLeft;
|
||||||
const char* lineFeed;
|
LineFeed lineFeed;
|
||||||
int lineFeedLength;
|
|
||||||
unsigned linesRead;
|
unsigned linesRead;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -119,7 +119,6 @@ public:
|
||||||
/// Reading begins from the first line after the headers (ones which end
|
/// Reading begins from the first line after the headers (ones which end
|
||||||
/// by the "### Glossary section:" line).
|
/// by the "### Glossary section:" line).
|
||||||
bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex );
|
bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex );
|
||||||
void initLineFeed(Utf8::Encoding e);
|
|
||||||
/// Returns the number of lines read so far from the file.
|
/// Returns the number of lines read so far from the file.
|
||||||
unsigned getLinesRead() const
|
unsigned getLinesRead() const
|
||||||
{ return linesRead; }
|
{ return linesRead; }
|
||||||
|
@ -178,6 +177,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
||||||
|
|
||||||
codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding));
|
codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding));
|
||||||
// We now can use our own readNextLine() function
|
// We now can use our own readNextLine() function
|
||||||
|
lineFeed = Utf8::initLineFeed(encoding);
|
||||||
|
|
||||||
wstring str;
|
wstring str;
|
||||||
wstring *currentField = 0;
|
wstring *currentField = 0;
|
||||||
|
@ -243,30 +243,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void GlsScanner::initLineFeed(Utf8::Encoding e)
|
|
||||||
{
|
|
||||||
switch (e)
|
|
||||||
{
|
|
||||||
case Utf8::Utf16LE:
|
|
||||||
lineFeed= new char[2] {0x0A,0};
|
|
||||||
lineFeedLength = 2;
|
|
||||||
break;
|
|
||||||
case Utf8::Utf16BE:
|
|
||||||
lineFeed = new char[2] { 0,0x0A};
|
|
||||||
lineFeedLength = 2;
|
|
||||||
break;
|
|
||||||
case Utf8::Windows1252:
|
|
||||||
|
|
||||||
case Utf8::Windows1251:
|
|
||||||
|
|
||||||
case Utf8::Utf8:
|
|
||||||
|
|
||||||
case Utf8::Windows1250:
|
|
||||||
default:
|
|
||||||
lineFeedLength = 1;
|
|
||||||
lineFeed = new char[1] {0x0A};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
||||||
Iconv::Ex )
|
Iconv::Ex )
|
||||||
{
|
{
|
||||||
|
@ -286,7 +263,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
||||||
int result = gzread( f, readBuffer + readBufferLeft,
|
int result = gzread( f, readBuffer + readBufferLeft,
|
||||||
sizeof( readBuffer ) - readBufferLeft );
|
sizeof( readBuffer ) - readBufferLeft );
|
||||||
|
|
||||||
if (result == -1)
|
if (result == -1)
|
||||||
throw exCantReadGlsFile();
|
throw exCantReadGlsFile();
|
||||||
|
|
||||||
readBufferPtr = readBuffer;
|
readBufferPtr = readBuffer;
|
||||||
|
@ -296,7 +273,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
||||||
if(readBufferLeft<=0)
|
if(readBufferLeft<=0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length);
|
||||||
if(pos==-1)
|
if(pos==-1)
|
||||||
return false;
|
return false;
|
||||||
QString line = codec->toUnicode(readBufferPtr, pos);
|
QString line = codec->toUnicode(readBufferPtr, pos);
|
||||||
|
|
27
utf8.cc
27
utf8.cc
|
@ -208,4 +208,31 @@ char const* getEncodingNameFor(Encoding e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LineFeed initLineFeed(Encoding e)
|
||||||
|
{
|
||||||
|
LineFeed lf;
|
||||||
|
switch (e)
|
||||||
|
{
|
||||||
|
case Utf8::Utf16LE:
|
||||||
|
lf.lineFeed= new char[2]{ 0x0A,0 };
|
||||||
|
lf.length = 2;
|
||||||
|
break;
|
||||||
|
case Utf8::Utf16BE:
|
||||||
|
lf.lineFeed = new char[2]{ 0,0x0A };
|
||||||
|
lf.length = 2;
|
||||||
|
break;
|
||||||
|
case Utf8::Windows1252:
|
||||||
|
|
||||||
|
case Utf8::Windows1251:
|
||||||
|
|
||||||
|
case Utf8::Utf8:
|
||||||
|
|
||||||
|
case Utf8::Windows1250:
|
||||||
|
default:
|
||||||
|
lf.length = 1;
|
||||||
|
lf.lineFeed = new char[1]{ 0x0A };
|
||||||
|
}
|
||||||
|
return lf;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
11
utf8.hh
11
utf8.hh
|
@ -2,6 +2,7 @@
|
||||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||||
#ifndef __UTF8_HH_INCLUDED__
|
#ifndef __UTF8_HH_INCLUDED__
|
||||||
#define __UTF8_HH_INCLUDED__
|
#define __UTF8_HH_INCLUDED__
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "cpp_features.hh"
|
#include "cpp_features.hh"
|
||||||
|
@ -55,5 +56,15 @@ bool isspace( int c );
|
||||||
//get the first line in string s1. -1 if not found
|
//get the first line in string s1. -1 if not found
|
||||||
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length);
|
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length);
|
||||||
char const* getEncodingNameFor(Encoding e);
|
char const* getEncodingNameFor(Encoding e);
|
||||||
|
|
||||||
|
struct LineFeed
|
||||||
|
{
|
||||||
|
int length;
|
||||||
|
char* lineFeed;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
LineFeed initLineFeed(Encoding e);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue