refract encoding method

This commit is contained in:
xiaoyifang 2021-11-06 16:55:51 +08:00
parent f0a3df3d6f
commit 8405035d2f
5 changed files with 47 additions and 58 deletions

View file

@ -895,7 +895,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
//iconv.reinit( encoding ); //iconv.reinit( encoding );
codec = QTextCodec::codecForName(getEncodingNameFor(encoding)); codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
initLineFeed(encoding); lineFeed=Utf8::initLineFeed(encoding);
// We now can use our own readNextLine() function // We now can use our own readNextLine() function
wstring str; wstring str;
@ -1025,7 +1025,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
if(readBufferLeft<=0) if(readBufferLeft<=0)
return false; return false;
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength); int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length);
if(pos==-1) if(pos==-1)
return false; return false;
QString line = codec->toUnicode(readBufferPtr, pos); QString line = codec->toUnicode(readBufferPtr, pos);
@ -1083,31 +1083,6 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b
/////////////// DslScanner /////////////// DslScanner
void DslScanner::initLineFeed(Utf8::Encoding e)
{
switch (e)
{
case Utf8::Utf16LE:
lineFeed= new char[2] {0x0A,0};
lineFeedLength = 2;
break;
case Utf8::Utf16BE:
lineFeed = new char[2] { 0,0x0A};
lineFeedLength = 2;
break;
case Utf8::Windows1252:
case Utf8::Windows1251:
case Utf8::Utf8:
case Utf8::Windows1250:
default:
lineFeedLength = 1;
lineFeed = new char[1] {0x0A};
}
}
void processUnsortedParts( wstring & str, bool strip ) void processUnsortedParts( wstring & str, bool strip )
{ {
int refCount = 0; int refCount = 0;

View file

@ -24,6 +24,7 @@ using gd::wchar;
using std::list; using std::list;
using std::vector; using std::vector;
using Utf8::Encoding; using Utf8::Encoding;
using Utf8::LineFeed;
@ -110,8 +111,7 @@ class DslScanner
char readBuffer[ 65536 ]; char readBuffer[ 65536 ];
QTextStream* fragStream; QTextStream* fragStream;
char * readBufferPtr; char * readBufferPtr;
const char* lineFeed; LineFeed lineFeed;
int lineFeedLength;
size_t readBufferLeft; size_t readBufferLeft;
//qint64 pos; //qint64 pos;
unsigned linesRead; unsigned linesRead;
@ -131,7 +131,6 @@ public:
/// Returns the detected encoding of this file. /// Returns the detected encoding of this file.
Encoding getEncoding() const Encoding getEncoding() const
{ return encoding; } { return encoding; }
void initLineFeed(Encoding e);
/// Returns the dictionary's name, as was read from file's headers. /// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const wstring const & getDictionaryName() const

33
gls.cc
View file

@ -59,6 +59,7 @@ using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo; using BtreeIndexing::IndexInfo;
using Utf8::Encoding; using Utf8::Encoding;
using Utf8::LineFeed;
/////////////// GlsScanner /////////////// GlsScanner
@ -73,8 +74,7 @@ class GlsScanner
char readBuffer[ 10000 ]; char readBuffer[ 10000 ];
char * readBufferPtr; char * readBufferPtr;
size_t readBufferLeft; size_t readBufferLeft;
const char* lineFeed; LineFeed lineFeed;
int lineFeedLength;
unsigned linesRead; unsigned linesRead;
public: public:
@ -119,7 +119,6 @@ public:
/// Reading begins from the first line after the headers (ones which end /// Reading begins from the first line after the headers (ones which end
/// by the "### Glossary section:" line). /// by the "### Glossary section:" line).
bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex ); bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex );
void initLineFeed(Utf8::Encoding e);
/// Returns the number of lines read so far from the file. /// Returns the number of lines read so far from the file.
unsigned getLinesRead() const unsigned getLinesRead() const
{ return linesRead; } { return linesRead; }
@ -178,6 +177,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding)); codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding));
// We now can use our own readNextLine() function // We now can use our own readNextLine() function
lineFeed = Utf8::initLineFeed(encoding);
wstring str; wstring str;
wstring *currentField = 0; wstring *currentField = 0;
@ -243,30 +243,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
} }
} }
} }
void GlsScanner::initLineFeed(Utf8::Encoding e)
{
switch (e)
{
case Utf8::Utf16LE:
lineFeed= new char[2] {0x0A,0};
lineFeedLength = 2;
break;
case Utf8::Utf16BE:
lineFeed = new char[2] { 0,0x0A};
lineFeedLength = 2;
break;
case Utf8::Windows1252:
case Utf8::Windows1251:
case Utf8::Utf8:
case Utf8::Windows1250:
default:
lineFeedLength = 1;
lineFeed = new char[1] {0x0A};
}
}
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex, bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
Iconv::Ex ) Iconv::Ex )
{ {
@ -286,7 +263,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
int result = gzread( f, readBuffer + readBufferLeft, int result = gzread( f, readBuffer + readBufferLeft,
sizeof( readBuffer ) - readBufferLeft ); sizeof( readBuffer ) - readBufferLeft );
if (result == -1) if (result == -1)
throw exCantReadGlsFile(); throw exCantReadGlsFile();
readBufferPtr = readBuffer; readBufferPtr = readBuffer;
@ -296,7 +273,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
if(readBufferLeft<=0) if(readBufferLeft<=0)
return false; return false;
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength); int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length);
if(pos==-1) if(pos==-1)
return false; return false;
QString line = codec->toUnicode(readBufferPtr, pos); QString line = codec->toUnicode(readBufferPtr, pos);

27
utf8.cc
View file

@ -208,4 +208,31 @@ char const* getEncodingNameFor(Encoding e)
} }
} }
LineFeed initLineFeed(Encoding e)
{
LineFeed lf;
switch (e)
{
case Utf8::Utf16LE:
lf.lineFeed= new char[2]{ 0x0A,0 };
lf.length = 2;
break;
case Utf8::Utf16BE:
lf.lineFeed = new char[2]{ 0,0x0A };
lf.length = 2;
break;
case Utf8::Windows1252:
case Utf8::Windows1251:
case Utf8::Utf8:
case Utf8::Windows1250:
default:
lf.length = 1;
lf.lineFeed = new char[1]{ 0x0A };
}
return lf;
}
} }

11
utf8.hh
View file

@ -2,6 +2,7 @@
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef __UTF8_HH_INCLUDED__ #ifndef __UTF8_HH_INCLUDED__
#define __UTF8_HH_INCLUDED__ #define __UTF8_HH_INCLUDED__
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include "cpp_features.hh" #include "cpp_features.hh"
@ -55,5 +56,15 @@ bool isspace( int c );
//get the first line in string s1. -1 if not found //get the first line in string s1. -1 if not found
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length); int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length);
char const* getEncodingNameFor(Encoding e); char const* getEncodingNameFor(Encoding e);
struct LineFeed
{
int length;
char* lineFeed;
};
LineFeed initLineFeed(Encoding e);
} }
#endif #endif