mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-24 04:24:09 +00:00
refract encoding method
This commit is contained in:
parent
f0a3df3d6f
commit
8405035d2f
|
@ -895,7 +895,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
|
||||
//iconv.reinit( encoding );
|
||||
codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
|
||||
initLineFeed(encoding);
|
||||
lineFeed=Utf8::initLineFeed(encoding);
|
||||
// We now can use our own readNextLine() function
|
||||
|
||||
wstring str;
|
||||
|
@ -1025,7 +1025,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
|
|||
if(readBufferLeft<=0)
|
||||
return false;
|
||||
|
||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length);
|
||||
if(pos==-1)
|
||||
return false;
|
||||
QString line = codec->toUnicode(readBufferPtr, pos);
|
||||
|
@ -1083,31 +1083,6 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b
|
|||
|
||||
/////////////// DslScanner
|
||||
|
||||
void DslScanner::initLineFeed(Utf8::Encoding e)
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case Utf8::Utf16LE:
|
||||
lineFeed= new char[2] {0x0A,0};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf8::Utf16BE:
|
||||
lineFeed = new char[2] { 0,0x0A};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf8::Windows1252:
|
||||
|
||||
case Utf8::Windows1251:
|
||||
|
||||
case Utf8::Utf8:
|
||||
|
||||
case Utf8::Windows1250:
|
||||
default:
|
||||
lineFeedLength = 1;
|
||||
lineFeed = new char[1] {0x0A};
|
||||
}
|
||||
}
|
||||
|
||||
void processUnsortedParts( wstring & str, bool strip )
|
||||
{
|
||||
int refCount = 0;
|
||||
|
|
|
@ -24,6 +24,7 @@ using gd::wchar;
|
|||
using std::list;
|
||||
using std::vector;
|
||||
using Utf8::Encoding;
|
||||
using Utf8::LineFeed;
|
||||
|
||||
|
||||
|
||||
|
@ -110,8 +111,7 @@ class DslScanner
|
|||
char readBuffer[ 65536 ];
|
||||
QTextStream* fragStream;
|
||||
char * readBufferPtr;
|
||||
const char* lineFeed;
|
||||
int lineFeedLength;
|
||||
LineFeed lineFeed;
|
||||
size_t readBufferLeft;
|
||||
//qint64 pos;
|
||||
unsigned linesRead;
|
||||
|
@ -131,7 +131,6 @@ public:
|
|||
/// Returns the detected encoding of this file.
|
||||
Encoding getEncoding() const
|
||||
{ return encoding; }
|
||||
void initLineFeed(Encoding e);
|
||||
|
||||
/// Returns the dictionary's name, as was read from file's headers.
|
||||
wstring const & getDictionaryName() const
|
||||
|
|
33
gls.cc
33
gls.cc
|
@ -59,6 +59,7 @@ using BtreeIndexing::WordArticleLink;
|
|||
using BtreeIndexing::IndexedWords;
|
||||
using BtreeIndexing::IndexInfo;
|
||||
using Utf8::Encoding;
|
||||
using Utf8::LineFeed;
|
||||
|
||||
/////////////// GlsScanner
|
||||
|
||||
|
@ -73,8 +74,7 @@ class GlsScanner
|
|||
char readBuffer[ 10000 ];
|
||||
char * readBufferPtr;
|
||||
size_t readBufferLeft;
|
||||
const char* lineFeed;
|
||||
int lineFeedLength;
|
||||
LineFeed lineFeed;
|
||||
unsigned linesRead;
|
||||
|
||||
public:
|
||||
|
@ -119,7 +119,6 @@ public:
|
|||
/// Reading begins from the first line after the headers (ones which end
|
||||
/// by the "### Glossary section:" line).
|
||||
bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex );
|
||||
void initLineFeed(Utf8::Encoding e);
|
||||
/// Returns the number of lines read so far from the file.
|
||||
unsigned getLinesRead() const
|
||||
{ return linesRead; }
|
||||
|
@ -178,6 +177,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
|
||||
codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding));
|
||||
// We now can use our own readNextLine() function
|
||||
lineFeed = Utf8::initLineFeed(encoding);
|
||||
|
||||
wstring str;
|
||||
wstring *currentField = 0;
|
||||
|
@ -243,30 +243,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
}
|
||||
}
|
||||
}
|
||||
void GlsScanner::initLineFeed(Utf8::Encoding e)
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case Utf8::Utf16LE:
|
||||
lineFeed= new char[2] {0x0A,0};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf8::Utf16BE:
|
||||
lineFeed = new char[2] { 0,0x0A};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf8::Windows1252:
|
||||
|
||||
case Utf8::Windows1251:
|
||||
|
||||
case Utf8::Utf8:
|
||||
|
||||
case Utf8::Windows1250:
|
||||
default:
|
||||
lineFeedLength = 1;
|
||||
lineFeed = new char[1] {0x0A};
|
||||
}
|
||||
}
|
||||
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
||||
Iconv::Ex )
|
||||
{
|
||||
|
@ -286,7 +263,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
|||
int result = gzread( f, readBuffer + readBufferLeft,
|
||||
sizeof( readBuffer ) - readBufferLeft );
|
||||
|
||||
if (result == -1)
|
||||
if (result == -1)
|
||||
throw exCantReadGlsFile();
|
||||
|
||||
readBufferPtr = readBuffer;
|
||||
|
@ -296,7 +273,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
|||
if(readBufferLeft<=0)
|
||||
return false;
|
||||
|
||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length);
|
||||
if(pos==-1)
|
||||
return false;
|
||||
QString line = codec->toUnicode(readBufferPtr, pos);
|
||||
|
|
27
utf8.cc
27
utf8.cc
|
@ -208,4 +208,31 @@ char const* getEncodingNameFor(Encoding e)
|
|||
}
|
||||
}
|
||||
|
||||
LineFeed initLineFeed(Encoding e)
|
||||
{
|
||||
LineFeed lf;
|
||||
switch (e)
|
||||
{
|
||||
case Utf8::Utf16LE:
|
||||
lf.lineFeed= new char[2]{ 0x0A,0 };
|
||||
lf.length = 2;
|
||||
break;
|
||||
case Utf8::Utf16BE:
|
||||
lf.lineFeed = new char[2]{ 0,0x0A };
|
||||
lf.length = 2;
|
||||
break;
|
||||
case Utf8::Windows1252:
|
||||
|
||||
case Utf8::Windows1251:
|
||||
|
||||
case Utf8::Utf8:
|
||||
|
||||
case Utf8::Windows1250:
|
||||
default:
|
||||
lf.length = 1;
|
||||
lf.lineFeed = new char[1]{ 0x0A };
|
||||
}
|
||||
return lf;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
11
utf8.hh
11
utf8.hh
|
@ -2,6 +2,7 @@
|
|||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
#ifndef __UTF8_HH_INCLUDED__
|
||||
#define __UTF8_HH_INCLUDED__
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include "cpp_features.hh"
|
||||
|
@ -55,5 +56,15 @@ bool isspace( int c );
|
|||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length);
|
||||
char const* getEncodingNameFor(Encoding e);
|
||||
|
||||
struct LineFeed
|
||||
{
|
||||
int length;
|
||||
char* lineFeed;
|
||||
|
||||
};
|
||||
|
||||
LineFeed initLineFeed(Encoding e);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue