mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-12-18 03:14:06 +00:00
refract encoding method
This commit is contained in:
parent
02a88c98ad
commit
f0a3df3d6f
5
dsl.cc
5
dsl.cc
|
@ -75,6 +75,7 @@ using gd::wstring;
|
|||
using gd::wchar;
|
||||
using std::vector;
|
||||
using std::list;
|
||||
using Utf8::Encoding;
|
||||
|
||||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
|
@ -597,7 +598,7 @@ void DslDictionary::loadArticle( uint32_t address,
|
|||
{
|
||||
articleData =
|
||||
Iconv::toWstring(
|
||||
getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ),
|
||||
Utf8::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ),
|
||||
articleBody, articleSize );
|
||||
free( articleBody );
|
||||
|
||||
|
@ -1361,7 +1362,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
|||
{
|
||||
articleData =
|
||||
Iconv::toWstring(
|
||||
getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ),
|
||||
getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ),
|
||||
articleBody, articleSize );
|
||||
free( articleBody );
|
||||
|
||||
|
|
116
dsl_details.cc
116
dsl_details.cc
|
@ -19,6 +19,7 @@ namespace Details {
|
|||
|
||||
using gd::wstring;
|
||||
using std::list;
|
||||
using Utf8::Encoding;
|
||||
|
||||
#ifndef __linux__
|
||||
|
||||
|
@ -41,18 +42,6 @@ int wcscasecmp( const wchar *s1, const wchar *s2 )
|
|||
|
||||
#endif
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length)
|
||||
{
|
||||
char* pos = std::search(s1,s1+s1length, s2, s2+s2length);
|
||||
|
||||
if (pos == s1 + s1length)
|
||||
return pos-s1;
|
||||
|
||||
//the line size.
|
||||
return pos- s1+ s2length;
|
||||
}
|
||||
|
||||
static DSLLangCode LangCodes[] =
|
||||
{
|
||||
{ 1, "en" },
|
||||
|
@ -159,25 +148,7 @@ bool isAtSignFirst( wstring const & str )
|
|||
return reg.indexIn( gd::toQString( str ) ) == 0;
|
||||
}
|
||||
|
||||
char const* getEncodingNameFor(DslEncoding e)
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case Utf16LE:
|
||||
return "UTF-16LE";
|
||||
case Utf16BE:
|
||||
return "UTF-16BE";
|
||||
case Windows1252:
|
||||
return "WINDOWS-1252";
|
||||
case Windows1251:
|
||||
return "WINDOWS-1251";
|
||||
case Details::Utf8:
|
||||
return "UTF-8";
|
||||
case Windows1250:
|
||||
default:
|
||||
return "WINDOWS-1250";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/////////////// ArticleDom
|
||||
|
||||
|
@ -811,38 +782,36 @@ void ArticleDom::closeTag( wstring const & name,
|
|||
|
||||
void ArticleDom::nextChar() THROW_SPEC( eot )
|
||||
{
|
||||
if ( !*stringPos )
|
||||
throw eot();
|
||||
else{
|
||||
ch = *stringPos++;
|
||||
if ( !*stringPos )
|
||||
throw eot();
|
||||
|
||||
if ( ch == L'\\' )
|
||||
{
|
||||
ch = *stringPos++;
|
||||
|
||||
if ( ch == L'\\' )
|
||||
{
|
||||
if ( !*stringPos )
|
||||
throw eot();
|
||||
throw eot();
|
||||
|
||||
ch = *stringPos++;
|
||||
|
||||
escaped = true;
|
||||
}
|
||||
else
|
||||
if ( ch == L'[' && *stringPos == L'[' )
|
||||
{
|
||||
}
|
||||
else if ( ch == L'[' && *stringPos == L'[' )
|
||||
{
|
||||
++stringPos;
|
||||
escaped = true;
|
||||
}
|
||||
else
|
||||
if ( ch == L']' && *stringPos == L']' )
|
||||
{
|
||||
}
|
||||
else if ( ch == L']' && *stringPos == L']' )
|
||||
{
|
||||
++stringPos;
|
||||
escaped = true;
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
escaped = false;
|
||||
|
||||
if( ch == '\n' || ch == '\r' )
|
||||
if( ch == '\n' || ch == '\r' )
|
||||
lineStartPos = stringPos;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool ArticleDom::atSignFirstInLine()
|
||||
|
@ -857,7 +826,7 @@ bool ArticleDom::atSignFirstInLine()
|
|||
/////////////// DslScanner
|
||||
|
||||
DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
||||
encoding( Windows1252 ), readBufferPtr( readBuffer ),
|
||||
encoding( Utf8::Windows1252 ), readBufferPtr( readBuffer ),
|
||||
readBufferLeft( 0 ), linesRead( 0 )
|
||||
{
|
||||
// Since .dz is backwards-compatible with .gz, we use gz- functions to
|
||||
|
@ -884,10 +853,10 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
// If the file begins with the dedicated Unicode marker, we just consume
|
||||
// it. If, on the other hand, it's not, we return the bytes back
|
||||
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE )
|
||||
encoding = Utf16LE;
|
||||
encoding = Utf8::Utf16LE;
|
||||
else
|
||||
if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF )
|
||||
encoding = Utf16BE;
|
||||
encoding = Utf8::Utf16BE;
|
||||
else
|
||||
if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB )
|
||||
{
|
||||
|
@ -899,22 +868,22 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
throw exMalformedDslFile( fileName );
|
||||
}
|
||||
|
||||
encoding = Utf8;
|
||||
encoding = Utf8::Utf8;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( firstBytes[ 0 ] && !firstBytes[ 1 ] )
|
||||
encoding = Utf16LE;
|
||||
encoding = Utf8::Utf16LE;
|
||||
else
|
||||
if ( !firstBytes[ 0 ] && firstBytes[ 1 ] )
|
||||
encoding = Utf16BE;
|
||||
encoding = Utf8::Utf16BE;
|
||||
else
|
||||
{
|
||||
// Ok, this doesn't look like 16-bit Unicode. We will start with a
|
||||
// 8-bit encoding with an intent to find out the exact one from
|
||||
// the header.
|
||||
needExactEncoding = true;
|
||||
encoding = Windows1251;
|
||||
encoding = Utf8::Windows1251;
|
||||
}
|
||||
|
||||
if ( gzrewind( f ) )
|
||||
|
@ -995,13 +964,13 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
}
|
||||
else
|
||||
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Latin" ) ) )
|
||||
encoding = Windows1252;
|
||||
encoding = Utf8::Windows1252;
|
||||
else
|
||||
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Cyrillic" ) ) )
|
||||
encoding = Windows1251;
|
||||
encoding = Utf8::Windows1251;
|
||||
else
|
||||
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"EasternEuropean" ) ) )
|
||||
encoding = Windows1250;
|
||||
encoding = Utf8::Windows1250;
|
||||
else
|
||||
{
|
||||
gzclose( f );
|
||||
|
@ -1036,8 +1005,6 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
|
|||
// Check that we have bytes to read
|
||||
if ( readBufferLeft < 5000 )
|
||||
{
|
||||
//readBufferPtr+=pos;
|
||||
//readBufferLeft-=pos;
|
||||
if ( !gzeof( f ) )
|
||||
{
|
||||
// To avoid having to deal with ring logic, we move the remaining bytes
|
||||
|
@ -1053,19 +1020,12 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
|
|||
|
||||
readBufferPtr = readBuffer;
|
||||
readBufferLeft += (size_t) result;
|
||||
/*QByteArray frag = QByteArray::fromRawData(readBuffer, readBufferLeft);
|
||||
fragStream = new QTextStream(frag) ;
|
||||
fragStream->setCodec(codec);*/
|
||||
}
|
||||
}
|
||||
|
||||
//if(fragStream->atEnd())
|
||||
// return false;
|
||||
|
||||
if(readBufferLeft<=0)
|
||||
return false;
|
||||
//QString line=fragStream->readLine();
|
||||
int pos = findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
||||
|
||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
||||
if(pos==-1)
|
||||
return false;
|
||||
QString line = codec->toUnicode(readBufferPtr, pos);
|
||||
|
@ -1123,25 +1083,25 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b
|
|||
|
||||
/////////////// DslScanner
|
||||
|
||||
void DslScanner::initLineFeed(DslEncoding e)
|
||||
void DslScanner::initLineFeed(Utf8::Encoding e)
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case Utf16LE:
|
||||
case Utf8::Utf16LE:
|
||||
lineFeed= new char[2] {0x0A,0};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf16BE:
|
||||
case Utf8::Utf16BE:
|
||||
lineFeed = new char[2] { 0,0x0A};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Windows1252:
|
||||
case Utf8::Windows1252:
|
||||
|
||||
case Windows1251:
|
||||
case Utf8::Windows1251:
|
||||
|
||||
case Details::Utf8:
|
||||
case Utf8::Utf8:
|
||||
|
||||
case Windows1250:
|
||||
case Utf8::Windows1250:
|
||||
default:
|
||||
lineFeedLength = 1;
|
||||
lineFeed = new char[1] {0x0A};
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "iconv.hh"
|
||||
#include <QTextCodec>
|
||||
#include <QByteArray>
|
||||
#include "utf8.hh"
|
||||
|
||||
// Implementation details for Dsl, not part of its interface
|
||||
namespace Dsl {
|
||||
|
@ -22,17 +23,9 @@ using gd::wstring;
|
|||
using gd::wchar;
|
||||
using std::list;
|
||||
using std::vector;
|
||||
using Utf8::Encoding;
|
||||
|
||||
|
||||
// Those are possible encodings for .dsl files
|
||||
enum DslEncoding
|
||||
{
|
||||
Utf16LE,
|
||||
Utf16BE,
|
||||
Windows1252,
|
||||
Windows1251,
|
||||
Windows1250,
|
||||
Utf8 // This is an extension. Detected solely by the UTF8 BOM.
|
||||
};
|
||||
|
||||
struct DSLLangCode
|
||||
{
|
||||
|
@ -44,8 +37,6 @@ string findCodeForDslId( int id );
|
|||
|
||||
bool isAtSignFirst( wstring const & str );
|
||||
|
||||
char const* getEncodingNameFor(DslEncoding e);
|
||||
|
||||
/// Parses the DSL language, representing it in its structural DOM form.
|
||||
struct ArticleDom
|
||||
{
|
||||
|
@ -111,7 +102,7 @@ private:
|
|||
class DslScanner
|
||||
{
|
||||
gzFile f;
|
||||
DslEncoding encoding;
|
||||
Encoding encoding;
|
||||
QTextCodec* codec;
|
||||
wstring dictionaryName;
|
||||
wstring langFrom, langTo;
|
||||
|
@ -138,9 +129,9 @@ public:
|
|||
~DslScanner() throw();
|
||||
|
||||
/// Returns the detected encoding of this file.
|
||||
DslEncoding getEncoding() const
|
||||
Encoding getEncoding() const
|
||||
{ return encoding; }
|
||||
void initLineFeed(DslEncoding e);
|
||||
void initLineFeed(Encoding e);
|
||||
|
||||
/// Returns the dictionary's name, as was read from file's headers.
|
||||
wstring const & getDictionaryName() const
|
||||
|
@ -207,8 +198,8 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
|
|||
{
|
||||
switch( encoding )
|
||||
{
|
||||
case Utf16LE:
|
||||
case Utf16BE:
|
||||
case Utf8::Utf16LE:
|
||||
case Utf8::Utf16BE:
|
||||
return x*2;
|
||||
default:
|
||||
return x;
|
||||
|
|
137
gls.cc
137
gls.cc
|
@ -58,13 +58,7 @@ using gd::wchar;
|
|||
using BtreeIndexing::WordArticleLink;
|
||||
using BtreeIndexing::IndexedWords;
|
||||
using BtreeIndexing::IndexInfo;
|
||||
|
||||
enum Encoding
|
||||
{
|
||||
Utf8,
|
||||
Utf16LE,
|
||||
Utf16BE
|
||||
};
|
||||
using Utf8::Encoding;
|
||||
|
||||
/////////////// GlsScanner
|
||||
|
||||
|
@ -73,15 +67,14 @@ class GlsScanner
|
|||
gzFile f;
|
||||
Encoding encoding;
|
||||
QTextCodec* codec;
|
||||
Iconv iconv;
|
||||
wstring dictionaryName;
|
||||
wstring dictionaryDecription, dictionaryAuthor;
|
||||
wstring langFrom, langTo;
|
||||
char readBuffer[ 10000 ];
|
||||
char * readBufferPtr;
|
||||
size_t readBufferLeft;
|
||||
QTextStream* fragStream;
|
||||
qint64 pos;
|
||||
const char* lineFeed;
|
||||
int lineFeedLength;
|
||||
unsigned linesRead;
|
||||
|
||||
public:
|
||||
|
@ -126,30 +119,15 @@ public:
|
|||
/// Reading begins from the first line after the headers (ones which end
|
||||
/// by the "### Glossary section:" line).
|
||||
bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex );
|
||||
|
||||
void initLineFeed(Utf8::Encoding e);
|
||||
/// Returns the number of lines read so far from the file.
|
||||
unsigned getLinesRead() const
|
||||
{ return linesRead; }
|
||||
|
||||
/// Returns a name to be passed to iconv for the given encoding.
|
||||
static char const * getEncodingNameFor( Encoding e )
|
||||
{
|
||||
switch( e )
|
||||
{
|
||||
case Utf16LE:
|
||||
return Iconv::Utf16Le;
|
||||
case Utf16BE:
|
||||
return "UTF-16BE";
|
||||
case Utf8:
|
||||
default:
|
||||
return Iconv::Utf8;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
||||
encoding( Utf8 ), iconv( Iconv::GdWchar, Iconv::Utf8 ), readBufferPtr( readBuffer ),
|
||||
readBufferLeft( 0 ), linesRead( 0 ), pos(0)
|
||||
encoding( Utf8::Utf8 ), readBufferPtr( readBuffer ),
|
||||
readBufferLeft( 0 ), linesRead( 0 )
|
||||
{
|
||||
// Since .dz is backwards-compatible with .gz, we use gz- functions to
|
||||
// read it -- they are much nicer than the dict_data- ones.
|
||||
|
@ -172,10 +150,10 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
// If the file begins with the dedicated Unicode marker, we just consume
|
||||
// it. If, on the other hand, it's not, we return the bytes back
|
||||
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE )
|
||||
encoding = Utf16LE;
|
||||
encoding = Utf8::Utf16LE;
|
||||
else
|
||||
if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF )
|
||||
encoding = Utf16BE;
|
||||
encoding = Utf8::Utf16BE;
|
||||
else
|
||||
if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB )
|
||||
{
|
||||
|
@ -186,7 +164,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
gzclose( f );
|
||||
throw exMalformedGlsFile( fileName );
|
||||
}
|
||||
encoding = Utf8;
|
||||
encoding = Utf8::Utf8;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -195,12 +173,10 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
gzclose( f );
|
||||
throw exCantOpen( fileName );
|
||||
}
|
||||
encoding = Utf8;
|
||||
encoding = Utf8::Utf8;
|
||||
}
|
||||
|
||||
if( encoding != Utf8 )
|
||||
iconv.reinit( Iconv::GdWchar, getEncodingNameFor( encoding ) );
|
||||
codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
|
||||
codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding));
|
||||
// We now can use our own readNextLine() function
|
||||
|
||||
wstring str;
|
||||
|
@ -267,45 +243,74 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
|
|||
}
|
||||
}
|
||||
}
|
||||
void GlsScanner::initLineFeed(Utf8::Encoding e)
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case Utf8::Utf16LE:
|
||||
lineFeed= new char[2] {0x0A,0};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf8::Utf16BE:
|
||||
lineFeed = new char[2] { 0,0x0A};
|
||||
lineFeedLength = 2;
|
||||
break;
|
||||
case Utf8::Windows1252:
|
||||
|
||||
case Utf8::Windows1251:
|
||||
|
||||
case Utf8::Utf8:
|
||||
|
||||
case Utf8::Windows1250:
|
||||
default:
|
||||
lineFeedLength = 1;
|
||||
lineFeed = new char[1] {0x0A};
|
||||
}
|
||||
}
|
||||
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
||||
Iconv::Ex )
|
||||
{
|
||||
offset = (size_t)(gztell(f) - readBufferLeft + pos);
|
||||
offset = (size_t)(gztell(f) - readBufferLeft);
|
||||
|
||||
{
|
||||
// Check that we have bytes to read
|
||||
if (readBufferLeft - pos < 2000)
|
||||
{
|
||||
readBufferPtr += pos;
|
||||
readBufferLeft -= pos;
|
||||
if (!gzeof(f))
|
||||
{
|
||||
// To avoid having to deal with ring logic, we move the remaining bytes
|
||||
// to the beginning
|
||||
memmove(readBuffer, readBufferPtr, readBufferLeft);
|
||||
{
|
||||
// Check that we have bytes to read
|
||||
if ( readBufferLeft < 5000 )
|
||||
{
|
||||
if ( !gzeof( f ) )
|
||||
{
|
||||
// To avoid having to deal with ring logic, we move the remaining bytes
|
||||
// to the beginning
|
||||
memmove( readBuffer, readBufferPtr, readBufferLeft );
|
||||
|
||||
// Read some more bytes to readBuffer
|
||||
int result = gzread(f, readBuffer + readBufferLeft,
|
||||
sizeof(readBuffer) - readBufferLeft);
|
||||
// Read some more bytes to readBuffer
|
||||
int result = gzread( f, readBuffer + readBufferLeft,
|
||||
sizeof( readBuffer ) - readBufferLeft );
|
||||
|
||||
if (result == -1)
|
||||
throw exCantReadGlsFile();
|
||||
if (result == -1)
|
||||
throw exCantReadGlsFile();
|
||||
|
||||
readBufferPtr = readBuffer;
|
||||
readBufferLeft += (size_t)result;
|
||||
QByteArray frag = QByteArray::fromRawData(readBuffer, readBufferLeft);
|
||||
fragStream = new QTextStream(frag);
|
||||
fragStream->setCodec(codec);
|
||||
}
|
||||
}
|
||||
readBufferPtr = readBuffer;
|
||||
readBufferLeft += (size_t) result;
|
||||
}
|
||||
}
|
||||
if(readBufferLeft<=0)
|
||||
return false;
|
||||
|
||||
if (fragStream->atEnd())
|
||||
return false;
|
||||
int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
|
||||
if(pos==-1)
|
||||
return false;
|
||||
QString line = codec->toUnicode(readBufferPtr, pos);
|
||||
if(line.endsWith("\n"))
|
||||
line.chop(1);
|
||||
if(line.endsWith("\r"))
|
||||
line.chop(1);
|
||||
|
||||
QString line = fragStream->readLine();
|
||||
pos = fragStream->pos();
|
||||
linesRead++;
|
||||
if(pos>readBufferLeft){
|
||||
pos=readBufferLeft;
|
||||
}
|
||||
readBufferLeft -= pos;
|
||||
readBufferPtr += pos;
|
||||
linesRead++;
|
||||
|
||||
#ifdef __WIN32
|
||||
out = line.toStdU32String();
|
||||
|
@ -314,7 +319,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
|
|||
#endif
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GlsScanner::~GlsScanner() throw()
|
||||
|
@ -669,7 +674,7 @@ void GlsDictionary::loadArticleText( uint32_t address,
|
|||
}
|
||||
else
|
||||
{
|
||||
string articleData = Iconv::toUtf8( GlsScanner::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
|
||||
string articleData = Iconv::toUtf8( Utf8::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
|
||||
string::size_type start_pos = 0, end_pos = 0;
|
||||
|
||||
for( ; ; )
|
||||
|
|
33
utf8.cc
33
utf8.cc
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include "utf8.hh"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace Utf8 {
|
||||
|
||||
|
@ -175,4 +176,36 @@ bool isspace( int c )
|
|||
}
|
||||
}
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length)
|
||||
{
|
||||
char* pos = std::search(s1,s1+s1length, s2, s2+s2length);
|
||||
|
||||
if (pos == s1 + s1length)
|
||||
return pos-s1;
|
||||
|
||||
//the line size.
|
||||
return pos- s1+ s2length;
|
||||
}
|
||||
|
||||
char const* getEncodingNameFor(Encoding e)
|
||||
{
|
||||
switch (e)
|
||||
{
|
||||
case Utf16LE:
|
||||
return "UTF-16LE";
|
||||
case Utf16BE:
|
||||
return "UTF-16BE";
|
||||
case Windows1252:
|
||||
return "WINDOWS-1252";
|
||||
case Windows1251:
|
||||
return "WINDOWS-1251";
|
||||
case Utf8:
|
||||
return "UTF-8";
|
||||
case Windows1250:
|
||||
default:
|
||||
return "WINDOWS-1250";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
18
utf8.hh
18
utf8.hh
|
@ -1,6 +1,7 @@
|
|||
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||
|
||||
#ifndef __UTF8_HH_INCLUDED__
|
||||
#define __UTF8_HH_INCLUDED__
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include "cpp_features.hh"
|
||||
|
@ -13,6 +14,17 @@
|
|||
/// places.
|
||||
namespace Utf8 {
|
||||
|
||||
// Those are possible encodings for .dsl files
|
||||
enum Encoding
|
||||
{
|
||||
Utf16LE,
|
||||
Utf16BE,
|
||||
Windows1252,
|
||||
Windows1251,
|
||||
Windows1250,
|
||||
Utf8 // This is an extension. Detected solely by the UTF8 BOM.
|
||||
};
|
||||
|
||||
using std::string;
|
||||
using gd::wstring;
|
||||
using gd::wchar;
|
||||
|
@ -40,4 +52,8 @@ wstring decode( string const & ) THROW_SPEC( exCantDecode );
|
|||
/// Linux but was messing up strings under Windows.
|
||||
bool isspace( int c );
|
||||
|
||||
//get the first line in string s1. -1 if not found
|
||||
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length);
|
||||
char const* getEncodingNameFor(Encoding e);
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue