refract encoding method

This commit is contained in:
xiaoyifang 2021-11-06 16:26:30 +08:00
parent 02a88c98ad
commit f0a3df3d6f
6 changed files with 170 additions and 164 deletions

5
dsl.cc
View file

@ -75,6 +75,7 @@ using gd::wstring;
using gd::wchar; using gd::wchar;
using std::vector; using std::vector;
using std::list; using std::list;
using Utf8::Encoding;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
@ -597,7 +598,7 @@ void DslDictionary::loadArticle( uint32_t address,
{ {
articleData = articleData =
Iconv::toWstring( Iconv::toWstring(
getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ), Utf8::getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ),
articleBody, articleSize ); articleBody, articleSize );
free( articleBody ); free( articleBody );
@ -1361,7 +1362,7 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
{ {
articleData = articleData =
Iconv::toWstring( Iconv::toWstring(
getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ), getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ),
articleBody, articleSize ); articleBody, articleSize );
free( articleBody ); free( articleBody );

View file

@ -19,6 +19,7 @@ namespace Details {
using gd::wstring; using gd::wstring;
using std::list; using std::list;
using Utf8::Encoding;
#ifndef __linux__ #ifndef __linux__
@ -41,18 +42,6 @@ int wcscasecmp( const wchar *s1, const wchar *s2 )
#endif #endif
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length)
{
char* pos = std::search(s1,s1+s1length, s2, s2+s2length);
if (pos == s1 + s1length)
return pos-s1;
//the line size.
return pos- s1+ s2length;
}
static DSLLangCode LangCodes[] = static DSLLangCode LangCodes[] =
{ {
{ 1, "en" }, { 1, "en" },
@ -159,25 +148,7 @@ bool isAtSignFirst( wstring const & str )
return reg.indexIn( gd::toQString( str ) ) == 0; return reg.indexIn( gd::toQString( str ) ) == 0;
} }
char const* getEncodingNameFor(DslEncoding e)
{
switch (e)
{
case Utf16LE:
return "UTF-16LE";
case Utf16BE:
return "UTF-16BE";
case Windows1252:
return "WINDOWS-1252";
case Windows1251:
return "WINDOWS-1251";
case Details::Utf8:
return "UTF-8";
case Windows1250:
default:
return "WINDOWS-1250";
}
}
/////////////// ArticleDom /////////////// ArticleDom
@ -811,38 +782,36 @@ void ArticleDom::closeTag( wstring const & name,
void ArticleDom::nextChar() THROW_SPEC( eot ) void ArticleDom::nextChar() THROW_SPEC( eot )
{ {
if ( !*stringPos ) if ( !*stringPos )
throw eot(); throw eot();
else{
ch = *stringPos++;
if ( ch == L'\\' ) ch = *stringPos++;
{
if ( ch == L'\\' )
{
if ( !*stringPos ) if ( !*stringPos )
throw eot(); throw eot();
ch = *stringPos++; ch = *stringPos++;
escaped = true; escaped = true;
} }
else else if ( ch == L'[' && *stringPos == L'[' )
if ( ch == L'[' && *stringPos == L'[' ) {
{
++stringPos; ++stringPos;
escaped = true; escaped = true;
} }
else else if ( ch == L']' && *stringPos == L']' )
if ( ch == L']' && *stringPos == L']' ) {
{
++stringPos; ++stringPos;
escaped = true; escaped = true;
} }
else else
escaped = false; escaped = false;
if( ch == '\n' || ch == '\r' ) if( ch == '\n' || ch == '\r' )
lineStartPos = stringPos; lineStartPos = stringPos;
}
} }
bool ArticleDom::atSignFirstInLine() bool ArticleDom::atSignFirstInLine()
@ -857,7 +826,7 @@ bool ArticleDom::atSignFirstInLine()
/////////////// DslScanner /////////////// DslScanner
DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ): DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
encoding( Windows1252 ), readBufferPtr( readBuffer ), encoding( Utf8::Windows1252 ), readBufferPtr( readBuffer ),
readBufferLeft( 0 ), linesRead( 0 ) readBufferLeft( 0 ), linesRead( 0 )
{ {
// Since .dz is backwards-compatible with .gz, we use gz- functions to // Since .dz is backwards-compatible with .gz, we use gz- functions to
@ -884,10 +853,10 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
// If the file begins with the dedicated Unicode marker, we just consume // If the file begins with the dedicated Unicode marker, we just consume
// it. If, on the other hand, it's not, we return the bytes back // it. If, on the other hand, it's not, we return the bytes back
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE )
encoding = Utf16LE; encoding = Utf8::Utf16LE;
else else
if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF )
encoding = Utf16BE; encoding = Utf8::Utf16BE;
else else
if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB )
{ {
@ -899,22 +868,22 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
throw exMalformedDslFile( fileName ); throw exMalformedDslFile( fileName );
} }
encoding = Utf8; encoding = Utf8::Utf8;
} }
else else
{ {
if ( firstBytes[ 0 ] && !firstBytes[ 1 ] ) if ( firstBytes[ 0 ] && !firstBytes[ 1 ] )
encoding = Utf16LE; encoding = Utf8::Utf16LE;
else else
if ( !firstBytes[ 0 ] && firstBytes[ 1 ] ) if ( !firstBytes[ 0 ] && firstBytes[ 1 ] )
encoding = Utf16BE; encoding = Utf8::Utf16BE;
else else
{ {
// Ok, this doesn't look like 16-bit Unicode. We will start with a // Ok, this doesn't look like 16-bit Unicode. We will start with a
// 8-bit encoding with an intent to find out the exact one from // 8-bit encoding with an intent to find out the exact one from
// the header. // the header.
needExactEncoding = true; needExactEncoding = true;
encoding = Windows1251; encoding = Utf8::Windows1251;
} }
if ( gzrewind( f ) ) if ( gzrewind( f ) )
@ -995,13 +964,13 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
} }
else else
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Latin" ) ) ) if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Latin" ) ) )
encoding = Windows1252; encoding = Utf8::Windows1252;
else else
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Cyrillic" ) ) ) if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"Cyrillic" ) ) )
encoding = Windows1251; encoding = Utf8::Windows1251;
else else
if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"EasternEuropean" ) ) ) if ( !wcscasecmp( arg.c_str(), GD_NATIVE_TO_WS( L"EasternEuropean" ) ) )
encoding = Windows1250; encoding = Utf8::Windows1250;
else else
{ {
gzclose( f ); gzclose( f );
@ -1036,8 +1005,6 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
// Check that we have bytes to read // Check that we have bytes to read
if ( readBufferLeft < 5000 ) if ( readBufferLeft < 5000 )
{ {
//readBufferPtr+=pos;
//readBufferLeft-=pos;
if ( !gzeof( f ) ) if ( !gzeof( f ) )
{ {
// To avoid having to deal with ring logic, we move the remaining bytes // To avoid having to deal with ring logic, we move the remaining bytes
@ -1053,19 +1020,12 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo
readBufferPtr = readBuffer; readBufferPtr = readBuffer;
readBufferLeft += (size_t) result; readBufferLeft += (size_t) result;
/*QByteArray frag = QByteArray::fromRawData(readBuffer, readBufferLeft);
fragStream = new QTextStream(frag) ;
fragStream->setCodec(codec);*/
} }
} }
//if(fragStream->atEnd())
// return false;
if(readBufferLeft<=0) if(readBufferLeft<=0)
return false; return false;
//QString line=fragStream->readLine();
int pos = findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength); int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
if(pos==-1) if(pos==-1)
return false; return false;
QString line = codec->toUnicode(readBufferPtr, pos); QString line = codec->toUnicode(readBufferPtr, pos);
@ -1123,25 +1083,25 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b
/////////////// DslScanner /////////////// DslScanner
void DslScanner::initLineFeed(DslEncoding e) void DslScanner::initLineFeed(Utf8::Encoding e)
{ {
switch (e) switch (e)
{ {
case Utf16LE: case Utf8::Utf16LE:
lineFeed= new char[2] {0x0A,0}; lineFeed= new char[2] {0x0A,0};
lineFeedLength = 2; lineFeedLength = 2;
break; break;
case Utf16BE: case Utf8::Utf16BE:
lineFeed = new char[2] { 0,0x0A}; lineFeed = new char[2] { 0,0x0A};
lineFeedLength = 2; lineFeedLength = 2;
break; break;
case Windows1252: case Utf8::Windows1252:
case Windows1251: case Utf8::Windows1251:
case Details::Utf8: case Utf8::Utf8:
case Windows1250: case Utf8::Windows1250:
default: default:
lineFeedLength = 1; lineFeedLength = 1;
lineFeed = new char[1] {0x0A}; lineFeed = new char[1] {0x0A};

View file

@ -12,6 +12,7 @@
#include "iconv.hh" #include "iconv.hh"
#include <QTextCodec> #include <QTextCodec>
#include <QByteArray> #include <QByteArray>
#include "utf8.hh"
// Implementation details for Dsl, not part of its interface // Implementation details for Dsl, not part of its interface
namespace Dsl { namespace Dsl {
@ -22,17 +23,9 @@ using gd::wstring;
using gd::wchar; using gd::wchar;
using std::list; using std::list;
using std::vector; using std::vector;
using Utf8::Encoding;
// Those are possible encodings for .dsl files
enum DslEncoding
{
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8 // This is an extension. Detected solely by the UTF8 BOM.
};
struct DSLLangCode struct DSLLangCode
{ {
@ -44,8 +37,6 @@ string findCodeForDslId( int id );
bool isAtSignFirst( wstring const & str ); bool isAtSignFirst( wstring const & str );
char const* getEncodingNameFor(DslEncoding e);
/// Parses the DSL language, representing it in its structural DOM form. /// Parses the DSL language, representing it in its structural DOM form.
struct ArticleDom struct ArticleDom
{ {
@ -111,7 +102,7 @@ private:
class DslScanner class DslScanner
{ {
gzFile f; gzFile f;
DslEncoding encoding; Encoding encoding;
QTextCodec* codec; QTextCodec* codec;
wstring dictionaryName; wstring dictionaryName;
wstring langFrom, langTo; wstring langFrom, langTo;
@ -138,9 +129,9 @@ public:
~DslScanner() throw(); ~DslScanner() throw();
/// Returns the detected encoding of this file. /// Returns the detected encoding of this file.
DslEncoding getEncoding() const Encoding getEncoding() const
{ return encoding; } { return encoding; }
void initLineFeed(DslEncoding e); void initLineFeed(Encoding e);
/// Returns the dictionary's name, as was read from file's headers. /// Returns the dictionary's name, as was read from file's headers.
wstring const & getDictionaryName() const wstring const & getDictionaryName() const
@ -207,8 +198,8 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
{ {
switch( encoding ) switch( encoding )
{ {
case Utf16LE: case Utf8::Utf16LE:
case Utf16BE: case Utf8::Utf16BE:
return x*2; return x*2;
default: default:
return x; return x;

137
gls.cc
View file

@ -58,13 +58,7 @@ using gd::wchar;
using BtreeIndexing::WordArticleLink; using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo; using BtreeIndexing::IndexInfo;
using Utf8::Encoding;
enum Encoding
{
Utf8,
Utf16LE,
Utf16BE
};
/////////////// GlsScanner /////////////// GlsScanner
@ -73,15 +67,14 @@ class GlsScanner
gzFile f; gzFile f;
Encoding encoding; Encoding encoding;
QTextCodec* codec; QTextCodec* codec;
Iconv iconv;
wstring dictionaryName; wstring dictionaryName;
wstring dictionaryDecription, dictionaryAuthor; wstring dictionaryDecription, dictionaryAuthor;
wstring langFrom, langTo; wstring langFrom, langTo;
char readBuffer[ 10000 ]; char readBuffer[ 10000 ];
char * readBufferPtr; char * readBufferPtr;
size_t readBufferLeft; size_t readBufferLeft;
QTextStream* fragStream; const char* lineFeed;
qint64 pos; int lineFeedLength;
unsigned linesRead; unsigned linesRead;
public: public:
@ -126,30 +119,15 @@ public:
/// Reading begins from the first line after the headers (ones which end /// Reading begins from the first line after the headers (ones which end
/// by the "### Glossary section:" line). /// by the "### Glossary section:" line).
bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex ); bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex );
void initLineFeed(Utf8::Encoding e);
/// Returns the number of lines read so far from the file. /// Returns the number of lines read so far from the file.
unsigned getLinesRead() const unsigned getLinesRead() const
{ return linesRead; } { return linesRead; }
/// Returns a name to be passed to iconv for the given encoding.
static char const * getEncodingNameFor( Encoding e )
{
switch( e )
{
case Utf16LE:
return Iconv::Utf16Le;
case Utf16BE:
return "UTF-16BE";
case Utf8:
default:
return Iconv::Utf8;
}
}
}; };
GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ): GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
encoding( Utf8 ), iconv( Iconv::GdWchar, Iconv::Utf8 ), readBufferPtr( readBuffer ), encoding( Utf8::Utf8 ), readBufferPtr( readBuffer ),
readBufferLeft( 0 ), linesRead( 0 ), pos(0) readBufferLeft( 0 ), linesRead( 0 )
{ {
// Since .dz is backwards-compatible with .gz, we use gz- functions to // Since .dz is backwards-compatible with .gz, we use gz- functions to
// read it -- they are much nicer than the dict_data- ones. // read it -- they are much nicer than the dict_data- ones.
@ -172,10 +150,10 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
// If the file begins with the dedicated Unicode marker, we just consume // If the file begins with the dedicated Unicode marker, we just consume
// it. If, on the other hand, it's not, we return the bytes back // it. If, on the other hand, it's not, we return the bytes back
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE )
encoding = Utf16LE; encoding = Utf8::Utf16LE;
else else
if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF )
encoding = Utf16BE; encoding = Utf8::Utf16BE;
else else
if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB )
{ {
@ -186,7 +164,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
gzclose( f ); gzclose( f );
throw exMalformedGlsFile( fileName ); throw exMalformedGlsFile( fileName );
} }
encoding = Utf8; encoding = Utf8::Utf8;
} }
else else
{ {
@ -195,12 +173,10 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
gzclose( f ); gzclose( f );
throw exCantOpen( fileName ); throw exCantOpen( fileName );
} }
encoding = Utf8; encoding = Utf8::Utf8;
} }
if( encoding != Utf8 ) codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding));
iconv.reinit( Iconv::GdWchar, getEncodingNameFor( encoding ) );
codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
// We now can use our own readNextLine() function // We now can use our own readNextLine() function
wstring str; wstring str;
@ -267,45 +243,74 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
} }
} }
} }
void GlsScanner::initLineFeed(Utf8::Encoding e)
{
switch (e)
{
case Utf8::Utf16LE:
lineFeed= new char[2] {0x0A,0};
lineFeedLength = 2;
break;
case Utf8::Utf16BE:
lineFeed = new char[2] { 0,0x0A};
lineFeedLength = 2;
break;
case Utf8::Windows1252:
case Utf8::Windows1251:
case Utf8::Utf8:
case Utf8::Windows1250:
default:
lineFeedLength = 1;
lineFeed = new char[1] {0x0A};
}
}
bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex, bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
Iconv::Ex ) Iconv::Ex )
{ {
offset = (size_t)(gztell(f) - readBufferLeft + pos); offset = (size_t)(gztell(f) - readBufferLeft);
{ {
// Check that we have bytes to read // Check that we have bytes to read
if (readBufferLeft - pos < 2000) if ( readBufferLeft < 5000 )
{ {
readBufferPtr += pos; if ( !gzeof( f ) )
readBufferLeft -= pos; {
if (!gzeof(f)) // To avoid having to deal with ring logic, we move the remaining bytes
{ // to the beginning
// To avoid having to deal with ring logic, we move the remaining bytes memmove( readBuffer, readBufferPtr, readBufferLeft );
// to the beginning
memmove(readBuffer, readBufferPtr, readBufferLeft);
// Read some more bytes to readBuffer // Read some more bytes to readBuffer
int result = gzread(f, readBuffer + readBufferLeft, int result = gzread( f, readBuffer + readBufferLeft,
sizeof(readBuffer) - readBufferLeft); sizeof( readBuffer ) - readBufferLeft );
if (result == -1) if (result == -1)
throw exCantReadGlsFile(); throw exCantReadGlsFile();
readBufferPtr = readBuffer; readBufferPtr = readBuffer;
readBufferLeft += (size_t)result; readBufferLeft += (size_t) result;
QByteArray frag = QByteArray::fromRawData(readBuffer, readBufferLeft); }
fragStream = new QTextStream(frag); }
fragStream->setCodec(codec); if(readBufferLeft<=0)
} return false;
}
if (fragStream->atEnd()) int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength);
return false; if(pos==-1)
return false;
QString line = codec->toUnicode(readBufferPtr, pos);
if(line.endsWith("\n"))
line.chop(1);
if(line.endsWith("\r"))
line.chop(1);
QString line = fragStream->readLine(); if(pos>readBufferLeft){
pos = fragStream->pos(); pos=readBufferLeft;
linesRead++; }
readBufferLeft -= pos;
readBufferPtr += pos;
linesRead++;
#ifdef __WIN32 #ifdef __WIN32
out = line.toStdU32String(); out = line.toStdU32String();
@ -314,7 +319,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex,
#endif #endif
return true; return true;
} }
} }
GlsScanner::~GlsScanner() throw() GlsScanner::~GlsScanner() throw()
@ -669,7 +674,7 @@ void GlsDictionary::loadArticleText( uint32_t address,
} }
else else
{ {
string articleData = Iconv::toUtf8( GlsScanner::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize ); string articleData = Iconv::toUtf8( Utf8::getEncodingNameFor( Encoding( idxHeader.glsEncoding ) ), articleBody, articleSize );
string::size_type start_pos = 0, end_pos = 0; string::size_type start_pos = 0, end_pos = 0;
for( ; ; ) for( ; ; )

33
utf8.cc
View file

@ -3,6 +3,7 @@
#include "utf8.hh" #include "utf8.hh"
#include <vector> #include <vector>
#include <algorithm>
namespace Utf8 { namespace Utf8 {
@ -175,4 +176,36 @@ bool isspace( int c )
} }
} }
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length)
{
char* pos = std::search(s1,s1+s1length, s2, s2+s2length);
if (pos == s1 + s1length)
return pos-s1;
//the line size.
return pos- s1+ s2length;
}
char const* getEncodingNameFor(Encoding e)
{
switch (e)
{
case Utf16LE:
return "UTF-16LE";
case Utf16BE:
return "UTF-16BE";
case Windows1252:
return "WINDOWS-1252";
case Windows1251:
return "WINDOWS-1251";
case Utf8:
return "UTF-8";
case Windows1250:
default:
return "WINDOWS-1250";
}
}
} }

18
utf8.hh
View file

@ -1,6 +1,7 @@
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org> /* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef __UTF8_HH_INCLUDED__
#define __UTF8_HH_INCLUDED__
#include <cstdio> #include <cstdio>
#include <string> #include <string>
#include "cpp_features.hh" #include "cpp_features.hh"
@ -13,6 +14,17 @@
/// places. /// places.
namespace Utf8 { namespace Utf8 {
// Those are possible encodings for .dsl files
enum Encoding
{
Utf16LE,
Utf16BE,
Windows1252,
Windows1251,
Windows1250,
Utf8 // This is an extension. Detected solely by the UTF8 BOM.
};
using std::string; using std::string;
using gd::wstring; using gd::wstring;
using gd::wchar; using gd::wchar;
@ -40,4 +52,8 @@ wstring decode( string const & ) THROW_SPEC( exCantDecode );
/// Linux but was messing up strings under Windows. /// Linux but was messing up strings under Windows.
bool isspace( int c ); bool isspace( int c );
//get the first line in string s1. -1 if not found
int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length);
char const* getEncodingNameFor(Encoding e);
} }
#endif