diff --git a/dsl_details.cc b/dsl_details.cc index 540377d3..9de71395 100644 --- a/dsl_details.cc +++ b/dsl_details.cc @@ -895,7 +895,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ): //iconv.reinit( encoding ); codec = QTextCodec::codecForName(getEncodingNameFor(encoding)); - initLineFeed(encoding); + lineFeed=Utf8::initLineFeed(encoding); // We now can use our own readNextLine() function wstring str; @@ -1025,7 +1025,7 @@ bool DslScanner::readNextLine( wstring & out, size_t & offset, bool only_head_wo if(readBufferLeft<=0) return false; - int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength); + int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length); if(pos==-1) return false; QString line = codec->toUnicode(readBufferPtr, pos); @@ -1083,31 +1083,6 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b /////////////// DslScanner -void DslScanner::initLineFeed(Utf8::Encoding e) -{ - switch (e) - { - case Utf8::Utf16LE: - lineFeed= new char[2] {0x0A,0}; - lineFeedLength = 2; - break; - case Utf8::Utf16BE: - lineFeed = new char[2] { 0,0x0A}; - lineFeedLength = 2; - break; - case Utf8::Windows1252: - - case Utf8::Windows1251: - - case Utf8::Utf8: - - case Utf8::Windows1250: - default: - lineFeedLength = 1; - lineFeed = new char[1] {0x0A}; - } -} - void processUnsortedParts( wstring & str, bool strip ) { int refCount = 0; diff --git a/dsl_details.hh b/dsl_details.hh index 5b698a74..0b22335d 100644 --- a/dsl_details.hh +++ b/dsl_details.hh @@ -24,6 +24,7 @@ using gd::wchar; using std::list; using std::vector; using Utf8::Encoding; +using Utf8::LineFeed; @@ -110,8 +111,7 @@ class DslScanner char readBuffer[ 65536 ]; QTextStream* fragStream; char * readBufferPtr; - const char* lineFeed; - int lineFeedLength; + LineFeed lineFeed; size_t readBufferLeft; //qint64 pos; unsigned linesRead; @@ -131,7 +131,6 @@ public: /// Returns the detected encoding of this file. Encoding getEncoding() const { return encoding; } - void initLineFeed(Encoding e); /// Returns the dictionary's name, as was read from file's headers. wstring const & getDictionaryName() const diff --git a/gls.cc b/gls.cc index 59ecf360..8080c96e 100644 --- a/gls.cc +++ b/gls.cc @@ -59,6 +59,7 @@ using BtreeIndexing::WordArticleLink; using BtreeIndexing::IndexedWords; using BtreeIndexing::IndexInfo; using Utf8::Encoding; +using Utf8::LineFeed; /////////////// GlsScanner @@ -73,8 +74,7 @@ class GlsScanner char readBuffer[ 10000 ]; char * readBufferPtr; size_t readBufferLeft; - const char* lineFeed; - int lineFeedLength; + LineFeed lineFeed; unsigned linesRead; public: @@ -119,7 +119,6 @@ public: /// Reading begins from the first line after the headers (ones which end /// by the "### Glossary section:" line). bool readNextLine( wstring &, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex ); - void initLineFeed(Utf8::Encoding e); /// Returns the number of lines read so far from the file. unsigned getLinesRead() const { return linesRead; } @@ -178,6 +177,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ): codec = QTextCodec::codecForName(Utf8::getEncodingNameFor(encoding)); // We now can use our own readNextLine() function + lineFeed = Utf8::initLineFeed(encoding); wstring str; wstring *currentField = 0; @@ -243,30 +243,7 @@ GlsScanner::GlsScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ): } } } -void GlsScanner::initLineFeed(Utf8::Encoding e) -{ - switch (e) - { - case Utf8::Utf16LE: - lineFeed= new char[2] {0x0A,0}; - lineFeedLength = 2; - break; - case Utf8::Utf16BE: - lineFeed = new char[2] { 0,0x0A}; - lineFeedLength = 2; - break; - case Utf8::Windows1252: - case Utf8::Windows1251: - - case Utf8::Utf8: - - case Utf8::Windows1250: - default: - lineFeedLength = 1; - lineFeed = new char[1] {0x0A}; - } -} bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex, Iconv::Ex ) { @@ -286,7 +263,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex, int result = gzread( f, readBuffer + readBufferLeft, sizeof( readBuffer ) - readBufferLeft ); - if (result == -1) + if (result == -1) throw exCantReadGlsFile(); readBufferPtr = readBuffer; @@ -296,7 +273,7 @@ bool GlsScanner::readNextLine( wstring & out, size_t & offset ) THROW_SPEC( Ex, if(readBufferLeft<=0) return false; - int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed,lineFeedLength); + int pos = Utf8::findFirstLinePosition(readBufferPtr,readBufferLeft, lineFeed.lineFeed,lineFeed.length); if(pos==-1) return false; QString line = codec->toUnicode(readBufferPtr, pos); diff --git a/utf8.cc b/utf8.cc index a1370d21..ee5dd1f6 100644 --- a/utf8.cc +++ b/utf8.cc @@ -208,4 +208,31 @@ char const* getEncodingNameFor(Encoding e) } } +LineFeed initLineFeed(Encoding e) +{ + LineFeed lf; + switch (e) + { + case Utf8::Utf16LE: + lf.lineFeed= new char[2]{ 0x0A,0 }; + lf.length = 2; + break; + case Utf8::Utf16BE: + lf.lineFeed = new char[2]{ 0,0x0A }; + lf.length = 2; + break; + case Utf8::Windows1252: + + case Utf8::Windows1251: + + case Utf8::Utf8: + + case Utf8::Windows1250: + default: + lf.length = 1; + lf.lineFeed = new char[1]{ 0x0A }; + } + return lf; +} + } diff --git a/utf8.hh b/utf8.hh index daf06907..ef1bc08f 100644 --- a/utf8.hh +++ b/utf8.hh @@ -2,6 +2,7 @@ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #ifndef __UTF8_HH_INCLUDED__ #define __UTF8_HH_INCLUDED__ + #include #include #include "cpp_features.hh" @@ -55,5 +56,15 @@ bool isspace( int c ); //get the first line in string s1. -1 if not found int findFirstLinePosition( char* s1,int s1length, const char* s2,int s2length); char const* getEncodingNameFor(Encoding e); + +struct LineFeed +{ + int length; + char* lineFeed; + +}; + +LineFeed initLineFeed(Encoding e); } + #endif