load dsl dictionary performance improved

This commit is contained in:
xiaoyifang 2021-11-06 15:45:36 +08:00
parent 2633b32458
commit 02a88c98ad
3 changed files with 28 additions and 52 deletions

8
dsl.cc
View file

@ -596,8 +596,8 @@ void DslDictionary::loadArticle( uint32_t address,
try
{
articleData =
DslIconv::toWstring(
DslIconv::getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ),
Iconv::toWstring(
getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ),
articleBody, articleSize );
free( articleBody );
@ -1360,8 +1360,8 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
try
{
articleData =
DslIconv::toWstring(
DslIconv::getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ),
Iconv::toWstring(
getEncodingNameFor( DslEncoding( idxHeader.dslEncoding ) ),
articleBody, articleSize );
free( articleBody );

View file

@ -159,6 +159,26 @@ bool isAtSignFirst( wstring const & str )
return reg.indexIn( gd::toQString( str ) ) == 0;
}
char const* getEncodingNameFor(DslEncoding e)
{
switch (e)
{
case Utf16LE:
return "UTF-16LE";
case Utf16BE:
return "UTF-16BE";
case Windows1252:
return "WINDOWS-1252";
case Windows1251:
return "WINDOWS-1251";
case Details::Utf8:
return "UTF-8";
case Windows1250:
default:
return "WINDOWS-1250";
}
}
/////////////// ArticleDom
wstring ArticleDom::Node::renderAsText( bool stripTrsTag ) const
@ -837,7 +857,7 @@ bool ArticleDom::atSignFirstInLine()
/////////////// DslScanner
DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
encoding( Windows1252 ), iconv( encoding ), readBufferPtr( readBuffer ),
encoding( Windows1252 ), readBufferPtr( readBuffer ),
readBufferLeft( 0 ), linesRead( 0 )
{
// Since .dz is backwards-compatible with .gz, we use gz- functions to
@ -905,7 +925,7 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
}
//iconv.reinit( encoding );
codec = QTextCodec::codecForName(iconv.getEncodingNameFor(encoding));
codec = QTextCodec::codecForName(getEncodingNameFor(encoding));
initLineFeed(encoding);
// We now can use our own readNextLine() function
@ -999,9 +1019,6 @@ DslScanner::DslScanner( string const & fileName ) THROW_SPEC( Ex, Iconv::Ex ):
gzseek( f, offset, SEEK_SET );
readBufferPtr = readBuffer;
readBufferLeft = 0;
if ( needExactEncoding )
iconv.reinit( encoding );
}
DslScanner::~DslScanner() throw()
@ -1106,37 +1123,6 @@ bool DslScanner::readNextLineWithoutComments( wstring & out, size_t & offset , b
/////////////// DslScanner
DslIconv::DslIconv( DslEncoding e ) THROW_SPEC( Iconv::Ex ):
Iconv( Iconv::GdWchar, getEncodingNameFor( e ) )
{
}
void DslIconv::reinit( DslEncoding e ) THROW_SPEC( Iconv::Ex )
{
Iconv::reinit( Iconv::GdWchar, getEncodingNameFor( e ) );
}
char const * DslIconv::getEncodingNameFor( DslEncoding e )
{
switch( e )
{
case Utf16LE:
return "UTF-16LE";
case Utf16BE:
return "UTF-16BE";
case Windows1252:
return "WINDOWS-1252";
case Windows1251:
return "WINDOWS-1251";
case Details::Utf8:
return "UTF-8";
case Windows1250:
default:
return "WINDOWS-1250";
}
}
void DslScanner::initLineFeed(DslEncoding e)
{
switch (e)

View file

@ -44,6 +44,8 @@ string findCodeForDslId( int id );
bool isAtSignFirst( wstring const & str );
char const* getEncodingNameFor(DslEncoding e);
/// Parses the DSL language, representing it in its structural DOM form.
struct ArticleDom
{
@ -104,17 +106,6 @@ private:
wstring headword;
};
/// A adapted version of Iconv which takes Dsl encoding and decodes to wchar.
class DslIconv: public Iconv
{
public:
DslIconv( DslEncoding ) THROW_SPEC( Iconv::Ex );
void reinit( DslEncoding ) THROW_SPEC( Iconv::Ex );
/// Returns a name to be passed to iconv for the given dsl encoding.
static char const * getEncodingNameFor( DslEncoding );
};
/// Opens the .dsl or .dsl.dz file and allows line-by-line reading. Auto-detects
/// the encoding, and reads all headers by itself.
class DslScanner
@ -122,7 +113,6 @@ class DslScanner
gzFile f;
DslEncoding encoding;
QTextCodec* codec;
DslIconv iconv;
wstring dictionaryName;
wstring langFrom, langTo;
wstring soundDictionary;