mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
+ Ignore abbreviation files based on the _abrv suffix in their names.
*! Properly identify source and target languages.
This commit is contained in:
parent
c08805f728
commit
bde25bb8a3
21
src/dsl.cc
21
src/dsl.cc
|
@ -63,7 +63,7 @@ DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
|
|||
enum
|
||||
{
|
||||
Signature = 0x584c5344, // DSLX on little-endian, XLSD on big-endian
|
||||
CurrentFormatVersion = 8 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
CurrentFormatVersion = 9 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
};
|
||||
|
||||
struct IdxHeader
|
||||
|
@ -1147,12 +1147,23 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
{
|
||||
// Try .dsl and .dsl.dz suffixes
|
||||
|
||||
if ( ( i->size() < 4 ||
|
||||
strcasecmp( i->c_str() + ( i->size() - 4 ), ".dsl" ) != 0 ) &&
|
||||
bool uncompressedDsl = ( i->size() >= 4 &&
|
||||
strcasecmp( i->c_str() + ( i->size() - 4 ), ".dsl" ) == 0 );
|
||||
if ( !uncompressedDsl &&
|
||||
( i->size() < 7 ||
|
||||
strcasecmp( i->c_str() + ( i->size() - 7 ), ".dsl.dz" ) != 0 ) )
|
||||
continue;
|
||||
|
||||
// Make sure it's not an abbreviation file
|
||||
|
||||
int extSize = ( uncompressedDsl ? 4 : 7 );
|
||||
if ( i->size() - extSize >= 5 &&
|
||||
strncasecmp( i->c_str() + i->size() - extSize - 5, "_abrv", 5 ) == 0 )
|
||||
{
|
||||
// It is, skip it
|
||||
continue;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
vector< string > dictFiles( 1, *i );
|
||||
|
@ -1403,8 +1414,8 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
idxHeader.articleCount = articleCount;
|
||||
idxHeader.wordCount = wordCount;
|
||||
|
||||
idxHeader.langFrom = LangCoder::code3toInt( scanner.getLangFrom() );
|
||||
idxHeader.langTo = LangCoder::code3toInt( scanner.getLangTo() );
|
||||
idxHeader.langFrom = dslLanguageToId( scanner.getLangFrom() );
|
||||
idxHeader.langTo = dslLanguageToId( scanner.getLangTo() );
|
||||
|
||||
idx.rewind();
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include "dsl_details.hh"
|
||||
#include "folding.hh"
|
||||
#include "utf8.hh"
|
||||
#include "langcoder.hh"
|
||||
#include <wctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
|
@ -440,9 +440,9 @@ DslScanner::DslScanner( string const & fileName ) throw( Ex, Iconv::Ex ):
|
|||
if ( isName )
|
||||
dictionaryName = arg;
|
||||
else if ( isLangFrom )
|
||||
langFrom = Utf8::encode(arg);
|
||||
langFrom = arg;
|
||||
else if ( isLangTo )
|
||||
langTo = Utf8::encode(arg);
|
||||
langTo = arg;
|
||||
else
|
||||
{
|
||||
// The encoding
|
||||
|
@ -802,5 +802,37 @@ void unescapeDsl( wstring & str )
|
|||
str.erase( x, 1 ); // ++x would skip the next char without processing it
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void cutEnding( wstring & where, wstring const & ending )
|
||||
{
|
||||
if ( where.size() > ending.size() &&
|
||||
where.compare( where.size() - ending.size(),
|
||||
ending.size(), ending ) == 0 )
|
||||
where.erase( where.size() - ending.size() );
|
||||
}
|
||||
}
|
||||
|
||||
quint32 dslLanguageToId( wstring const & name )
|
||||
{
|
||||
static wstring newSp( GD_NATIVE_TO_WS( L"newspelling" ) );
|
||||
static wstring st( GD_NATIVE_TO_WS( L"standard" ) );
|
||||
static wstring ms( GD_NATIVE_TO_WS( L"modernsort" ) );
|
||||
static wstring ts( GD_NATIVE_TO_WS( L"traditionalsort" ) );
|
||||
static wstring prc( GD_NATIVE_TO_WS( L"prc" ) );
|
||||
|
||||
// Any of those endings are to be removed
|
||||
|
||||
wstring nameStripped = Folding::apply( name );
|
||||
|
||||
cutEnding( nameStripped, newSp );
|
||||
cutEnding( nameStripped, st );
|
||||
cutEnding( nameStripped, ms );
|
||||
cutEnding( nameStripped, ts );
|
||||
cutEnding( nameStripped, prc );
|
||||
|
||||
return LangCoder::findIdForLanguage( nameStripped );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,7 +97,7 @@ class DslScanner
|
|||
DslEncoding encoding;
|
||||
DslIconv iconv;
|
||||
wstring dictionaryName;
|
||||
string langFrom, langTo;
|
||||
wstring langFrom, langTo;
|
||||
char readBuffer[ 65536 ];
|
||||
char * readBufferPtr;
|
||||
size_t readBufferLeft;
|
||||
|
@ -124,11 +124,11 @@ public:
|
|||
{ return dictionaryName; }
|
||||
|
||||
/// Returns the dictionary's source language, as was read from file's headers.
|
||||
string const & getLangFrom() const
|
||||
wstring const & getLangFrom() const
|
||||
{ return langFrom; }
|
||||
|
||||
/// Returns the dictionary's target language, as was read from file's headers.
|
||||
string const & getLangTo() const
|
||||
wstring const & getLangTo() const
|
||||
{ return langTo; }
|
||||
|
||||
/// Reads next line from the file. Returns true if reading succeeded --
|
||||
|
@ -175,6 +175,10 @@ inline size_t DslScanner::distanceToBytes( size_t x ) const
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts the given language name taken from Dsl header (i.e. getLangFrom(),
|
||||
/// getLangTo()) to its proper language id.
|
||||
quint32 dslLanguageToId( wstring const & name );
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue