mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-30 13:24:05 +00:00
added language read support for stardict format
added smart language extraction from filename to LangCoder
This commit is contained in:
parent
99c8c8642e
commit
ec67732e36
|
@ -12,7 +12,7 @@ LangCoder::LangCoder()
|
||||||
LangStruct ls;
|
LangStruct ls;
|
||||||
for (int i = 0; true; i++) {
|
for (int i = 0; true; i++) {
|
||||||
const LangCode &lc = LangCodes[i];
|
const LangCode &lc = LangCodes[i];
|
||||||
if (lc.lang.isEmpty())
|
if (lc.lang[0] == 0)
|
||||||
break;
|
break;
|
||||||
//ls.order = i;
|
//ls.order = i;
|
||||||
//ls.icon = QIcon(":/flags/" + QString(lc.code) + ".png");
|
//ls.icon = QIcon(":/flags/" + QString(lc.code) + ".png");
|
||||||
|
@ -23,8 +23,8 @@ LangCoder::LangCoder()
|
||||||
QString LangCoder::decode(quint32 code)
|
QString LangCoder::decode(quint32 code)
|
||||||
{
|
{
|
||||||
// temp!
|
// temp!
|
||||||
if (codeMap.contains(code))
|
if (langCoder.codeMap.contains(code))
|
||||||
return LangCodes[codeMap[code]].lang;
|
return LangCodes[langCoder.codeMap[code]].lang;
|
||||||
|
|
||||||
return QString();
|
return QString();
|
||||||
}
|
}
|
||||||
|
@ -72,6 +72,50 @@ quint32 LangCoder::findIdForLanguage( gd::wstring const & lang )
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
quint32 LangCoder::guessId( const QString & lang )
|
||||||
|
{
|
||||||
|
QString lstr = lang.simplified().toLower();
|
||||||
|
|
||||||
|
// too small to guess
|
||||||
|
if (lstr.size() < 2)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// check if it could be the whole language name
|
||||||
|
if (lstr.size() >= 3)
|
||||||
|
{
|
||||||
|
for( LangCode const * lc = LangCodes; lc->code[ 0 ]; ++lc )
|
||||||
|
{
|
||||||
|
if ( lstr == QString( lc->lang ) )
|
||||||
|
{
|
||||||
|
// We've got a match
|
||||||
|
return code2toInt( lc->code );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// still not found - try to match by 2-symbol code
|
||||||
|
return code2toInt( lstr.left(2).toAscii().data() );
|
||||||
|
}
|
||||||
|
|
||||||
|
QPair<quint32,quint32> LangCoder::findIdsForFilename( QString const & name )
|
||||||
|
{
|
||||||
|
QString nameFolded = QFileInfo( name ).fileName().toCaseFolded();
|
||||||
|
|
||||||
|
QRegExp reg( "[-_.]([a-z]{2,3})-([a-z]{2,3})[-_.]" ); reg.setMinimal(true);
|
||||||
|
int off = 0;
|
||||||
|
while ( reg.indexIn( nameFolded, off ) >= 0 )
|
||||||
|
{
|
||||||
|
quint32 from = guessId( reg.cap(1) );
|
||||||
|
quint32 to = guessId( reg.cap(2) );
|
||||||
|
if (from && to)
|
||||||
|
return QPair<quint32,quint32>(from, to);
|
||||||
|
|
||||||
|
off += reg.matchedLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
return QPair<quint32,quint32>(0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
LangStruct& LangCoder::CodeToLangStruct(const QString &code)
|
LangStruct& LangCoder::CodeToLangStruct(const QString &code)
|
||||||
{
|
{
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
struct LangCode
|
struct LangCode
|
||||||
{
|
{
|
||||||
char code[ 3 ]; // ISO 639-1
|
char code[ 3 ]; // ISO 639-1
|
||||||
QString lang; // Language name in English
|
char *lang; // Language name in English
|
||||||
};
|
};
|
||||||
|
|
||||||
// Language codes
|
// Language codes
|
||||||
|
@ -230,9 +230,15 @@ public:
|
||||||
/// is case- and punctuation insensitive.
|
/// is case- and punctuation insensitive.
|
||||||
static quint32 findIdForLanguage( gd::wstring const & );
|
static quint32 findIdForLanguage( gd::wstring const & );
|
||||||
|
|
||||||
//const QMap<quint32, int>& codes() { return codeMap; }
|
|
||||||
|
|
||||||
QString decode(quint32 code);
|
static QPair<quint32,quint32> findIdsForFilename( QString const & );
|
||||||
|
|
||||||
|
static quint32 guessId( const QString & lang );
|
||||||
|
|
||||||
|
/// Returns decoded name of language or empty string if not found.
|
||||||
|
static QString decode(quint32 code);
|
||||||
|
|
||||||
|
//const QMap<quint32, int>& codes() { return codeMap; }
|
||||||
|
|
||||||
LangStruct langStruct(quint32 code);
|
LangStruct langStruct(quint32 code);
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
#include "dictzip.h"
|
#include "dictzip.h"
|
||||||
#include "xdxf2html.hh"
|
#include "xdxf2html.hh"
|
||||||
#include "htmlescape.hh"
|
#include "htmlescape.hh"
|
||||||
|
#include "langcoder.hh"
|
||||||
|
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -69,7 +71,7 @@ struct Ifo
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
|
Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
|
||||||
CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
|
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
|
||||||
};
|
};
|
||||||
|
|
||||||
struct IdxHeader
|
struct IdxHeader
|
||||||
|
@ -83,6 +85,8 @@ struct IdxHeader
|
||||||
uint32_t synWordCount; // Saved from Ifo::synwordcount
|
uint32_t synWordCount; // Saved from Ifo::synwordcount
|
||||||
uint32_t bookNameSize; // Book name's length. Used to read it then.
|
uint32_t bookNameSize; // Book name's length. Used to read it then.
|
||||||
uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
|
uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
|
||||||
|
uint32_t langFrom; // Source language
|
||||||
|
uint32_t langTo; // Target language
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
bool indexIsOldOrBad( string const & indexFile )
|
bool indexIsOldOrBad( string const & indexFile )
|
||||||
|
@ -130,6 +134,12 @@ public:
|
||||||
virtual QIcon getIcon() throw()
|
virtual QIcon getIcon() throw()
|
||||||
{ return QIcon(":/icons/icon32_stardict.png"); }
|
{ return QIcon(":/icons/icon32_stardict.png"); }
|
||||||
|
|
||||||
|
inline virtual quint32 getLangFrom() const
|
||||||
|
{ return idxHeader.langFrom; }
|
||||||
|
|
||||||
|
inline virtual quint32 getLangTo() const
|
||||||
|
{ return idxHeader.langTo; }
|
||||||
|
|
||||||
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
|
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
|
||||||
throw( std::exception );
|
throw( std::exception );
|
||||||
|
|
||||||
|
@ -1068,6 +1078,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
||||||
idxHeader.bookNameSize = ifo.bookname.size();
|
idxHeader.bookNameSize = ifo.bookname.size();
|
||||||
idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
|
idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
|
||||||
|
|
||||||
|
QPair<quint32,quint32> langs =
|
||||||
|
LangCoder::findIdsForFilename( QString::fromStdString( dictFileName ) );
|
||||||
|
idxHeader.langFrom = langs.first;
|
||||||
|
idxHeader.langTo = langs.second;
|
||||||
|
|
||||||
idx.rewind();
|
idx.rewind();
|
||||||
|
|
||||||
idx.write( &idxHeader, sizeof( idxHeader ) );
|
idx.write( &idxHeader, sizeof( idxHeader ) );
|
||||||
|
|
Loading…
Reference in a new issue