mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
added language read support for stardict format
added smart language extraction from filename to LangCoder
This commit is contained in:
parent
99c8c8642e
commit
ec67732e36
|
@ -12,7 +12,7 @@ LangCoder::LangCoder()
|
|||
LangStruct ls;
|
||||
for (int i = 0; true; i++) {
|
||||
const LangCode &lc = LangCodes[i];
|
||||
if (lc.lang.isEmpty())
|
||||
if (lc.lang[0] == 0)
|
||||
break;
|
||||
//ls.order = i;
|
||||
//ls.icon = QIcon(":/flags/" + QString(lc.code) + ".png");
|
||||
|
@ -23,8 +23,8 @@ LangCoder::LangCoder()
|
|||
QString LangCoder::decode(quint32 code)
|
||||
{
|
||||
// temp!
|
||||
if (codeMap.contains(code))
|
||||
return LangCodes[codeMap[code]].lang;
|
||||
if (langCoder.codeMap.contains(code))
|
||||
return LangCodes[langCoder.codeMap[code]].lang;
|
||||
|
||||
return QString();
|
||||
}
|
||||
|
@ -72,6 +72,50 @@ quint32 LangCoder::findIdForLanguage( gd::wstring const & lang )
|
|||
return 0;
|
||||
}
|
||||
|
||||
quint32 LangCoder::guessId( const QString & lang )
|
||||
{
|
||||
QString lstr = lang.simplified().toLower();
|
||||
|
||||
// too small to guess
|
||||
if (lstr.size() < 2)
|
||||
return 0;
|
||||
|
||||
// check if it could be the whole language name
|
||||
if (lstr.size() >= 3)
|
||||
{
|
||||
for( LangCode const * lc = LangCodes; lc->code[ 0 ]; ++lc )
|
||||
{
|
||||
if ( lstr == QString( lc->lang ) )
|
||||
{
|
||||
// We've got a match
|
||||
return code2toInt( lc->code );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// still not found - try to match by 2-symbol code
|
||||
return code2toInt( lstr.left(2).toAscii().data() );
|
||||
}
|
||||
|
||||
QPair<quint32,quint32> LangCoder::findIdsForFilename( QString const & name )
|
||||
{
|
||||
QString nameFolded = QFileInfo( name ).fileName().toCaseFolded();
|
||||
|
||||
QRegExp reg( "[-_.]([a-z]{2,3})-([a-z]{2,3})[-_.]" ); reg.setMinimal(true);
|
||||
int off = 0;
|
||||
while ( reg.indexIn( nameFolded, off ) >= 0 )
|
||||
{
|
||||
quint32 from = guessId( reg.cap(1) );
|
||||
quint32 to = guessId( reg.cap(2) );
|
||||
if (from && to)
|
||||
return QPair<quint32,quint32>(from, to);
|
||||
|
||||
off += reg.matchedLength();
|
||||
}
|
||||
|
||||
return QPair<quint32,quint32>(0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
LangStruct& LangCoder::CodeToLangStruct(const QString &code)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
struct LangCode
|
||||
{
|
||||
char code[ 3 ]; // ISO 639-1
|
||||
QString lang; // Language name in English
|
||||
char *lang; // Language name in English
|
||||
};
|
||||
|
||||
// Language codes
|
||||
|
@ -230,9 +230,15 @@ public:
|
|||
/// is case- and punctuation insensitive.
|
||||
static quint32 findIdForLanguage( gd::wstring const & );
|
||||
|
||||
//const QMap<quint32, int>& codes() { return codeMap; }
|
||||
|
||||
QString decode(quint32 code);
|
||||
static QPair<quint32,quint32> findIdsForFilename( QString const & );
|
||||
|
||||
static quint32 guessId( const QString & lang );
|
||||
|
||||
/// Returns decoded name of language or empty string if not found.
|
||||
static QString decode(quint32 code);
|
||||
|
||||
//const QMap<quint32, int>& codes() { return codeMap; }
|
||||
|
||||
LangStruct langStruct(quint32 code);
|
||||
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include "dictzip.h"
|
||||
#include "xdxf2html.hh"
|
||||
#include "htmlescape.hh"
|
||||
#include "langcoder.hh"
|
||||
|
||||
#include <zlib.h>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
@ -69,7 +71,7 @@ struct Ifo
|
|||
enum
|
||||
{
|
||||
Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
|
||||
CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
|
||||
};
|
||||
|
||||
struct IdxHeader
|
||||
|
@ -83,6 +85,8 @@ struct IdxHeader
|
|||
uint32_t synWordCount; // Saved from Ifo::synwordcount
|
||||
uint32_t bookNameSize; // Book name's length. Used to read it then.
|
||||
uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
|
||||
uint32_t langFrom; // Source language
|
||||
uint32_t langTo; // Target language
|
||||
} __attribute__((packed));
|
||||
|
||||
bool indexIsOldOrBad( string const & indexFile )
|
||||
|
@ -130,6 +134,12 @@ public:
|
|||
virtual QIcon getIcon() throw()
|
||||
{ return QIcon(":/icons/icon32_stardict.png"); }
|
||||
|
||||
inline virtual quint32 getLangFrom() const
|
||||
{ return idxHeader.langFrom; }
|
||||
|
||||
inline virtual quint32 getLangTo() const
|
||||
{ return idxHeader.langTo; }
|
||||
|
||||
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
|
||||
throw( std::exception );
|
||||
|
||||
|
@ -1068,6 +1078,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
|
|||
idxHeader.bookNameSize = ifo.bookname.size();
|
||||
idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
|
||||
|
||||
QPair<quint32,quint32> langs =
|
||||
LangCoder::findIdsForFilename( QString::fromStdString( dictFileName ) );
|
||||
idxHeader.langFrom = langs.first;
|
||||
idxHeader.langTo = langs.second;
|
||||
|
||||
idx.rewind();
|
||||
|
||||
idx.write( &idxHeader, sizeof( idxHeader ) );
|
||||
|
|
Loading…
Reference in a new issue