added language read support for stardict format

added smart language extraction from filename to LangCoder
This commit is contained in:
ars_goldendict 2009-04-23 19:57:39 +00:00
parent 99c8c8642e
commit ec67732e36
3 changed files with 72 additions and 7 deletions

View file

@ -12,7 +12,7 @@ LangCoder::LangCoder()
LangStruct ls; LangStruct ls;
for (int i = 0; true; i++) { for (int i = 0; true; i++) {
const LangCode &lc = LangCodes[i]; const LangCode &lc = LangCodes[i];
if (lc.lang.isEmpty()) if (lc.lang[0] == 0)
break; break;
//ls.order = i; //ls.order = i;
//ls.icon = QIcon(":/flags/" + QString(lc.code) + ".png"); //ls.icon = QIcon(":/flags/" + QString(lc.code) + ".png");
@ -23,8 +23,8 @@ LangCoder::LangCoder()
QString LangCoder::decode(quint32 code) QString LangCoder::decode(quint32 code)
{ {
// temp! // temp!
if (codeMap.contains(code)) if (langCoder.codeMap.contains(code))
return LangCodes[codeMap[code]].lang; return LangCodes[langCoder.codeMap[code]].lang;
return QString(); return QString();
} }
@ -72,6 +72,50 @@ quint32 LangCoder::findIdForLanguage( gd::wstring const & lang )
return 0; return 0;
} }
quint32 LangCoder::guessId( const QString & lang )
{
QString lstr = lang.simplified().toLower();
// too small to guess
if (lstr.size() < 2)
return 0;
// check if it could be the whole language name
if (lstr.size() >= 3)
{
for( LangCode const * lc = LangCodes; lc->code[ 0 ]; ++lc )
{
if ( lstr == QString( lc->lang ) )
{
// We've got a match
return code2toInt( lc->code );
}
}
}
// still not found - try to match by 2-symbol code
return code2toInt( lstr.left(2).toAscii().data() );
}
QPair<quint32,quint32> LangCoder::findIdsForFilename( QString const & name )
{
QString nameFolded = QFileInfo( name ).fileName().toCaseFolded();
QRegExp reg( "[-_.]([a-z]{2,3})-([a-z]{2,3})[-_.]" ); reg.setMinimal(true);
int off = 0;
while ( reg.indexIn( nameFolded, off ) >= 0 )
{
quint32 from = guessId( reg.cap(1) );
quint32 to = guessId( reg.cap(2) );
if (from && to)
return QPair<quint32,quint32>(from, to);
off += reg.matchedLength();
}
return QPair<quint32,quint32>(0, 0);
}
/* /*
LangStruct& LangCoder::CodeToLangStruct(const QString &code) LangStruct& LangCoder::CodeToLangStruct(const QString &code)
{ {

View file

@ -7,7 +7,7 @@
struct LangCode struct LangCode
{ {
char code[ 3 ]; // ISO 639-1 char code[ 3 ]; // ISO 639-1
QString lang; // Language name in English char *lang; // Language name in English
}; };
// Language codes // Language codes
@ -230,9 +230,15 @@ public:
/// is case- and punctuation insensitive. /// is case- and punctuation insensitive.
static quint32 findIdForLanguage( gd::wstring const & ); static quint32 findIdForLanguage( gd::wstring const & );
//const QMap<quint32, int>& codes() { return codeMap; }
QString decode(quint32 code); static QPair<quint32,quint32> findIdsForFilename( QString const & );
static quint32 guessId( const QString & lang );
/// Returns decoded name of language or empty string if not found.
static QString decode(quint32 code);
//const QMap<quint32, int>& codes() { return codeMap; }
LangStruct langStruct(quint32 code); LangStruct langStruct(quint32 code);

View file

@ -9,6 +9,8 @@
#include "dictzip.h" #include "dictzip.h"
#include "xdxf2html.hh" #include "xdxf2html.hh"
#include "htmlescape.hh" #include "htmlescape.hh"
#include "langcoder.hh"
#include <zlib.h> #include <zlib.h>
#include <map> #include <map>
#include <set> #include <set>
@ -69,7 +71,7 @@ struct Ifo
enum enum
{ {
Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version CurrentFormatVersion = 6 + BtreeIndexing::FormatVersion + Folding::Version
}; };
struct IdxHeader struct IdxHeader
@ -83,6 +85,8 @@ struct IdxHeader
uint32_t synWordCount; // Saved from Ifo::synwordcount uint32_t synWordCount; // Saved from Ifo::synwordcount
uint32_t bookNameSize; // Book name's length. Used to read it then. uint32_t bookNameSize; // Book name's length. Used to read it then.
uint32_t sameTypeSequenceSize; // That string's size. Used to read it then. uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
uint32_t langFrom; // Source language
uint32_t langTo; // Target language
} __attribute__((packed)); } __attribute__((packed));
bool indexIsOldOrBad( string const & indexFile ) bool indexIsOldOrBad( string const & indexFile )
@ -130,6 +134,12 @@ public:
virtual QIcon getIcon() throw() virtual QIcon getIcon() throw()
{ return QIcon(":/icons/icon32_stardict.png"); } { return QIcon(":/icons/icon32_stardict.png"); }
inline virtual quint32 getLangFrom() const
{ return idxHeader.langFrom; }
inline virtual quint32 getLangTo() const
{ return idxHeader.langTo; }
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
throw( std::exception ); throw( std::exception );
@ -1068,6 +1078,11 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
idxHeader.bookNameSize = ifo.bookname.size(); idxHeader.bookNameSize = ifo.bookname.size();
idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size(); idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
QPair<quint32,quint32> langs =
LangCoder::findIdsForFilename( QString::fromStdString( dictFileName ) );
idxHeader.langFrom = langs.first;
idxHeader.langTo = langs.second;
idx.rewind(); idx.rewind();
idx.write( &idxHeader, sizeof( idxHeader ) ); idx.write( &idxHeader, sizeof( idxHeader ) );