+ Support for transliterations added. For now basic Russian translit and

Hepburn Romaji are supported.
This commit is contained in:
Konstantin Isakov 2009-05-06 14:39:08 +00:00
parent 4e0b2626fb
commit 197ccf35d4
19 changed files with 853 additions and 77 deletions

View file

@ -98,6 +98,16 @@ Preferences::Preferences():
{
}
Romaji::Romaji():
enable( false ),
enableHepburn( true ),
enableNihonShiki( false ),
enableKunreiShiki( false ),
enableHiragana( true ),
enableKatakana( true )
{
}
namespace {
MediaWikis makeDefaultMediaWikis( bool enable )
@ -317,6 +327,26 @@ Class load() throw( exError )
c.hunspell.enabledDictionaries.push_back( nl.item( x ).toElement().text() );
}
QDomNode transliteration = root.namedItem( "transliteration" );
if ( !transliteration.isNull() )
{
applyBoolOption( c.transliteration.enableRussianTransliteration,
transliteration.namedItem( "enableRussianTransliteration" ) );
QDomNode romaji = transliteration.namedItem( "romaji" );
if ( !romaji.isNull() )
{
applyBoolOption( c.transliteration.romaji.enable, romaji.namedItem( "enable" ) );
applyBoolOption( c.transliteration.romaji.enableHepburn, romaji.namedItem( "enableHepburn" ) );
applyBoolOption( c.transliteration.romaji.enableNihonShiki, romaji.namedItem( "enableNihonShiki" ) );
applyBoolOption( c.transliteration.romaji.enableKunreiShiki, romaji.namedItem( "enableKunreiShiki" ) );
applyBoolOption( c.transliteration.romaji.enableHiragana, romaji.namedItem( "enableHiragana" ) );
applyBoolOption( c.transliteration.romaji.enableKatakana, romaji.namedItem( "enableKatakana" ) );
}
}
QDomNode mws = root.namedItem( "mediawikis" );
if ( !mws.isNull() )
@ -549,6 +579,42 @@ void save( Class const & c ) throw( exError )
}
}
{
QDomElement transliteration = dd.createElement( "transliteration" );
root.appendChild( transliteration );
QDomElement opt = dd.createElement( "enableRussianTransliteration" );
opt.appendChild( dd.createTextNode( c.transliteration.enableRussianTransliteration ? "1":"0" ) );
transliteration.appendChild( opt );
QDomElement romaji = dd.createElement( "romaji" );
transliteration.appendChild( romaji );
opt = dd.createElement( "enable" );
opt.appendChild( dd.createTextNode( c.transliteration.romaji.enable ? "1":"0" ) );
romaji.appendChild( opt );
opt = dd.createElement( "enableHepburn" );
opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableHepburn ? "1":"0" ) );
romaji.appendChild( opt );
opt = dd.createElement( "enableNihonShiki" );
opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableNihonShiki ? "1":"0" ) );
romaji.appendChild( opt );
opt = dd.createElement( "enableKunreiShiki" );
opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableKunreiShiki ? "1":"0" ) );
romaji.appendChild( opt );
opt = dd.createElement( "enableHiragana" );
opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableHiragana ? "1":"0" ) );
romaji.appendChild( opt );
opt = dd.createElement( "enableKatakana" );
opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableKatakana ? "1":"0" ) );
romaji.appendChild( opt );
}
{
QDomElement mws = dd.createElement( "mediawikis" );
root.appendChild( mws );

View file

@ -199,6 +199,48 @@ struct Hunspell
/// All the MediaWikis
typedef vector< MediaWiki > MediaWikis;
/// Romaji transliteration configuration
struct Romaji
{
bool enable;
bool enableHepburn;
bool enableNihonShiki;
bool enableKunreiShiki;
bool enableHiragana;
bool enableKatakana;
Romaji();
bool operator == ( Romaji const & other ) const
{ return enable == other.enable &&
enableHepburn == other.enableHepburn &&
enableNihonShiki == other.enableNihonShiki &&
enableKunreiShiki == other.enableKunreiShiki &&
enableHiragana == other.enableHiragana &&
enableKatakana == other.enableKatakana; }
bool operator != ( Romaji const & other ) const
{ return ! operator == ( other ); }
};
struct Transliteration
{
bool enableRussianTransliteration;
Romaji romaji;
bool operator == ( Transliteration const & other ) const
{ return enableRussianTransliteration == other.enableRussianTransliteration &&
romaji == other.romaji; }
bool operator != ( Transliteration const & other ) const
{ return ! operator == ( other ); }
Transliteration(): enableRussianTransliteration( false )
{}
};
struct Class
{
Paths paths;
@ -207,6 +249,7 @@ struct Class
Preferences preferences;
MediaWikis mediawikis;
Hunspell hunspell;
Transliteration transliteration;
unsigned lastMainGroupId; // Last used group in main window
unsigned lastPopupGroupId; // Last used group in popup window

View file

@ -125,6 +125,12 @@ sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & )
return new WordSearchRequestInstant();
}
vector< wstring > Class::getAlternateWritings( wstring const & )
throw()
{
return vector< wstring >();
}
sptr< DataRequest > Class::getResource( string const & /*name*/ )
throw( std::exception )
{

View file

@ -298,6 +298,14 @@ public:
virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & )
throw( std::exception );
/// For a given word, provides alternate writings of it which are to be looked
/// up alongside with it. Transliteration dictionaries implement this. The
/// default implementation returns an empty list. Note that this function is
/// supposed to be very fast and simple, and the results are thus returned
/// syncronously.
virtual vector< wstring > getAlternateWritings( wstring const & )
throw();
/// Returns a definition for the given word. The definition should
/// be an html fragment (without html/head/body tags) in an utf8 encoding.
/// The 'alts' vector could contain a list of words the definitions of which

View file

@ -13,7 +13,8 @@ EditDictionaries::EditDictionaries( QWidget * parent, Config::Class & cfg_,
QDialog( parent ), cfg( cfg_ ), dictionaries( dictionaries_ ),
dictNetMgr( dictNetMgr_ ),
origCfg( cfg ),
sources( this, cfg.paths, cfg.soundDirs, cfg.hunspell, cfg.mediawikis ),
sources( this, cfg.paths, cfg.soundDirs, cfg.hunspell, cfg.transliteration,
cfg.mediawikis ),
groups( new Groups( this, dictionaries, cfg.groups ) ),
dictionariesChanged( false ),
groupsChanged( false ),
@ -106,6 +107,7 @@ bool EditDictionaries::isSourcesChanged() const
return sources.getPaths() != cfg.paths ||
sources.getSoundDirs() != cfg.soundDirs ||
sources.getHunspell() != cfg.hunspell ||
sources.getTransliteration() != cfg.transliteration ||
sources.getMediaWikis() != cfg.mediawikis;
}
@ -116,6 +118,7 @@ void EditDictionaries::acceptChangedSources()
cfg.paths = sources.getPaths();
cfg.soundDirs = sources.getSoundDirs();
cfg.hunspell = sources.getHunspell();
cfg.transliteration = sources.getTransliteration();
cfg.mediawikis = sources.getMediaWikis();
loadDictionaries( this, true, cfg, dictionaries, dictNetMgr );

View file

@ -117,7 +117,10 @@ HEADERS += folding.hh \
hotkeyedit.hh \
langcoder.hh \
editdictionaries.hh \
loaddictionaries.hh
loaddictionaries.hh \
transliteration.hh \
romaji.hh \
russiantranslit.hh
FORMS += groups.ui \
dictgroupwidget.ui \
@ -180,7 +183,10 @@ SOURCES += folding.cc \
hotkeyedit.cc \
langcoder.cc \
editdictionaries.cc \
loaddictionaries.cc
loaddictionaries.cc \
transliteration.cc \
romaji.cc \
russiantranslit.cc
win32 {
SOURCES += mouseover_win32/ThTypes.c

View file

@ -11,6 +11,8 @@
#include "sounddir.hh"
#include "hunspell.hh"
#include "dictdfiles.hh"
#include "romaji.hh"
#include "russiantranslit.hh"
#include <QMessageBox>
#include <QDir>
@ -23,7 +25,8 @@ using std::string;
using std::vector;
LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
paths( cfg.paths ), soundDirs( cfg.soundDirs ), hunspell( cfg.hunspell )
paths( cfg.paths ), soundDirs( cfg.soundDirs ), hunspell( cfg.hunspell ),
transliteration( cfg.transliteration )
{
}
@ -51,7 +54,19 @@ void LoadDictionaries::run()
dictionaries.insert( dictionaries.end(), hunspellDictionaries.begin(),
hunspellDictionaries.end() );
}
// Make romaji
{
vector< sptr< Dictionary::Class > > romajiDictionaries =
Romaji::makeDictionaries( transliteration.romaji );
dictionaries.insert( dictionaries.end(), romajiDictionaries.begin(),
romajiDictionaries.end() );
}
// Make Russian tnrasliteration
if ( transliteration.enableRussianTransliteration )
dictionaries.push_back( RussianTranslit::makeDictionary() );
}
catch( std::exception & e )
{

View file

@ -19,6 +19,7 @@ class LoadDictionaries: public QThread, public Dictionary::Initializing
Config::Paths const & paths;
Config::SoundDirs const & soundDirs;
Config::Hunspell const & hunspell;
Config::Transliteration const & transliteration;
std::vector< sptr< Dictionary::Class > > dictionaries;
std::string exceptionText;

106
src/romaji.cc Normal file
View file

@ -0,0 +1,106 @@
#include "romaji.hh"
#include <QCoreApplication>
namespace Romaji {
class HepburnHiragana: public Transliteration::Table
{
public:
HepburnHiragana();
};
HepburnHiragana::HepburnHiragana()
{
// Raw UTF8 -- handle with care. We'd better remap those to \xAB hex encoding
ins( "a", "" ); ins( "i", "" ); ins( "u", "" ); ins( "e", "" ); ins( "o", "" );
ins( "ka", "" ); ins( "ki", "" ); ins( "ku", "" ); ins( "ke", "" ); ins( "ko", "" ); ins( "kya", "きゃ" ); ins( "kyu", "きゅ" ); ins( "kyo", "きょ" );
ins( "sa", "" ); ins( "shi", "" ); ins( "su", "" ); ins( "se", "" ); ins( "so", "" ); ins( "sha", "しゃ" ); ins( "shu", "しゅ" ); ins( "sho", "しょ" );
ins( "ta", "" ); ins( "chi", "" ); ins( "tsu", "" ); ins( "te", "" ); ins( "to", "" ); ins( "cha", "ちゃ" ); ins( "chu", "ちゅ" ); ins( "cho", "ちょ" );
ins( "na", "" ); ins( "ni", "" ); ins( "nu", "" ); ins( "ne", "" ); ins( "no", "" ); ins( "nya", "にゃ" ); ins( "nyu", "にゅ" ); ins( "nyo", "にょ" );
ins( "ha", "" ); ins( "hi", "" ); ins( "fu", "" ); ins( "he", "" ); ins( "ho", "" ); ins( "hya", "ひゃ" ); ins( "hyu", "ひゅ" ); ins( "hyo", "ひょ" );
ins( "ma", "" ); ins( "mi", "" ); ins( "mu", "" ); ins( "me", "" ); ins( "mo", "" ); ins( "mya", "みゃ" ); ins( "myu", "みゅ" ); ins( "myo", "みょ" );
ins( "ya", "" ); ins( "yu", "" ); ins( "yo", "" );
ins( "ra", "" ); ins( "ri", "" ); ins( "ru", "" ); ins( "re", "" ); ins( "ro", "" ); ins( "rya", "りゃ" ); ins( "ryu", "りゅ" ); ins( "ryo", "りょ" );
ins( "wa", "" ); /*ゐ wi† ゑ we† を wo‡ */
ins( "n", "" );
ins( "ga", "" ); ins( "gi", "" ); ins( "gu", "" ); ins( "ge", "" ); ins( "go", "" ); ins( "gya", "ぎゃ" ); ins( "gyu", "ぎゅ" ); ins( "gyo", "ぎょ" );
ins( "za", "" ); ins( "ji", "" ); ins( "zu", "" ); ins( "ze", "" ); ins( "zo", "" ); ins( "ja", "じゃ" ); ins( "ju", "じゅ" ); ins( "jo", "じょ" );
ins( "da", "" ); ins( "(ji)", "" ); ins( "(zu)", "" ); ins( "de", "" ); ins( "do", "" ); ins( "(ja)", "ぢゃ" ); ins( "(ju)", "ぢゅ" ); ins( "(jo)", "ぢょ" );
ins( "ba", "" ); ins( "bi", "" ); ins( "bu", "" ); ins( "be", "" ); ins( "bo", "" ); ins( "bya", "びゃ" ); ins( "byu", "びゅ" ); ins( "byo", "びょ" );
ins( "pa", "" ); ins( "pi", "" ); ins( "pu", "" ); ins( "pe", "" ); ins( "po", "" ); ins( "pya", "ぴゃ" ); ins( "pyu", "ぴゅ" ); ins( "pyo", "ぴょ" );
}
class HepburnKatakana: public Transliteration::Table
{
public:
HepburnKatakana();
};
HepburnKatakana::HepburnKatakana()
{
// Raw UTF8 -- handle with care. We'd better remap those to \xAB hex encoding
ins( "a", "" ); ins( "i", "" ); ins( "u", "" ); ins( "e", "" ); ins( "o", "" );
ins( "ka", "" ); ins( "ki", "" ); ins( "ku", "" ); ins( "ke", "" ); ins( "ko", "" ); ins( "kya", "キャ" ); ins( "kyu", "キュ" ); ins( "kyo", "キョ" );
ins( "sa", "" ); ins( "shi", "" ); ins( "su", "" ); ins( "se", "" ); ins( "so", "" ); ins( "sha", "シャ" ); ins( "shu", "シュ" ); ins( "sho", "ショ" );
ins( "ta", "" ); ins( "chi", "" ); ins( "tsu", "" ); ins( "te", "" ); ins( "to", "" ); ins( "cha", "チャ" ); ins( "chu", "チュ" ); ins( "cho", "チョ" );
ins( "na", "" ); ins( "ni", "" ); ins( "nu", "" ); ins( "ne", "" ); ins( "no", "" ); ins( "nya", "ニャ" ); ins( "nyu", "ニュ" ); ins( "nyo", "ニョ" );
ins( "ha", "" ); ins( "hi", "" ); ins( "fu", "" ); ins( "he", "" ); ins( "ho", "" ); ins( "hya", "ヒャ" ); ins( "hyu", "ヒュ" ); ins( "hyo", "ヒョ" );
ins( "ma", "" ); ins( "mi", "" ); ins( "mu", "" ); ins( "me", "" ); ins( "mo", "" ); ins( "mya", "ミャ" ); ins( "myu", "ミュ" ); ins( "myo", "ミョ" );
ins( "ya", "" ); ins( "yu", "" ); ins( "yo", "" );
ins( "ra", "" ); ins( "ri", "" ); ins( "ru", "" ); ins( "re", "" ); ins( "ro", "" ); ins( "rya", "リャ" ); ins( "ryu", "リュ" ); ins( "ryo", "リョ" );
ins( "wa", "" ); /*ヰ wi† ヱ we† ヲ wo‡ */
ins( "n", "" );
ins( "ga", "" ); ins( "gi", "" ); ins( "gu", "" ); ins( "ge", "" ); ins( "go", "" ); ins( "gya", "ギャ" ); ins( "gyu", "ギュ" ); ins( "gyo", "ギョ" );
ins( "za", "" ); ins( "ji", "" ); ins( "zu", "" ); ins( "ze", "" ); ins( "zo", "" ); ins( "ja", "ジャ" ); ins( "ju", "ジュ" ); ins( "jo", "ジョ" );
ins( "da", "" ); ins( "(ji)", "" ); ins( "(zu)", "" ); ins( "de", "" ); ins( "do", "" ); ins( "(ja)", "ヂャ" ); ins( "(ju)", "ヂュ" ); ins( "(jo)", "ヂョ" );
ins( "ba", "" ); ins( "bi", "" ); ins( "bu", "" ); ins( "be", "" ); ins( "bo", "" ); ins( "bya", "ビャ" ); ins( "byu", "ビュ" ); ins( "byo", "ビョ" );
ins( "pa", "" ); ins( "pi", "" ); ins( "pu", "" ); ins( "pe", "" ); ins( "po", "" ); ins( "pya", "ピャ" ); ins( "pyu", "ピュ" ); ins( "pyo", "ピョ" );
ins( "ye", "イェ" );
ins( "wi", "ウィ" ); ins( "we", "ウェ" ); ins( "wo", "ウォ" );
ins( "va", "" ); /*ヸ vi† ヹ ve†*/ ins( "vo", "" );
ins( "va", "ヴァ" ); ins( "vi", "ヴィ" ); ins( "vu", "" ); ins( "ve", "ヴェ" ); ins( "vo", "ヴォ" );
ins( "she", "シェ" );
ins( "je", "ジェ" );
ins( "che", "チェ" );
ins( "ti", "ティ" ); ins( "tu", "トゥ" );
ins( "tyu", "テュ" );
ins( "di", "ディ" ); ins( "du", "ドゥ" );
ins( "dyu", "デュ" );
ins( "tsa", "ツァ" ); ins( "tse", "ツェ" ); ins( "tso", "ツォ" );
ins( "fa", "ファ" ); ins( "fi", "フィ" ); ins( "fe", "フェ" ); ins( "fo", "フォ" );
ins( "fyu", "フュ" );
}
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & r )
throw( std::exception )
{
vector< sptr< Dictionary::Class > > result;
if ( r.enable )
{
if ( r.enableHepburn )
{
if ( r.enableHiragana )
{
static HepburnHiragana t;
result.push_back( new Transliteration::TransliterationDictionary( "94eae5a5aaf5b0a900490f4d6b36aac0",
QCoreApplication::translate( "Romaji", "Hepburn Romaji for Hiragana" ).toUtf8().data(), t ) );
}
if ( r.enableKatakana )
{
static HepburnKatakana t;
result.push_back( new Transliteration::TransliterationDictionary( "3252a35767d3f6e85e3e39069800dd2f",
QCoreApplication::translate( "Romaji", "Hepburn Romaji for Katakana" ).toUtf8().data(), t ) );
}
}
}
return result;
}
}

20
src/romaji.hh Normal file
View file

@ -0,0 +1,20 @@
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef __ROMAJI_HH_INCLUDED__
#define __ROMAJI_HH_INCLUDED__
#include "transliteration.hh"
#include "config.hh"
/// Japanese romanization (Romaji) support.
namespace Romaji {
using std::vector;
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & )
throw( std::exception );
}
#endif

111
src/russiantranslit.cc Normal file
View file

@ -0,0 +1,111 @@
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "russiantranslit.hh"
#include "transliteration.hh"
#include <QCoreApplication>
namespace RussianTranslit {
class RussianTable: public Transliteration::Table
{
public:
RussianTable();
};
RussianTable::RussianTable()
{
// Utf8
// Lowercase
ins( "a", "а" );
ins( "b", "б" );
ins( "v", "в" );
ins( "w", "в" );
ins( "g", "г" );
ins( "d", "д" );
ins( "e", "е" );
ins( "yo", "ё" );
ins( "zh", "ж" );
ins( "z", "з" );
ins( "i", "и" );
ins( "j", "й" );
ins( "k", "к" );
ins( "l", "л" );
ins( "m", "м" );
ins( "n", "н" );
ins( "o", "о" );
ins( "p", "п" );
ins( "r", "р" );
ins( "s", "с" );
ins( "t", "т" );
ins( "u", "у" );
ins( "f", "ф" );
ins( "h", "х" );
ins( "ts", "ц" );
ins( "c", "ц" );
ins( "ch", "ч" );
ins( "sh", "ш" );
ins( "shch", "щ" );
ins( "\"", "ъ" );
ins( "y", "ы" );
ins( "'", "ь" );
ins( "'e", "э" );
ins( "yu", "ю" );
ins( "ya", "я" );
// Uppercase
ins( "A", "А" );
ins( "B", "Б" );
ins( "V", "В" );
ins( "W", "В" );
ins( "G", "Г" );
ins( "D", "Д" );
ins( "E", "Е" );
ins( "YO", "Ё" );
ins( "Yo", "Ё" );
ins( "ZH", "Ж" );
ins( "Zh", "Ж" );
ins( "Z", "З" );
ins( "I", "И" );
ins( "J", "Й" );
ins( "K", "К" );
ins( "L", "Л" );
ins( "M", "М" );
ins( "N", "Н" );
ins( "O", "О" );
ins( "P", "П" );
ins( "R", "Р" );
ins( "S", "С" );
ins( "T", "Т" );
ins( "U", "У" );
ins( "F", "Ф" );
ins( "H", "Х" );
ins( "TS", "Ц" );
ins( "Ts", "Ц" );
ins( "C", "Ц" );
ins( "CH", "Ч" );
ins( "Ch", "Ч" );
ins( "SH", "Ш" );
ins( "Sh", "Ш" );
ins( "SHCH", "Щ" );
ins( "ShCh", "Щ" );
ins( "Y", "Ы" );
ins( "'E", "Э" );
ins( "YU", "Ю" );
ins( "Yu", "Ю" );
ins( "YA", "Я" );
ins( "Ya", "Я" );
}
sptr< Dictionary::Class > makeDictionary() throw( std::exception )
{
static RussianTable t;
return new Transliteration::TransliterationDictionary( "cf1b74acd98adea9b2bba16af38f1086",
QCoreApplication::translate( "RussianTranslit", "Russian Transliteration" ).toUtf8().data(), t );
}
}

17
src/russiantranslit.hh Normal file
View file

@ -0,0 +1,17 @@
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef __RUSSIANTRANSLIT_HH_INCLUDED__
#define __RUSSIANTRANSLIT_HH_INCLUDED__
#include "dictionary.hh"
// Support for Russian transliteration
namespace RussianTranslit {
sptr< Dictionary::Class > makeDictionary() throw( std::exception );
}
#endif

View file

@ -10,6 +10,7 @@
Sources::Sources( QWidget * parent, Config::Paths const & paths,
Config::SoundDirs const & soundDirs,
Config::Hunspell const & hunspell,
Config::Transliteration const & tr,
Config::MediaWikis const & mediawikis ): QWidget( parent ),
mediawikisModel( this, mediawikis ), pathsModel( this, paths ),
soundDirsModel( this, soundDirs ),
@ -38,6 +39,14 @@ Sources::Sources( QWidget * parent, Config::Paths const & paths,
ui.hunspellDictionaries->setModel( &hunspellDictsModel );
fitHunspellDictsColumns();
ui.enableRussianTransliteration->setChecked( tr.enableRussianTransliteration );
ui.enableRomaji->setChecked( tr.romaji.enable );
ui.enableHepburn->setChecked( tr.romaji.enableHepburn );
ui.enableNihonShiki->setChecked( tr.romaji.enableNihonShiki );
ui.enableKunreiShiki->setChecked( tr.romaji.enableKunreiShiki );
ui.enableHiragana->setChecked( tr.romaji.enableHiragana );
ui.enableKatakana->setChecked( tr.romaji.enableKatakana );
}
void Sources::fitPathsColumns()
@ -159,6 +168,20 @@ Config::Hunspell Sources::getHunspell() const
return h;
}
Config::Transliteration Sources::getTransliteration() const
{
Config::Transliteration tr;
tr.enableRussianTransliteration = ui.enableRussianTransliteration->isChecked();
tr.romaji.enable = ui.enableRomaji->isChecked();
tr.romaji.enableHepburn = ui.enableHepburn->isChecked();
tr.romaji.enableNihonShiki = ui.enableNihonShiki->isChecked();
tr.romaji.enableKunreiShiki = ui.enableKunreiShiki->isChecked();
tr.romaji.enableHiragana = ui.enableHiragana->isChecked();
tr.romaji.enableKatakana = ui.enableKatakana->isChecked();
return tr;
}
////////// MediaWikisModel

View file

@ -138,6 +138,7 @@ public:
Sources( QWidget * parent, Config::Paths const &,
Config::SoundDirs const &,
Config::Hunspell const &,
Config::Transliteration const &,
Config::MediaWikis const & );
Config::Paths const & getPaths() const
@ -150,6 +151,8 @@ public:
{ return mediawikisModel.getCurrentWikis(); }
Config::Hunspell getHunspell() const;
Config::Transliteration getTransliteration() const;
signals:

View file

@ -271,6 +271,131 @@ of the appropriate groups to use them.</string>
</item>
</layout>
</widget>
<widget class="QWidget" name="tab_3">
<attribute name="title">
<string>Transliteration</string>
</attribute>
<layout class="QVBoxLayout" name="verticalLayout_9">
<item>
<spacer name="verticalSpacer_5">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QCheckBox" name="enableRussianTransliteration">
<property name="text">
<string>Russian transliteration</string>
</property>
</widget>
</item>
<item>
<widget class="QGroupBox" name="enableRomaji">
<property name="toolTip">
<string>Enables to use the Latin alphabet to write the Japanese language</string>
</property>
<property name="title">
<string>Japanese Romaji</string>
</property>
<property name="checkable">
<bool>true</bool>
</property>
<property name="checked">
<bool>true</bool>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QLabel" name="label_7">
<property name="text">
<string>Systems:</string>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QCheckBox" name="enableHepburn">
<property name="toolTip">
<string>The most widely used method of transcription of Japanese,
based on English phonology</string>
</property>
<property name="text">
<string>Hepburn</string>
</property>
</widget>
</item>
<item row="0" column="2">
<widget class="QCheckBox" name="enableNihonShiki">
<property name="toolTip">
<string>The most regular system, having a one-to-one relation to the
kana writing systems. Standardized as ISO 3602</string>
</property>
<property name="text">
<string>Nihon-shiki</string>
</property>
</widget>
</item>
<item row="0" column="3">
<widget class="QCheckBox" name="enableKunreiShiki">
<property name="toolTip">
<string>Based on Nihon-shiki system, but modified for modern standard Japanese.
Standardized as ISO 3602</string>
</property>
<property name="text">
<string>Kunrei-shiki</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label_8">
<property name="text">
<string>Syllabaries:</string>
</property>
</widget>
</item>
<item row="1" column="2">
<widget class="QCheckBox" name="enableHiragana">
<property name="toolTip">
<string>Hiragana Japanese syllabary</string>
</property>
<property name="text">
<string>Hiragana</string>
</property>
</widget>
</item>
<item row="1" column="3">
<widget class="QCheckBox" name="enableKatakana">
<property name="toolTip">
<string>Hiragana Japanese syllabary</string>
</property>
<property name="text">
<string>Katakana</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="verticalSpacer_4">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>80</height>
</size>
</property>
</spacer>
</item>
</layout>
</widget>
</widget>
</item>
</layout>

119
src/transliteration.cc Normal file
View file

@ -0,0 +1,119 @@
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "transliteration.hh"
#include "utf8.hh"
#include "folding.hh"
namespace Transliteration {
using gd::wchar;
void Table::ins( char const * from, char const * to )
{
wstring fr = Utf8::decode( std::string( from ) );
if ( fr.size() > maxEntrySize )
maxEntrySize = fr.size();
insert( std::pair< wstring, wstring >( fr,
Utf8::decode( std::string( to ) ) ) );
}
TransliterationDictionary::TransliterationDictionary( string const & id,
string const & name_,
Table const & table_ ):
Dictionary::Class( id, vector< string >() ),
name( name_ ), table( table_ )
{}
string TransliterationDictionary::getName() throw()
{ return name; }
map< Dictionary::Property, string > TransliterationDictionary::getProperties() throw()
{ return map< Dictionary::Property, string >(); }
unsigned long TransliterationDictionary::getArticleCount() throw()
{ return 0; }
unsigned long TransliterationDictionary::getWordCount() throw()
{ return 0; }
sptr< Dictionary::WordSearchRequest > TransliterationDictionary::prefixMatch( wstring const &,
unsigned long ) throw( std::exception )
{ return new Dictionary::WordSearchRequestInstant(); }
sptr< Dictionary::DataRequest > TransliterationDictionary::getArticle( wstring const &,
vector< wstring > const & )
throw( std::exception )
{ return new Dictionary::DataRequestInstant( false ); }
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str )
throw()
{
vector< wstring > results;
wstring folded = Folding::apply( str );
if ( folded.empty() )
return results;
wstring result;
wchar const * ptr = folded.c_str();
size_t left = folded.size();
Table::const_iterator i;
while( left )
{
unsigned x;
for( x = table.getMaxEntrySize(); x >= 1; --x )
{
if ( left >= x )
{
i = table.find( wstring( ptr, x ) );
if ( i != table.end() )
{
result.append( i->second );
ptr += x;
left -= x;
break;
}
}
}
if ( !x )
{
// No matches -- skip one char
--left;
++ptr;
}
}
if ( result.size() )
results.push_back( result );
return results;
}
sptr< Dictionary::WordSearchRequest > TransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
throw( std::exception )
{
sptr< Dictionary::WordSearchRequestInstant > result = new Dictionary::WordSearchRequestInstant();
vector< wstring > alts = getAlternateWritings( str );
printf( "alts = %u\n", alts.size() );
for( unsigned x = 0; x < alts.size(); ++x )
result->getMatches().push_back( alts[ x ] );
return result;
}
}

71
src/transliteration.hh Normal file
View file

@ -0,0 +1,71 @@
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef __TRANSLITERATION_HH_INCLUDED__
#define __TRANSLITERATION_HH_INCLUDED__
#include "dictionary.hh"
#include <map>
namespace Transliteration {
using std::map;
using gd::wstring;
using std::string;
using std::vector;
class Table: public map< wstring, wstring >
{
unsigned maxEntrySize;
public:
Table(): maxEntrySize( 0 )
{}
unsigned getMaxEntrySize() const
{ return maxEntrySize; }
protected:
/// Inserts new entry into index. from and to are UTF8-encoded strings.
/// Also updates maxEntrySize.
void ins( char const * from, char const * to );
};
/// This is a base dictionary class for simple transliteratons
class TransliterationDictionary: public Dictionary::Class
{
string name;
Table const & table;
public:
TransliterationDictionary( string const & id, string const & name,
Table const & table );
virtual string getName() throw();
virtual map< Dictionary::Property, string > getProperties() throw();
virtual unsigned long getArticleCount() throw();
virtual unsigned long getWordCount() throw();
virtual vector< wstring > getAlternateWritings( wstring const & )
throw();
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
throw( std::exception );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &,
unsigned long ) throw( std::exception );
virtual sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const & )
throw( std::exception );
};
}
#endif

View file

@ -87,19 +87,36 @@ void WordFinder::startSearch()
searchQueued = false;
searchInProgress = true;
wstring word = gd::toWString( inputWord );
// Gather all writings of the word
if ( allWordWritings.size() != 1 )
allWordWritings.resize( 1 );
allWordWritings[ 0 ] = gd::toWString( inputWord );
for( size_t x = 0; x < inputDicts->size(); ++x )
{
sptr< Dictionary::WordSearchRequest > sr =
( searchType == PrefixMatch ) ?
(*inputDicts)[ x ]->prefixMatch( word, 40 ) :
(*inputDicts)[ x ]->stemmedMatch( word, 3, 3, 30 );
vector< wstring > writings = (*inputDicts)[ x ]->getAlternateWritings( allWordWritings[ 0 ] );
connect( sr.get(), SIGNAL( finished() ),
this, SLOT( requestFinished() ), Qt::QueuedConnection );
allWordWritings.insert( allWordWritings.end(), writings.begin(), writings.end() );
}
queuedRequests.push_back( sr );
// Query each dictionary for all word writings
for( size_t x = 0; x < inputDicts->size(); ++x )
{
for( size_t y = 0; y < allWordWritings.size(); ++y )
{
sptr< Dictionary::WordSearchRequest > sr =
( searchType == PrefixMatch ) ?
(*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], 40 ) :
(*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], 3, 3, 30 );
connect( sr.get(), SIGNAL( finished() ),
this, SLOT( requestFinished() ), Qt::QueuedConnection );
queuedRequests.push_back( sr );
}
}
// Handle any requests finished already
@ -257,7 +274,7 @@ void WordFinder::updateResults()
resultsArray.push_back( OneResult() );
resultsArray.back().word = match;
resultsArray.back().rank = -1;
resultsArray.back().rank = INT_MAX;
resultsArray.back().wasSuggested = ( weight != 0 );
insertResult.first->second = --resultsArray.end();
@ -291,57 +308,65 @@ void WordFinder::updateResults()
WorstMatch,
Multiplier = 256 // Categories should be multiplied by Multiplier
};
wstring target = Folding::applySimpleCaseOnly( gd::toWString( inputWord ) );
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
wstring::size_type matchPos = 0;
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
i != j; ++i )
for( unsigned wr = 0; wr < allWordWritings.size(); ++wr )
{
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
if ( i->first == target )
i->second->rank = ExactMatch * Multiplier;
else
if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase )
i->second->rank = ExactNoFullCaseMatch * Multiplier;
else
if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia )
i->second->rank = ExactNoDiaMatch * Multiplier;
else
if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct )
i->second->rank = ExactNoPunctMatch * Multiplier;
else
if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs )
i->second->rank = ExactNoWsMatch * Multiplier;
else
if ( hasSurroundedWithWs( i->first, target, matchPos ) )
i->second->rank = ExactInsideMatch * Multiplier + matchPos;
else
if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) )
i->second->rank = ExactNoDiaInsideMatch * Multiplier + matchPos;
else
if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) )
i->second->rank = ExactNoPunctInsideMatch * Multiplier + matchPos;
else
if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 )
i->second->rank = PrefixMatch * Multiplier + saturated( i->first.size() );
else
if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 )
i->second->rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() );
else
if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 )
i->second->rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() );
else
if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 )
i->second->rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() );
else
i->second->rank = WorstMatch * Multiplier;
wstring target = Folding::applySimpleCaseOnly( allWordWritings[ wr ] );
wstring targetNoFullCase = Folding::applyFullCaseOnly( target );
wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase );
wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia );
wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct );
wstring::size_type matchPos = 0;
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
i != j; ++i )
{
wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs;
int rank;
if ( i->first == target )
rank = ExactMatch * Multiplier;
else
if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase )
rank = ExactNoFullCaseMatch * Multiplier;
else
if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia )
rank = ExactNoDiaMatch * Multiplier;
else
if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct )
rank = ExactNoPunctMatch * Multiplier;
else
if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs )
rank = ExactNoWsMatch * Multiplier;
else
if ( hasSurroundedWithWs( i->first, target, matchPos ) )
rank = ExactInsideMatch * Multiplier + matchPos;
else
if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) )
rank = ExactNoDiaInsideMatch * Multiplier + matchPos;
else
if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) )
rank = ExactNoPunctInsideMatch * Multiplier + matchPos;
else
if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 )
rank = PrefixMatch * Multiplier + saturated( i->first.size() );
else
if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 )
rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() );
else
if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 )
rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() );
else
if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 )
rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() );
else
rank = WorstMatch * Multiplier;
if ( i->second->rank > rank )
i->second->rank = rank; // We store the best rank of any writing
}
}
resultsArray.sort( SortByRank() );
@ -354,23 +379,29 @@ void WordFinder::updateResults()
// in their beginnings, and second, the length of the strings. Here we assign
// only the first one, storing it in rank. Then we sort the results using
// SortByRankAndLength.
wstring target = Folding::apply( gd::toWString( inputWord ) );
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
i != j; ++i )
for( unsigned wr = 0; wr < allWordWritings.size(); ++wr )
{
wstring resultFolded = Folding::apply( i->first );
wstring target = Folding::apply( allWordWritings[ wr ] );
for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end();
i != j; ++i )
{
wstring resultFolded = Folding::apply( i->first );
int charsInCommon = 0;
for( wchar const * t = target.c_str(), * r = resultFolded.c_str();
*t && *t == *r; ++t, ++r, ++charsInCommon ) ;
int rank = -charsInCommon; // Negated so the lesser-than
// comparison would yield right
// results.
int charsInCommon = 0;
for( wchar const * t = target.c_str(), * r = resultFolded.c_str();
*t && *t == *r; ++t, ++r, ++charsInCommon ) ;
i->second->rank = -charsInCommon; // Negated so the lesser-than
// comparison would yield right
// results.
if ( i->second->rank > rank )
i->second->rank = rank; // We store the best rank of any writing
}
}
resultsArray.sort( SortByRankAndLength() );
maxSearchResults = 15;

View file

@ -45,6 +45,8 @@ private:
std::vector< sptr< Dictionary::Class > > const * inputDicts;
std::vector< gd::wstring > allWordWritings; // All writings of the inputWord
struct OneResult
{
gd::wstring word;