From 197ccf35d413df123138b71e1648d4bc059f0d6b Mon Sep 17 00:00:00 2001 From: Konstantin Isakov Date: Wed, 6 May 2009 14:39:08 +0000 Subject: [PATCH] + Support for transliterations added. For now basic Russian translit and Hepburn Romaji are supported. --- src/config.cc | 66 +++++++++++++++ src/config.hh | 43 ++++++++++ src/dictionary.cc | 6 ++ src/dictionary.hh | 8 ++ src/editdictionaries.cc | 5 +- src/goldendict.pro | 10 ++- src/loaddictionaries.cc | 17 +++- src/loaddictionaries.hh | 1 + src/romaji.cc | 106 ++++++++++++++++++++++++ src/romaji.hh | 20 +++++ src/russiantranslit.cc | 111 +++++++++++++++++++++++++ src/russiantranslit.hh | 17 ++++ src/sources.cc | 23 ++++++ src/sources.hh | 3 + src/sources.ui | 125 ++++++++++++++++++++++++++++ src/transliteration.cc | 119 +++++++++++++++++++++++++++ src/transliteration.hh | 71 ++++++++++++++++ src/wordfinder.cc | 177 +++++++++++++++++++++++----------------- src/wordfinder.hh | 2 + 19 files changed, 853 insertions(+), 77 deletions(-) create mode 100644 src/romaji.cc create mode 100644 src/romaji.hh create mode 100644 src/russiantranslit.cc create mode 100644 src/russiantranslit.hh create mode 100644 src/transliteration.cc create mode 100644 src/transliteration.hh diff --git a/src/config.cc b/src/config.cc index 27a99a15..d4f93312 100644 --- a/src/config.cc +++ b/src/config.cc @@ -98,6 +98,16 @@ Preferences::Preferences(): { } +Romaji::Romaji(): + enable( false ), + enableHepburn( true ), + enableNihonShiki( false ), + enableKunreiShiki( false ), + enableHiragana( true ), + enableKatakana( true ) +{ +} + namespace { MediaWikis makeDefaultMediaWikis( bool enable ) @@ -317,6 +327,26 @@ Class load() throw( exError ) c.hunspell.enabledDictionaries.push_back( nl.item( x ).toElement().text() ); } + QDomNode transliteration = root.namedItem( "transliteration" ); + + if ( !transliteration.isNull() ) + { + applyBoolOption( c.transliteration.enableRussianTransliteration, + transliteration.namedItem( "enableRussianTransliteration" ) ); + + QDomNode romaji = transliteration.namedItem( "romaji" ); + + if ( !romaji.isNull() ) + { + applyBoolOption( c.transliteration.romaji.enable, romaji.namedItem( "enable" ) ); + applyBoolOption( c.transliteration.romaji.enableHepburn, romaji.namedItem( "enableHepburn" ) ); + applyBoolOption( c.transliteration.romaji.enableNihonShiki, romaji.namedItem( "enableNihonShiki" ) ); + applyBoolOption( c.transliteration.romaji.enableKunreiShiki, romaji.namedItem( "enableKunreiShiki" ) ); + applyBoolOption( c.transliteration.romaji.enableHiragana, romaji.namedItem( "enableHiragana" ) ); + applyBoolOption( c.transliteration.romaji.enableKatakana, romaji.namedItem( "enableKatakana" ) ); + } + } + QDomNode mws = root.namedItem( "mediawikis" ); if ( !mws.isNull() ) @@ -549,6 +579,42 @@ void save( Class const & c ) throw( exError ) } } + { + QDomElement transliteration = dd.createElement( "transliteration" ); + root.appendChild( transliteration ); + + QDomElement opt = dd.createElement( "enableRussianTransliteration" ); + opt.appendChild( dd.createTextNode( c.transliteration.enableRussianTransliteration ? "1":"0" ) ); + transliteration.appendChild( opt ); + + QDomElement romaji = dd.createElement( "romaji" ); + transliteration.appendChild( romaji ); + + opt = dd.createElement( "enable" ); + opt.appendChild( dd.createTextNode( c.transliteration.romaji.enable ? "1":"0" ) ); + romaji.appendChild( opt ); + + opt = dd.createElement( "enableHepburn" ); + opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableHepburn ? "1":"0" ) ); + romaji.appendChild( opt ); + + opt = dd.createElement( "enableNihonShiki" ); + opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableNihonShiki ? "1":"0" ) ); + romaji.appendChild( opt ); + + opt = dd.createElement( "enableKunreiShiki" ); + opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableKunreiShiki ? "1":"0" ) ); + romaji.appendChild( opt ); + + opt = dd.createElement( "enableHiragana" ); + opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableHiragana ? "1":"0" ) ); + romaji.appendChild( opt ); + + opt = dd.createElement( "enableKatakana" ); + opt.appendChild( dd.createTextNode( c.transliteration.romaji.enableKatakana ? "1":"0" ) ); + romaji.appendChild( opt ); + } + { QDomElement mws = dd.createElement( "mediawikis" ); root.appendChild( mws ); diff --git a/src/config.hh b/src/config.hh index bfc2e1df..d3b2d36d 100644 --- a/src/config.hh +++ b/src/config.hh @@ -199,6 +199,48 @@ struct Hunspell /// All the MediaWikis typedef vector< MediaWiki > MediaWikis; +/// Romaji transliteration configuration +struct Romaji +{ + bool enable; + + bool enableHepburn; + bool enableNihonShiki; + bool enableKunreiShiki; + bool enableHiragana; + bool enableKatakana; + + Romaji(); + + bool operator == ( Romaji const & other ) const + { return enable == other.enable && + enableHepburn == other.enableHepburn && + enableNihonShiki == other.enableNihonShiki && + enableKunreiShiki == other.enableKunreiShiki && + enableHiragana == other.enableHiragana && + enableKatakana == other.enableKatakana; } + + bool operator != ( Romaji const & other ) const + { return ! operator == ( other ); } + +}; + +struct Transliteration +{ + bool enableRussianTransliteration; + Romaji romaji; + + bool operator == ( Transliteration const & other ) const + { return enableRussianTransliteration == other.enableRussianTransliteration && + romaji == other.romaji; } + + bool operator != ( Transliteration const & other ) const + { return ! operator == ( other ); } + + Transliteration(): enableRussianTransliteration( false ) + {} +}; + struct Class { Paths paths; @@ -207,6 +249,7 @@ struct Class Preferences preferences; MediaWikis mediawikis; Hunspell hunspell; + Transliteration transliteration; unsigned lastMainGroupId; // Last used group in main window unsigned lastPopupGroupId; // Last used group in popup window diff --git a/src/dictionary.cc b/src/dictionary.cc index 4926b5c0..4e2cdf3f 100644 --- a/src/dictionary.cc +++ b/src/dictionary.cc @@ -125,6 +125,12 @@ sptr< WordSearchRequest > Class::findHeadwordsForSynonym( wstring const & ) return new WordSearchRequestInstant(); } +vector< wstring > Class::getAlternateWritings( wstring const & ) + throw() +{ + return vector< wstring >(); +} + sptr< DataRequest > Class::getResource( string const & /*name*/ ) throw( std::exception ) { diff --git a/src/dictionary.hh b/src/dictionary.hh index 9f209614..69c87b54 100644 --- a/src/dictionary.hh +++ b/src/dictionary.hh @@ -298,6 +298,14 @@ public: virtual sptr< WordSearchRequest > findHeadwordsForSynonym( wstring const & ) throw( std::exception ); + /// For a given word, provides alternate writings of it which are to be looked + /// up alongside with it. Transliteration dictionaries implement this. The + /// default implementation returns an empty list. Note that this function is + /// supposed to be very fast and simple, and the results are thus returned + /// syncronously. + virtual vector< wstring > getAlternateWritings( wstring const & ) + throw(); + /// Returns a definition for the given word. The definition should /// be an html fragment (without html/head/body tags) in an utf8 encoding. /// The 'alts' vector could contain a list of words the definitions of which diff --git a/src/editdictionaries.cc b/src/editdictionaries.cc index 626cc20d..0ab182d1 100644 --- a/src/editdictionaries.cc +++ b/src/editdictionaries.cc @@ -13,7 +13,8 @@ EditDictionaries::EditDictionaries( QWidget * parent, Config::Class & cfg_, QDialog( parent ), cfg( cfg_ ), dictionaries( dictionaries_ ), dictNetMgr( dictNetMgr_ ), origCfg( cfg ), - sources( this, cfg.paths, cfg.soundDirs, cfg.hunspell, cfg.mediawikis ), + sources( this, cfg.paths, cfg.soundDirs, cfg.hunspell, cfg.transliteration, + cfg.mediawikis ), groups( new Groups( this, dictionaries, cfg.groups ) ), dictionariesChanged( false ), groupsChanged( false ), @@ -106,6 +107,7 @@ bool EditDictionaries::isSourcesChanged() const return sources.getPaths() != cfg.paths || sources.getSoundDirs() != cfg.soundDirs || sources.getHunspell() != cfg.hunspell || + sources.getTransliteration() != cfg.transliteration || sources.getMediaWikis() != cfg.mediawikis; } @@ -116,6 +118,7 @@ void EditDictionaries::acceptChangedSources() cfg.paths = sources.getPaths(); cfg.soundDirs = sources.getSoundDirs(); cfg.hunspell = sources.getHunspell(); + cfg.transliteration = sources.getTransliteration(); cfg.mediawikis = sources.getMediaWikis(); loadDictionaries( this, true, cfg, dictionaries, dictNetMgr ); diff --git a/src/goldendict.pro b/src/goldendict.pro index 1efdcfd7..60a80686 100644 --- a/src/goldendict.pro +++ b/src/goldendict.pro @@ -117,7 +117,10 @@ HEADERS += folding.hh \ hotkeyedit.hh \ langcoder.hh \ editdictionaries.hh \ - loaddictionaries.hh + loaddictionaries.hh \ + transliteration.hh \ + romaji.hh \ + russiantranslit.hh FORMS += groups.ui \ dictgroupwidget.ui \ @@ -180,7 +183,10 @@ SOURCES += folding.cc \ hotkeyedit.cc \ langcoder.cc \ editdictionaries.cc \ - loaddictionaries.cc + loaddictionaries.cc \ + transliteration.cc \ + romaji.cc \ + russiantranslit.cc win32 { SOURCES += mouseover_win32/ThTypes.c diff --git a/src/loaddictionaries.cc b/src/loaddictionaries.cc index 886682fe..4f8f9c04 100644 --- a/src/loaddictionaries.cc +++ b/src/loaddictionaries.cc @@ -11,6 +11,8 @@ #include "sounddir.hh" #include "hunspell.hh" #include "dictdfiles.hh" +#include "romaji.hh" +#include "russiantranslit.hh" #include #include @@ -23,7 +25,8 @@ using std::string; using std::vector; LoadDictionaries::LoadDictionaries( Config::Class const & cfg ): - paths( cfg.paths ), soundDirs( cfg.soundDirs ), hunspell( cfg.hunspell ) + paths( cfg.paths ), soundDirs( cfg.soundDirs ), hunspell( cfg.hunspell ), + transliteration( cfg.transliteration ) { } @@ -51,7 +54,19 @@ void LoadDictionaries::run() dictionaries.insert( dictionaries.end(), hunspellDictionaries.begin(), hunspellDictionaries.end() ); } + + // Make romaji + { + vector< sptr< Dictionary::Class > > romajiDictionaries = + Romaji::makeDictionaries( transliteration.romaji ); + dictionaries.insert( dictionaries.end(), romajiDictionaries.begin(), + romajiDictionaries.end() ); + } + + // Make Russian tnrasliteration + if ( transliteration.enableRussianTransliteration ) + dictionaries.push_back( RussianTranslit::makeDictionary() ); } catch( std::exception & e ) { diff --git a/src/loaddictionaries.hh b/src/loaddictionaries.hh index ffac2d89..6497c5cb 100644 --- a/src/loaddictionaries.hh +++ b/src/loaddictionaries.hh @@ -19,6 +19,7 @@ class LoadDictionaries: public QThread, public Dictionary::Initializing Config::Paths const & paths; Config::SoundDirs const & soundDirs; Config::Hunspell const & hunspell; + Config::Transliteration const & transliteration; std::vector< sptr< Dictionary::Class > > dictionaries; std::string exceptionText; diff --git a/src/romaji.cc b/src/romaji.cc new file mode 100644 index 00000000..0869dea7 --- /dev/null +++ b/src/romaji.cc @@ -0,0 +1,106 @@ +#include "romaji.hh" +#include + +namespace Romaji { + +class HepburnHiragana: public Transliteration::Table +{ +public: + + HepburnHiragana(); +}; + +HepburnHiragana::HepburnHiragana() +{ + // Raw UTF8 -- handle with care. We'd better remap those to \xAB hex encoding + ins( "a", "あ" ); ins( "i", "い" ); ins( "u", "う" ); ins( "e", "え" ); ins( "o", "お" ); + ins( "ka", "か" ); ins( "ki", "き" ); ins( "ku", "く" ); ins( "ke", "け" ); ins( "ko", "こ" ); ins( "kya", "きゃ" ); ins( "kyu", "きゅ" ); ins( "kyo", "きょ" ); + ins( "sa", "さ" ); ins( "shi", "し" ); ins( "su", "す" ); ins( "se", "せ" ); ins( "so", "そ" ); ins( "sha", "しゃ" ); ins( "shu", "しゅ" ); ins( "sho", "しょ" ); + ins( "ta", "た" ); ins( "chi", "ち" ); ins( "tsu", "つ" ); ins( "te", "て" ); ins( "to", "と" ); ins( "cha", "ちゃ" ); ins( "chu", "ちゅ" ); ins( "cho", "ちょ" ); + ins( "na", "な" ); ins( "ni", "に" ); ins( "nu", "ぬ" ); ins( "ne", "ね" ); ins( "no", "の" ); ins( "nya", "にゃ" ); ins( "nyu", "にゅ" ); ins( "nyo", "にょ" ); + ins( "ha", "は" ); ins( "hi", "ひ" ); ins( "fu", "ふ" ); ins( "he", "へ" ); ins( "ho", "ほ" ); ins( "hya", "ひゃ" ); ins( "hyu", "ひゅ" ); ins( "hyo", "ひょ" ); + ins( "ma", "ま" ); ins( "mi", "み" ); ins( "mu", "む" ); ins( "me", "め" ); ins( "mo", "も" ); ins( "mya", "みゃ" ); ins( "myu", "みゅ" ); ins( "myo", "みょ" ); + ins( "ya", "や" ); ins( "yu", "ゆ" ); ins( "yo", "よ" ); + ins( "ra", "ら" ); ins( "ri", "り" ); ins( "ru", "る" ); ins( "re", "れ" ); ins( "ro", "ろ" ); ins( "rya", "りゃ" ); ins( "ryu", "りゅ" ); ins( "ryo", "りょ" ); + ins( "wa", "わ" ); /*ゐ wi† ゑ we† を wo‡ */ + ins( "n", "ん" ); + ins( "ga", "が" ); ins( "gi", "ぎ" ); ins( "gu", "ぐ" ); ins( "ge", "げ" ); ins( "go", "ご" ); ins( "gya", "ぎゃ" ); ins( "gyu", "ぎゅ" ); ins( "gyo", "ぎょ" ); + ins( "za", "ざ" ); ins( "ji", "じ" ); ins( "zu", "ず" ); ins( "ze", "ぜ" ); ins( "zo", "ぞ" ); ins( "ja", "じゃ" ); ins( "ju", "じゅ" ); ins( "jo", "じょ" ); + ins( "da", "だ" ); ins( "(ji)", "ぢ" ); ins( "(zu)", "づ" ); ins( "de", "で" ); ins( "do", "ど" ); ins( "(ja)", "ぢゃ" ); ins( "(ju)", "ぢゅ" ); ins( "(jo)", "ぢょ" ); + ins( "ba", "ば" ); ins( "bi", "び" ); ins( "bu", "ぶ" ); ins( "be", "べ" ); ins( "bo", "ぼ" ); ins( "bya", "びゃ" ); ins( "byu", "びゅ" ); ins( "byo", "びょ" ); + ins( "pa", "ぱ" ); ins( "pi", "ぴ" ); ins( "pu", "ぷ" ); ins( "pe", "ぺ" ); ins( "po", "ぽ" ); ins( "pya", "ぴゃ" ); ins( "pyu", "ぴゅ" ); ins( "pyo", "ぴょ" ); +} + +class HepburnKatakana: public Transliteration::Table +{ +public: + + HepburnKatakana(); +}; + +HepburnKatakana::HepburnKatakana() +{ + // Raw UTF8 -- handle with care. We'd better remap those to \xAB hex encoding + ins( "a", "ア" ); ins( "i", "イ" ); ins( "u", "ウ" ); ins( "e", "エ" ); ins( "o", "オ" ); + ins( "ka", "カ" ); ins( "ki", "キ" ); ins( "ku", "ク" ); ins( "ke", "ケ" ); ins( "ko", "コ" ); ins( "kya", "キャ" ); ins( "kyu", "キュ" ); ins( "kyo", "キョ" ); + ins( "sa", "サ" ); ins( "shi", "シ" ); ins( "su", "ス" ); ins( "se", "セ" ); ins( "so", "ソ" ); ins( "sha", "シャ" ); ins( "shu", "シュ" ); ins( "sho", "ショ" ); + ins( "ta", "タ" ); ins( "chi", "チ" ); ins( "tsu", "ツ" ); ins( "te", "テ" ); ins( "to", "ト" ); ins( "cha", "チャ" ); ins( "chu", "チュ" ); ins( "cho", "チョ" ); + ins( "na", "ナ" ); ins( "ni", "ニ" ); ins( "nu", "ヌ" ); ins( "ne", "ネ" ); ins( "no", "ノ" ); ins( "nya", "ニャ" ); ins( "nyu", "ニュ" ); ins( "nyo", "ニョ" ); + ins( "ha", "ハ" ); ins( "hi", "ヒ" ); ins( "fu", "フ" ); ins( "he", "ヘ" ); ins( "ho", "ホ" ); ins( "hya", "ヒャ" ); ins( "hyu", "ヒュ" ); ins( "hyo", "ヒョ" ); + ins( "ma", "マ" ); ins( "mi", "ミ" ); ins( "mu", "ム" ); ins( "me", "メ" ); ins( "mo", "モ" ); ins( "mya", "ミャ" ); ins( "myu", "ミュ" ); ins( "myo", "ミョ" ); + ins( "ya", "ヤ" ); ins( "yu", "ユ" ); ins( "yo", "ヨ" ); + ins( "ra", "ラ" ); ins( "ri", "リ" ); ins( "ru", "ル" ); ins( "re", "レ" ); ins( "ro", "ロ" ); ins( "rya", "リャ" ); ins( "ryu", "リュ" ); ins( "ryo", "リョ" ); + ins( "wa", "ワ" ); /*ヰ wi† ヱ we† ヲ wo‡ */ + ins( "n", "ン" ); + ins( "ga", "ガ" ); ins( "gi", "ギ" ); ins( "gu", "グ" ); ins( "ge", "ゲ" ); ins( "go", "ゴ" ); ins( "gya", "ギャ" ); ins( "gyu", "ギュ" ); ins( "gyo", "ギョ" ); + ins( "za", "ザ" ); ins( "ji", "ジ" ); ins( "zu", "ズ" ); ins( "ze", "ゼ" ); ins( "zo", "ゾ" ); ins( "ja", "ジャ" ); ins( "ju", "ジュ" ); ins( "jo", "ジョ" ); + ins( "da", "ダ" ); ins( "(ji)", "ヂ" ); ins( "(zu)", "ヅ" ); ins( "de", "デ" ); ins( "do", "ド" ); ins( "(ja)", "ヂャ" ); ins( "(ju)", "ヂュ" ); ins( "(jo)", "ヂョ" ); + ins( "ba", "バ" ); ins( "bi", "ビ" ); ins( "bu", "ブ" ); ins( "be", "ベ" ); ins( "bo", "ボ" ); ins( "bya", "ビャ" ); ins( "byu", "ビュ" ); ins( "byo", "ビョ" ); + ins( "pa", "パ" ); ins( "pi", "ピ" ); ins( "pu", "プ" ); ins( "pe", "ペ" ); ins( "po", "ポ" ); ins( "pya", "ピャ" ); ins( "pyu", "ピュ" ); ins( "pyo", "ピョ" ); + ins( "ye", "イェ" ); + ins( "wi", "ウィ" ); ins( "we", "ウェ" ); ins( "wo", "ウォ" ); + ins( "va", "ヷ" ); /*ヸ vi† ヹ ve†*/ ins( "vo", "ヺ" ); + ins( "va", "ヴァ" ); ins( "vi", "ヴィ" ); ins( "vu", "ヴ" ); ins( "ve", "ヴェ" ); ins( "vo", "ヴォ" ); + ins( "she", "シェ" ); + ins( "je", "ジェ" ); + ins( "che", "チェ" ); + ins( "ti", "ティ" ); ins( "tu", "トゥ" ); + ins( "tyu", "テュ" ); + ins( "di", "ディ" ); ins( "du", "ドゥ" ); + ins( "dyu", "デュ" ); + ins( "tsa", "ツァ" ); ins( "tse", "ツェ" ); ins( "tso", "ツォ" ); + ins( "fa", "ファ" ); ins( "fi", "フィ" ); ins( "fe", "フェ" ); ins( "fo", "フォ" ); + ins( "fyu", "フュ" ); +} + +vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & r ) + throw( std::exception ) +{ + vector< sptr< Dictionary::Class > > result; + + if ( r.enable ) + { + if ( r.enableHepburn ) + { + if ( r.enableHiragana ) + { + static HepburnHiragana t; + + result.push_back( new Transliteration::TransliterationDictionary( "94eae5a5aaf5b0a900490f4d6b36aac0", + QCoreApplication::translate( "Romaji", "Hepburn Romaji for Hiragana" ).toUtf8().data(), t ) ); + } + + if ( r.enableKatakana ) + { + static HepburnKatakana t; + + result.push_back( new Transliteration::TransliterationDictionary( "3252a35767d3f6e85e3e39069800dd2f", + QCoreApplication::translate( "Romaji", "Hepburn Romaji for Katakana" ).toUtf8().data(), t ) ); + } + } + } + + return result; +} + +} diff --git a/src/romaji.hh b/src/romaji.hh new file mode 100644 index 00000000..70049e62 --- /dev/null +++ b/src/romaji.hh @@ -0,0 +1,20 @@ +/* This file is (c) 2008-2009 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#ifndef __ROMAJI_HH_INCLUDED__ +#define __ROMAJI_HH_INCLUDED__ + +#include "transliteration.hh" +#include "config.hh" + +/// Japanese romanization (Romaji) support. +namespace Romaji { + +using std::vector; + +vector< sptr< Dictionary::Class > > makeDictionaries( Config::Romaji const & ) + throw( std::exception ); + +} + +#endif diff --git a/src/russiantranslit.cc b/src/russiantranslit.cc new file mode 100644 index 00000000..20b9957d --- /dev/null +++ b/src/russiantranslit.cc @@ -0,0 +1,111 @@ +/* This file is (c) 2008-2009 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#include "russiantranslit.hh" +#include "transliteration.hh" +#include + +namespace RussianTranslit { + +class RussianTable: public Transliteration::Table +{ +public: + + RussianTable(); +}; + +RussianTable::RussianTable() +{ + // Utf8 + + // Lowercase + ins( "a", "а" ); + ins( "b", "б" ); + ins( "v", "в" ); + ins( "w", "в" ); + ins( "g", "г" ); + ins( "d", "д" ); + ins( "e", "е" ); + ins( "yo", "ё" ); + ins( "zh", "ж" ); + ins( "z", "з" ); + ins( "i", "и" ); + ins( "j", "й" ); + ins( "k", "к" ); + ins( "l", "л" ); + ins( "m", "м" ); + ins( "n", "н" ); + ins( "o", "о" ); + ins( "p", "п" ); + ins( "r", "р" ); + ins( "s", "с" ); + ins( "t", "т" ); + ins( "u", "у" ); + ins( "f", "ф" ); + ins( "h", "х" ); + ins( "ts", "ц" ); + ins( "c", "ц" ); + ins( "ch", "ч" ); + ins( "sh", "ш" ); + ins( "shch", "щ" ); + ins( "\"", "ъ" ); + ins( "y", "ы" ); + ins( "'", "ь" ); + ins( "'e", "э" ); + ins( "yu", "ю" ); + ins( "ya", "я" ); + + // Uppercase + ins( "A", "А" ); + ins( "B", "Б" ); + ins( "V", "В" ); + ins( "W", "В" ); + ins( "G", "Г" ); + ins( "D", "Д" ); + ins( "E", "Е" ); + ins( "YO", "Ё" ); + ins( "Yo", "Ё" ); + ins( "ZH", "Ж" ); + ins( "Zh", "Ж" ); + ins( "Z", "З" ); + ins( "I", "И" ); + ins( "J", "Й" ); + ins( "K", "К" ); + ins( "L", "Л" ); + ins( "M", "М" ); + ins( "N", "Н" ); + ins( "O", "О" ); + ins( "P", "П" ); + ins( "R", "Р" ); + ins( "S", "С" ); + ins( "T", "Т" ); + ins( "U", "У" ); + ins( "F", "Ф" ); + ins( "H", "Х" ); + ins( "TS", "Ц" ); + ins( "Ts", "Ц" ); + ins( "C", "Ц" ); + ins( "CH", "Ч" ); + ins( "Ch", "Ч" ); + ins( "SH", "Ш" ); + ins( "Sh", "Ш" ); + ins( "SHCH", "Щ" ); + ins( "ShCh", "Щ" ); + ins( "Y", "Ы" ); + ins( "'E", "Э" ); + ins( "YU", "Ю" ); + ins( "Yu", "Ю" ); + ins( "YA", "Я" ); + ins( "Ya", "Я" ); +} + +sptr< Dictionary::Class > makeDictionary() throw( std::exception ) +{ + static RussianTable t; + + return new Transliteration::TransliterationDictionary( "cf1b74acd98adea9b2bba16af38f1086", + QCoreApplication::translate( "RussianTranslit", "Russian Transliteration" ).toUtf8().data(), t ); +} + +} + diff --git a/src/russiantranslit.hh b/src/russiantranslit.hh new file mode 100644 index 00000000..8b93f1dc --- /dev/null +++ b/src/russiantranslit.hh @@ -0,0 +1,17 @@ +/* This file is (c) 2008-2009 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#ifndef __RUSSIANTRANSLIT_HH_INCLUDED__ +#define __RUSSIANTRANSLIT_HH_INCLUDED__ + +#include "dictionary.hh" + +// Support for Russian transliteration +namespace RussianTranslit { + +sptr< Dictionary::Class > makeDictionary() throw( std::exception ); + +} + +#endif + diff --git a/src/sources.cc b/src/sources.cc index a9ea7890..83e2b7b4 100644 --- a/src/sources.cc +++ b/src/sources.cc @@ -10,6 +10,7 @@ Sources::Sources( QWidget * parent, Config::Paths const & paths, Config::SoundDirs const & soundDirs, Config::Hunspell const & hunspell, + Config::Transliteration const & tr, Config::MediaWikis const & mediawikis ): QWidget( parent ), mediawikisModel( this, mediawikis ), pathsModel( this, paths ), soundDirsModel( this, soundDirs ), @@ -38,6 +39,14 @@ Sources::Sources( QWidget * parent, Config::Paths const & paths, ui.hunspellDictionaries->setModel( &hunspellDictsModel ); fitHunspellDictsColumns(); + + ui.enableRussianTransliteration->setChecked( tr.enableRussianTransliteration ); + ui.enableRomaji->setChecked( tr.romaji.enable ); + ui.enableHepburn->setChecked( tr.romaji.enableHepburn ); + ui.enableNihonShiki->setChecked( tr.romaji.enableNihonShiki ); + ui.enableKunreiShiki->setChecked( tr.romaji.enableKunreiShiki ); + ui.enableHiragana->setChecked( tr.romaji.enableHiragana ); + ui.enableKatakana->setChecked( tr.romaji.enableKatakana ); } void Sources::fitPathsColumns() @@ -159,6 +168,20 @@ Config::Hunspell Sources::getHunspell() const return h; } +Config::Transliteration Sources::getTransliteration() const +{ + Config::Transliteration tr; + + tr.enableRussianTransliteration = ui.enableRussianTransliteration->isChecked(); + tr.romaji.enable = ui.enableRomaji->isChecked(); + tr.romaji.enableHepburn = ui.enableHepburn->isChecked(); + tr.romaji.enableNihonShiki = ui.enableNihonShiki->isChecked(); + tr.romaji.enableKunreiShiki = ui.enableKunreiShiki->isChecked(); + tr.romaji.enableHiragana = ui.enableHiragana->isChecked(); + tr.romaji.enableKatakana = ui.enableKatakana->isChecked(); + + return tr; +} ////////// MediaWikisModel diff --git a/src/sources.hh b/src/sources.hh index f092829a..3b927e93 100644 --- a/src/sources.hh +++ b/src/sources.hh @@ -138,6 +138,7 @@ public: Sources( QWidget * parent, Config::Paths const &, Config::SoundDirs const &, Config::Hunspell const &, + Config::Transliteration const &, Config::MediaWikis const & ); Config::Paths const & getPaths() const @@ -150,6 +151,8 @@ public: { return mediawikisModel.getCurrentWikis(); } Config::Hunspell getHunspell() const; + + Config::Transliteration getTransliteration() const; signals: diff --git a/src/sources.ui b/src/sources.ui index 7eedf925..bc418061 100644 --- a/src/sources.ui +++ b/src/sources.ui @@ -271,6 +271,131 @@ of the appropriate groups to use them. + + + Transliteration + + + + + + Qt::Vertical + + + + 20 + 40 + + + + + + + + Russian transliteration + + + + + + + Enables to use the Latin alphabet to write the Japanese language + + + Japanese Romaji + + + true + + + true + + + + + + Systems: + + + + + + + The most widely used method of transcription of Japanese, +based on English phonology + + + Hepburn + + + + + + + The most regular system, having a one-to-one relation to the +kana writing systems. Standardized as ISO 3602 + + + Nihon-shiki + + + + + + + Based on Nihon-shiki system, but modified for modern standard Japanese. +Standardized as ISO 3602 + + + Kunrei-shiki + + + + + + + Syllabaries: + + + + + + + Hiragana Japanese syllabary + + + Hiragana + + + + + + + Hiragana Japanese syllabary + + + Katakana + + + + + + + + + + Qt::Vertical + + + + 20 + 80 + + + + + + diff --git a/src/transliteration.cc b/src/transliteration.cc new file mode 100644 index 00000000..76e7261a --- /dev/null +++ b/src/transliteration.cc @@ -0,0 +1,119 @@ +/* This file is (c) 2008-2009 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#include "transliteration.hh" +#include "utf8.hh" +#include "folding.hh" + +namespace Transliteration { + +using gd::wchar; + +void Table::ins( char const * from, char const * to ) +{ + wstring fr = Utf8::decode( std::string( from ) ); + + if ( fr.size() > maxEntrySize ) + maxEntrySize = fr.size(); + + insert( std::pair< wstring, wstring >( fr, + Utf8::decode( std::string( to ) ) ) ); +} + +TransliterationDictionary::TransliterationDictionary( string const & id, + string const & name_, + Table const & table_ ): + Dictionary::Class( id, vector< string >() ), + name( name_ ), table( table_ ) +{} + +string TransliterationDictionary::getName() throw() +{ return name; } + +map< Dictionary::Property, string > TransliterationDictionary::getProperties() throw() +{ return map< Dictionary::Property, string >(); } + +unsigned long TransliterationDictionary::getArticleCount() throw() +{ return 0; } + +unsigned long TransliterationDictionary::getWordCount() throw() +{ return 0; } + +sptr< Dictionary::WordSearchRequest > TransliterationDictionary::prefixMatch( wstring const &, + unsigned long ) throw( std::exception ) +{ return new Dictionary::WordSearchRequestInstant(); } + +sptr< Dictionary::DataRequest > TransliterationDictionary::getArticle( wstring const &, + vector< wstring > const & ) + throw( std::exception ) +{ return new Dictionary::DataRequestInstant( false ); } + + +vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str ) + throw() +{ + vector< wstring > results; + + wstring folded = Folding::apply( str ); + + if ( folded.empty() ) + return results; + + wstring result; + + wchar const * ptr = folded.c_str(); + + size_t left = folded.size(); + + Table::const_iterator i; + + while( left ) + { + unsigned x; + + for( x = table.getMaxEntrySize(); x >= 1; --x ) + { + if ( left >= x ) + { + i = table.find( wstring( ptr, x ) ); + + if ( i != table.end() ) + { + result.append( i->second ); + ptr += x; + left -= x; + break; + } + } + } + + if ( !x ) + { + // No matches -- skip one char + --left; + ++ptr; + } + } + + if ( result.size() ) + results.push_back( result ); + + return results; +} + +sptr< Dictionary::WordSearchRequest > TransliterationDictionary::findHeadwordsForSynonym( wstring const & str ) + throw( std::exception ) +{ + sptr< Dictionary::WordSearchRequestInstant > result = new Dictionary::WordSearchRequestInstant(); + + vector< wstring > alts = getAlternateWritings( str ); + + printf( "alts = %u\n", alts.size() ); + + for( unsigned x = 0; x < alts.size(); ++x ) + result->getMatches().push_back( alts[ x ] ); + + return result; +} + +} diff --git a/src/transliteration.hh b/src/transliteration.hh new file mode 100644 index 00000000..2cc798fc --- /dev/null +++ b/src/transliteration.hh @@ -0,0 +1,71 @@ +/* This file is (c) 2008-2009 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#ifndef __TRANSLITERATION_HH_INCLUDED__ +#define __TRANSLITERATION_HH_INCLUDED__ + +#include "dictionary.hh" +#include + +namespace Transliteration { + +using std::map; +using gd::wstring; +using std::string; +using std::vector; + +class Table: public map< wstring, wstring > +{ + unsigned maxEntrySize; + +public: + + Table(): maxEntrySize( 0 ) + {} + + unsigned getMaxEntrySize() const + { return maxEntrySize; } + +protected: + + /// Inserts new entry into index. from and to are UTF8-encoded strings. + /// Also updates maxEntrySize. + void ins( char const * from, char const * to ); +}; + +/// This is a base dictionary class for simple transliteratons +class TransliterationDictionary: public Dictionary::Class +{ + string name; + Table const & table; + +public: + + TransliterationDictionary( string const & id, string const & name, + Table const & table ); + + virtual string getName() throw(); + + virtual map< Dictionary::Property, string > getProperties() throw(); + + virtual unsigned long getArticleCount() throw(); + + virtual unsigned long getWordCount() throw(); + + virtual vector< wstring > getAlternateWritings( wstring const & ) + throw(); + + virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & ) + throw( std::exception ); + + virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &, + unsigned long ) throw( std::exception ); + + virtual sptr< Dictionary::DataRequest > getArticle( wstring const &, + vector< wstring > const & ) + throw( std::exception ); +}; + +} + +#endif diff --git a/src/wordfinder.cc b/src/wordfinder.cc index 50dbf4e5..006f090a 100644 --- a/src/wordfinder.cc +++ b/src/wordfinder.cc @@ -87,19 +87,36 @@ void WordFinder::startSearch() searchQueued = false; searchInProgress = true; - wstring word = gd::toWString( inputWord ); + // Gather all writings of the word + + if ( allWordWritings.size() != 1 ) + allWordWritings.resize( 1 ); + + allWordWritings[ 0 ] = gd::toWString( inputWord ); for( size_t x = 0; x < inputDicts->size(); ++x ) { - sptr< Dictionary::WordSearchRequest > sr = - ( searchType == PrefixMatch ) ? - (*inputDicts)[ x ]->prefixMatch( word, 40 ) : - (*inputDicts)[ x ]->stemmedMatch( word, 3, 3, 30 ); + vector< wstring > writings = (*inputDicts)[ x ]->getAlternateWritings( allWordWritings[ 0 ] ); - connect( sr.get(), SIGNAL( finished() ), - this, SLOT( requestFinished() ), Qt::QueuedConnection ); + allWordWritings.insert( allWordWritings.end(), writings.begin(), writings.end() ); + } - queuedRequests.push_back( sr ); + // Query each dictionary for all word writings + + for( size_t x = 0; x < inputDicts->size(); ++x ) + { + for( size_t y = 0; y < allWordWritings.size(); ++y ) + { + sptr< Dictionary::WordSearchRequest > sr = + ( searchType == PrefixMatch ) ? + (*inputDicts)[ x ]->prefixMatch( allWordWritings[ y ], 40 ) : + (*inputDicts)[ x ]->stemmedMatch( allWordWritings[ y ], 3, 3, 30 ); + + connect( sr.get(), SIGNAL( finished() ), + this, SLOT( requestFinished() ), Qt::QueuedConnection ); + + queuedRequests.push_back( sr ); + } } // Handle any requests finished already @@ -257,7 +274,7 @@ void WordFinder::updateResults() resultsArray.push_back( OneResult() ); resultsArray.back().word = match; - resultsArray.back().rank = -1; + resultsArray.back().rank = INT_MAX; resultsArray.back().wasSuggested = ( weight != 0 ); insertResult.first->second = --resultsArray.end(); @@ -291,57 +308,65 @@ void WordFinder::updateResults() WorstMatch, Multiplier = 256 // Categories should be multiplied by Multiplier }; - - wstring target = Folding::applySimpleCaseOnly( gd::toWString( inputWord ) ); - wstring targetNoFullCase = Folding::applyFullCaseOnly( target ); - wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase ); - wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia ); - wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct ); - - wstring::size_type matchPos = 0; - - for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end(); - i != j; ++i ) + + for( unsigned wr = 0; wr < allWordWritings.size(); ++wr ) { - wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs; - - if ( i->first == target ) - i->second->rank = ExactMatch * Multiplier; - else - if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase ) - i->second->rank = ExactNoFullCaseMatch * Multiplier; - else - if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia ) - i->second->rank = ExactNoDiaMatch * Multiplier; - else - if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct ) - i->second->rank = ExactNoPunctMatch * Multiplier; - else - if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs ) - i->second->rank = ExactNoWsMatch * Multiplier; - else - if ( hasSurroundedWithWs( i->first, target, matchPos ) ) - i->second->rank = ExactInsideMatch * Multiplier + matchPos; - else - if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) ) - i->second->rank = ExactNoDiaInsideMatch * Multiplier + matchPos; - else - if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) ) - i->second->rank = ExactNoPunctInsideMatch * Multiplier + matchPos; - else - if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 ) - i->second->rank = PrefixMatch * Multiplier + saturated( i->first.size() ); - else - if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 ) - i->second->rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() ); - else - if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 ) - i->second->rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() ); - else - if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 ) - i->second->rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() ); - else - i->second->rank = WorstMatch * Multiplier; + wstring target = Folding::applySimpleCaseOnly( allWordWritings[ wr ] ); + wstring targetNoFullCase = Folding::applyFullCaseOnly( target ); + wstring targetNoDia = Folding::applyDiacriticsOnly( targetNoFullCase ); + wstring targetNoPunct = Folding::applyPunctOnly( targetNoDia ); + wstring targetNoWs = Folding::applyWhitespaceOnly( targetNoPunct ); + + wstring::size_type matchPos = 0; + + for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end(); + i != j; ++i ) + { + wstring resultNoFullCase, resultNoDia, resultNoPunct, resultNoWs; + + int rank; + + if ( i->first == target ) + rank = ExactMatch * Multiplier; + else + if ( ( resultNoFullCase = Folding::applyFullCaseOnly( i->first ) ) == targetNoFullCase ) + rank = ExactNoFullCaseMatch * Multiplier; + else + if ( ( resultNoDia = Folding::applyDiacriticsOnly( resultNoFullCase ) ) == targetNoDia ) + rank = ExactNoDiaMatch * Multiplier; + else + if ( ( resultNoPunct = Folding::applyPunctOnly( resultNoDia ) ) == targetNoPunct ) + rank = ExactNoPunctMatch * Multiplier; + else + if ( ( resultNoWs = Folding::applyWhitespaceOnly( resultNoPunct ) ) == targetNoWs ) + rank = ExactNoWsMatch * Multiplier; + else + if ( hasSurroundedWithWs( i->first, target, matchPos ) ) + rank = ExactInsideMatch * Multiplier + matchPos; + else + if ( hasSurroundedWithWs( resultNoDia, targetNoDia, matchPos ) ) + rank = ExactNoDiaInsideMatch * Multiplier + matchPos; + else + if ( hasSurroundedWithWs( resultNoPunct, targetNoPunct, matchPos ) ) + rank = ExactNoPunctInsideMatch * Multiplier + matchPos; + else + if ( i->first.size() > target.size() && i->first.compare( 0, target.size(), target ) == 0 ) + rank = PrefixMatch * Multiplier + saturated( i->first.size() ); + else + if ( resultNoDia.size() > targetNoDia.size() && resultNoDia.compare( 0, targetNoDia.size(), targetNoDia ) == 0 ) + rank = PrefixNoDiaMatch * Multiplier + saturated( i->first.size() ); + else + if ( resultNoPunct.size() > targetNoPunct.size() && resultNoPunct.compare( 0, targetNoPunct.size(), targetNoPunct ) == 0 ) + rank = PrefixNoPunctMatch * Multiplier + saturated( i->first.size() ); + else + if ( resultNoWs.size() > targetNoWs.size() && resultNoWs.compare( 0, targetNoWs.size(), targetNoWs ) == 0 ) + rank = PrefixNoWsMatch * Multiplier + saturated( i->first.size() ); + else + rank = WorstMatch * Multiplier; + + if ( i->second->rank > rank ) + i->second->rank = rank; // We store the best rank of any writing + } } resultsArray.sort( SortByRank() ); @@ -354,23 +379,29 @@ void WordFinder::updateResults() // in their beginnings, and second, the length of the strings. Here we assign // only the first one, storing it in rank. Then we sort the results using // SortByRankAndLength. - wstring target = Folding::apply( gd::toWString( inputWord ) ); - - for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end(); - i != j; ++i ) + for( unsigned wr = 0; wr < allWordWritings.size(); ++wr ) { - wstring resultFolded = Folding::apply( i->first ); + wstring target = Folding::apply( allWordWritings[ wr ] ); + + for( ResultsIndex::const_iterator i = resultsIndex.begin(), j = resultsIndex.end(); + i != j; ++i ) + { + wstring resultFolded = Folding::apply( i->first ); + + int charsInCommon = 0; + + for( wchar const * t = target.c_str(), * r = resultFolded.c_str(); + *t && *t == *r; ++t, ++r, ++charsInCommon ) ; + + int rank = -charsInCommon; // Negated so the lesser-than + // comparison would yield right + // results. - int charsInCommon = 0; - - for( wchar const * t = target.c_str(), * r = resultFolded.c_str(); - *t && *t == *r; ++t, ++r, ++charsInCommon ) ; - - i->second->rank = -charsInCommon; // Negated so the lesser-than - // comparison would yield right - // results. + if ( i->second->rank > rank ) + i->second->rank = rank; // We store the best rank of any writing + } } - + resultsArray.sort( SortByRankAndLength() ); maxSearchResults = 15; diff --git a/src/wordfinder.hh b/src/wordfinder.hh index 5e58fc99..fe7aad1f 100644 --- a/src/wordfinder.hh +++ b/src/wordfinder.hh @@ -45,6 +45,8 @@ private: std::vector< sptr< Dictionary::Class > > const * inputDicts; + std::vector< gd::wstring > allWordWritings; // All writings of the inputWord + struct OneResult { gd::wstring word;