Support conversion between simplified and traditional Chinese characters

This commit is contained in:
Zhe Wang 2015-10-19 21:52:23 +08:00
parent 8ac7eddf2f
commit 12f67a79a4
10 changed files with 327 additions and 74 deletions

75
chinese.cc Normal file
View file

@ -0,0 +1,75 @@
/* This file is (c) 2015 Zhe Wang <0x1997@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#include "chinese.hh"
#include <QCoreApplication>
#include <opencc/Export.hpp>
#include <opencc/SimpleConverter.hpp>
#include "folding.hh"
#include "transliteration.hh"
#include "utf8.hh"
namespace Chinese {
class CharacterConversionDictionary: public Transliteration::BaseTransliterationDictionary
{
opencc::SimpleConverter converter;
public:
CharacterConversionDictionary( std::string const & id, std::string const & name,
QIcon icon, std::string const & openccConfig);
std::vector< std::wstring > getAlternateWritings( std::wstring const & )
throw();
};
CharacterConversionDictionary::CharacterConversionDictionary( std::string const & id,
std::string const & name_,
QIcon icon_,
std::string const & openccConfig):
Transliteration::BaseTransliterationDictionary( id, name_, icon_, false ),
converter(openccConfig)
{
}
std::vector< std::wstring > CharacterConversionDictionary::getAlternateWritings( std::wstring const & str )
throw()
{
std::vector< std::wstring > results;
std::wstring folded = Folding::applySimpleCaseOnly( str );
std::wstring result = Utf8::decode( converter.Convert( Utf8::encode( folded ) ) );
if ( result != folded )
results.push_back( result );
return results;
}
std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Chinese const & cfg )
throw( std::exception )
{
std::vector< sptr< Dictionary::Class > > result;
if ( cfg.enable )
{
if ( cfg.enableSimpToTradConversion )
{
result.push_back( new CharacterConversionDictionary( "abbd22460acb11992bb089b2ccda7a0c",
QCoreApplication::translate( "Chinese", "Simplified to traditional Chinese conversion" ).toUtf8().data(),
QIcon( ":/flags/cn.png" ), "s2t.json" ) );
}
if ( cfg.enableTradToSimpConversion )
{
result.push_back( new CharacterConversionDictionary( "43d783892e6cd3fa973e4232287cce72",
QCoreApplication::translate( "Chinese", "Traditional to simplified Chinese conversion" ).toUtf8().data(),
QIcon( ":/flags/cn.png" ), "t2s.json" ) );
}
}
return result;
}
}

19
chinese.hh Normal file
View file

@ -0,0 +1,19 @@
/* This file is (c) 2015 Zhe Wang <0x1997@gmail.com>
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
#ifndef __CHINESE_HH_INCLUDED__
#define __CHINESE_HH_INCLUDED__
#include <map>
#include "config.hh"
#include "dictionary.hh"
/// Chinese character conversion support.
namespace Chinese {
std::vector< sptr< Dictionary::Class > > makeDictionaries( Config::Chinese const & )
throw( std::exception );
}
#endif

View file

@ -137,6 +137,13 @@ Preferences::Preferences():
{
}
Chinese::Chinese():
enable( false ),
enableSimpToTradConversion( true ),
enableTradToSimpConversion( true )
{
}
Romaji::Romaji():
enable( false ),
enableHepburn( true ),
@ -515,6 +522,15 @@ Class load() throw( exError )
applyBoolOption( c.transliteration.enableBelarusianTransliteration,
transliteration.namedItem( "enableBelarusianTransliteration" ) );
QDomNode chinese = transliteration.namedItem( "chinese" );
if ( !chinese.isNull() )
{
applyBoolOption( c.transliteration.chinese.enable, chinese.namedItem( "enable" ) );
applyBoolOption( c.transliteration.chinese.enableSimpToTradConversion, chinese.namedItem( "enableSimpToTradConversion" ) );
applyBoolOption( c.transliteration.chinese.enableTradToSimpConversion, chinese.namedItem( "enableTradToSimpConversion" ) );
}
QDomNode romaji = transliteration.namedItem( "romaji" );
if ( !romaji.isNull() )
@ -1199,6 +1215,23 @@ void save( Class const & c ) throw( exError )
opt.appendChild( dd.createTextNode( c.transliteration.enableBelarusianTransliteration ? "1":"0" ) );
transliteration.appendChild( opt );
// Chinese
QDomElement chinese = dd.createElement( "chinese" );
transliteration.appendChild( chinese );
opt = dd.createElement( "enable" );
opt.appendChild( dd.createTextNode( c.transliteration.chinese.enable ? "1":"0" ) );
chinese.appendChild( opt );
opt = dd.createElement( "enableSimpToTradConversion" );
opt.appendChild( dd.createTextNode( c.transliteration.chinese.enableSimpToTradConversion ? "1":"0" ) );
chinese.appendChild( opt );
opt = dd.createElement( "enableSimpToTradConversion" );
opt.appendChild( dd.createTextNode( c.transliteration.chinese.enableSimpToTradConversion ? "1":"0" ) );
chinese.appendChild( opt );
// Romaji
QDomElement romaji = dd.createElement( "romaji" );

View file

@ -339,6 +339,26 @@ struct Hunspell
/// All the MediaWikis
typedef QVector< MediaWiki > MediaWikis;
/// Chinese transliteration configuration
struct Chinese
{
bool enable;
bool enableSimpToTradConversion;
bool enableTradToSimpConversion;
Chinese();
bool operator == ( Chinese const & other ) const
{ return enable == other.enable &&
enableSimpToTradConversion == other.enableSimpToTradConversion &&
enableTradToSimpConversion == other.enableTradToSimpConversion; }
bool operator != ( Chinese const & other ) const
{ return ! operator == ( other ); }
};
/// Romaji transliteration configuration
struct Romaji
{
@ -371,14 +391,15 @@ struct Transliteration
bool enableGermanTransliteration;
bool enableGreekTransliteration;
bool enableBelarusianTransliteration;
Chinese chinese;
Romaji romaji;
bool operator == ( Transliteration const & other ) const
{ return enableRussianTransliteration == other.enableRussianTransliteration &&
romaji == other.romaji &&
enableGermanTransliteration == other.enableGermanTransliteration &&
enableGreekTransliteration == other.enableGreekTransliteration &&
enableBelarusianTransliteration == other.enableBelarusianTransliteration;
enableBelarusianTransliteration == other.enableBelarusianTransliteration &&
chinese == other.chinese && romaji == other.romaji;
}
bool operator != ( Transliteration const & other ) const

View file

@ -473,6 +473,16 @@ CONFIG( no_epwing_support ) {
LIBS += -leb
}
CONFIG( no_chinese_conversion_support ) {
DEFINES += NO_CHINESE_CONVERSION_SUPPORT
}
!CONFIG( no_chinese_conversion_support ) {
HEADERS += chinese.hh
SOURCES += chinese.cc
LIBS += -lopencc
}
RESOURCES += resources.qrc \
flags.qrc
TRANSLATIONS += locale/ru_RU.ts \

View file

@ -35,6 +35,10 @@
#include "epwing.hh"
#endif
#ifndef NO_CHINESE_CONVERSION_SUPPORT
#include "chinese.hh"
#endif
#include <QMessageBox>
#include <QDir>
@ -273,6 +277,15 @@ void loadDictionaries( QWidget * parent, bool showInitially,
///// We create transliterations syncronously since they are very simple
// Make Chinese conversion
{
vector< sptr< Dictionary::Class > > chineseDictionaries =
Chinese::makeDictionaries( cfg.transliteration.chinese );
dictionaries.insert( dictionaries.end(), chineseDictionaries.begin(),
chineseDictionaries.end() );
}
// Make Romaji
{
vector< sptr< Dictionary::Class > > romajiDictionaries =

View file

@ -93,6 +93,9 @@ Sources::Sources( QWidget * parent, Config::Class const & cfg):
ui.enableGermanTransliteration->setChecked( trs.enableGermanTransliteration );
ui.enableGreekTransliteration->setChecked( trs.enableGreekTransliteration );
ui.enableBelarusianTransliteration->setChecked( trs.enableBelarusianTransliteration );
ui.enableChineseConversion->setChecked( trs.chinese.enable );
ui.enableSimpToTradConversion->setChecked( trs.chinese.enableSimpToTradConversion );
ui.enableTradToSimpConversion->setChecked( trs.chinese.enableTradToSimpConversion );
ui.enableRomaji->setChecked( trs.romaji.enable );
ui.enableHepburn->setChecked( trs.romaji.enableHepburn );
ui.enableNihonShiki->setChecked( trs.romaji.enableNihonShiki );
@ -349,6 +352,9 @@ Config::Transliteration Sources::getTransliteration() const
tr.enableGermanTransliteration = ui.enableGermanTransliteration->isChecked();
tr.enableGreekTransliteration = ui.enableGreekTransliteration->isChecked();
tr.enableBelarusianTransliteration = ui.enableBelarusianTransliteration->isChecked();
tr.chinese.enable = ui.enableChineseConversion->isChecked();
tr.chinese.enableSimpToTradConversion = ui.enableSimpToTradConversion->isChecked();
tr.chinese.enableTradToSimpConversion = ui.enableTradToSimpConversion->isChecked();
tr.romaji.enable = ui.enableRomaji->isChecked();
tr.romaji.enableHepburn = ui.enableHepburn->isChecked();
tr.romaji.enableNihonShiki = ui.enableNihonShiki->isChecked();

View file

@ -17,7 +17,7 @@
<item>
<widget class="QTabWidget" name="tabWidget">
<property name="currentIndex">
<number>0</number>
<number>8</number>
</property>
<property name="iconSize">
<size>
@ -721,6 +721,54 @@ in the future, or register on the site to get your own key.</string>
</property>
</spacer>
</item>
<item>
<widget class="QGroupBox" name="enableChineseConversion">
<property name="toolTip">
<string>Enable conversion between simplified and traditional Chinese characters</string>
</property>
<property name="title">
<string>Chinese Conversion</string>
</property>
<property name="checkable">
<bool>true</bool>
</property>
<layout class="QGridLayout" name="gridLayout_4">
<item row="0" column="1">
<widget class="QCheckBox" name="enableTradToSimpConversion">
<property name="toolTip">
<string>Enable conversion from traditional characters to simplified characters</string>
</property>
<property name="text">
<string>Traditional to simplified conversion</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="enableSimpToTradConversion">
<property name="toolTip">
<string>Enable conversion from simplified characters to traditional characters</string>
</property>
<property name="text">
<string>Smplified to traditional conversion</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="verticalSpacer_13">
<property name="orientation">
<enum>Qt::Vertical</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>20</width>
<height>40</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QGroupBox" name="enableRomaji">
<property name="toolTip">

View file

@ -10,52 +10,77 @@ namespace Transliteration {
using gd::wchar;
void Table::ins( char const * from, char const * to )
{
wstring fr = Utf8::decode( std::string( from ) );
if ( fr.size() > maxEntrySize )
maxEntrySize = fr.size();
insert( std::pair< wstring, wstring >( fr,
Utf8::decode( std::string( to ) ) ) );
}
TransliterationDictionary::TransliterationDictionary( string const & id,
string const & name_,
QIcon icon_,
Table const & table_,
bool caseSensitive_ ):
BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id,
string const & name_,
QIcon icon_,
bool caseSensitive_ ):
Dictionary::Class( id, vector< string >() ),
name( name_ ), table( table_ ),
name( name_ ),
caseSensitive( caseSensitive_ )
{
dictionaryIcon = dictionaryNativeIcon = icon_;
dictionaryIconLoaded = true;
}
string TransliterationDictionary::getName() throw()
string BaseTransliterationDictionary::getName() throw()
{ return name; }
map< Dictionary::Property, string > TransliterationDictionary::getProperties() throw()
map< Dictionary::Property, string > BaseTransliterationDictionary::getProperties() throw()
{ return map< Dictionary::Property, string >(); }
unsigned long TransliterationDictionary::getArticleCount() throw()
unsigned long BaseTransliterationDictionary::getArticleCount() throw()
{ return 0; }
unsigned long TransliterationDictionary::getWordCount() throw()
unsigned long BaseTransliterationDictionary::getWordCount() throw()
{ return 0; }
sptr< Dictionary::WordSearchRequest > TransliterationDictionary::prefixMatch( wstring const &,
unsigned long ) throw( std::exception )
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( wstring const &,
unsigned long ) throw( std::exception )
{ return new Dictionary::WordSearchRequestInstant(); }
sptr< Dictionary::DataRequest > TransliterationDictionary::getArticle( wstring const &,
vector< wstring > const &,
wstring const & )
sptr< Dictionary::DataRequest > BaseTransliterationDictionary::getArticle( wstring const &,
vector< wstring > const &,
wstring const & )
throw( std::exception )
{ return new Dictionary::DataRequestInstant( false ); }
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
throw( std::exception )
{
sptr< Dictionary::WordSearchRequestInstant > result = new Dictionary::WordSearchRequestInstant();
vector< wstring > alts = getAlternateWritings( str );
GD_DPRINTF( "alts = %u\n", (unsigned) alts.size() );
for( unsigned x = 0; x < alts.size(); ++x )
result->getMatches().push_back( alts[ x ] );
return result;
}
void Table::ins( char const * from, char const * to )
{
wstring fr = Utf8::decode( std::string( from ) );
if ( fr.size() > maxEntrySize )
maxEntrySize = fr.size();
insert( std::pair< wstring, wstring >( fr,
Utf8::decode( std::string( to ) ) ) );
}
TransliterationDictionary::TransliterationDictionary( string const & id,
string const & name_,
QIcon icon_,
Table const & table_,
bool caseSensitive_ ):
BaseTransliterationDictionary(id, name_, icon_, caseSensitive_),
table( table_ )
{
}
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str )
throw()
@ -80,17 +105,17 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
size_t left = target->size();
Table::const_iterator i;
while( left )
{
unsigned x;
for( x = table.getMaxEntrySize(); x >= 1; --x )
{
if ( left >= x )
{
i = table.find( wstring( ptr, x ) );
if ( i != table.end() )
{
result.append( i->second );
@ -108,26 +133,11 @@ vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const
--left;
}
}
if ( result != *target )
results.push_back( result );
return results;
}
sptr< Dictionary::WordSearchRequest > TransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
throw( std::exception )
{
sptr< Dictionary::WordSearchRequestInstant > result = new Dictionary::WordSearchRequestInstant();
vector< wstring > alts = getAlternateWritings( str );
GD_DPRINTF( "alts = %u\n", (unsigned) alts.size() );
for( unsigned x = 0; x < alts.size(); ++x )
result->getMatches().push_back( alts[ x ] );
return result;
}
}

View file

@ -13,7 +13,45 @@ using std::map;
using gd::wstring;
using std::string;
using std::vector;
/// This is a base dictionary class for simple transliteratons
class BaseTransliterationDictionary: public Dictionary::Class
{
string name;
protected:
bool caseSensitive;
public:
BaseTransliterationDictionary( string const & id, string const & name,
QIcon icon, bool caseSensitive = true );
virtual string getName() throw();
virtual map< Dictionary::Property, string > getProperties() throw();
virtual unsigned long getArticleCount() throw();
virtual unsigned long getWordCount() throw();
virtual vector< wstring > getAlternateWritings( wstring const & )
throw() = 0;
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
throw( std::exception );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &,
unsigned long ) throw( std::exception );
virtual sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const &,
wstring const & )
throw( std::exception );
};
class Table: public map< wstring, wstring >
{
unsigned maxEntrySize;
@ -25,7 +63,7 @@ public:
unsigned getMaxEntrySize() const
{ return maxEntrySize; }
protected:
/// Inserts new entry into index. from and to are UTF8-encoded strings.
@ -33,13 +71,12 @@ protected:
void ins( char const * from, char const * to );
};
/// This is a base dictionary class for simple transliteratons
class TransliterationDictionary: public Dictionary::Class
/// A base dictionary class for table based transliteratons
class TransliterationDictionary: public BaseTransliterationDictionary
{
string name;
Table const & table;
bool caseSensitive;
public:
TransliterationDictionary( string const & id, string const & name,
@ -47,27 +84,8 @@ public:
Table const & table,
bool caseSensitive = true );
virtual string getName() throw();
virtual map< Dictionary::Property, string > getProperties() throw();
virtual unsigned long getArticleCount() throw();
virtual unsigned long getWordCount() throw();
virtual vector< wstring > getAlternateWritings( wstring const & )
throw();
virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
throw( std::exception );
virtual sptr< Dictionary::WordSearchRequest > prefixMatch( wstring const &,
unsigned long ) throw( std::exception );
virtual sptr< Dictionary::DataRequest > getArticle( wstring const &,
vector< wstring > const &,
wstring const & )
throw( std::exception );
};
}