2012-02-20 21:47:14 +00:00
|
|
|
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
2009-05-06 14:39:08 +00:00
|
|
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
|
|
|
|
|
|
|
#include "transliteration.hh"
|
|
|
|
#include "utf8.hh"
|
|
|
|
#include "folding.hh"
|
2013-11-16 18:34:09 +00:00
|
|
|
#include "gddebug.hh"
|
2009-05-06 14:39:08 +00:00
|
|
|
|
|
|
|
namespace Transliteration {
|
|
|
|
|
|
|
|
using gd::wchar;
|
|
|
|
|
2015-10-19 13:52:23 +00:00
|
|
|
BaseTransliterationDictionary::BaseTransliterationDictionary( string const & id,
|
|
|
|
string const & name_,
|
|
|
|
QIcon icon_,
|
|
|
|
bool caseSensitive_ ):
|
2009-05-06 14:39:08 +00:00
|
|
|
Dictionary::Class( id, vector< string >() ),
|
2015-10-19 13:52:23 +00:00
|
|
|
name( name_ ),
|
2009-05-14 12:42:06 +00:00
|
|
|
caseSensitive( caseSensitive_ )
|
2012-12-03 12:47:43 +00:00
|
|
|
{
|
2023-06-19 02:34:08 +00:00
|
|
|
dictionaryIcon = icon_;
|
2012-12-03 12:47:43 +00:00
|
|
|
dictionaryIconLoaded = true;
|
|
|
|
}
|
2009-05-06 14:39:08 +00:00
|
|
|
|
2022-06-03 13:28:41 +00:00
|
|
|
string BaseTransliterationDictionary::getName() noexcept
|
2009-05-06 14:39:08 +00:00
|
|
|
{ return name; }
|
|
|
|
|
2022-06-03 13:28:41 +00:00
|
|
|
map< Dictionary::Property, string > BaseTransliterationDictionary::getProperties() noexcept
|
2009-05-06 14:39:08 +00:00
|
|
|
{ return map< Dictionary::Property, string >(); }
|
|
|
|
|
2022-06-03 13:28:41 +00:00
|
|
|
unsigned long BaseTransliterationDictionary::getArticleCount() noexcept
|
2009-05-06 14:39:08 +00:00
|
|
|
{ return 0; }
|
|
|
|
|
2022-06-03 13:28:41 +00:00
|
|
|
unsigned long BaseTransliterationDictionary::getWordCount() noexcept
|
2009-05-06 14:39:08 +00:00
|
|
|
{ return 0; }
|
|
|
|
|
2015-10-19 13:52:23 +00:00
|
|
|
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::prefixMatch( wstring const &,
|
2022-01-09 08:35:07 +00:00
|
|
|
unsigned long )
|
2022-11-29 03:54:31 +00:00
|
|
|
{ return std::make_shared<Dictionary::WordSearchRequestInstant>(); }
|
2009-05-06 14:39:08 +00:00
|
|
|
|
2015-10-19 13:52:23 +00:00
|
|
|
sptr< Dictionary::DataRequest > BaseTransliterationDictionary::getArticle( wstring const &,
|
|
|
|
vector< wstring > const &,
|
2018-06-13 16:00:42 +00:00
|
|
|
wstring const &, bool )
|
2022-01-09 08:35:07 +00:00
|
|
|
|
2022-11-29 03:54:31 +00:00
|
|
|
{ return std::make_shared<Dictionary::DataRequestInstant>( false ); }
|
2009-05-06 14:39:08 +00:00
|
|
|
|
2015-10-19 13:52:23 +00:00
|
|
|
sptr< Dictionary::WordSearchRequest > BaseTransliterationDictionary::findHeadwordsForSynonym( wstring const & str )
|
2022-01-09 08:35:07 +00:00
|
|
|
|
2015-10-19 13:52:23 +00:00
|
|
|
{
|
2022-11-29 03:54:31 +00:00
|
|
|
sptr< Dictionary::WordSearchRequestInstant > result = std::make_shared<Dictionary::WordSearchRequestInstant>();
|
2015-10-19 13:52:23 +00:00
|
|
|
|
|
|
|
vector< wstring > alts = getAlternateWritings( str );
|
|
|
|
|
|
|
|
GD_DPRINTF( "alts = %u\n", (unsigned) alts.size() );
|
|
|
|
|
|
|
|
for( unsigned x = 0; x < alts.size(); ++x )
|
|
|
|
result->getMatches().push_back( alts[ x ] );
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void Table::ins( char const * from, char const * to )
|
|
|
|
{
|
|
|
|
wstring fr = Utf8::decode( std::string( from ) );
|
|
|
|
|
|
|
|
if ( fr.size() > maxEntrySize )
|
|
|
|
maxEntrySize = fr.size();
|
|
|
|
|
|
|
|
insert( std::pair< wstring, wstring >( fr,
|
|
|
|
Utf8::decode( std::string( to ) ) ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TransliterationDictionary::TransliterationDictionary( string const & id,
|
|
|
|
string const & name_,
|
|
|
|
QIcon icon_,
|
|
|
|
Table const & table_,
|
|
|
|
bool caseSensitive_ ):
|
|
|
|
BaseTransliterationDictionary(id, name_, icon_, caseSensitive_),
|
|
|
|
table( table_ )
|
|
|
|
{
|
|
|
|
}
|
2009-05-06 14:39:08 +00:00
|
|
|
|
|
|
|
vector< wstring > TransliterationDictionary::getAlternateWritings( wstring const & str )
|
2022-06-03 13:28:41 +00:00
|
|
|
noexcept
|
2009-05-06 14:39:08 +00:00
|
|
|
{
|
|
|
|
vector< wstring > results;
|
|
|
|
|
2009-05-14 12:42:06 +00:00
|
|
|
wstring result, folded;
|
|
|
|
wstring const * target;
|
2009-05-06 14:39:08 +00:00
|
|
|
|
2009-05-14 12:42:06 +00:00
|
|
|
if ( caseSensitive )
|
|
|
|
{
|
|
|
|
// Don't do any transform -- the transliteration is case-sensitive
|
|
|
|
target = &str;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
folded = Folding::applySimpleCaseOnly( str );
|
|
|
|
target = &folded;
|
|
|
|
}
|
2009-05-06 14:39:08 +00:00
|
|
|
|
2009-05-14 12:42:06 +00:00
|
|
|
wchar const * ptr = target->c_str();
|
|
|
|
size_t left = target->size();
|
2009-05-06 14:39:08 +00:00
|
|
|
|
|
|
|
Table::const_iterator i;
|
2015-10-19 13:52:23 +00:00
|
|
|
|
2009-05-06 14:39:08 +00:00
|
|
|
while( left )
|
|
|
|
{
|
|
|
|
unsigned x;
|
2015-10-19 13:52:23 +00:00
|
|
|
|
2009-05-06 14:39:08 +00:00
|
|
|
for( x = table.getMaxEntrySize(); x >= 1; --x )
|
|
|
|
{
|
|
|
|
if ( left >= x )
|
|
|
|
{
|
|
|
|
i = table.find( wstring( ptr, x ) );
|
2015-10-19 13:52:23 +00:00
|
|
|
|
2009-05-06 14:39:08 +00:00
|
|
|
if ( i != table.end() )
|
|
|
|
{
|
|
|
|
result.append( i->second );
|
|
|
|
ptr += x;
|
|
|
|
left -= x;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ( !x )
|
|
|
|
{
|
2009-05-12 08:44:17 +00:00
|
|
|
// No matches -- add this char as it is
|
|
|
|
result.push_back( *ptr++ );
|
2009-05-06 14:39:08 +00:00
|
|
|
--left;
|
|
|
|
}
|
|
|
|
}
|
2015-10-19 13:52:23 +00:00
|
|
|
|
2009-05-14 12:42:06 +00:00
|
|
|
if ( result != *target )
|
2009-05-06 14:39:08 +00:00
|
|
|
results.push_back( result );
|
|
|
|
|
|
|
|
return results;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|