mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
refactor: merge some encoding and encoding names related code
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
Some checks are pending
SonarCloud / Build and analyze (push) Waiting to run
This commit is contained in:
parent
52a9427b8b
commit
dda91a30dd
|
@ -26,6 +26,7 @@ Checks: >
|
||||||
-google-readability-casting,
|
-google-readability-casting,
|
||||||
-hicpp-deprecated-headers,
|
-hicpp-deprecated-headers,
|
||||||
-hicpp-no-array-decay,
|
-hicpp-no-array-decay,
|
||||||
|
-misc-confusable-identifiers,
|
||||||
-misc-const-correctness,
|
-misc-const-correctness,
|
||||||
-misc-include-cleaner,
|
-misc-include-cleaner,
|
||||||
-misc-non-private-member-variables-in-classes,
|
-misc-non-private-member-variables-in-classes,
|
||||||
|
@ -33,6 +34,7 @@ Checks: >
|
||||||
-modernize-deprecated-headers,
|
-modernize-deprecated-headers,
|
||||||
-modernize-use-nodiscard,
|
-modernize-use-nodiscard,
|
||||||
-modernize-use-trailing-return-type,
|
-modernize-use-trailing-return-type,
|
||||||
|
-performance-enum-size,
|
||||||
-readability-function-cognitive-complexity,
|
-readability-function-cognitive-complexity,
|
||||||
-readability-identifier-length,
|
-readability-identifier-length,
|
||||||
-readability-magic-numbers,
|
-readability-magic-numbers,
|
||||||
|
|
|
@ -6,12 +6,8 @@
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
char const * const Iconv::GdWchar = "UTF-32LE";
|
|
||||||
char const * const Iconv::Utf16Le = "UTF-16LE";
|
|
||||||
char const * const Iconv::Utf8 = "UTF-8";
|
|
||||||
|
|
||||||
Iconv::Iconv( char const * from ):
|
Iconv::Iconv( char const * from ):
|
||||||
state( iconv_open( Utf8, from ) )
|
state( iconv_open( Text::utf8, from ) )
|
||||||
{
|
{
|
||||||
if ( state == (iconv_t)-1 ) {
|
if ( state == (iconv_t)-1 ) {
|
||||||
throw exCantInit( strerror( errno ) );
|
throw exCantInit( strerror( errno ) );
|
||||||
|
|
|
@ -3,14 +3,11 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <QString>
|
|
||||||
|
|
||||||
#include "text.hh"
|
|
||||||
#include "ex.hh"
|
#include "ex.hh"
|
||||||
|
#include "text.hh"
|
||||||
|
#include <QString>
|
||||||
#include <iconv.h>
|
#include <iconv.h>
|
||||||
|
|
||||||
|
|
||||||
/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv()
|
/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv()
|
||||||
/// Only supports converting from a known "from" to UTF8
|
/// Only supports converting from a known "from" to UTF8
|
||||||
class Iconv
|
class Iconv
|
||||||
|
@ -22,12 +19,6 @@ public:
|
||||||
DEF_EX( Ex, "Iconv exception", std::exception )
|
DEF_EX( Ex, "Iconv exception", std::exception )
|
||||||
DEF_EX_STR( exCantInit, "Can't initialize iconv conversion:", Ex )
|
DEF_EX_STR( exCantInit, "Can't initialize iconv conversion:", Ex )
|
||||||
|
|
||||||
// Some predefined character sets' names
|
|
||||||
|
|
||||||
static char const * const GdWchar;
|
|
||||||
static char const * const Utf16Le;
|
|
||||||
static char const * const Utf8;
|
|
||||||
|
|
||||||
explicit Iconv( char const * from );
|
explicit Iconv( char const * from );
|
||||||
|
|
||||||
~Iconv();
|
~Iconv();
|
||||||
|
|
|
@ -10,6 +10,60 @@
|
||||||
|
|
||||||
namespace Text {
|
namespace Text {
|
||||||
|
|
||||||
|
const char * getEncodingNameFor( Encoding e )
|
||||||
|
{
|
||||||
|
switch ( e ) {
|
||||||
|
case Encoding::Utf32LE:
|
||||||
|
return utf32_le;
|
||||||
|
case Encoding::Utf32BE:
|
||||||
|
return utf32_be;
|
||||||
|
case Encoding::Utf32:
|
||||||
|
return utf32;
|
||||||
|
case Encoding::Utf16LE:
|
||||||
|
return utf16_le;
|
||||||
|
case Encoding::Utf16BE:
|
||||||
|
return utf16_be;
|
||||||
|
case Encoding::Windows1252:
|
||||||
|
return windows_1252;
|
||||||
|
case Encoding::Windows1251:
|
||||||
|
return windows_1251;
|
||||||
|
case Encoding::Windows1250:
|
||||||
|
return windows_1250;
|
||||||
|
case Encoding::Utf8:
|
||||||
|
default:
|
||||||
|
return utf8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Encoding getEncodingForName( const QByteArray & name )
|
||||||
|
{
|
||||||
|
auto const n = name.toUpper();
|
||||||
|
if ( n == utf32_le ) {
|
||||||
|
return Encoding::Utf32LE;
|
||||||
|
}
|
||||||
|
if ( n == utf32_be ) {
|
||||||
|
return Encoding::Utf32BE;
|
||||||
|
}
|
||||||
|
if ( n == utf32 ) {
|
||||||
|
return Encoding::Utf32;
|
||||||
|
}
|
||||||
|
if ( n == utf16_le ) {
|
||||||
|
return Encoding::Utf16LE;
|
||||||
|
}
|
||||||
|
if ( n == utf16_be ) {
|
||||||
|
return Encoding::Utf16BE;
|
||||||
|
}
|
||||||
|
if ( n == windows_1252 ) {
|
||||||
|
return Encoding::Windows1252;
|
||||||
|
}
|
||||||
|
if ( n == windows_1251 ) {
|
||||||
|
return Encoding::Windows1251;
|
||||||
|
}
|
||||||
|
if ( n == windows_1250 ) {
|
||||||
|
return Encoding::Windows1250;
|
||||||
|
}
|
||||||
|
return Encoding::Utf8;
|
||||||
|
}
|
||||||
|
|
||||||
/// Encodes the given UTF-32 into UTF-8. The inSize specifies the number
|
/// Encodes the given UTF-32 into UTF-8. The inSize specifies the number
|
||||||
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
|
/// of wide characters the 'in' pointer points to. The 'out' buffer must be
|
||||||
|
@ -200,87 +254,31 @@ int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2lengt
|
||||||
return pos - s1 + s2length;
|
return pos - s1 + s2length;
|
||||||
}
|
}
|
||||||
|
|
||||||
char const * getEncodingNameFor( Encoding e )
|
|
||||||
{
|
|
||||||
switch ( e ) {
|
|
||||||
case Utf32LE:
|
|
||||||
return "UTF-32LE";
|
|
||||||
case Utf32BE:
|
|
||||||
return "UTF-32BE";
|
|
||||||
case Utf16LE:
|
|
||||||
return "UTF-16LE";
|
|
||||||
case Utf16BE:
|
|
||||||
return "UTF-16BE";
|
|
||||||
case Windows1252:
|
|
||||||
return "WINDOWS-1252";
|
|
||||||
case Windows1251:
|
|
||||||
return "WINDOWS-1251";
|
|
||||||
case Utf8:
|
|
||||||
return "UTF-8";
|
|
||||||
case Windows1250:
|
|
||||||
return "WINDOWS-1250";
|
|
||||||
default:
|
|
||||||
return "UTF-8";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Encoding getEncodingForName( const QByteArray & _name )
|
|
||||||
{
|
|
||||||
const auto name = _name.toUpper();
|
|
||||||
if ( name == "UTF-32LE" ) {
|
|
||||||
return Utf32LE;
|
|
||||||
}
|
|
||||||
if ( name == "UTF-32BE" ) {
|
|
||||||
return Utf32BE;
|
|
||||||
}
|
|
||||||
if ( name == "UTF-16LE" ) {
|
|
||||||
return Utf16LE;
|
|
||||||
}
|
|
||||||
if ( name == "UTF-16BE" ) {
|
|
||||||
return Utf16BE;
|
|
||||||
}
|
|
||||||
if ( name == "WINDOWS-1252" ) {
|
|
||||||
return Windows1252;
|
|
||||||
}
|
|
||||||
if ( name == "WINDOWS-1251" ) {
|
|
||||||
return Windows1251;
|
|
||||||
}
|
|
||||||
if ( name == "UTF-8" ) {
|
|
||||||
return Utf8;
|
|
||||||
}
|
|
||||||
if ( name == "WINDOWS-1250" ) {
|
|
||||||
return Windows1250;
|
|
||||||
}
|
|
||||||
return Utf8;
|
|
||||||
}
|
|
||||||
|
|
||||||
LineFeed initLineFeed( const Encoding e )
|
LineFeed initLineFeed( const Encoding e )
|
||||||
{
|
{
|
||||||
LineFeed lf{};
|
LineFeed lf{};
|
||||||
switch ( e ) {
|
switch ( e ) {
|
||||||
case Utf32LE:
|
case Encoding::Utf32LE:
|
||||||
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
|
lf.lineFeed = new char[ 4 ]{ 0x0A, 0, 0, 0 };
|
||||||
lf.length = 4;
|
lf.length = 4;
|
||||||
break;
|
break;
|
||||||
case Utf32BE:
|
case Encoding::Utf32BE:
|
||||||
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
|
lf.lineFeed = new char[ 4 ]{ 0, 0, 0, 0x0A };
|
||||||
lf.length = 4;
|
lf.length = 4;
|
||||||
break;
|
break;
|
||||||
case Utf16LE:
|
case Encoding::Utf16LE:
|
||||||
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
|
lf.lineFeed = new char[ 2 ]{ 0x0A, 0 };
|
||||||
lf.length = 2;
|
lf.length = 2;
|
||||||
break;
|
break;
|
||||||
case Utf16BE:
|
case Encoding::Utf16BE:
|
||||||
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
|
lf.lineFeed = new char[ 2 ]{ 0, 0x0A };
|
||||||
lf.length = 2;
|
lf.length = 2;
|
||||||
break;
|
break;
|
||||||
case Windows1252:
|
case Encoding::Windows1252:
|
||||||
|
case Encoding::Windows1251:
|
||||||
case Windows1251:
|
case Encoding::Windows1250:
|
||||||
|
case Encoding::Utf8:
|
||||||
case Utf8:
|
|
||||||
|
|
||||||
case Windows1250:
|
|
||||||
default:
|
default:
|
||||||
lf.length = 1;
|
lf.length = 1;
|
||||||
lf.lineFeed = new char[ 1 ]{ 0x0A };
|
lf.lineFeed = new char[ 1 ]{ 0x0A };
|
||||||
|
|
|
@ -2,18 +2,18 @@
|
||||||
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <cstdio>
|
#include "ex.hh"
|
||||||
#include <QByteArray>
|
#include <QByteArray>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "ex.hh"
|
|
||||||
|
|
||||||
/// Facilities to process Text, focusing on Unicode
|
/// Facilities to process Text, focusing on Unicode
|
||||||
namespace Text {
|
namespace Text {
|
||||||
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
|
DEF_EX_STR( exCantDecode, "Can't decode the given string from Utf8:", std::exception )
|
||||||
|
|
||||||
// Those are possible encodings for .dsl files
|
/// Encoding names. Ref -> IANA's encoding names https://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||||
enum Encoding {
|
/// Notice: The ordering must not be changed before Utf32LE. The current .dsl format index file depends on it.
|
||||||
Utf16LE,
|
enum class Encoding {
|
||||||
|
Utf16LE = 0,
|
||||||
Utf16BE,
|
Utf16BE,
|
||||||
Windows1252,
|
Windows1252,
|
||||||
Windows1251,
|
Windows1251,
|
||||||
|
@ -21,9 +21,25 @@ enum Encoding {
|
||||||
Utf8,
|
Utf8,
|
||||||
Utf32BE,
|
Utf32BE,
|
||||||
Utf32LE,
|
Utf32LE,
|
||||||
|
Utf32,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline constexpr auto utf16_be = "UTF-16BE";
|
||||||
|
inline constexpr auto utf16_le = "UTF-16LE";
|
||||||
|
inline constexpr auto utf32 = "UTF-32";
|
||||||
|
inline constexpr auto utf32_be = "UTF-32BE";
|
||||||
|
inline constexpr auto utf32_le = "UTF-32LE";
|
||||||
|
inline constexpr auto utf8 = "UTF-8";
|
||||||
|
inline constexpr auto windows_1250 = "WINDOWS-1250";
|
||||||
|
inline constexpr auto windows_1251 = "WINDOWS-1251";
|
||||||
|
inline constexpr auto windows_1252 = "WINDOWS-1252";
|
||||||
|
|
||||||
|
const char * getEncodingNameFor( Encoding e );
|
||||||
|
Encoding getEncodingForName( const QByteArray & name );
|
||||||
|
|
||||||
|
/// utf32 -> utf8
|
||||||
std::string toUtf8( std::u32string const & ) noexcept;
|
std::string toUtf8( std::u32string const & ) noexcept;
|
||||||
|
/// utf8 -> utf32
|
||||||
std::u32string toUtf32( std::string const & );
|
std::u32string toUtf32( std::string const & );
|
||||||
|
|
||||||
/// Since the standard isspace() is locale-specific, we need something
|
/// Since the standard isspace() is locale-specific, we need something
|
||||||
|
@ -33,8 +49,6 @@ bool isspace( int c );
|
||||||
|
|
||||||
//get the first line in string s1. -1 if not found
|
//get the first line in string s1. -1 if not found
|
||||||
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
|
int findFirstLinePosition( char * s1, int s1length, const char * s2, int s2length );
|
||||||
char const * getEncodingNameFor( Encoding e );
|
|
||||||
Encoding getEncodingForName( const QByteArray & name );
|
|
||||||
|
|
||||||
struct LineFeed
|
struct LineFeed
|
||||||
{
|
{
|
||||||
|
|
|
@ -1144,8 +1144,9 @@ void DslDictionary::getArticleText( uint32_t articleAddress, QString & headword,
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
try {
|
try {
|
||||||
articleData =
|
articleData = Iconv::toWstring( getEncodingNameFor( static_cast< Encoding >( idxHeader.dslEncoding ) ),
|
||||||
Iconv::toWstring( getEncodingNameFor( Encoding( idxHeader.dslEncoding ) ), articleBody, articleSize );
|
articleBody,
|
||||||
|
articleSize );
|
||||||
free( articleBody );
|
free( articleBody );
|
||||||
|
|
||||||
// Strip DSL comments
|
// Strip DSL comments
|
||||||
|
@ -1789,7 +1790,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
idx.write( soundDictName.data(), soundDictName.size() );
|
idx.write( soundDictName.data(), soundDictName.size() );
|
||||||
}
|
}
|
||||||
|
|
||||||
idxHeader.dslEncoding = scanner.getEncoding();
|
idxHeader.dslEncoding = static_cast< uint32_t >( scanner.getEncoding() );
|
||||||
|
|
||||||
IndexedWords indexedWords;
|
IndexedWords indexedWords;
|
||||||
|
|
||||||
|
|
|
@ -844,7 +844,7 @@ bool ArticleDom::atSignFirstInLine()
|
||||||
/////////////// DslScanner
|
/////////////// DslScanner
|
||||||
|
|
||||||
DslScanner::DslScanner( string const & fileName ):
|
DslScanner::DslScanner( string const & fileName ):
|
||||||
encoding( Text::Utf8 ),
|
encoding( Text::Encoding::Utf8 ),
|
||||||
readBufferPtr( readBuffer ),
|
readBufferPtr( readBuffer ),
|
||||||
readBufferLeft( 0 ),
|
readBufferLeft( 0 ),
|
||||||
linesRead( 0 )
|
linesRead( 0 )
|
||||||
|
@ -875,19 +875,19 @@ DslScanner::DslScanner( string const & fileName ):
|
||||||
guessedEncoding.has_value() ) {
|
guessedEncoding.has_value() ) {
|
||||||
switch ( guessedEncoding.value() ) {
|
switch ( guessedEncoding.value() ) {
|
||||||
case QStringConverter::Utf8:
|
case QStringConverter::Utf8:
|
||||||
encoding = Text::Utf8;
|
encoding = Text::Encoding::Utf8;
|
||||||
break;
|
break;
|
||||||
case QStringConverter::Utf16LE:
|
case QStringConverter::Utf16LE:
|
||||||
encoding = Text::Utf16LE;
|
encoding = Text::Encoding::Utf16LE;
|
||||||
break;
|
break;
|
||||||
case QStringConverter::Utf16BE:
|
case QStringConverter::Utf16BE:
|
||||||
encoding = Text::Utf16BE;
|
encoding = Text::Encoding::Utf16BE;
|
||||||
break;
|
break;
|
||||||
case QStringConverter::Utf32LE:
|
case QStringConverter::Utf32LE:
|
||||||
encoding = Text::Utf16LE;
|
encoding = Text::Encoding::Utf16LE;
|
||||||
break;
|
break;
|
||||||
case QStringConverter::Utf32BE:
|
case QStringConverter::Utf32BE:
|
||||||
encoding = Text::Utf32BE;
|
encoding = Text::Encoding::Utf32BE;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
@ -976,13 +976,13 @@ DslScanner::DslScanner( string const & fileName ):
|
||||||
qWarning( "Warning: encoding was specified in a Unicode file, ignoring." );
|
qWarning( "Warning: encoding was specified in a Unicode file, ignoring." );
|
||||||
}
|
}
|
||||||
else if ( !arg.compare( U"Latin" ) ) {
|
else if ( !arg.compare( U"Latin" ) ) {
|
||||||
encoding = Text::Windows1252;
|
encoding = Text::Encoding::Windows1252;
|
||||||
}
|
}
|
||||||
else if ( !arg.compare( U"Cyrillic" ) ) {
|
else if ( !arg.compare( U"Cyrillic" ) ) {
|
||||||
encoding = Text::Windows1251;
|
encoding = Text::Encoding::Windows1251;
|
||||||
}
|
}
|
||||||
else if ( !arg.compare( U"EasternEuropean" ) ) {
|
else if ( !arg.compare( U"EasternEuropean" ) ) {
|
||||||
encoding = Text::Windows1250;
|
encoding = Text::Encoding::Windows1250;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
gzclose( f );
|
gzclose( f );
|
||||||
|
|
|
@ -207,8 +207,8 @@ void stripComments( std::u32string &, bool & );
|
||||||
inline size_t DslScanner::distanceToBytes( size_t x ) const
|
inline size_t DslScanner::distanceToBytes( size_t x ) const
|
||||||
{
|
{
|
||||||
switch ( encoding ) {
|
switch ( encoding ) {
|
||||||
case Text::Utf16LE:
|
case Encoding::Utf16LE:
|
||||||
case Text::Utf16BE:
|
case Encoding::Utf16BE:
|
||||||
return x * 2;
|
return x * 2;
|
||||||
default:
|
default:
|
||||||
return x;
|
return x;
|
||||||
|
|
|
@ -123,7 +123,7 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
GlsScanner::GlsScanner( string const & fileName ):
|
GlsScanner::GlsScanner( string const & fileName ):
|
||||||
encoding( Text::Utf8 ),
|
encoding( Encoding::Utf8 ),
|
||||||
readBufferPtr( readBuffer ),
|
readBufferPtr( readBuffer ),
|
||||||
readBufferLeft( 0 ),
|
readBufferLeft( 0 ),
|
||||||
linesRead( 0 )
|
linesRead( 0 )
|
||||||
|
@ -149,10 +149,10 @@ GlsScanner::GlsScanner( string const & fileName ):
|
||||||
// If the file begins with the dedicated Unicode marker, we just consume
|
// If the file begins with the dedicated Unicode marker, we just consume
|
||||||
// it. If, on the other hand, it's not, we return the bytes back
|
// it. If, on the other hand, it's not, we return the bytes back
|
||||||
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) {
|
if ( firstBytes[ 0 ] == 0xFF && firstBytes[ 1 ] == 0xFE ) {
|
||||||
encoding = Text::Utf16LE;
|
encoding = Encoding::Utf16LE;
|
||||||
}
|
}
|
||||||
else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) {
|
else if ( firstBytes[ 0 ] == 0xFE && firstBytes[ 1 ] == 0xFF ) {
|
||||||
encoding = Text::Utf16BE;
|
encoding = Encoding::Utf16BE;
|
||||||
}
|
}
|
||||||
else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) {
|
else if ( firstBytes[ 0 ] == 0xEF && firstBytes[ 1 ] == 0xBB ) {
|
||||||
// Looks like Utf8, read one more byte
|
// Looks like Utf8, read one more byte
|
||||||
|
@ -161,14 +161,14 @@ GlsScanner::GlsScanner( string const & fileName ):
|
||||||
gzclose( f );
|
gzclose( f );
|
||||||
throw exMalformedGlsFile( fileName );
|
throw exMalformedGlsFile( fileName );
|
||||||
}
|
}
|
||||||
encoding = Text::Utf8;
|
encoding = Encoding::Utf8;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ( gzrewind( f ) ) {
|
if ( gzrewind( f ) ) {
|
||||||
gzclose( f );
|
gzclose( f );
|
||||||
throw exCantOpen( fileName );
|
throw exCantOpen( fileName );
|
||||||
}
|
}
|
||||||
encoding = Text::Utf8;
|
encoding = Encoding::Utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
codec = QTextCodec::codecForName( Text::getEncodingNameFor( encoding ) );
|
codec = QTextCodec::codecForName( Text::getEncodingNameFor( encoding ) );
|
||||||
|
@ -1259,7 +1259,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( vector< string > const & f
|
||||||
idx.write( (uint32_t)dictionaryName.size() );
|
idx.write( (uint32_t)dictionaryName.size() );
|
||||||
idx.write( dictionaryName.data(), dictionaryName.size() );
|
idx.write( dictionaryName.data(), dictionaryName.size() );
|
||||||
|
|
||||||
idxHeader.glsEncoding = scanner.getEncoding();
|
idxHeader.glsEncoding = static_cast< uint32_t >( scanner.getEncoding() );
|
||||||
|
|
||||||
IndexedWords indexedWords;
|
IndexedWords indexedWords;
|
||||||
|
|
||||||
|
|
|
@ -207,7 +207,7 @@ void HunspellArticleRequest::run()
|
||||||
|
|
||||||
QMutexLocker _( &hunspellMutex );
|
QMutexLocker _( &hunspellMutex );
|
||||||
|
|
||||||
string trimmedWord_utf8 = Iconv::toUtf8( Iconv::GdWchar, trimmedWord.data(), trimmedWord.size() );
|
string trimmedWord_utf8 = Iconv::toUtf8( Text::utf32, trimmedWord.data(), trimmedWord.size() );
|
||||||
|
|
||||||
if ( hunspell.spell( trimmedWord_utf8 ) ) {
|
if ( hunspell.spell( trimmedWord_utf8 ) ) {
|
||||||
// Good word -- no spelling suggestions then.
|
// Good word -- no spelling suggestions then.
|
||||||
|
@ -361,7 +361,7 @@ QList< std::u32string > suggest( std::u32string & word, QMutex & hunspellMutex,
|
||||||
try {
|
try {
|
||||||
QMutexLocker _( &hunspellMutex );
|
QMutexLocker _( &hunspellMutex );
|
||||||
|
|
||||||
auto suggestions = hunspell.analyze( Iconv::toUtf8( Iconv::GdWchar, word.data(), word.size() ) );
|
auto suggestions = hunspell.analyze( Iconv::toUtf8( Text::utf32, word.data(), word.size() ) );
|
||||||
if ( !suggestions.empty() ) {
|
if ( !suggestions.empty() ) {
|
||||||
// There were some suggestions made for us. Make an appropriate output.
|
// There were some suggestions made for us. Make an appropriate output.
|
||||||
|
|
||||||
|
@ -464,7 +464,7 @@ void HunspellPrefixMatchRequest::run()
|
||||||
|
|
||||||
QMutexLocker _( &hunspellMutex );
|
QMutexLocker _( &hunspellMutex );
|
||||||
|
|
||||||
if ( hunspell.spell( Iconv::toUtf8( Iconv::GdWchar, trimmedWord.data(), trimmedWord.size() ) ) ) {
|
if ( hunspell.spell( Iconv::toUtf8( Text::utf32, trimmedWord.data(), trimmedWord.size() ) ) ) {
|
||||||
// Known word -- add it to the result
|
// Known word -- add it to the result
|
||||||
|
|
||||||
QMutexLocker _( &dataMutex );
|
QMutexLocker _( &dataMutex );
|
||||||
|
|
|
@ -143,7 +143,7 @@ Entry::Entry( File::Index & f )
|
||||||
// Read the size of the recording, in samples
|
// Read the size of the recording, in samples
|
||||||
samplesLength = f.read< uint32_t >();
|
samplesLength = f.read< uint32_t >();
|
||||||
|
|
||||||
name = Iconv::toUtf8( Iconv::Utf16Le, &filenameBuffer.front(), read * sizeof( uint16_t ) );
|
name = Iconv::toUtf8( Text::utf16_le, &filenameBuffer.front(), read * sizeof( uint16_t ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
class LsaDictionary: public BtreeIndexing::BtreeDictionary
|
class LsaDictionary: public BtreeIndexing::BtreeDictionary
|
||||||
|
|
Loading…
Reference in a new issue