From ac43018f6824eddbf830bad57a60ef4221eb9f2c Mon Sep 17 00:00:00 2001 From: shenleban tongying Date: Fri, 30 Aug 2024 04:09:05 -0400 Subject: [PATCH 1/2] clean: port away from QTextCodec in slob.cc --- src/common/iconv.cc | 10 ++++++++++ src/common/iconv.hh | 2 ++ src/dict/slob.cc | 47 +++++++++++++++++++++++---------------------- 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/common/iconv.cc b/src/common/iconv.cc index 206cdbd7..76e36cd5 100644 --- a/src/common/iconv.cc +++ b/src/common/iconv.cc @@ -126,3 +126,13 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData, siz const QString outStr = ic.convert( fromData, dataSize ); return outStr.toStdString(); } + +QString Iconv::toQString( char const * fromEncoding, void const * fromData, size_t dataSize ) +{ + if ( dataSize == 0 ) { + return {}; + } + + Iconv ic( fromEncoding ); + return ic.convert( fromData, dataSize ); +} diff --git a/src/common/iconv.hh b/src/common/iconv.hh index ec2312eb..3f8fa227 100644 --- a/src/common/iconv.hh +++ b/src/common/iconv.hh @@ -47,6 +47,8 @@ public: // string. static std::string toUtf8( char const * fromEncoding, void const * fromData, size_t dataSize ); + static QString toQString( char const * fromEncoding, void const * fromData, size_t dataSize ); + private: // Copying/assigning not supported diff --git a/src/dict/slob.cc b/src/dict/slob.cc index 3ccdc3ce..72c11437 100644 --- a/src/dict/slob.cc +++ b/src/dict/slob.cc @@ -21,11 +21,12 @@ #include #endif +#include "iconv.hh" + #include #include #include #include -#include #include #include #include @@ -127,9 +128,8 @@ private: QFile file; QString fileName, dictionaryName; Compressions compression; - QString encoding; + std::string encoding; unsigned char uuid[ 16 ]; - QTextCodec * codec; QMap< QString, QString > tags; QVector< QString > contentTypes; quint32 blobCount; @@ -149,7 +149,6 @@ private: public: SlobFile(): compression( UNKNOWN ), - codec( 0 ), blobCount( 0 ), storeOffset( 0 ), fileSize( 0 ), @@ -170,7 +169,7 @@ public: return compression; } - QString const & getEncoding() const + std::string const & getEncoding() const { return encoding; } @@ -200,11 +199,6 @@ public: return contentTypesCount; } - QTextCodec * getCodec() const - { - return codec; - } - const RefOffsetsVector & getSortedRefOffsets(); void clearRefOffsets() @@ -241,10 +235,17 @@ QString SlobFile::readString( unsigned length ) QByteArray data = file.read( length ); QString str; - if ( codec != 0 && !data.isEmpty() ) - str = codec->toUnicode( data ); - else + if ( !encoding.empty() && !data.isEmpty() ) { + try { + str = Iconv::toQString( encoding.c_str(), data.data(), data.size() ); + } + catch ( Iconv::Ex & e ) { + qDebug() << QString( R"(slob decoding failed: %1)" ).arg( e.what() ); + } + } + else { str = QString( data ); + } char term = 0; int n = str.indexOf( term ); @@ -317,13 +318,7 @@ void SlobFile::open( const QString & name ) // Read encoding - encoding = readTinyText(); - - codec = QTextCodec::codecForName( encoding.toLatin1() ); - if ( codec == nullptr ) { - error = QString( R"(for encoding "%1")" ).arg( encoding ); - throw exNoCodecFound( string( error.toUtf8().data() ) ); - } + encoding = readTinyText().toStdString(); // Read compression type @@ -865,9 +860,15 @@ quint32 SlobDictionary::readArticle( quint32 articleNumber, std::string & result || contentType.contains( "/css", Qt::CaseInsensitive ) || contentType.contains( "/javascript", Qt::CaseInsensitive ) || contentType.contains( "/json", Qt::CaseInsensitive ) ) { - QTextCodec * codec = sf.getCodec(); - QString content = codec->toUnicode( data.c_str(), data.size() ); - result = string( content.toUtf8().data() ); + QString content; + try { + content = Iconv::toQString( sf.getEncoding().c_str(), data.data(), data.size() ); + } + catch ( Iconv::Ex & e ) { + qDebug() << QString( R"(slob decoding failed: %1)" ).arg( e.what() ); + } + + result = string( content.toUtf8().data() ); } else result = data; From 9a2c96ca5692e872e1c63d4eda65d3cd8f2bedb9 Mon Sep 17 00:00:00 2001 From: shenlebantongying Date: Wed, 28 Aug 2024 02:55:01 -0400 Subject: [PATCH 2/2] clean: remove unused code of iconv.cc and minor update code style --- CMakeLists.txt | 1 - src/common/iconv.cc | 27 +++++---------------------- src/common/iconv.hh | 24 ++++++++---------------- 3 files changed, 13 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 267ad144..a1df18cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,7 +186,6 @@ endif () target_compile_definitions(${GOLDENDICT} PUBLIC CMAKE_USED_HACK # temporal hack to avoid breaking qmake build - USE_ICONV MAKE_QTMULTIMEDIA_PLAYER MAKE_CHINESE_CONVERSION_SUPPORT ) diff --git a/src/common/iconv.cc b/src/common/iconv.cc index 76e36cd5..db677304 100644 --- a/src/common/iconv.cc +++ b/src/common/iconv.cc @@ -11,33 +11,20 @@ char const * const Iconv::GdWchar = "UTF-32LE"; char const * const Iconv::Utf16Le = "UTF-16LE"; char const * const Iconv::Utf8 = "UTF-8"; -using gd::wchar; - -Iconv::Iconv( char const * from ) -#ifdef USE_ICONV - // the to encoding must be UTF8 - : +Iconv::Iconv( char const * from ): state( iconv_open( Utf8, from ) ) -#endif { -#ifdef USE_ICONV if ( state == (iconv_t)-1 ) throw exCantInit( strerror( errno ) ); -#else - codec = QTextCodec::codecForName( from ); -#endif } Iconv::~Iconv() { -#ifdef USE_ICONV iconv_close( state ); -#endif } QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft ) { -#ifdef USE_ICONV size_t dsz = inBytesLeft; //avoid most realloc std::vector< char > outBuf( dsz + 32 ); @@ -90,12 +77,6 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft ) size_t datasize = outBuf.size() - outBufLeft; // QByteArray ba( &outBuf.front(), datasize ); return QString::fromUtf8( &outBuf.front(), datasize ); -#else - if ( codec ) - return codec->toUnicode( static_cast< const char * >( inBuf ), inBytesLeft ); - QByteArray ba( static_cast< const char * >( inBuf ), inBytesLeft ); - return QString( ba ); -#endif } gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize ) @@ -104,8 +85,9 @@ gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, /// Special-case the dataSize == 0 to avoid any kind of iconv-specific /// behaviour in that regard. - if ( !dataSize ) + if ( dataSize == 0 ) { return {}; + } Iconv ic( fromEncoding ); @@ -118,8 +100,9 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData, siz { // Similar to toWstring - if ( !dataSize ) + if ( dataSize == 0 ) { return {}; + } Iconv ic( fromEncoding ); diff --git a/src/common/iconv.hh b/src/common/iconv.hh index 3f8fa227..ca35354a 100644 --- a/src/common/iconv.hh +++ b/src/common/iconv.hh @@ -4,24 +4,19 @@ #ifndef __ICONV_HH_INCLUDED__ #define __ICONV_HH_INCLUDED__ -#include +#include #include "wstring.hh" #include "ex.hh" -#ifdef USE_ICONV - #include -#endif +#include -/// A wrapper for the iconv() character set conversion functions + +/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv() +/// Only supports converting from a known "from" to UTF8 class Iconv { -#ifdef USE_ICONV iconv_t state; -#else - QTextCodec * codec; - -#endif public: @@ -34,7 +29,7 @@ public: static char const * const Utf16Le; static char const * const Utf8; - Iconv( char const * from ); + explicit Iconv( char const * from ); ~Iconv(); @@ -49,11 +44,8 @@ public: static QString toQString( char const * fromEncoding, void const * fromData, size_t dataSize ); -private: - - // Copying/assigning not supported - Iconv( Iconv const & ); - Iconv & operator=( Iconv const & ); + // Copying/assigning isn't supported + Q_DISABLE_COPY_MOVE( Iconv ); }; #endif