Merge pull request #1734 from shenlebantongying/clean/qtextcodec
Some checks failed
SonarCloud / Build and analyze (push) Has been cancelled

clean: port away from `QTextCodec` in `slob.cc` and clean up iconv
This commit is contained in:
xiaoyifang 2024-08-31 11:09:01 +08:00 committed by GitHub
commit 5549d36a2d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 48 additions and 61 deletions

View file

@ -186,7 +186,6 @@ endif ()
target_compile_definitions(${GOLDENDICT} PUBLIC target_compile_definitions(${GOLDENDICT} PUBLIC
CMAKE_USED_HACK # temporal hack to avoid breaking qmake build CMAKE_USED_HACK # temporal hack to avoid breaking qmake build
USE_ICONV
MAKE_QTMULTIMEDIA_PLAYER MAKE_QTMULTIMEDIA_PLAYER
MAKE_CHINESE_CONVERSION_SUPPORT MAKE_CHINESE_CONVERSION_SUPPORT
) )

View file

@ -11,33 +11,20 @@ char const * const Iconv::GdWchar = "UTF-32LE";
char const * const Iconv::Utf16Le = "UTF-16LE"; char const * const Iconv::Utf16Le = "UTF-16LE";
char const * const Iconv::Utf8 = "UTF-8"; char const * const Iconv::Utf8 = "UTF-8";
using gd::wchar; Iconv::Iconv( char const * from ):
Iconv::Iconv( char const * from )
#ifdef USE_ICONV
// the to encoding must be UTF8
:
state( iconv_open( Utf8, from ) ) state( iconv_open( Utf8, from ) )
#endif
{ {
#ifdef USE_ICONV
if ( state == (iconv_t)-1 ) if ( state == (iconv_t)-1 )
throw exCantInit( strerror( errno ) ); throw exCantInit( strerror( errno ) );
#else
codec = QTextCodec::codecForName( from );
#endif
} }
Iconv::~Iconv() Iconv::~Iconv()
{ {
#ifdef USE_ICONV
iconv_close( state ); iconv_close( state );
#endif
} }
QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft ) QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
{ {
#ifdef USE_ICONV
size_t dsz = inBytesLeft; size_t dsz = inBytesLeft;
//avoid most realloc //avoid most realloc
std::vector< char > outBuf( dsz + 32 ); std::vector< char > outBuf( dsz + 32 );
@ -90,12 +77,6 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
size_t datasize = outBuf.size() - outBufLeft; size_t datasize = outBuf.size() - outBufLeft;
// QByteArray ba( &outBuf.front(), datasize ); // QByteArray ba( &outBuf.front(), datasize );
return QString::fromUtf8( &outBuf.front(), datasize ); return QString::fromUtf8( &outBuf.front(), datasize );
#else
if ( codec )
return codec->toUnicode( static_cast< const char * >( inBuf ), inBytesLeft );
QByteArray ba( static_cast< const char * >( inBuf ), inBytesLeft );
return QString( ba );
#endif
} }
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize ) gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
@ -104,8 +85,9 @@ gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific /// Special-case the dataSize == 0 to avoid any kind of iconv-specific
/// behaviour in that regard. /// behaviour in that regard.
if ( !dataSize ) if ( dataSize == 0 ) {
return {}; return {};
}
Iconv ic( fromEncoding ); Iconv ic( fromEncoding );
@ -118,11 +100,22 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData, siz
{ {
// Similar to toWstring // Similar to toWstring
if ( !dataSize ) if ( dataSize == 0 ) {
return {}; return {};
}
Iconv ic( fromEncoding ); Iconv ic( fromEncoding );
const QString outStr = ic.convert( fromData, dataSize ); const QString outStr = ic.convert( fromData, dataSize );
return outStr.toStdString(); return outStr.toStdString();
} }
QString Iconv::toQString( char const * fromEncoding, void const * fromData, size_t dataSize )
{
if ( dataSize == 0 ) {
return {};
}
Iconv ic( fromEncoding );
return ic.convert( fromData, dataSize );
}

View file

@ -4,24 +4,19 @@
#ifndef __ICONV_HH_INCLUDED__ #ifndef __ICONV_HH_INCLUDED__
#define __ICONV_HH_INCLUDED__ #define __ICONV_HH_INCLUDED__
#include <QTextCodec> #include <QString>
#include "wstring.hh" #include "wstring.hh"
#include "ex.hh" #include "ex.hh"
#ifdef USE_ICONV #include <iconv.h>
#include <iconv.h>
#endif
/// A wrapper for the iconv() character set conversion functions
/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv()
/// Only supports converting from a known "from" to UTF8
class Iconv class Iconv
{ {
#ifdef USE_ICONV
iconv_t state; iconv_t state;
#else
QTextCodec * codec;
#endif
public: public:
@ -34,7 +29,7 @@ public:
static char const * const Utf16Le; static char const * const Utf16Le;
static char const * const Utf8; static char const * const Utf8;
Iconv( char const * from ); explicit Iconv( char const * from );
~Iconv(); ~Iconv();
@ -47,11 +42,10 @@ public:
// string. // string.
static std::string toUtf8( char const * fromEncoding, void const * fromData, size_t dataSize ); static std::string toUtf8( char const * fromEncoding, void const * fromData, size_t dataSize );
private: static QString toQString( char const * fromEncoding, void const * fromData, size_t dataSize );
// Copying/assigning not supported // Copying/assigning isn't supported
Iconv( Iconv const & ); Q_DISABLE_COPY_MOVE( Iconv );
Iconv & operator=( Iconv const & );
}; };
#endif #endif

View file

@ -21,11 +21,12 @@
#include <stub_msvc.h> #include <stub_msvc.h>
#endif #endif
#include "iconv.hh"
#include <QString> #include <QString>
#include <QFile> #include <QFile>
#include <QFileInfo> #include <QFileInfo>
#include <QDir> #include <QDir>
#include <QTextCodec>
#include <QMap> #include <QMap>
#include <QPair> #include <QPair>
#include <QProcess> #include <QProcess>
@ -127,9 +128,8 @@ private:
QFile file; QFile file;
QString fileName, dictionaryName; QString fileName, dictionaryName;
Compressions compression; Compressions compression;
QString encoding; std::string encoding;
unsigned char uuid[ 16 ]; unsigned char uuid[ 16 ];
QTextCodec * codec;
QMap< QString, QString > tags; QMap< QString, QString > tags;
QVector< QString > contentTypes; QVector< QString > contentTypes;
quint32 blobCount; quint32 blobCount;
@ -149,7 +149,6 @@ private:
public: public:
SlobFile(): SlobFile():
compression( UNKNOWN ), compression( UNKNOWN ),
codec( 0 ),
blobCount( 0 ), blobCount( 0 ),
storeOffset( 0 ), storeOffset( 0 ),
fileSize( 0 ), fileSize( 0 ),
@ -170,7 +169,7 @@ public:
return compression; return compression;
} }
QString const & getEncoding() const std::string const & getEncoding() const
{ {
return encoding; return encoding;
} }
@ -200,11 +199,6 @@ public:
return contentTypesCount; return contentTypesCount;
} }
QTextCodec * getCodec() const
{
return codec;
}
const RefOffsetsVector & getSortedRefOffsets(); const RefOffsetsVector & getSortedRefOffsets();
void clearRefOffsets() void clearRefOffsets()
@ -241,10 +235,17 @@ QString SlobFile::readString( unsigned length )
QByteArray data = file.read( length ); QByteArray data = file.read( length );
QString str; QString str;
if ( codec != 0 && !data.isEmpty() ) if ( !encoding.empty() && !data.isEmpty() ) {
str = codec->toUnicode( data ); try {
else str = Iconv::toQString( encoding.c_str(), data.data(), data.size() );
}
catch ( Iconv::Ex & e ) {
qDebug() << QString( R"(slob decoding failed: %1)" ).arg( e.what() );
}
}
else {
str = QString( data ); str = QString( data );
}
char term = 0; char term = 0;
int n = str.indexOf( term ); int n = str.indexOf( term );
@ -317,13 +318,7 @@ void SlobFile::open( const QString & name )
// Read encoding // Read encoding
encoding = readTinyText(); encoding = readTinyText().toStdString();
codec = QTextCodec::codecForName( encoding.toLatin1() );
if ( codec == nullptr ) {
error = QString( R"(for encoding "%1")" ).arg( encoding );
throw exNoCodecFound( string( error.toUtf8().data() ) );
}
// Read compression type // Read compression type
@ -865,9 +860,15 @@ quint32 SlobDictionary::readArticle( quint32 articleNumber, std::string & result
|| contentType.contains( "/css", Qt::CaseInsensitive ) || contentType.contains( "/css", Qt::CaseInsensitive )
|| contentType.contains( "/javascript", Qt::CaseInsensitive ) || contentType.contains( "/javascript", Qt::CaseInsensitive )
|| contentType.contains( "/json", Qt::CaseInsensitive ) ) { || contentType.contains( "/json", Qt::CaseInsensitive ) ) {
QTextCodec * codec = sf.getCodec(); QString content;
QString content = codec->toUnicode( data.c_str(), data.size() ); try {
result = string( content.toUtf8().data() ); content = Iconv::toQString( sf.getEncoding().c_str(), data.data(), data.size() );
}
catch ( Iconv::Ex & e ) {
qDebug() << QString( R"(slob decoding failed: %1)" ).arg( e.what() );
}
result = string( content.toUtf8().data() );
} }
else else
result = data; result = data;