Merge pull request #1734 from shenlebantongying/clean/qtextcodec
Some checks failed
SonarCloud / Build and analyze (push) Has been cancelled

clean: port away from `QTextCodec` in `slob.cc` and clean up iconv
This commit is contained in:
xiaoyifang 2024-08-31 11:09:01 +08:00 committed by GitHub
commit 5549d36a2d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 48 additions and 61 deletions

View file

@ -186,7 +186,6 @@ endif ()
target_compile_definitions(${GOLDENDICT} PUBLIC
CMAKE_USED_HACK # temporal hack to avoid breaking qmake build
USE_ICONV
MAKE_QTMULTIMEDIA_PLAYER
MAKE_CHINESE_CONVERSION_SUPPORT
)

View file

@ -11,33 +11,20 @@ char const * const Iconv::GdWchar = "UTF-32LE";
char const * const Iconv::Utf16Le = "UTF-16LE";
char const * const Iconv::Utf8 = "UTF-8";
using gd::wchar;
Iconv::Iconv( char const * from )
#ifdef USE_ICONV
// the to encoding must be UTF8
:
Iconv::Iconv( char const * from ):
state( iconv_open( Utf8, from ) )
#endif
{
#ifdef USE_ICONV
if ( state == (iconv_t)-1 )
throw exCantInit( strerror( errno ) );
#else
codec = QTextCodec::codecForName( from );
#endif
}
Iconv::~Iconv()
{
#ifdef USE_ICONV
iconv_close( state );
#endif
}
QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
{
#ifdef USE_ICONV
size_t dsz = inBytesLeft;
//avoid most realloc
std::vector< char > outBuf( dsz + 32 );
@ -90,12 +77,6 @@ QString Iconv::convert( void const *& inBuf, size_t & inBytesLeft )
size_t datasize = outBuf.size() - outBufLeft;
// QByteArray ba( &outBuf.front(), datasize );
return QString::fromUtf8( &outBuf.front(), datasize );
#else
if ( codec )
return codec->toUnicode( static_cast< const char * >( inBuf ), inBytesLeft );
QByteArray ba( static_cast< const char * >( inBuf ), inBytesLeft );
return QString( ba );
#endif
}
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData, size_t dataSize )
@ -104,8 +85,9 @@ gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific
/// behaviour in that regard.
if ( !dataSize )
if ( dataSize == 0 ) {
return {};
}
Iconv ic( fromEncoding );
@ -118,11 +100,22 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData, siz
{
// Similar to toWstring
if ( !dataSize )
if ( dataSize == 0 ) {
return {};
}
Iconv ic( fromEncoding );
const QString outStr = ic.convert( fromData, dataSize );
return outStr.toStdString();
}
QString Iconv::toQString( char const * fromEncoding, void const * fromData, size_t dataSize )
{
if ( dataSize == 0 ) {
return {};
}
Iconv ic( fromEncoding );
return ic.convert( fromData, dataSize );
}

View file

@ -4,24 +4,19 @@
#ifndef __ICONV_HH_INCLUDED__
#define __ICONV_HH_INCLUDED__
#include <QTextCodec>
#include <QString>
#include "wstring.hh"
#include "ex.hh"
#ifdef USE_ICONV
#include <iconv.h>
#endif
/// A wrapper for the iconv() character set conversion functions
/// "Internationalization conversion" for char encoding conversion, currently implemented with iconv()
/// Only supports converting from a known "from" to UTF8
class Iconv
{
#ifdef USE_ICONV
iconv_t state;
#else
QTextCodec * codec;
#endif
public:
@ -34,7 +29,7 @@ public:
static char const * const Utf16Le;
static char const * const Utf8;
Iconv( char const * from );
explicit Iconv( char const * from );
~Iconv();
@ -47,11 +42,10 @@ public:
// string.
static std::string toUtf8( char const * fromEncoding, void const * fromData, size_t dataSize );
private:
static QString toQString( char const * fromEncoding, void const * fromData, size_t dataSize );
// Copying/assigning not supported
Iconv( Iconv const & );
Iconv & operator=( Iconv const & );
// Copying/assigning isn't supported
Q_DISABLE_COPY_MOVE( Iconv );
};
#endif

View file

@ -21,11 +21,12 @@
#include <stub_msvc.h>
#endif
#include "iconv.hh"
#include <QString>
#include <QFile>
#include <QFileInfo>
#include <QDir>
#include <QTextCodec>
#include <QMap>
#include <QPair>
#include <QProcess>
@ -127,9 +128,8 @@ private:
QFile file;
QString fileName, dictionaryName;
Compressions compression;
QString encoding;
std::string encoding;
unsigned char uuid[ 16 ];
QTextCodec * codec;
QMap< QString, QString > tags;
QVector< QString > contentTypes;
quint32 blobCount;
@ -149,7 +149,6 @@ private:
public:
SlobFile():
compression( UNKNOWN ),
codec( 0 ),
blobCount( 0 ),
storeOffset( 0 ),
fileSize( 0 ),
@ -170,7 +169,7 @@ public:
return compression;
}
QString const & getEncoding() const
std::string const & getEncoding() const
{
return encoding;
}
@ -200,11 +199,6 @@ public:
return contentTypesCount;
}
QTextCodec * getCodec() const
{
return codec;
}
const RefOffsetsVector & getSortedRefOffsets();
void clearRefOffsets()
@ -241,10 +235,17 @@ QString SlobFile::readString( unsigned length )
QByteArray data = file.read( length );
QString str;
if ( codec != 0 && !data.isEmpty() )
str = codec->toUnicode( data );
else
if ( !encoding.empty() && !data.isEmpty() ) {
try {
str = Iconv::toQString( encoding.c_str(), data.data(), data.size() );
}
catch ( Iconv::Ex & e ) {
qDebug() << QString( R"(slob decoding failed: %1)" ).arg( e.what() );
}
}
else {
str = QString( data );
}
char term = 0;
int n = str.indexOf( term );
@ -317,13 +318,7 @@ void SlobFile::open( const QString & name )
// Read encoding
encoding = readTinyText();
codec = QTextCodec::codecForName( encoding.toLatin1() );
if ( codec == nullptr ) {
error = QString( R"(for encoding "%1")" ).arg( encoding );
throw exNoCodecFound( string( error.toUtf8().data() ) );
}
encoding = readTinyText().toStdString();
// Read compression type
@ -865,8 +860,14 @@ quint32 SlobDictionary::readArticle( quint32 articleNumber, std::string & result
|| contentType.contains( "/css", Qt::CaseInsensitive )
|| contentType.contains( "/javascript", Qt::CaseInsensitive )
|| contentType.contains( "/json", Qt::CaseInsensitive ) ) {
QTextCodec * codec = sf.getCodec();
QString content = codec->toUnicode( data.c_str(), data.size() );
QString content;
try {
content = Iconv::toQString( sf.getEncoding().c_str(), data.data(), data.size() );
}
catch ( Iconv::Ex & e ) {
qDebug() << QString( R"(slob decoding failed: %1)" ).arg( e.what() );
}
result = string( content.toUtf8().data() );
}
else