Merge remote-tracking branch 'origin/feature/remove-iconv' into staged

This commit is contained in:
yifang 2022-03-04 22:32:15 +08:00
commit e1730ab5f8
12 changed files with 117 additions and 423 deletions

View file

@ -0,0 +1,23 @@
QT -= gui
CONFIG += c++11 console
CONFIG -= app_bundle
CONFIG += qtestlib
# You can make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
HEADERS+= \
../iconv.hh \
../wstring.hh \
../wstring_qt.hh
SOURCES += \
test-qtextcodec-convert.cpp \
../iconv.cc \
../wstring_qt.cc
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target

View file

@ -0,0 +1,49 @@
#include <QTest>
#include <QDate>
#include "../iconv.hh"
#include <string>
#include "../wstring_qt.hh"
//used to test Iconv.cc
class testQTextCodec : public QObject
{
Q_OBJECT
private slots:
void testConvert();
void testToWstring();
void testToUtf8();
};
void testQTextCodec::testConvert()
{
Iconv conv( "utf-8", Iconv::GdWchar );
const char s[] = { 0x61, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x63, 0x00, 0x00, 0x00 };
void const * in = &s[ 0 ];
size_t len = 12;
QString r = conv.convert( in, len );
QCOMPARE( r, "abc" );
}
void testQTextCodec::testToWstring()
{
const char s[] = { 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x63 };
gd::wstring r1 = Iconv::toWstring( "UTF-32BE", s, 12 );
QCOMPARE( r1.size(), 3 );
QCOMPARE( r1, U"abc" );
char32_t * arr = (char32_t*)r1.c_str ();
QCOMPARE( arr[ 0 ], 0x00000061 );
}
void testQTextCodec::testToUtf8()
{
const char s[] = { 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, 0x63 };
std::string r1 = Iconv::toUtf8 ( "UTF-32BE", s, 12 );
QCOMPARE( r1.size(), 3 );
QCOMPARE( r1, u8"abc" );
char * arr = (char*)r1.c_str ();
QCOMPARE( arr[ 0 ], 0x61 );
}
QTEST_MAIN(testQTextCodec)
#include "test-qtextcodec-convert.moc"

View file

@ -27,7 +27,6 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <iconv.h>
#include <QTextDocument>
#include "gddebug.hh"
#include "ufile.hh"
@ -36,6 +35,7 @@
#include <QString>
#include <QDebug>
#include "dictionary.hh"
#include "wstring_qt.hh"
#ifdef _WIN32
#include <io.h>
@ -198,6 +198,7 @@ bool Babylon::read(std::string &source_charset, std::string &target_charset)
{
headword.clear();
definition.clear();
switch( block.type )
{
case 0:
@ -534,7 +535,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
{
try
{
transcription = Iconv::toUtf8( "CP1252", block.data + pos + 3, length );
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 3, length );
}
catch( Iconv::Ex & e )
{
@ -565,7 +566,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
{
try
{
transcription = Iconv::toUtf8( "CP1252", block.data + pos + 4, length );
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 4, length );
}
catch( Iconv::Ex & e )
{
@ -759,42 +760,14 @@ void Babylon::convertToUtf8( std::string &s, unsigned int type )
if( charset == "UTF-8" )
return;
iconv_t cd = iconv_open( "UTF-8", charset.c_str() );
if( cd == (iconv_t)(-1) )
throw exIconv();
Iconv conv_("UTF-8", charset.c_str());
char *outbuf, *defbuf;
size_t inbufbytes, outbufbytes;
inbufbytes = s.size();
outbufbytes = s.size() * 6;
size_t inbufbytes = s.size();
char *inbuf;
inbuf = (char *)s.data();
outbuf = (char*)malloc( outbufbytes + 1 );
if( !outbuf )
{
iconv_close( cd );
throw exAllocation();
}
const void* test = inbuf;
memset( outbuf, '\0', outbufbytes + 1 );
defbuf = outbuf;
while (inbufbytes) {
if (iconv(cd, &inbuf, &inbufbytes, &outbuf, &outbufbytes) == (size_t)-1) {
gdWarning( "\"%s\" - error in iconv conversion (%s)\n", inbuf, strerror( errno ) );
break;
// inbuf++;
// inbufbytes--;
}
}
// Flush the state. This fixes CP1255 problems.
iconv( cd, 0, 0, &outbuf, &outbufbytes );
if( inbufbytes == 0 )
s = std::string( defbuf );
free( defbuf );
iconv_close( cd );
QString convStr = conv_.convert(test,inbufbytes);
s = gd::toStdString(convStr);
}

View file

@ -37,7 +37,6 @@ DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x050F00
DEFINES += MAKE_QTMULTIMEDIA_PLAYER
}
!CONFIG( no_ffmpeg_player ) {
DEFINES += MAKE_FFMPEG_PLAYER
}
@ -78,8 +77,7 @@ win32 {
HUNSPELL_LIB = hunspell
}
LIBS += -liconv \
-lwsock32 \
LIBS += -lwsock32 \
-lpsapi \
-lole32 \
-loleaut32 \
@ -138,7 +136,7 @@ unix:!mac {
libswresample \
}
arm {
LIBS += -liconv
#LIBS += -liconv
} else {
LIBS += -lX11 -lXtst
}
@ -171,7 +169,7 @@ unix:!mac {
INSTALLS += helps
}
freebsd {
LIBS += -liconv -lexecinfo
LIBS += -lexecinfo
}
mac {
TARGET = GoldenDict
@ -181,7 +179,6 @@ mac {
# CONFIG += x86 x86_64 ppc
LIBS = -lz \
-lbz2 \
-liconv \
-lvorbisfile \
-lvorbis \
-logg \

View file

@ -734,10 +734,8 @@ string encodeToHunspell( Hunspell & hunspell, wstring const & str )
void * out = &result.front();
size_t outLeft = result.size();
if ( conv.convert( in, inLeft, out, outLeft ) != Iconv::Success )
throw Iconv::Ex();
return string( &result.front(), result.size() - outLeft );
QString convStr= conv.convert( in, inLeft);
return FsEncoding::encode(convStr);
}
wstring decodeFromHunspell( Hunspell & hunspell, char const * str )
@ -752,12 +750,9 @@ wstring decodeFromHunspell( Hunspell & hunspell, char const * str )
void * out = &result.front();
size_t outLeft = result.size() * sizeof( wchar );
if ( conv.convert( in, inLeft, out, outLeft ) != Iconv::Success )
throw Iconv::Ex();
return wstring( &result.front(), result.size() - outLeft/sizeof( wchar ) );
QString convStr= conv.convert( in, inLeft);
return gd::toWString(convStr);
}
}
vector< sptr< Dictionary::Class > > makeDictionaries( Config::Hunspell const & cfg )

107
iconv.cc
View file

@ -6,69 +6,28 @@
#include <errno.h>
#include <string.h>
#include <QDebug>
#include "wstring_qt.hh"
#ifdef __WIN32
char const * const Iconv::GdWchar = "UCS-4LE";
#else
char const * const Iconv::GdWchar = "WCHAR_T";
#endif
char const * const Iconv::GdWchar = "UTF-32LE";
char const * const Iconv::Utf16Le = "UTF-16LE";
char const * const Iconv::Utf8 = "UTF-8";
using gd::wchar;
Iconv::Iconv( char const * to, char const * from ) :
state( iconv_open( to, from ) )
Iconv::Iconv( char const * to, char const * from )
{
if ( state == (iconv_t) -1 )
throw exCantInit( strerror( errno ) );
codec = QTextCodec::codecForName(from);
}
void Iconv::reinit( char const * to, char const * from )
Iconv::~Iconv()
{
iconv_close( state );
state = iconv_open( to, from );
if ( state == (iconv_t) -1 )
throw exCantInit( strerror( errno ) );
}
Iconv::~Iconv() throw()
QString Iconv::convert(void const* & inBuf, size_t& inBytesLeft)
{
iconv_close( state );
}
return codec->toUnicode(static_cast<const char*>(inBuf), inBytesLeft);
Iconv::Result Iconv::convert( void const * & inBuf, size_t & inBytesLeft,
void * & outBuf, size_t & outBytesLeft )
{
size_t result = iconv( state,
// #ifdef __WIN32
// (char const **)&inBuf,
// #else
(char **)&inBuf,
// #endif
&inBytesLeft,
(char **)&outBuf, &outBytesLeft );
if ( result == (size_t) -1 )
{
switch( errno )
{
case EILSEQ:
throw exIncorrectSeq();
case EINVAL:
return NeedMoreIn;
case E2BIG:
return NeedMoreOut;
default:
throw exOther( strerror( errno ) );
}
}
return Success;
}
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,
@ -86,30 +45,8 @@ gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,
/// This size is usually enough, but may be enlarged during the conversion
std::vector< wchar > outBuf( dataSize );
void * outBufPtr = &outBuf.front();
size_t outBufLeft = outBuf.size() * sizeof( wchar );
for( ; ; )
{
switch( ic.convert( fromData, dataSize, outBufPtr, outBufLeft ) )
{
case Success:
return gd::wstring( &outBuf.front(),
outBuf.size() - outBufLeft / sizeof( wchar ) );
case NeedMoreIn:
throw exPrematureEnd();
case NeedMoreOut:
{
// Grow the buffer and retry
// The pointer may get invalidated so we save the diff and restore it
size_t offset = (wchar *)outBufPtr - &outBuf.front();
outBuf.resize( outBuf.size() + 256 );
outBufPtr = &outBuf.front() + offset;
outBufLeft += 256;
}
}
}
QString outStr = ic.convert(fromData, dataSize);
return gd::toWString(outStr);
}
std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData,
@ -125,29 +62,7 @@ std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData,
std::vector< char > outBuf( dataSize );
void * outBufPtr = &outBuf.front();
size_t outBufLeft = outBuf.size();
for( ; ; )
{
switch( ic.convert( fromData, dataSize, outBufPtr, outBufLeft ) )
{
case Success:
return std::string( &outBuf.front(),
outBuf.size() - outBufLeft );
case NeedMoreIn:
throw exPrematureEnd();
case NeedMoreOut:
{
// Grow the buffer and retry
// The pointer may get invalidated so we save the diff and restore it
size_t offset = (char *)outBufPtr - &outBuf.front();
outBuf.resize( outBuf.size() + 256 );
outBufPtr = &outBuf.front() + offset;
outBufLeft += 256;
}
}
}
QString outStr = ic.convert(fromData, dataSize);
return gd::toStdString(outStr);
}

View file

@ -4,22 +4,20 @@
#ifndef __ICONV_HH_INCLUDED__
#define __ICONV_HH_INCLUDED__
#include <iconv.h>
#include <QTextCodec>
#include "wstring.hh"
#include "ex.hh"
/// A wrapper for the iconv() character set conversion functions
class Iconv
{
iconv_t state;
//iconv_t state;
QTextCodec* codec;
public:
DEF_EX( Ex, "Iconv exception", std::exception )
DEF_EX_STR( exCantInit, "Can't initialize iconv conversion:", Ex )
DEF_EX( exIncorrectSeq, "Invalid character sequence encountered during character conversion", Ex )
DEF_EX( exPrematureEnd, "Character sequence ended prematurely during character conversion", Ex )
DEF_EX_STR( exOther, "An error has occurred during character conversion:", Ex )
// Some predefined character sets' names
@ -29,32 +27,18 @@ public:
Iconv( char const * to, char const * from );
// Changes to another pair of encodings. All the internal state is reset.
void reinit( char const * to, char const * from ) ;
~Iconv();
~Iconv() throw();
enum Result
{
Success, // All the data was successfully converted
NeedMoreIn, // Input has an incomplete multibyte character at its end
NeedMoreOut // The output buffer can't hold the result
};
Result convert( void const * & inBuf, size_t & inBytesLeft,
void * & outBuf, size_t & outBytesLeft );
QString convert(void const* & inBuf, size_t& inBytesLeft);
// Converts a given block of data from the given encoding to a wide string.
static gd::wstring toWstring( char const * fromEncoding, void const * fromData,
size_t dataSize )
;
size_t dataSize );
// Converts a given block of data from the given encoding to an utf8-encoded
// string.
static std::string toUtf8( char const * fromEncoding, void const * fromData,
size_t dataSize )
;
size_t dataSize );
private:
// Copying/assigning not supported

View file

@ -22,7 +22,6 @@
#include <errno.h>
#include <zlib.h>
#include <iconv.h>
#include <lzo/lzo1x.h>
#include <QtEndian>

View file

@ -1,247 +0,0 @@
/* Copyright (C) 1999-2003, 2005-2006, 2008-2011 Free Software Foundation, Inc.
This file is part of the GNU LIBICONV Library.
The GNU LIBICONV Library is free software; you can redistribute it
and/or modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
The GNU LIBICONV Library is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU LIBICONV Library; see the file COPYING.LIB.
If not, see <http://www.gnu.org/licenses/>. */
/* When installed, this file is called "iconv.h". */
#ifndef _LIBICONV_H
#define _LIBICONV_H
#define _LIBICONV_VERSION 0x010F /* version number: (major<<8) + minor */
#if 1 && BUILDING_LIBICONV
#define LIBICONV_DLL_EXPORTED __attribute__((__visibility__("default")))
#else
#define LIBICONV_DLL_EXPORTED
#endif
extern LIBICONV_DLL_EXPORTED __declspec (dllimport) int _libiconv_version; /* Likewise */
/* We would like to #include any system header file which could define
iconv_t, 1. in order to eliminate the risk that the user gets compilation
errors because some other system header file includes /usr/include/iconv.h
which defines iconv_t or declares iconv after this file, 2. when compiling
for LIBICONV_PLUG, we need the proper iconv_t type in order to produce
binary compatible code.
But gcc's #include_next is not portable. Thus, once libiconv's iconv.h
has been installed in /usr/local/include, there is no way any more to
include the original /usr/include/iconv.h. We simply have to get away
without it.
Ad 1. The risk that a system header file does
#include "iconv.h" or #include_next "iconv.h"
is small. They all do #include <iconv.h>.
Ad 2. The iconv_t type is a pointer type in all cases I have seen. (It
has to be a scalar type because (iconv_t)(-1) is a possible return value
from iconv_open().) */
/* Define iconv_t ourselves. */
#undef iconv_t
#define iconv_t libiconv_t
typedef void* iconv_t;
/* Get size_t declaration.
Get wchar_t declaration if it exists. */
#include <stddef.h>
/* Get errno declaration and values. */
#include <errno.h>
/* Some systems, like SunOS 4, don't have EILSEQ. Some systems, like BSD/OS,
have EILSEQ in a different header. On these systems, define EILSEQ
ourselves. */
#ifndef EILSEQ
#define EILSEQ
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* Allocates descriptor for code conversion from encoding fromcode to
encoding tocode. */
#ifndef LIBICONV_PLUG
#define iconv_open libiconv_open
#endif
extern LIBICONV_DLL_EXPORTED iconv_t iconv_open (const char* tocode, const char* fromcode);
/* Converts, using conversion descriptor cd, at most *inbytesleft bytes
starting at *inbuf, writing at most *outbytesleft bytes starting at
*outbuf.
Decrements *inbytesleft and increments *inbuf by the same amount.
Decrements *outbytesleft and increments *outbuf by the same amount. */
#ifndef LIBICONV_PLUG
#define iconv libiconv
#endif
extern LIBICONV_DLL_EXPORTED size_t iconv (iconv_t cd, char* * inbuf, size_t *inbytesleft, char* * outbuf, size_t *outbytesleft);
/* Frees resources allocated for conversion descriptor cd. */
#ifndef LIBICONV_PLUG
#define iconv_close libiconv_close
#endif
extern LIBICONV_DLL_EXPORTED int iconv_close (iconv_t cd);
#ifdef __cplusplus
}
#endif
#ifndef LIBICONV_PLUG
/* Nonstandard extensions. */
#if 1
#if 0
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#endif
#include <wchar.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* A type that holds all memory needed by a conversion descriptor.
A pointer to such an object can be used as an iconv_t. */
typedef struct {
void* dummy1[28];
#if 1
mbstate_t dummy2;
#endif
} iconv_allocation_t;
/* Allocates descriptor for code conversion from encoding fromcode to
encoding tocode into preallocated memory. Returns an error indicator
(0 or -1 with errno set). */
#define iconv_open_into libiconv_open_into
extern LIBICONV_DLL_EXPORTED int iconv_open_into (const char* tocode, const char* fromcode,
iconv_allocation_t* resultp);
/* Control of attributes. */
#define iconvctl libiconvctl
extern LIBICONV_DLL_EXPORTED int iconvctl (iconv_t cd, int request, void* argument);
/* Hook performed after every successful conversion of a Unicode character. */
typedef void (*iconv_unicode_char_hook) (unsigned int uc, void* data);
/* Hook performed after every successful conversion of a wide character. */
typedef void (*iconv_wide_char_hook) (wchar_t wc, void* data);
/* Set of hooks. */
struct iconv_hooks {
iconv_unicode_char_hook uc_hook;
iconv_wide_char_hook wc_hook;
void* data;
};
/* Fallback function. Invoked when a small number of bytes could not be
converted to a Unicode character. This function should process all
bytes from inbuf and may produce replacement Unicode characters by calling
the write_replacement callback repeatedly. */
typedef void (*iconv_unicode_mb_to_uc_fallback)
(const char* inbuf, size_t inbufsize,
void (*write_replacement) (const unsigned int *buf, size_t buflen,
void* callback_arg),
void* callback_arg,
void* data);
/* Fallback function. Invoked when a Unicode character could not be converted
to the target encoding. This function should process the character and
may produce replacement bytes (in the target encoding) by calling the
write_replacement callback repeatedly. */
typedef void (*iconv_unicode_uc_to_mb_fallback)
(unsigned int code,
void (*write_replacement) (const char *buf, size_t buflen,
void* callback_arg),
void* callback_arg,
void* data);
#if 1
/* Fallback function. Invoked when a number of bytes could not be converted to
a wide character. This function should process all bytes from inbuf and may
produce replacement wide characters by calling the write_replacement
callback repeatedly. */
typedef void (*iconv_wchar_mb_to_wc_fallback)
(const char* inbuf, size_t inbufsize,
void (*write_replacement) (const wchar_t *buf, size_t buflen,
void* callback_arg),
void* callback_arg,
void* data);
/* Fallback function. Invoked when a wide character could not be converted to
the target encoding. This function should process the character and may
produce replacement bytes (in the target encoding) by calling the
write_replacement callback repeatedly. */
typedef void (*iconv_wchar_wc_to_mb_fallback)
(wchar_t code,
void (*write_replacement) (const char *buf, size_t buflen,
void* callback_arg),
void* callback_arg,
void* data);
#else
/* If the wchar_t type does not exist, these two fallback functions are never
invoked. Their argument list therefore does not matter. */
typedef void (*iconv_wchar_mb_to_wc_fallback) ();
typedef void (*iconv_wchar_wc_to_mb_fallback) ();
#endif
/* Set of fallbacks. */
struct iconv_fallbacks {
iconv_unicode_mb_to_uc_fallback mb_to_uc_fallback;
iconv_unicode_uc_to_mb_fallback uc_to_mb_fallback;
iconv_wchar_mb_to_wc_fallback mb_to_wc_fallback;
iconv_wchar_wc_to_mb_fallback wc_to_mb_fallback;
void* data;
};
/* Requests for iconvctl. */
#define ICONV_TRIVIALP 0 /* int *argument */
#define ICONV_GET_TRANSLITERATE 1 /* int *argument */
#define ICONV_SET_TRANSLITERATE 2 /* const int *argument */
#define ICONV_GET_DISCARD_ILSEQ 3 /* int *argument */
#define ICONV_SET_DISCARD_ILSEQ 4 /* const int *argument */
#define ICONV_SET_HOOKS 5 /* const struct iconv_hooks *argument */
#define ICONV_SET_FALLBACKS 6 /* const struct iconv_fallbacks *argument */
/* Listing of locale independent encodings. */
#define iconvlist libiconvlist
extern LIBICONV_DLL_EXPORTED void iconvlist (int (*do_one) (unsigned int namescount,
const char * const * names,
void* data),
void* data);
/* Canonicalize an encoding name.
The result is either a canonical encoding name, or name itself. */
extern LIBICONV_DLL_EXPORTED const char * iconv_canonicalize (const char * name);
/* Support for relocatable packages. */
/* Sets the original and the current installation prefix of the package.
Relocation simply replaces a pathname starting with the original prefix
by the corresponding pathname with the current prefix instead. Both
prefixes should be directory names without trailing slash (i.e. use ""
instead of "/"). */
extern LIBICONV_DLL_EXPORTED void libiconv_set_relocation_prefix (const char *orig_prefix,
const char *curr_prefix);
#ifdef __cplusplus
}
#endif
#endif
#endif /* _LIBICONV_H */

Binary file not shown.

View file

@ -28,4 +28,9 @@ namespace gd
return gd::toWString( gd::toQString( str ).normalized( QString::NormalizationForm_C ) );
}
std::string toStdString(const QString& str)
{
return str.toStdString();
}
}

View file

@ -15,6 +15,7 @@ namespace gd
QString toQString( wstring const & );
wstring toWString( QString const & );
wstring normalize( wstring const & );
std::string toStdString(const QString& );
}
#endif