2012-02-20 21:47:14 +00:00
|
|
|
/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
|
2009-01-28 20:55:45 +00:00
|
|
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
|
|
|
|
|
|
|
#include "iconv.hh"
|
|
|
|
#include <vector>
|
|
|
|
#include <errno.h>
|
2009-01-30 01:20:37 +00:00
|
|
|
#include <string.h>
|
2021-07-06 13:01:50 +00:00
|
|
|
#include <QDebug>
|
2009-01-28 20:55:45 +00:00
|
|
|
|
2009-04-18 17:20:12 +00:00
|
|
|
#ifdef __WIN32
|
|
|
|
char const * const Iconv::GdWchar = "UCS-4LE";
|
|
|
|
#else
|
|
|
|
char const * const Iconv::GdWchar = "WCHAR_T";
|
|
|
|
#endif
|
|
|
|
|
2009-01-28 20:55:45 +00:00
|
|
|
char const * const Iconv::Utf16Le = "UTF-16LE";
|
|
|
|
char const * const Iconv::Utf8 = "UTF-8";
|
|
|
|
|
2009-04-18 17:20:12 +00:00
|
|
|
using gd::wchar;
|
|
|
|
|
2018-05-21 15:32:04 +00:00
|
|
|
Iconv::Iconv( char const * to, char const * from ) THROW_SPEC( exCantInit ):
|
2009-01-28 20:55:45 +00:00
|
|
|
state( iconv_open( to, from ) )
|
|
|
|
{
|
|
|
|
if ( state == (iconv_t) -1 )
|
|
|
|
throw exCantInit( strerror( errno ) );
|
|
|
|
}
|
|
|
|
|
2018-05-21 15:32:04 +00:00
|
|
|
void Iconv::reinit( char const * to, char const * from ) THROW_SPEC( exCantInit )
|
2009-01-28 20:55:45 +00:00
|
|
|
{
|
|
|
|
iconv_close( state );
|
|
|
|
|
|
|
|
state = iconv_open( to, from );
|
|
|
|
|
|
|
|
if ( state == (iconv_t) -1 )
|
|
|
|
throw exCantInit( strerror( errno ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
Iconv::~Iconv() throw()
|
|
|
|
{
|
|
|
|
iconv_close( state );
|
|
|
|
}
|
|
|
|
|
|
|
|
Iconv::Result Iconv::convert( void const * & inBuf, size_t & inBytesLeft,
|
|
|
|
void * & outBuf, size_t & outBytesLeft )
|
2018-05-21 15:32:04 +00:00
|
|
|
THROW_SPEC( exIncorrectSeq, exOther )
|
2009-01-28 20:55:45 +00:00
|
|
|
{
|
|
|
|
size_t result = iconv( state,
|
2011-07-19 18:36:04 +00:00
|
|
|
// #ifdef __WIN32
|
|
|
|
// (char const **)&inBuf,
|
|
|
|
// #else
|
2009-02-02 01:04:39 +00:00
|
|
|
(char **)&inBuf,
|
2011-07-19 18:36:04 +00:00
|
|
|
// #endif
|
2009-02-02 01:04:39 +00:00
|
|
|
&inBytesLeft,
|
2009-01-28 20:55:45 +00:00
|
|
|
(char **)&outBuf, &outBytesLeft );
|
|
|
|
|
|
|
|
if ( result == (size_t) -1 )
|
|
|
|
{
|
2021-07-06 13:01:50 +00:00
|
|
|
qDebug("iconv convert errno:"+errno);
|
2009-01-28 20:55:45 +00:00
|
|
|
switch( errno )
|
|
|
|
{
|
|
|
|
case EILSEQ:
|
|
|
|
throw exIncorrectSeq();
|
|
|
|
case EINVAL:
|
|
|
|
return NeedMoreIn;
|
|
|
|
case E2BIG:
|
|
|
|
return NeedMoreOut;
|
|
|
|
default:
|
|
|
|
throw exOther( strerror( errno ) );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Success;
|
|
|
|
}
|
|
|
|
|
2009-04-18 17:20:12 +00:00
|
|
|
gd::wstring Iconv::toWstring( char const * fromEncoding, void const * fromData,
|
|
|
|
size_t dataSize )
|
2018-05-21 15:32:04 +00:00
|
|
|
THROW_SPEC( exCantInit, exIncorrectSeq, exPrematureEnd, exOther )
|
2009-01-28 20:55:45 +00:00
|
|
|
{
|
|
|
|
/// Special-case the dataSize == 0 to avoid any kind of iconv-specific
|
|
|
|
/// behaviour in that regard.
|
|
|
|
|
|
|
|
if ( !dataSize )
|
2009-04-18 17:20:12 +00:00
|
|
|
return gd::wstring();
|
2009-01-28 20:55:45 +00:00
|
|
|
|
2009-04-18 17:20:12 +00:00
|
|
|
Iconv ic( GdWchar, fromEncoding );
|
2009-01-28 20:55:45 +00:00
|
|
|
|
|
|
|
/// This size is usually enough, but may be enlarged during the conversion
|
2009-04-18 17:20:12 +00:00
|
|
|
std::vector< wchar > outBuf( dataSize );
|
2009-01-28 20:55:45 +00:00
|
|
|
|
|
|
|
void * outBufPtr = &outBuf.front();
|
|
|
|
|
2009-04-18 17:20:12 +00:00
|
|
|
size_t outBufLeft = outBuf.size() * sizeof( wchar );
|
2009-01-28 20:55:45 +00:00
|
|
|
|
|
|
|
for( ; ; )
|
|
|
|
{
|
|
|
|
switch( ic.convert( fromData, dataSize, outBufPtr, outBufLeft ) )
|
|
|
|
{
|
|
|
|
case Success:
|
2009-04-18 17:20:12 +00:00
|
|
|
return gd::wstring( &outBuf.front(),
|
|
|
|
outBuf.size() - outBufLeft / sizeof( wchar ) );
|
2009-01-28 20:55:45 +00:00
|
|
|
case NeedMoreIn:
|
|
|
|
throw exPrematureEnd();
|
|
|
|
case NeedMoreOut:
|
|
|
|
{
|
|
|
|
// Grow the buffer and retry
|
|
|
|
// The pointer may get invalidated so we save the diff and restore it
|
2009-04-18 17:20:12 +00:00
|
|
|
size_t offset = (wchar *)outBufPtr - &outBuf.front();
|
2009-01-28 20:55:45 +00:00
|
|
|
outBuf.resize( outBuf.size() + 256 );
|
|
|
|
outBufPtr = &outBuf.front() + offset;
|
|
|
|
outBufLeft += 256;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string Iconv::toUtf8( char const * fromEncoding, void const * fromData,
|
|
|
|
size_t dataSize )
|
2018-05-21 15:32:04 +00:00
|
|
|
THROW_SPEC( exCantInit, exIncorrectSeq, exPrematureEnd, exOther )
|
2009-01-28 20:55:45 +00:00
|
|
|
{
|
|
|
|
// Similar to toWstring
|
|
|
|
|
|
|
|
if ( !dataSize )
|
|
|
|
return std::string();
|
|
|
|
|
|
|
|
Iconv ic( Utf8, fromEncoding );
|
|
|
|
|
|
|
|
std::vector< char > outBuf( dataSize );
|
|
|
|
|
|
|
|
void * outBufPtr = &outBuf.front();
|
|
|
|
|
|
|
|
size_t outBufLeft = outBuf.size();
|
|
|
|
|
|
|
|
for( ; ; )
|
|
|
|
{
|
|
|
|
switch( ic.convert( fromData, dataSize, outBufPtr, outBufLeft ) )
|
|
|
|
{
|
|
|
|
case Success:
|
|
|
|
return std::string( &outBuf.front(),
|
|
|
|
outBuf.size() - outBufLeft );
|
|
|
|
case NeedMoreIn:
|
|
|
|
throw exPrematureEnd();
|
|
|
|
case NeedMoreOut:
|
|
|
|
{
|
|
|
|
// Grow the buffer and retry
|
|
|
|
// The pointer may get invalidated so we save the diff and restore it
|
|
|
|
size_t offset = (char *)outBufPtr - &outBuf.front();
|
|
|
|
outBuf.resize( outBuf.size() + 256 );
|
|
|
|
outBufPtr = &outBuf.front() + offset;
|
|
|
|
outBufLeft += 256;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|