mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
clean: port DSL encoding guessing away from QTextCodec (#1799)
This commit is contained in:
parent
96ada0737c
commit
1c2f93e393
|
@ -27,6 +27,7 @@ Checks: >
|
||||||
-google-readability-casting,
|
-google-readability-casting,
|
||||||
-hicpp-deprecated-headers,
|
-hicpp-deprecated-headers,
|
||||||
-misc-const-correctness,
|
-misc-const-correctness,
|
||||||
|
-misc-include-cleaner,
|
||||||
-misc-non-private-member-variables-in-classes,
|
-misc-non-private-member-variables-in-classes,
|
||||||
-modernize-avoid-c-arrays,
|
-modernize-avoid-c-arrays,
|
||||||
-modernize-deprecated-headers,
|
-modernize-deprecated-headers,
|
||||||
|
|
|
@ -807,7 +807,7 @@ bool ArticleDom::atSignFirstInLine()
|
||||||
/////////////// DslScanner
|
/////////////// DslScanner
|
||||||
|
|
||||||
DslScanner::DslScanner( string const & fileName ):
|
DslScanner::DslScanner( string const & fileName ):
|
||||||
encoding( Utf8::Windows1252 ),
|
encoding( Utf8::Utf8 ),
|
||||||
readBufferPtr( readBuffer ),
|
readBufferPtr( readBuffer ),
|
||||||
readBufferLeft( 0 ),
|
readBufferLeft( 0 ),
|
||||||
linesRead( 0 )
|
linesRead( 0 )
|
||||||
|
@ -819,11 +819,12 @@ DslScanner::DslScanner( string const & fileName ):
|
||||||
if ( !f )
|
if ( !f )
|
||||||
throw exCantOpen( fileName );
|
throw exCantOpen( fileName );
|
||||||
|
|
||||||
// Now try guessing the encoding by reading the first two bytes
|
// Now try guessing the encoding
|
||||||
|
|
||||||
unsigned char firstBytes[ 50 ];
|
constexpr size_t firstBytesSize = 50;
|
||||||
|
unsigned char firstBytes[ firstBytesSize ];
|
||||||
|
|
||||||
if ( gzread( f, firstBytes, sizeof( firstBytes ) ) != sizeof( firstBytes ) ) {
|
if ( gzread( f, firstBytes, firstBytesSize ) != firstBytesSize ) {
|
||||||
// Apparently the file's too short
|
// Apparently the file's too short
|
||||||
gzclose( f );
|
gzclose( f );
|
||||||
throw exMalformedDslFile( fileName );
|
throw exMalformedDslFile( fileName );
|
||||||
|
@ -831,37 +832,33 @@ DslScanner::DslScanner( string const & fileName ):
|
||||||
|
|
||||||
bool needExactEncoding = false;
|
bool needExactEncoding = false;
|
||||||
|
|
||||||
QByteArray ba = QByteArray::fromRawData( (const char *)firstBytes, 50 );
|
// Note that .dsl format always starts with "#NAME"
|
||||||
codec = QTextCodec::codecForUtfText( ba, nullptr );
|
if ( auto guessedEncoding = QStringConverter::encodingForData( { firstBytes, firstBytesSize }, '#' );
|
||||||
if ( !codec ) {
|
guessedEncoding.has_value() ) {
|
||||||
// the encoding has no bom.
|
switch ( guessedEncoding.value() ) {
|
||||||
// check the first char # (0x23).
|
case QStringConverter::Utf8:
|
||||||
auto hashTag = 0x0023;
|
encoding = Utf8::Utf8;
|
||||||
|
break;
|
||||||
auto uci = qFromUnaligned< uint32_t >( firstBytes );
|
case QStringConverter::Utf16LE:
|
||||||
if ( uci == qToBigEndian( hashTag ) ) {
|
encoding = Utf8::Utf16LE;
|
||||||
codec = QTextCodec::codecForMib( 1018 ); // utf-32 be
|
break;
|
||||||
}
|
case QStringConverter::Utf16BE:
|
||||||
else if ( uci == qToLittleEndian( hashTag ) ) {
|
encoding = Utf8::Utf16BE;
|
||||||
codec = QTextCodec::codecForMib( 1019 ); // utf-32 le
|
break;
|
||||||
}
|
case QStringConverter::Utf32LE:
|
||||||
else {
|
encoding = Utf8::Utf16LE;
|
||||||
auto uc = qFromUnaligned< uint16_t >( firstBytes );
|
break;
|
||||||
if ( uc == qToBigEndian( uint16_t( hashTag ) ) ) {
|
case QStringConverter::Utf32BE:
|
||||||
codec = QTextCodec::codecForMib( 1013 ); // utf16 be
|
encoding = Utf8::Utf32BE;
|
||||||
}
|
break;
|
||||||
else if ( uc == qToLittleEndian( uint16_t( hashTag ) ) ) {
|
default:
|
||||||
codec = QTextCodec::codecForMib( 1014 ); // utf16 le
|
break;
|
||||||
}
|
|
||||||
else {
|
|
||||||
//default encoding
|
|
||||||
codec = QTextCodec::codecForName( "UTF-8" );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
encoding = Utf8::getEncodingForName( codec->name() );
|
codec = QTextCodec::codecForName( getEncodingNameFor( encoding ) );
|
||||||
qDebug() << codec->name();
|
|
||||||
|
qDebug() << "DSL encoding ->" << codec->name();
|
||||||
|
|
||||||
if ( gzrewind( f ) ) {
|
if ( gzrewind( f ) ) {
|
||||||
gzclose( f );
|
gzclose( f );
|
||||||
|
|
Loading…
Reference in a new issue