fix: dsl ann file codec detection

This commit is contained in:
xiaoyifang 2024-11-22 11:24:56 +08:00
parent 5b70a7e081
commit 0f3336977f
4 changed files with 34 additions and 23 deletions

View file

@ -47,6 +47,17 @@ QString unescapeAmps( QString const & str )
result.replace( "&&", "&" ); result.replace( "&&", "&" );
return result; return result;
} }
QTextCodec::Encoding detectEncoding( QByteArray & ba, char16_t expectedFirstCharacter = 0 )
{
QStringConverter::Encoding detectedEncoding = QStringConverter::encodingForData( data, expectedFirstCharacter );
// mapping the encoding
if ( detectedEncoding.has_value() && encodingMap.contains( detectedEncoding.value() ) ) {
return encodingMap[ encoding ];
}
// default utf8
return QTextCodec::Utf8;
}
} // namespace Utils } // namespace Utils
QString Utils::Path::combine( const QString & path1, const QString & path2 ) QString Utils::Path::combine( const QString & path1, const QString & path2 )

View file

@ -11,6 +11,9 @@
#include <QUrl> #include <QUrl>
#include <QUrlQuery> #include <QUrlQuery>
#include <QWidget> #include <QWidget>
#include <QStringConverter>
#include <QTextCodec>
#include <QMap>
#include "filetype.hh" #include "filetype.hh"
#include <string> #include <string>
using std::string; using std::string;
@ -353,4 +356,21 @@ QString escapeAmps( QString const & str );
QString unescapeAmps( QString const & str ); QString unescapeAmps( QString const & str );
//restrict scope to this very file.
namespace{
// encoding mappings between QStringConverter::Encoding and QTextCodec::Encoding
const QMap< QStringConverter::Encoding, QTextCodec::Encoding > encodingMap = {
{ QStringConverter::Latin1, QTextCodec::Latin1 },
{ QStringConverter::Utf8, QTextCodec::Utf8 },
{ QStringConverter::Utf16, QTextCodec::Utf16 },
{ QStringConverter::Utf16LE , QTextCodec::Utf16LE },
{ QStringConverter::Utf16BE , QTextCodec::Utf16BE },
{ QStringConverter::Utf32LE , QTextCodec::Utf32LE },
{ QStringConverter::Utf32BE , QTextCodec::Utf32BE },
// others
};
}
QTextCodec::Encoding detectEncoding( QByteArray & ba, char16_t expectedFirstCharacter = 0 );
} // namespace Utils } // namespace Utils

View file

@ -1042,6 +1042,8 @@ QString const & DslDictionary::getDescription()
QString data, str; QString data, str;
str = annStream.readLine(); str = annStream.readLine();
auto codec = Utils::detectEncoding( str );
annStream.setCodec( codec );
if ( str.left( 10 ).compare( "#LANGUAGE " ) != 0 ) { if ( str.left( 10 ).compare( "#LANGUAGE " ) != 0 ) {
annStream.seek( 0 ); annStream.seek( 0 );

View file

@ -872,30 +872,8 @@ DslScanner::DslScanner( string const & fileName ):
bool needExactEncoding = false; bool needExactEncoding = false;
// Note that .dsl format always starts with "#NAME" // Note that .dsl format always starts with "#NAME"
if ( auto guessedEncoding = QStringConverter::encodingForData( { firstBytes, firstBytesSize }, '#' );
guessedEncoding.has_value() ) {
switch ( guessedEncoding.value() ) {
case QStringConverter::Utf8:
encoding = Utf8::Utf8;
break;
case QStringConverter::Utf16LE:
encoding = Utf8::Utf16LE;
break;
case QStringConverter::Utf16BE:
encoding = Utf8::Utf16BE;
break;
case QStringConverter::Utf32LE:
encoding = Utf8::Utf16LE;
break;
case QStringConverter::Utf32BE:
encoding = Utf8::Utf32BE;
break;
default:
break;
}
}
codec = QTextCodec::codecForName( getEncodingNameFor( encoding ) ); codec = Utils::detectEncoding( { firstBytes, firstBytesSize }, '#' );
qDebug() << "DSL encoding ->" << codec->name(); qDebug() << "DSL encoding ->" << codec->name();