fix: code smells

🎨 apply clang-format changes

fix: code smells

🎨 apply clang-format changes

🎨 apply clang-format changes
This commit is contained in:
YiFang Xiao 2023-06-03 09:42:03 +08:00 committed by xiaoyifang
parent 1391a8fa7e
commit 29403f4263
2 changed files with 184 additions and 185 deletions

View file

@ -52,10 +52,13 @@ DEF_EX( exUserAbort, "User abort", Dictionary::Ex )
DEF_EX( exIconv, "Iconv library error", Dictionary::Ex ) DEF_EX( exIconv, "Iconv library error", Dictionary::Ex )
DEF_EX( exAllocation, "Error memory allocation", Dictionary::Ex ) DEF_EX( exAllocation, "Error memory allocation", Dictionary::Ex )
Babylon::Babylon( std::string filename ) : Babylon::Babylon( const std::string & filename ):
m_filename( filename ) m_filename( filename ),
m_sourceLang( 0 ),
m_targetLang( 0 ),
m_numEntries( 0 )
{ {
file = NULL; file = nullptr;
} }
@ -67,15 +70,13 @@ Babylon::~Babylon()
bool Babylon::open() bool Babylon::open()
{ {
FILE *f; unsigned char buf[ 6 ];
unsigned char buf[6];
int i;
f = gd_fopen( m_filename.c_str(), "rb" ); FILE * f = gd_fopen( m_filename.c_str(), "rb" );
if( f == NULL ) if ( f == nullptr )
return false; return false;
i = fread( buf, 1, 6, f ); int i = fread( buf, 1, 6, f );
/* First four bytes: BGL signature 0x12340001 or 0x12340002 (big-endian) */ /* First four bytes: BGL signature 0x12340001 or 0x12340002 (big-endian) */
if( i < 6 || memcmp( buf, "\x12\x34\x00", 3 ) || buf[3] == 0 || buf[3] > 2 ) if( i < 6 || memcmp( buf, "\x12\x34\x00", 3 ) || buf[3] == 0 || buf[3] > 2 )
@ -112,7 +113,7 @@ bool Babylon::open()
fclose( f ); fclose( f );
if( file == NULL ) if ( file == nullptr )
return false; return false;
return true; return true;
@ -124,14 +125,14 @@ void Babylon::close()
if ( file ) if ( file )
{ {
gzclose( file ); gzclose( file );
file = 0; file = nullptr;
} }
} }
bool Babylon::readBlock( bgl_block &block ) bool Babylon::readBlock( bgl_block &block )
{ {
if ( file == NULL || gzeof( file ) ) if ( file == nullptr || gzeof( file ) )
return false; return false;
block.length = bgl_readnum( 1 ); block.length = bgl_readnum( 1 );
@ -164,12 +165,10 @@ unsigned int Babylon::bgl_readnum( int bytes )
unsigned char buf[4]; unsigned char buf[4];
unsigned val = 0; unsigned val = 0;
if ( bytes < 1 || bytes > 4 ) return (0); if ( bytes < 1 || bytes > 4 )
return 0;
int res = gzread( file, buf, bytes ); if ( const int res = gzread( file, buf, bytes ); res != bytes ) {
if( res != bytes )
{
gzclearerr( file ); gzclearerr( file );
return 4; // Read error - return end of file marker return 4; // Read error - return end of file marker
} }
@ -179,9 +178,10 @@ unsigned int Babylon::bgl_readnum( int bytes )
} }
bool Babylon::read(std::string &source_charset, std::string &target_charset) bool Babylon::read( const std::string & source_charset, const std::string & target_charset )
{ {
if( file == NULL ) return false; if ( file == nullptr )
return false;
bgl_block block; bgl_block block;
unsigned int pos; unsigned int pos;
@ -251,8 +251,7 @@ bool Babylon::read(std::string &source_charset, std::string &target_charset)
//m_sourceLang = headword; //m_sourceLang = headword;
break; break;
case 8: case 8:
m_targetLang = bgl_language[(unsigned char)(block.data[5])]; m_targetLang = bgl_language[ (unsigned char)( block.data[ 5 ] ) ];
//m_targetLang = headword;
break; break;
case 9: case 9:
headword.reserve( block.length - 2 ); headword.reserve( block.length - 2 );
@ -302,9 +301,7 @@ bool Babylon::read(std::string &source_charset, std::string &target_charset)
} }
gzseek( file, 0, SEEK_SET ); gzseek( file, 0, SEEK_SET );
if ( isUtf8File ) if ( isUtf8File ) {
{
//FDPRINTF( stderr, "%s: utf8 file.\n", m_title.c_str() );
m_defaultCharset = "UTF-8"; m_defaultCharset = "UTF-8";
m_sourceCharset = "UTF-8"; m_sourceCharset = "UTF-8";
m_targetCharset = "UTF-8"; m_targetCharset = "UTF-8";
@ -324,8 +321,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
{ {
bgl_entry entry; bgl_entry entry;
if( file == NULL ) if ( file == nullptr ) {
{
entry.headword = ""; entry.headword = "";
return entry; return entry;
} }
@ -355,7 +351,6 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
if ( pos + len > block.length ) if ( pos + len > block.length )
break; break;
std::string filename( block.data + pos, len ); std::string filename( block.data + pos, len );
//if (filename != "8EAF66FD.bmp" && filename != "C2EEF3F6.html") {
pos += len; pos += len;
if ( resourceHandler ) if ( resourceHandler )
resourceHandler->handleBabylonResource( filename, block.data + pos, block.length - pos ); resourceHandler->handleBabylonResource( filename, block.data + pos, block.length - pos );
@ -569,9 +564,8 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
{ {
transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 4, length ); transcription = Iconv::toUtf8( "Windows-1252", block.data + pos + 4, length );
} }
catch( Iconv::Ex & e ) catch ( Iconv::Ex & e ) {
{ qWarning( "Bgl: charset conversion error, no transcription processing's done: %s\n", e.what() );
qWarning( "Bgl: charset conversion error, no trancription processing's done: %s\n", e.what() );
transcription = std::string( block.data + pos + 4, length ); transcription = std::string( block.data + pos + 4, length );
} }
} }
@ -622,29 +616,36 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler )
//pos += len - a; //pos += len - a;
//break; //break;
} }
else else {
if (block.data[pos] == 0x14) { if ( block.data[ pos ] == 0x14 ) {
defBodyEnded = true; // Presumably defBodyEnded = true; // Presumably
pos++; pos++;
} else if ((unsigned char)block.data[pos] == 0x1A){ }
else if ( (unsigned char)block.data[ pos ] == 0x1A ) {
unsigned length = (unsigned char)block.data[ pos + 1 ]; unsigned length = (unsigned char)block.data[ pos + 1 ];
if (length <= 10){// 0x1A identifies two different data types. if ( length <= 10 ) { // 0x1A identifies two different data types.
// data about the Hebrew root should be shorter then // data about the Hebrew root should be shorter then
// 10 bytes, and in the other data type the byte // 10 bytes, and in the other data type the byte
// after 0x1A is > 10 (at least it is in Bybylon's // after 0x1A is > 10 (at least it is in Bybylon's
// Hebrew dictionaries). // Hebrew dictionaries).
root = std::string( block.data + pos + 2, length ); root = std::string( block.data + pos + 2, length );
std::reverse(root.begin(),root.end()); std::reverse( root.begin(), root.end() );
definition += " (" + root + ")"; definition += " (" + root + ")";
pos += length + 2; pos += length + 2;
a += length + 1; a += length + 1;
} }
else else {
pos++; pos++;
} else { }
definition += block.data[pos++]; }
else {
definition += block.data[ pos++ ];
}
} }
}else definition += block.data[pos++]; }
else {
definition += block.data[ pos++ ];
}
} }
convertToUtf8( definition, BGL_TARGET_CHARSET ); convertToUtf8( definition, BGL_TARGET_CHARSET );
if( !transcription.empty() ) if( !transcription.empty() )

View file

@ -28,119 +28,91 @@
#include <vector> #include <vector>
#include <qglobal.h> #include <qglobal.h>
#include <stdint.h> #include <stdint.h>
#include <array>
//const std::string bgl_language[] = {
#ifndef blgCode2Int #ifndef blgCode2Int
#define blgCode2Int( index, code0, code1 ) (((uint32_t)index) << 16 ) + (((uint32_t)code1) << 8 ) + (uint32_t)code0 #define blgCode2Int( index, code0, code1 ) (((uint32_t)index) << 16 ) + (((uint32_t)code1) << 8 ) + (uint32_t)code0
#endif #endif
const quint32 bgl_language[] = { const std::array< quint32, 60 > bgl_language = {
blgCode2Int( 0, 'e', 'n' ),// "English", blgCode2Int( 0, 'e', 'n' ), // "English",
blgCode2Int( 0, 'f', 'r' ),//"French", blgCode2Int( 0, 'f', 'r' ), //"French",
blgCode2Int( 0, 'i', 't' ),//"Italian", blgCode2Int( 0, 'i', 't' ), //"Italian",
blgCode2Int( 0, 'e', 's' ),//"Spanish", blgCode2Int( 0, 'e', 's' ), //"Spanish",
blgCode2Int( 0, 'n', 'l' ),//"Dutch", blgCode2Int( 0, 'n', 'l' ), //"Dutch",
blgCode2Int( 0, 'p', 't' ),//"Portuguese", blgCode2Int( 0, 'p', 't' ), //"Portuguese",
blgCode2Int( 0, 'd', 'e' ),//"German", blgCode2Int( 0, 'd', 'e' ), //"German",
blgCode2Int( 0, 'r', 'u' ),//"Russian", blgCode2Int( 0, 'r', 'u' ), //"Russian",
blgCode2Int( 0, 'j', 'a' ),//"Japanese", blgCode2Int( 0, 'j', 'a' ), //"Japanese",
blgCode2Int( 1, 'z', 'h' ),//"\x01",//"Traditional Chinese", blgCode2Int( 1, 'z', 'h' ), //"\x01",//"Traditional Chinese",
blgCode2Int( 2, 'z', 'h' ),//"\x02",//"Simplified Chinese", blgCode2Int( 2, 'z', 'h' ), //"\x02",//"Simplified Chinese",
blgCode2Int( 0, 'e', 'l' ),//"Greek", blgCode2Int( 0, 'e', 'l' ), //"Greek",
blgCode2Int( 0, 'k', 'o' ),//"Korean", blgCode2Int( 0, 'k', 'o' ), //"Korean",
blgCode2Int( 0, 't', 'r' ),//"Turkish", blgCode2Int( 0, 't', 'r' ), //"Turkish",
blgCode2Int( 0, 'h', 'e' ),//"Hebrew", blgCode2Int( 0, 'h', 'e' ), //"Hebrew",
blgCode2Int( 0, 'a', 'r' ),//"Arabic", blgCode2Int( 0, 'a', 'r' ), //"Arabic",
blgCode2Int( 0, 't', 'h' ),//"Thai", blgCode2Int( 0, 't', 'h' ), //"Thai",
blgCode2Int( 3, 0, 0 ),//"\x03",//"Other", blgCode2Int( 3, 0, 0 ), //"\x03",//"Other",
blgCode2Int( 4, 'z', 'h' ),//"\x04",//"Other Simplified Chinese dialects", blgCode2Int( 4, 'z', 'h' ), //"\x04",//"Other Simplified Chinese dialects",
blgCode2Int( 5, 'z', 'h' ),//"\x05",//Other Traditional Chinese dialects", blgCode2Int( 5, 'z', 'h' ), //"\x05",//Other Traditional Chinese dialects",
blgCode2Int( 6, 0, 0 ),//"\x06",//Other Eastern-European languages", blgCode2Int( 6, 0, 0 ), //"\x06",//Other Eastern-European languages",
blgCode2Int( 7, 0, 0 ),//"\x07",//Other Western-European languages", blgCode2Int( 7, 0, 0 ), //"\x07",//Other Western-European languages",
blgCode2Int( 8, 'r', 'u' ),//"\x08",//Other Russian languages", blgCode2Int( 8, 'r', 'u' ), //"\x08",//Other Russian languages",
blgCode2Int( 9, 'j', 'a' ),//"\x09",//Other Japanese languages", blgCode2Int( 9, 'j', 'a' ), //"\x09",//Other Japanese languages",
blgCode2Int( 10, 0, 0 ),//"\x0A",//"Other Baltic languages", blgCode2Int( 10, 0, 0 ), //"\x0A",//"Other Baltic languages",
blgCode2Int( 11, 'e', 'l' ),//"\x0B",//Other Greek languages", blgCode2Int( 11, 'e', 'l' ), //"\x0B",//Other Greek languages",
blgCode2Int( 12, 'k', 'o' ),//"\x0C",//"Other Korean dialects", blgCode2Int( 12, 'k', 'o' ), //"\x0C",//"Other Korean dialects",
blgCode2Int( 13, 't', 'r' ),//"\x0D",//Other Turkish dialects", blgCode2Int( 13, 't', 'r' ), //"\x0D",//Other Turkish dialects",
blgCode2Int( 14, 't', 'h' ),//"\x0E",//"Other Thai dialects", blgCode2Int( 14, 't', 'h' ), //"\x0E",//"Other Thai dialects",
blgCode2Int( 0, 'p', 'l' ),//"Polish", blgCode2Int( 0, 'p', 'l' ), //"Polish",
blgCode2Int( 0, 'h', 'u' ),//"Hungarian", blgCode2Int( 0, 'h', 'u' ), //"Hungarian",
blgCode2Int( 0, 'c', 's' ),//"Czech", blgCode2Int( 0, 'c', 's' ), //"Czech",
blgCode2Int( 0, 'l', 't' ),//"Lithuanian", blgCode2Int( 0, 'l', 't' ), //"Lithuanian",
blgCode2Int( 0, 'l', 'v' ),//"Latvian", blgCode2Int( 0, 'l', 'v' ), //"Latvian",
blgCode2Int( 0, 'c', 'a' ),//"Catalan", blgCode2Int( 0, 'c', 'a' ), //"Catalan",
blgCode2Int( 0, 'h', 'r' ),//"Croatian", blgCode2Int( 0, 'h', 'r' ), //"Croatian",
blgCode2Int( 0, 's', 'r' ),//"Serbian", blgCode2Int( 0, 's', 'r' ), //"Serbian",
blgCode2Int( 0, 's', 'k' ),//"Slovak", blgCode2Int( 0, 's', 'k' ), //"Slovak",
blgCode2Int( 0, 's', 'q' ),//"Albanian", blgCode2Int( 0, 's', 'q' ), //"Albanian",
blgCode2Int( 0, 'u', 'r' ),//"Urdu", blgCode2Int( 0, 'u', 'r' ), //"Urdu",
blgCode2Int( 0, 's', 'l' ),//"Slovenian", blgCode2Int( 0, 's', 'l' ), //"Slovenian",
blgCode2Int( 0, 'e', 't' ),//"Estonian", blgCode2Int( 0, 'e', 't' ), //"Estonian",
blgCode2Int( 0, 'b', 'g' ),//"Bulgarian", blgCode2Int( 0, 'b', 'g' ), //"Bulgarian",
blgCode2Int( 0, 'd', 'a' ),//"Danish", blgCode2Int( 0, 'd', 'a' ), //"Danish",
blgCode2Int( 0, 'f', 'i' ),//"Finnish", blgCode2Int( 0, 'f', 'i' ), //"Finnish",
blgCode2Int( 0, 'i', 's' ),//"Icelandic", blgCode2Int( 0, 'i', 's' ), //"Icelandic",
blgCode2Int( 0, 'n', 'o' ),//"Norwegian", blgCode2Int( 0, 'n', 'o' ), //"Norwegian",
blgCode2Int( 0, 'r', 'o' ),//"Romanian", blgCode2Int( 0, 'r', 'o' ), //"Romanian",
blgCode2Int( 0, 's', 'v' ),//"Swedish", blgCode2Int( 0, 's', 'v' ), //"Swedish",
blgCode2Int( 0, 'u', 'k' ),//"Ukrainian", blgCode2Int( 0, 'u', 'k' ), //"Ukrainian",
blgCode2Int( 0, 'b', 'e' ),//"Belarusian", blgCode2Int( 0, 'b', 'e' ), //"Belarusian",
blgCode2Int( 0, 'f', 'a' ),//"Farsi"=Persian, blgCode2Int( 0, 'f', 'a' ), //"Farsi"=Persian,
blgCode2Int( 0, 'e', 'u' ),//"Basque", blgCode2Int( 0, 'e', 'u' ), //"Basque",
blgCode2Int( 0, 'm', 'k' ),//"Macedonian", blgCode2Int( 0, 'm', 'k' ), //"Macedonian",
blgCode2Int( 0, 'a', 'f' ),//"Afrikaans", blgCode2Int( 0, 'a', 'f' ), //"Afrikaans",
blgCode2Int( 0, 'f', 'o' ),//"Faeroese"=Faroese, blgCode2Int( 0, 'f', 'o' ), //"Faeroese"=Faroese,
blgCode2Int( 0, 'l', 'a' ),//"Latin", blgCode2Int( 0, 'l', 'a' ), //"Latin",
blgCode2Int( 0, 'e', 'o' ),//"Esperanto", blgCode2Int( 0, 'e', 'o' ), //"Esperanto",
blgCode2Int( 15, 0, 0 ),//"Tamazight", blgCode2Int( 15, 0, 0 ), //"Tamazight",
blgCode2Int( 0, 'h', 'y' )//"Armenian" blgCode2Int( 0, 'h', 'y' ) //"Armenian"
}; };
const std::vector< std::string > bgl_charset = { "WINDOWS-1252", /*Default*/
"WINDOWS-1252", /*Latin*/
"WINDOWS-1250", /*Eastern European*/
"WINDOWS-1251", /*Cyriilic*/
"CP932", /*Japanese*/
"BIG5", /*Traditional Chinese*/
"GB18030", /*Simplified Chinese*/
"CP1257", /*Baltic*/
"CP1253", /*Greek*/
"EUC-KR", /*Korean*/
"ISO-8859-9", /*Turkish*/
"WINDOWS-1255", /*Hebrew*/
"CP1256", /*Arabic*/
"CP874" /*Thai*/ };
const std::string bgl_charsetname[] = { const std::array< std::string, 11 > partOfSpeech = {
"Default" , "n.", "adj.", "v.", "adv.", "interj.", "pron.", "prep.", "conj.", "suff.", "pref.", "art." };
"Latin",
"Eastern European",
"Cyrillic",
"Japanese",
"Traditional Chinese",
"Simplified Chinese",
"Baltic",
"Greek",
"Korean",
"Turkish",
"Hebrew",
"Arabic",
"Thai" };
const std::string bgl_charset[] = {
"WINDOWS-1252", /*Default*/
"WINDOWS-1252", /*Latin*/
"WINDOWS-1250", /*Eastern European*/
"WINDOWS-1251", /*Cyriilic*/
"CP932", /*Japanese*/
"BIG5", /*Traditional Chinese*/
"GB18030", /*Simplified Chinese*/
"CP1257", /*Baltic*/
"CP1253", /*Greek*/
"EUC-KR", /*Korean*/
"ISO-8859-9", /*Turkish*/
"WINDOWS-1255", /*Hebrew*/
"CP1256", /*Arabic*/
"CP874" /*Thai*/ };
const std::string partOfSpeech[] = {
"n.",
"adj.",
"v.",
"adv.",
"interj.",
"pron.",
"prep.",
"conj.",
"suff.",
"pref.",
"art." };
typedef struct { typedef struct {
unsigned type; unsigned type;
@ -158,20 +130,18 @@ typedef struct {
class Babylon class Babylon
{ {
public: public:
Babylon( std::string ); Babylon( const std::string & );
~Babylon(); ~Babylon();
// Subclass this to store resources // Subclass this to store resources
class ResourceHandler class ResourceHandler
{ {
public: public:
virtual void handleBabylonResource( std::string const & filename, virtual void handleBabylonResource( std::string const & filename, char const * data, size_t size ) = 0;
char const * data, size_t size )=0;
virtual ~ResourceHandler() virtual ~ResourceHandler() {}
{} };
};
/// Sets a prefix string to append to each resource reference in hyperlinks. /// Sets a prefix string to append to each resource reference in hyperlinks.
void setResourcePrefix( std::string const & prefix ) void setResourcePrefix( std::string const & prefix )
@ -179,31 +149,55 @@ public:
bool open(); bool open();
void close(); void close();
bool readBlock( bgl_block& ); bool readBlock( bgl_block & );
bool read(std::string &source_charset, std::string &target_charset); bool read( const std::string & source_charset, const std::string & target_charset );
bgl_entry readEntry( ResourceHandler * = 0 ); bgl_entry readEntry( ResourceHandler * = 0 );
inline std::string title() const { return m_title; } inline std::string title() const
inline std::string author() const { return m_author; } {
inline std::string email() const { return m_email; } return m_title;
inline std::string description() const { return m_description; } }
inline std::string copyright() const { return m_copyright; } inline std::string author() const
inline quint32 sourceLang() const { return m_sourceLang; }//std::string sourceLang() const { return m_sourceLang; } {
inline quint32 targetLang() const { return m_targetLang; }//inline std::string targetLang() const { return m_targetLang; } return m_author;
inline unsigned int numEntries() const { return m_numEntries; } }
inline std::string charset() const { return m_defaultCharset; } inline std::string email() const
{
return m_email;
}
inline std::string description() const
{
return m_description;
}
inline std::string copyright() const
{
return m_copyright;
}
inline quint32 sourceLang() const
{
return m_sourceLang;
}
inline quint32 targetLang() const
{
return m_targetLang;
}
inline std::string filename() const { return m_filename; } inline std::string filename() const
{
return m_filename;
}
std::vector< char > const & getIcon() const std::vector< char > const & getIcon() const
{ return icon; } {
return icon;
}
enum enum
{ {
ParserVersion = 17 ParserVersion = 17
}; };
private: private:
unsigned int bgl_readnum( int ); unsigned int bgl_readnum( int );
void convertToUtf8( std::string &, unsigned int = 0 ); void convertToUtf8( std::string &, unsigned int = 0 );
@ -215,8 +209,8 @@ private:
std::string m_email; std::string m_email;
std::string m_description; std::string m_description;
std::string m_copyright; std::string m_copyright;
quint32 m_sourceLang; //std::string m_sourceLang; quint32 m_sourceLang;
quint32 m_targetLang;//std::string m_targetLang; quint32 m_targetLang;
unsigned int m_numEntries; unsigned int m_numEntries;
std::string m_defaultCharset; std::string m_defaultCharset;
std::string m_sourceCharset; std::string m_sourceCharset;
@ -225,7 +219,11 @@ private:
std::string m_resourcePrefix; std::string m_resourcePrefix;
enum CHARSET { BGL_DEFAULT_CHARSET, BGL_SOURCE_CHARSET, BGL_TARGET_CHARSET }; enum CHARSET {
BGL_DEFAULT_CHARSET,
BGL_SOURCE_CHARSET,
BGL_TARGET_CHARSET
};
}; };
#endif // BABYLON_H #endif // BABYLON_H