/*************************************************************************** * Copyright (C) 2007 by Raul Fernandes and Karl Grill * * rgbr@yahoo.com.br * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * ***************************************************************************/ #ifndef BABYLON_H #define BABYLON_H #include #include #include #include #include #include #include #ifndef blgCode2Int #define blgCode2Int( index, code0, code1 ) (((uint32_t)index) << 16 ) + (((uint32_t)code1) << 8 ) + (uint32_t)code0 #endif const std::array< quint32, 60 > bgl_language = { blgCode2Int( 0, 'e', 'n' ), // "English", blgCode2Int( 0, 'f', 'r' ), //"French", blgCode2Int( 0, 'i', 't' ), //"Italian", blgCode2Int( 0, 'e', 's' ), //"Spanish", blgCode2Int( 0, 'n', 'l' ), //"Dutch", blgCode2Int( 0, 'p', 't' ), //"Portuguese", blgCode2Int( 0, 'd', 'e' ), //"German", blgCode2Int( 0, 'r', 'u' ), //"Russian", blgCode2Int( 0, 'j', 'a' ), //"Japanese", blgCode2Int( 1, 'z', 'h' ), //"\x01",//"Traditional Chinese", blgCode2Int( 2, 'z', 'h' ), //"\x02",//"Simplified Chinese", blgCode2Int( 0, 'e', 'l' ), //"Greek", blgCode2Int( 0, 'k', 'o' ), //"Korean", blgCode2Int( 0, 't', 'r' ), //"Turkish", blgCode2Int( 0, 'h', 'e' ), //"Hebrew", blgCode2Int( 0, 'a', 'r' ), //"Arabic", blgCode2Int( 0, 't', 'h' ), //"Thai", blgCode2Int( 3, 0, 0 ), //"\x03",//"Other", blgCode2Int( 4, 'z', 'h' ), //"\x04",//"Other Simplified Chinese dialects", blgCode2Int( 5, 'z', 'h' ), //"\x05",//Other Traditional Chinese dialects", blgCode2Int( 6, 0, 0 ), //"\x06",//Other Eastern-European languages", blgCode2Int( 7, 0, 0 ), //"\x07",//Other Western-European languages", blgCode2Int( 8, 'r', 'u' ), //"\x08",//Other Russian languages", blgCode2Int( 9, 'j', 'a' ), //"\x09",//Other Japanese languages", blgCode2Int( 10, 0, 0 ), //"\x0A",//"Other Baltic languages", blgCode2Int( 11, 'e', 'l' ), //"\x0B",//Other Greek languages", blgCode2Int( 12, 'k', 'o' ), //"\x0C",//"Other Korean dialects", blgCode2Int( 13, 't', 'r' ), //"\x0D",//Other Turkish dialects", blgCode2Int( 14, 't', 'h' ), //"\x0E",//"Other Thai dialects", blgCode2Int( 0, 'p', 'l' ), //"Polish", blgCode2Int( 0, 'h', 'u' ), //"Hungarian", blgCode2Int( 0, 'c', 's' ), //"Czech", blgCode2Int( 0, 'l', 't' ), //"Lithuanian", blgCode2Int( 0, 'l', 'v' ), //"Latvian", blgCode2Int( 0, 'c', 'a' ), //"Catalan", blgCode2Int( 0, 'h', 'r' ), //"Croatian", blgCode2Int( 0, 's', 'r' ), //"Serbian", blgCode2Int( 0, 's', 'k' ), //"Slovak", blgCode2Int( 0, 's', 'q' ), //"Albanian", blgCode2Int( 0, 'u', 'r' ), //"Urdu", blgCode2Int( 0, 's', 'l' ), //"Slovenian", blgCode2Int( 0, 'e', 't' ), //"Estonian", blgCode2Int( 0, 'b', 'g' ), //"Bulgarian", blgCode2Int( 0, 'd', 'a' ), //"Danish", blgCode2Int( 0, 'f', 'i' ), //"Finnish", blgCode2Int( 0, 'i', 's' ), //"Icelandic", blgCode2Int( 0, 'n', 'o' ), //"Norwegian", blgCode2Int( 0, 'r', 'o' ), //"Romanian", blgCode2Int( 0, 's', 'v' ), //"Swedish", blgCode2Int( 0, 'u', 'k' ), //"Ukrainian", blgCode2Int( 0, 'b', 'e' ), //"Belarusian", blgCode2Int( 0, 'f', 'a' ), //"Farsi"=Persian, blgCode2Int( 0, 'e', 'u' ), //"Basque", blgCode2Int( 0, 'm', 'k' ), //"Macedonian", blgCode2Int( 0, 'a', 'f' ), //"Afrikaans", blgCode2Int( 0, 'f', 'o' ), //"Faeroese"=Faroese, blgCode2Int( 0, 'l', 'a' ), //"Latin", blgCode2Int( 0, 'e', 'o' ), //"Esperanto", blgCode2Int( 15, 0, 0 ), //"Tamazight", blgCode2Int( 0, 'h', 'y' ) //"Armenian" }; const std::vector< std::string > bgl_charset = { "WINDOWS-1252", /*Default*/ "WINDOWS-1252", /*Latin*/ "WINDOWS-1250", /*Eastern European*/ "WINDOWS-1251", /*Cyriilic*/ "CP932", /*Japanese*/ "BIG5", /*Traditional Chinese*/ "GB18030", /*Simplified Chinese*/ "CP1257", /*Baltic*/ "CP1253", /*Greek*/ "EUC-KR", /*Korean*/ "ISO-8859-9", /*Turkish*/ "WINDOWS-1255", /*Hebrew*/ "CP1256", /*Arabic*/ "CP874" /*Thai*/ }; const std::array< std::string, 11 > partOfSpeech = { "n.", "adj.", "v.", "adv.", "interj.", "pron.", "prep.", "conj.", "suff.", "pref.", "art." }; typedef struct { unsigned type; unsigned length; char * data; } bgl_block; typedef struct { std::string headword; std::string definition; std::string displayedHeadword; std::vector alternates; } bgl_entry; class Babylon { public: Babylon( const std::string & ); ~Babylon(); // Subclass this to store resources class ResourceHandler { public: virtual void handleBabylonResource( std::string const & filename, char const * data, size_t size ) = 0; virtual ~ResourceHandler() {} }; /// Sets a prefix string to append to each resource reference in hyperlinks. void setResourcePrefix( std::string const & prefix ) { m_resourcePrefix = prefix; } bool open(); void close(); bool readBlock( bgl_block & ); bool read( const std::string & source_charset, const std::string & target_charset ); bgl_entry readEntry( ResourceHandler * = 0 ); inline std::string title() const { return m_title; } inline std::string author() const { return m_author; } inline std::string email() const { return m_email; } inline std::string description() const { return m_description; } inline std::string copyright() const { return m_copyright; } inline quint32 sourceLang() const { return m_sourceLang; } inline quint32 targetLang() const { return m_targetLang; } inline std::string filename() const { return m_filename; } std::vector< char > const & getIcon() const { return icon; } enum { ParserVersion = 17 }; private: unsigned int bgl_readnum( int ); void convertToUtf8( std::string &, unsigned int = 0 ); std::string m_filename; gzFile file; std::string m_title; std::string m_author; std::string m_email; std::string m_description; std::string m_copyright; quint32 m_sourceLang; quint32 m_targetLang; unsigned int m_numEntries; std::string m_defaultCharset; std::string m_sourceCharset; std::string m_targetCharset; std::vector< char > icon; std::string m_resourcePrefix; enum CHARSET { BGL_DEFAULT_CHARSET, BGL_SOURCE_CHARSET, BGL_TARGET_CHARSET }; }; #endif // BABYLON_H