From 2763b746355a6e9d2930ffd8c76d1b16f4e7e774 Mon Sep 17 00:00:00 2001 From: Abs62 Date: Fri, 9 Sep 2011 16:05:28 +0400 Subject: [PATCH] Use file names in UTF-8. Fix issue #30. --- bgl_babylon.cc | 5 +++-- dictionary.cc | 9 +++++---- dictzip.c | 6 ++++-- dsl.cc | 6 +++--- dsl_details.cc | 14 +++++++++++++- file.cc | 12 ++++++++++-- fsencoding.cc | 30 ++++++++++++++++++++++++++++++ fsencoding.hh | 11 +++++++++-- goldendict.pro | 10 ++++++++-- groups_widgets.cc | 3 ++- hunspell.cc | 5 +++-- loaddictionaries.cc | 17 +++++++++-------- orderandprops.cc | 3 ++- sounddir.cc | 5 +++-- stardict.cc | 15 ++++++++++++++- ufile.cc | 29 +++++++++++++++++++++++++++++ ufile.hh | 22 ++++++++++++++++++++++ 17 files changed, 169 insertions(+), 33 deletions(-) create mode 100644 ufile.cc create mode 100644 ufile.hh diff --git a/bgl_babylon.cc b/bgl_babylon.cc index fec429f0..329b4c3e 100644 --- a/bgl_babylon.cc +++ b/bgl_babylon.cc @@ -29,6 +29,7 @@ #include #include #include "dprintf.hh" +#include "ufile.hh" #ifdef _WIN32 #include @@ -58,7 +59,7 @@ bool Babylon::open() unsigned char buf[6]; int i; - f = fopen( m_filename.c_str(), "rb" ); + f = gd_fopen( m_filename.c_str(), "rb" ); if( f == NULL ) return false; @@ -325,7 +326,7 @@ bgl_entry Babylon::readEntry( ResourceHandler * resourceHandler ) block.data + pos, block.length - pos ); #if 0 - FILE *ifile = fopen(filename.c_str(), "w"); + FILE *ifile = gd_fopen(filename.c_str(), "w"); fwrite(block.data + pos, 1, block.length -pos, ifile); fclose(ifile); #endif diff --git a/dictionary.cc b/dictionary.cc index 101a3a4c..f7c886d2 100644 --- a/dictionary.cc +++ b/dictionary.cc @@ -17,6 +17,7 @@ #include #include #include +#include "fsencoding.hh" namespace Dictionary { @@ -163,10 +164,10 @@ string makeDictionaryId( vector< string > const & dictionaryFiles ) throw() { string const & full( dictionaryFiles[ x ] ); - QFileInfo fileInfo( QString::fromLocal8Bit( full.c_str() ) ); + QFileInfo fileInfo( FsEncoding::decode( full.c_str() ) ); if ( fileInfo.isAbsolute() ) - sortedList.push_back( dictionariesDir.relativeFilePath( fileInfo.filePath() ).toLocal8Bit().data() ); + sortedList.push_back( FsEncoding::encode( dictionariesDir.relativeFilePath( fileInfo.filePath() ) ) ); else { // Well, it's relative. We don't technically support those, but @@ -201,7 +202,7 @@ bool needToRebuildIndex( vector< string > const & dictionaryFiles, for( std::vector< string >::const_iterator i = dictionaryFiles.begin(); i != dictionaryFiles.end(); ++i ) { - QFileInfo fileInfo( QString::fromLocal8Bit( i->c_str() ) ); + QFileInfo fileInfo( FsEncoding::decode( i->c_str() ) ); if ( !fileInfo.exists() ) return true; @@ -212,7 +213,7 @@ bool needToRebuildIndex( vector< string > const & dictionaryFiles, lastModified = ts; } - QFileInfo fileInfo( QString::fromLocal8Bit( indexFile.c_str() ) ); + QFileInfo fileInfo( FsEncoding::decode( indexFile.c_str() ) ); if ( !fileInfo.exists() ) return true; diff --git a/dictzip.c b/dictzip.c index e78e16ea..cdc63341 100644 --- a/dictzip.c +++ b/dictzip.c @@ -31,6 +31,8 @@ #include #include +#include "ufile.hh" + #define BUFFERSIZE 10240 #define OUT_BUFFER_SIZE 0xffffL @@ -270,7 +272,7 @@ static int dict_read_header( const char *filename, int count; unsigned long offset; - if (!(str = fopen( filename, "rb" ))) + if (!(str = gd_fopen( filename, "rb" ))) err_fatal_errno( __func__, "Cannot open data file \"%s\" for read\n", filename ); @@ -444,7 +446,7 @@ dictData *dict_data_open( const char *filename, int computeCRC ) "\"%s\" not in text or dzip format\n", filename );*/ } - h->fd = fopen( filename, "rb" ); + h->fd = gd_fopen( filename, "rb" ); if ( !h->fd ) { diff --git a/dsl.cc b/dsl.cc index b435da43..fea20d15 100644 --- a/dsl.cc +++ b/dsl.cc @@ -375,7 +375,7 @@ void DslDictionary::doDeferredInit() idx, idxMutex ); QString zipName = QDir::fromNativeSeparators( - QFile::decodeName( getDictionaryFilenames().back().c_str() ) ); + FsEncoding::decode( getDictionaryFilenames().back().c_str() ) ); if ( zipName.endsWith( ".zip", Qt::CaseInsensitive ) ) // Sanity check resourceZip.openZipFile( zipName ); @@ -416,7 +416,7 @@ void DslDictionary::loadIcon() return; QString fileName = - QDir::fromNativeSeparators( QString::fromLocal8Bit( getDictionaryFilenames()[ 0 ].c_str() ) ); + QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ); // Remove the extension @@ -1650,7 +1650,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries( idxHeader.hasZipFile = 1; QFile zipFile( QDir::fromNativeSeparators( - QFile::decodeName( zipFileName.c_str() ) ) ); + FsEncoding::decode( zipFileName.c_str() ) ) ); if ( !zipFile.open( QFile::ReadOnly ) ) throw exCantReadFile( zipFileName ); diff --git a/dsl_details.cc b/dsl_details.cc index a62d27d5..ca4951dd 100644 --- a/dsl_details.cc +++ b/dsl_details.cc @@ -7,6 +7,7 @@ #include #include #include "dprintf.hh" +#include "ufile.hh" namespace Dsl { namespace Details { @@ -499,10 +500,21 @@ DslScanner::DslScanner( string const & fileName ) throw( Ex, Iconv::Ex ): { // Since .dz is backwards-compatible with .gz, we use gz- functions to // read it -- they are much nicer than the dict_data- ones. +#ifdef __WIN32 + int id = gd_open( fileName.c_str() ); + if( id == -1 ) + throw exCantOpen( fileName ); + f = gzdopen( id, "rb"); + if ( !f ) + { + _close( id ); + throw exCantOpen( fileName ); + } +#else f = gzopen( fileName.c_str(), "rb"); - if ( !f ) throw exCantOpen( fileName ); +#endif // Now try guessing the encoding by reading the first two bytes diff --git a/file.cc b/file.cc index 1720f4fb..43194554 100644 --- a/file.cc +++ b/file.cc @@ -10,6 +10,12 @@ #include #include +#ifdef __WIN32 +#include +#endif + +#include "ufile.hh" + namespace File { enum @@ -23,7 +29,9 @@ bool exists( char const * filename ) throw() { #ifdef __WIN32 struct _stat buf; - return _stat( filename, &buf ) == 0; + wchar_t wname[16384]; + MultiByteToWideChar( CP_UTF8, 0, filename, -1, wname, 16384 ); + return _wstat( wname, &buf ) == 0; #else struct stat buf; @@ -34,7 +42,7 @@ bool exists( char const * filename ) throw() void Class::open( char const * filename, char const * mode ) throw( exCantOpen ) { - f = fopen( filename, mode ); + f = gd_fopen( filename, mode ); if ( !f ) throw exCantOpen( std::string( filename ) + ": " + strerror( errno ) ); diff --git a/fsencoding.cc b/fsencoding.cc index a0c3e292..b9a11b04 100644 --- a/fsencoding.cc +++ b/fsencoding.cc @@ -11,17 +11,47 @@ namespace FsEncoding { string encode( wstring const & str ) { +#ifdef __WIN32 + return string( gd::toQString( str ).toUtf8().data() ); +#else return string( gd::toQString( str ).toLocal8Bit().data() ); +#endif } string encode( string const & str ) { +#ifdef __WIN32 + return string( str ); +#else return string( QString::fromUtf8( str.c_str() ).toLocal8Bit().data() ); +#endif +} + +string encode( QString const & str ) +{ +#ifdef __WIN32 + return string( str.toUtf8().data() ); +#else + return string( str.toLocal8Bit().data() ); +#endif } wstring decode( string const & str ) { +#ifdef __WIN32 + return gd::toWString( QString::fromUtf8( str.c_str() ) ); +#else return gd::toWString( QString::fromLocal8Bit( str.c_str() ) ); +#endif +} + +QString decode( const char *str ) +{ +#ifdef __WIN32 + return QString::fromUtf8( str ); +#else + return QString::fromLocal8Bit( str ); +#endif } char separator() diff --git a/fsencoding.hh b/fsencoding.hh index bcd04855..6df36ab7 100644 --- a/fsencoding.hh +++ b/fsencoding.hh @@ -5,6 +5,7 @@ #define __FSENCODING_HH_INCLUDED__ #include "wstring.hh" +#include /// Utilities to convert a wide string or an utf8 string to the local 8bit /// encoding of the file system, and to do other manipulations on the file @@ -14,15 +15,21 @@ namespace FsEncoding { using std::string; using gd::wstring; -/// Encodes the given wide string to the system 8bit encoding. +/// Encodes the given wide string to the utf8 encoding. string encode( wstring const & ); /// Encodes the given string in utf8 to the system 8bit encoding. string encode( string const & ); -/// Decodes the given 8bit-encoded string to a wide string. +/// Encodes the QString to the utf8/local 8-bit encoding. +string encode( QString const & ); + +/// Decodes the given utf8-encoded string to a wide string. wstring decode( string const & str ); +/// Decodes the given utf8/local 8-bit string to a QString. +QString decode( const char *str ); + /// Returns the filesystem separator (/ on Unix and clones, \ on Windows). char separator(); diff --git a/goldendict.pro b/goldendict.pro index 8ef3828f..6c3fc48a 100644 --- a/goldendict.pro +++ b/goldendict.pro @@ -188,7 +188,8 @@ HEADERS += folding.hh \ maintabwidget.hh \ dprintf.hh \ mainstatusbar.hh \ - gdappstyle.hh + gdappstyle.hh \ + ufile.hh FORMS += groups.ui \ dictgroupwidget.ui \ mainwindow.ui \ @@ -275,7 +276,8 @@ SOURCES += folding.cc \ parsecmdline.cc \ maintabwidget.cc \ mainstatusbar.cc \ - gdappstyle.cc + gdappstyle.cc \ + ufile.cc win32 { SOURCES += mouseover_win32/ThTypes.c \ wordbyauto.cc \ @@ -330,3 +332,7 @@ TS_OUT ~= s/.ts/.qm/g PRE_TARGETDEPS += $$TS_OUT include( qtsingleapplication/src/qtsingleapplication.pri ) + + + + diff --git a/groups_widgets.cc b/groups_widgets.cc index 9899ebe2..f4a8d3cc 100644 --- a/groups_widgets.cc +++ b/groups_widgets.cc @@ -7,6 +7,7 @@ #include "config.hh" #include "langcoder.hh" #include "language.hh" +#include "fsencoding.hh" //#include "initializing.hh" @@ -191,7 +192,7 @@ QVariant DictListModel::data( QModelIndex const & index, int role ) const if ( dirs.size() ) { tt += "
"; - tt += QString::fromLocal8Bit( dirs.at( 0 ).c_str() ); + tt += FsEncoding::decode( dirs.at( 0 ).c_str() ); } tt.replace( " ", " " ); diff --git a/hunspell.cc b/hunspell.cc index d13ef2d5..f0478f44 100644 --- a/hunspell.cc +++ b/hunspell.cc @@ -18,6 +18,7 @@ #include #include #include "dprintf.hh" +#include "fsencoding.hh" namespace HunspellMorpho { @@ -673,9 +674,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::Hunspell const & c vector< string > dictFiles; dictFiles.push_back( - QDir::toNativeSeparators( dataFiles[ d ].affFileName ).toLocal8Bit().data() ); + FsEncoding::encode( QDir::toNativeSeparators( dataFiles[ d ].affFileName ) ) ); dictFiles.push_back( - QDir::toNativeSeparators( dataFiles[ d ].dicFileName ).toLocal8Bit().data() ); + FsEncoding::encode( QDir::toNativeSeparators( dataFiles[ d ].dicFileName ) ) ); result.push_back( new HunspellDictionary( Dictionary::makeDictionaryId( dictFiles ), diff --git a/loaddictionaries.cc b/loaddictionaries.cc index aa1fc265..a0492554 100644 --- a/loaddictionaries.cc +++ b/loaddictionaries.cc @@ -19,6 +19,7 @@ #include "forvo.hh" #include "programs.hh" #include "dprintf.hh" +#include "fsencoding.hh" #include #include @@ -51,7 +52,7 @@ void LoadDictionaries::run() // Make soundDirs { vector< sptr< Dictionary::Class > > soundDirDictionaries = - SoundDir::makeDictionaries( soundDirs, Config::getIndexDir().toLocal8Bit().data(), *this ); + SoundDir::makeDictionaries( soundDirs, FsEncoding::encode( Config::getIndexDir() ), *this ); dictionaries.insert( dictionaries.end(), soundDirDictionaries.begin(), soundDirDictionaries.end() ); @@ -95,12 +96,12 @@ void LoadDictionaries::handlePath( Config::Path const & path ) handlePath( Config::Path( fullName, true ) ); } - allFiles.push_back( QDir::toNativeSeparators( fullName ).toLocal8Bit().data() ); + allFiles.push_back( FsEncoding::encode( QDir::toNativeSeparators( fullName ) ) ); } { vector< sptr< Dictionary::Class > > bglDictionaries = - Bgl::makeDictionaries( allFiles, Config::getIndexDir().toLocal8Bit().data(), *this ); + Bgl::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); dictionaries.insert( dictionaries.end(), bglDictionaries.begin(), bglDictionaries.end() ); @@ -108,7 +109,7 @@ void LoadDictionaries::handlePath( Config::Path const & path ) { vector< sptr< Dictionary::Class > > stardictDictionaries = - Stardict::makeDictionaries( allFiles, Config::getIndexDir().toLocal8Bit().data(), *this ); + Stardict::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); dictionaries.insert( dictionaries.end(), stardictDictionaries.begin(), stardictDictionaries.end() ); @@ -116,7 +117,7 @@ void LoadDictionaries::handlePath( Config::Path const & path ) { vector< sptr< Dictionary::Class > > lsaDictionaries = - Lsa::makeDictionaries( allFiles, Config::getIndexDir().toLocal8Bit().data(), *this ); + Lsa::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); dictionaries.insert( dictionaries.end(), lsaDictionaries.begin(), lsaDictionaries.end() ); @@ -124,7 +125,7 @@ void LoadDictionaries::handlePath( Config::Path const & path ) { vector< sptr< Dictionary::Class > > dslDictionaries = - Dsl::makeDictionaries( allFiles, Config::getIndexDir().toLocal8Bit().data(), *this ); + Dsl::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); dictionaries.insert( dictionaries.end(), dslDictionaries.begin(), dslDictionaries.end() ); @@ -132,7 +133,7 @@ void LoadDictionaries::handlePath( Config::Path const & path ) { vector< sptr< Dictionary::Class > > dictdDictionaries = - DictdFiles::makeDictionaries( allFiles, Config::getIndexDir().toLocal8Bit().data(), *this ); + DictdFiles::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); dictionaries.insert( dictionaries.end(), dictdDictionaries.begin(), dictdDictionaries.end() ); @@ -256,7 +257,7 @@ void loadDictionaries( QWidget * parent, bool showInitially, for( QStringList::const_iterator i = allIdxFiles.constBegin(); i != allIdxFiles.constEnd(); ++i ) { - if ( ids.find( i->toLocal8Bit().data() ) == ids.end() && + if ( ids.find( FsEncoding::encode( *i ) ) == ids.end() && i->size() == 32 ) indexDir.remove( *i ); } diff --git a/orderandprops.cc b/orderandprops.cc index e58d44c1..05a5e376 100644 --- a/orderandprops.cc +++ b/orderandprops.cc @@ -5,6 +5,7 @@ #include "instances.hh" #include "langcoder.hh" #include "language.hh" +#include "fsencoding.hh" OrderAndProps::OrderAndProps( QWidget * parent, Config::Group const & dictionaryOrder, @@ -110,7 +111,7 @@ void OrderAndProps::describeDictionary( DictListWidget * lst, QModelIndex const for( unsigned x = 0; x < filenames.size(); x++ ) { - filenamesText += QString::fromLocal8Bit( filenames[ x ].c_str() ); + filenamesText += FsEncoding::decode( filenames[ x ].c_str() ); filenamesText += '\n'; } diff --git a/sounddir.cc b/sounddir.cc index ba744ca9..36c92019 100644 --- a/sounddir.cc +++ b/sounddir.cc @@ -11,6 +11,7 @@ #include "htmlescape.hh" #include "audiolink.hh" #include "wstring_qt.hh" +#include "fsencoding.hh" #include #include #include @@ -252,7 +253,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const & chunk.back() = 0; // It must end with 0 anyway, but just in case - QDir dir( QDir::fromNativeSeparators( QString::fromLocal8Bit( getDictionaryFilenames()[ 0 ].c_str() ) ) ); + QDir dir( QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ) ); QString fileName = QDir::toNativeSeparators( dir.filePath( QString::fromUtf8( articleData ) ) ); @@ -260,7 +261,7 @@ sptr< Dictionary::DataRequest > SoundDirDictionary::getResource( string const & try { - File::Class f( fileName.toLocal8Bit().data(), "rb" ); + File::Class f( FsEncoding::encode( fileName ), "rb" ); sptr< Dictionary::DataRequestInstant > dr = new Dictionary::DataRequestInstant( true ); diff --git a/stardict.cc b/stardict.cc index f1af1699..602e5b8f 100644 --- a/stardict.cc +++ b/stardict.cc @@ -32,6 +32,7 @@ #include #include +#include "ufile.hh" namespace Stardict { @@ -886,10 +887,21 @@ static void handleIdxSynFile( string const & fileName, vector< uint32_t > * articleOffsets, bool isSynFile ) { +#ifdef __WIN32 + int id = gd_open( fileName.c_str() ); + if( id == -1 ) + throw exCantReadFile( fileName ); + gzFile stardictIdx = gzdopen( id, "rb"); + if ( !stardictIdx ) + { + _close( id ); + throw exCantReadFile( fileName ); + } +#else gzFile stardictIdx = gzopen( fileName.c_str(), "rb" ); - if ( !stardictIdx ) throw exCantReadFile( fileName ); +#endif vector< char > image; @@ -913,6 +925,7 @@ static void handleIdxSynFile( string const & fileName, break; } } + gzclose( stardictIdx ); // We append one zero byte to catch runaway string at the end, if any diff --git a/ufile.cc b/ufile.cc new file mode 100644 index 00000000..02fd0b6d --- /dev/null +++ b/ufile.cc @@ -0,0 +1,29 @@ +#ifdef __WIN32 + +#include +#include +#include + +#include "ufile.hh" + +FILE *gd_fopen( const char *filename, const char *mode) +{ + wchar_t wname[16384], wmode[32]; + + if( MultiByteToWideChar( CP_UTF8, 0, filename, -1, wname, 16384 ) == 0 ) + return NULL; + if( MultiByteToWideChar( CP_UTF8, 0, mode, -1, wmode, 32 ) == 0 ) + return NULL; + return _wfopen( wname, wmode ); +} + +int gd_open( const char *filename) +{ + wchar_t wname[16384]; + + if( MultiByteToWideChar( CP_UTF8, 0, filename, -1, wname, 16384 ) == 0 ) + return -1; + return _wopen( wname, _O_RDONLY | _O_BINARY ); +} + +#endif diff --git a/ufile.hh b/ufile.hh new file mode 100644 index 00000000..7eca9938 --- /dev/null +++ b/ufile.hh @@ -0,0 +1,22 @@ +#ifndef UFILE_HH_INCLUDED +#define UFILE_HH_INCLUDED + +#ifdef __WIN32 + +#ifdef __cplusplus +extern "C" +{ +#endif + +FILE *gd_fopen( const char *filename, const char *mode ); +int gd_open( const char *filename); + +#ifdef __cplusplus +} /* end extern "C" */ +#endif + +#else +#define gd_fopen fopen +#endif + +#endif // UFILE_HH