From 81ee55aff7b6e33eb265d2f2645f1e860a8a341f Mon Sep 17 00:00:00 2001 From: Abs62 Date: Thu, 9 Feb 2012 16:50:38 +0400 Subject: [PATCH] Add support for Aard dictionaries (aar-html) --- aard.cc | 910 ++++++++++++++++++++++++++++++++++++++++++ aard.hh | 23 ++ decompress.cc | 72 ++++ decompress.hh | 12 + goldendict.pro | 8 +- icons/icon32_aard.png | Bin 0 -> 3623 bytes loaddictionaries.cc | 10 +- resources.qrc | 1 + sdict.cc | 70 +--- 9 files changed, 1034 insertions(+), 72 deletions(-) create mode 100644 aard.cc create mode 100644 aard.hh create mode 100644 decompress.cc create mode 100644 decompress.hh create mode 100644 icons/icon32_aard.png diff --git a/aard.cc b/aard.cc new file mode 100644 index 00000000..10083a92 --- /dev/null +++ b/aard.cc @@ -0,0 +1,910 @@ +/* This file is (c) 2008-2011 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#include "aard.hh" +#include "btreeidx.hh" +#include "folding.hh" +#include "utf8.hh" +#include "chunkedstorage.hh" +#include "langcoder.hh" +#include "dprintf.hh" +#include "fsencoding.hh" +#include "decompress.hh" + +#include +#include +#include + +#ifdef _MSC_VER +#include +#endif + +#include +#include +#include +#include +#include + +#include "ufile.hh" +#include "wstring_qt.hh" + +namespace Aard { + +using std::map; +using std::multimap; +using std::pair; +using std::set; +using std::string; +using gd::wstring; + +using BtreeIndexing::WordArticleLink; +using BtreeIndexing::IndexedWords; +using BtreeIndexing::IndexInfo; + +namespace { + +DEF_EX_STR( exNotDctFile, "Not an Sdictionary file", Dictionary::Ex ) +DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex ) +DEF_EX_STR( exWordIsTooLarge, "Enountered a word that is too large:", Dictionary::Ex ) +DEF_EX_STR( exSuddenEndOfFile, "Sudden end of file", Dictionary::Ex ) + +#ifdef _MSC_VER +#pragma pack( push, 1 ) +#endif + +// Big-Endian template +// http://habrahabr.ru/blogs/cpp/121811/ + +template +struct BigEndian +{ + union + { + unsigned char bytes[sizeof(T)]; + T raw_value; + }; + + BigEndian(T t = T()) + { + operator =(t); + } + + BigEndian(const BigEndian & t) + { + raw_value = t.raw_value; + } + + operator const T() const + { + T t = T(); + for (unsigned i = 0; i < sizeof(T); i++) + t |= T(bytes[sizeof(T) - 1 - i]) << (i << 3); + return t; + } + + const T operator = (const T t) + { + for (unsigned i = 0; i < sizeof(T); i++) + bytes[sizeof(T) - 1 - i] = (unsigned char)( t >> (i << 3) ); + return t; + } + +} +#ifndef _MSC_VER +__attribute__((packed)) +#endif +; + +typedef BigEndian< uint16_t > uint16_be; +typedef BigEndian< uint32_t > uint32_be; + +/// AAR file header +struct AAR_header +{ + char signature[4]; + char checksum[40]; + uint16_be version; + char uuid[16]; + uint16_be volume; + uint16_be totalVolumes; + uint32_be metaLength; + uint32_be wordsCount; + uint32_be articleOffset; + char indexItemFormat[4]; + char keyLengthFormat[2]; + char articleLengthFormat[2]; +} +#ifndef _MSC_VER +__attribute__((packed)) +#endif +; + +struct IndexElement +{ + uint32_be wordOffset; + uint32_be articleOffset; +} +#ifndef _MSC_VER +__attribute__((packed)) +#endif +; + +enum +{ + Signature = 0x58524141, // AARX on little-endian, XRAA on big-endian + CurrentFormatVersion = 1 + BtreeIndexing::FormatVersion + Folding::Version +}; + +struct IdxHeader +{ + uint32_t signature; // First comes the signature, AARX + uint32_t formatVersion; // File format version (CurrentFormatVersion) + uint32_t chunksOffset; // The offset to chunks' storage + uint32_t indexBtreeMaxElements; // Two fields from IndexInfo + uint32_t indexRootOffset; + uint32_t wordCount; + uint32_t articleCount; + uint32_t langFrom; // Source language + uint32_t langTo; // Target language +} +#ifndef _MSC_VER +__attribute__((packed)) +#endif +; + +#ifdef _MSC_VER +#pragma pack( pop, 1 ) +#endif + +bool indexIsOldOrBad( string const & indexFile ) +{ + File::Class idx( indexFile, "rb" ); + + IdxHeader header; + + return idx.readRecords( &header, sizeof( header ), 1 ) != 1 || + header.signature != Signature || + header.formatVersion != CurrentFormatVersion; +} + +void readJSONValue( string const & source, string & str, uint32_t & pos) +{ + int level = 1; + char endChar; + str.push_back( source[pos] ); + if( source[pos] == '{') + endChar = '}'; + else if( source[pos] == '[' ) + endChar = ']'; + else if( source[pos] == '\"' ) + { + str.clear(); + endChar = '\"'; + } + else + endChar = ','; + + pos++; + char ch = 0; + char lastCh = 0; + while( !( ch == endChar && lastCh != '\\' && level == 0 ) + && pos < source.size() ) + { + lastCh = ch; + ch = source[ pos++ ]; + if( ( ch == '{' || ch == '[' ) && lastCh != '\\' ) + level++; + if( ( ch == '}' || ch == ']' ) && lastCh != '\\' ) + level--; + + if( ch == endChar && + ( ( ch == '\"' && lastCh != '\\' ) || ch == ',' ) + && level == 1) + break; + str.push_back( ch ); + } +} + +class AardDictionary: public BtreeIndexing::BtreeDictionary +{ + Mutex idxMutex; + File::Class idx; + IdxHeader idxHeader; + ChunkedStorage::Reader chunks; + string dictionaryName; + File::Class df; + QIcon dictionaryIcon, dictionaryNativeIcon; + bool dictionaryIconLoaded; + + public: + + AardDictionary( string const & id, string const & indexFile, + vector< string > const & dictionaryFiles ); + + ~AardDictionary(); + + virtual string getName() throw() + { return dictionaryName; } + + virtual map< Dictionary::Property, string > getProperties() throw() + { return map< Dictionary::Property, string >(); } + + virtual unsigned long getArticleCount() throw() + { return idxHeader.articleCount; } + + virtual unsigned long getWordCount() throw() + { return idxHeader.wordCount; } + + virtual QIcon getIcon() throw(); + + virtual QIcon getNativeIcon() throw(); + + inline virtual quint32 getLangFrom() const + { return idxHeader.langFrom; } + + inline virtual quint32 getLangTo() const + { return idxHeader.langTo; } + + virtual sptr< Dictionary::DataRequest > getArticle( wstring const &, + vector< wstring > const & alts, + wstring const & ) + throw( std::exception ); + +private: + + void loadIcon(); + + /// Loads the article. + void loadArticle( uint32_t address, + string & articleText ); + string convert( string const & in_data ); + + friend class AardArticleRequest; +}; + +AardDictionary::AardDictionary( string const & id, + string const & indexFile, + vector< string > const & dictionaryFiles ): + BtreeDictionary( id, dictionaryFiles ), + idx( indexFile, "rb" ), + idxHeader( idx.read< IdxHeader >() ), + chunks( idx, idxHeader.chunksOffset ), + df( dictionaryFiles[ 0 ], "rb" ), + dictionaryIconLoaded( false ) +{ + // Read dictionary name + + idx.seek( sizeof( idxHeader ) ); + vector< char > dName( idx.read< uint32_t >() ); + if( dName.size() ) + { + idx.read( &dName.front(), dName.size() ); + dictionaryName = string( &dName.front(), dName.size() ); + } + + // Initialize the index + + openIndex( IndexInfo( idxHeader.indexBtreeMaxElements, + idxHeader.indexRootOffset ), + idx, idxMutex ); +} + +AardDictionary::~AardDictionary() +{ + df.close(); +} + +QIcon AardDictionary::getNativeIcon() throw() +{ + loadIcon(); + return dictionaryNativeIcon; +} + +QIcon AardDictionary::getIcon() throw() +{ + loadIcon(); + return dictionaryIcon; +} + +void AardDictionary::loadIcon() +{ + if ( dictionaryIconLoaded ) + return; + + QString fileName = + QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) ); + + // Remove the extension + + fileName.chop( 3 ); + fileName += "bmp"; + QFileInfo info( fileName ); + + if ( !info.exists() ) + { + fileName.chop( 3 ); + fileName += "png"; + info = QFileInfo( fileName ); + } + + if ( info.exists() ) + { + QImage img( fileName ); + + if ( !img.isNull() ) + { + // Load successful + + // Apply the color key + + img.setAlphaChannel( img.createMaskFromColor( QColor( 192, 192, 192 ).rgb(), + Qt::MaskOutColor ) ); + + dictionaryNativeIcon = QIcon( QPixmap::fromImage( img ) ); + + // Transform it to be square + int max = img.width() > img.height() ? img.width() : img.height(); + + QImage result( max, max, QImage::Format_ARGB32 ); + result.fill( 0 ); // Black transparent + + QPainter painter( &result ); + + painter.drawImage( QPoint( img.width() == max ? 0 : ( max - img.width() ) / 2, + img.height() == max ? 0 : ( max - img.height() ) / 2 ), + img ); + + painter.end(); + + dictionaryIcon = QIcon( QPixmap::fromImage( result ) ); + } + } + + if ( dictionaryIcon.isNull() ) + { + // Load failed -- use default icons + dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/icon32_aard.png"); + } + + dictionaryIconLoaded = true; +} + +string AardDictionary::convert( const string & in ) +{ + string inConverted; + char inCh, lastCh = 0; + bool afterEol = false; + + for( string::const_iterator i = in.begin(), j = in.end(); i != j; ++i ) + { + inCh = *i; + if( lastCh == '\\' ) + { + inConverted.erase( inConverted.size() - 1 ); + lastCh = 0; + if( inCh == 'n' ) + { + inConverted.append( "
"); + afterEol = true; + continue; + } + else if( inCh == 'r') + continue; + } + else if( inCh == ' ' && afterEol ) + { + inConverted.append( " " ); + continue; + } else + lastCh = inCh; + afterEol = false; + inConverted.push_back( inCh ); + } + + QDomDocument dd; + QString errorStr; + int errorLine, errorColumn; + + if( !dd.setContent( QByteArray( inConverted.c_str() ), false, &errorStr, &errorLine, &errorColumn ) ) + { + FDPRINTF( stderr, "Aard article parse failed: %s at %d,%d\n", errorStr.toLocal8Bit().constData(), errorLine, errorColumn ); + FDPRINTF( stderr, "The input was: %s\n", in.c_str() ); + return inConverted; + } + + QDomNodeList nodes = dd.elementsByTagName( "a" ); // References + for( int i = 0; i < nodes.count(); i++ ) + { + QDomElement el = nodes.at( i ).toElement(); + QString ref = el.attribute( "href", "" ); + if( ref.size() == 0 || ref.indexOf( "http://") != -1 || ref[0] == '#' ) + continue; + if( ref.indexOf( "w:") == 0 || ref.indexOf( "s:") == 0 ) + ref.replace( 0, 2, "bword:" ); + else + ref.insert( 0, "bword:" ); + el.setAttribute( "href", ref ); + } + + return dd.toByteArray().data(); +} + +void AardDictionary::loadArticle( uint32_t address, + string & articleText ) +{ + uint32_t articleOffset = address; + uint32_t articleSize; + uint32_be size; + + vector< char > articleBody; + + articleText.clear(); + + df.seek( articleOffset ); + df.read( &size, sizeof(size) ); + articleSize = size; + articleBody.resize( articleSize ); + df.read( &articleBody.front(), articleSize ); + + if ( articleBody.empty() ) + throw exCantReadFile( getDictionaryFilenames()[ 0 ] ); + + string text = decompressBzip2( articleBody.data(), articleSize ); + if( text.empty() ) + text = decompressZlib( articleBody.data(), articleSize ); + if( text.empty() ) + text = string( articleBody.data(), articleSize ); + + uint32_t n = 0; + while( n < text.size() && text[n] != '\"' ) + n++; + + if( n >= text.size() ) + return; + + readJSONValue( text, articleText, n ); + + if( articleText.empty() ) + { + n = text.find( "\"r\"" ); + if( n != string::npos ) + { + n += 3; + while( n < text.size() && text[n] != '\"' ) + n++; + + if( n >= text.size() ) + return; + + string link; + readJSONValue( text, link, n ); + if( !link.empty() ) + articleText = "" + link + ""; + } + } + + if( !articleText.empty() ) + articleText = convert( articleText ); + + articleText = "
" + articleText + "
"; +} + +/// AardDictionary::getArticle() + +class AardArticleRequest; + +class AardArticleRequestRunnable: public QRunnable +{ + AardArticleRequest & r; + QSemaphore & hasExited; + +public: + + AardArticleRequestRunnable( AardArticleRequest & r_, + QSemaphore & hasExited_ ): r( r_ ), + hasExited( hasExited_ ) + {} + + ~AardArticleRequestRunnable() + { + hasExited.release(); + } + + virtual void run(); +}; + +class AardArticleRequest: public Dictionary::DataRequest +{ + friend class AardArticleRequestRunnable; + + wstring word; + vector< wstring > alts; + AardDictionary & dict; + + QAtomicInt isCancelled; + QSemaphore hasExited; + +public: + + AardArticleRequest( wstring const & word_, + vector< wstring > const & alts_, + AardDictionary & dict_ ): + word( word_ ), alts( alts_ ), dict( dict_ ) + { + QThreadPool::globalInstance()->start( + new AardArticleRequestRunnable( *this, hasExited ) ); + } + + void run(); // Run from another thread by DslArticleRequestRunnable + + virtual void cancel() + { + isCancelled.ref(); + } + + ~AardArticleRequest() + { + isCancelled.ref(); + hasExited.acquire(); + } +}; + +void AardArticleRequestRunnable::run() +{ + r.run(); +} + +void AardArticleRequest::run() +{ + if ( isCancelled ) + { + finish(); + return; + } + + vector< WordArticleLink > chain = dict.findArticles( word ); + + for( unsigned x = 0; x < alts.size(); ++x ) + { + /// Make an additional query for each alt + + vector< WordArticleLink > altChain = dict.findArticles( alts[ x ] ); + + chain.insert( chain.end(), altChain.begin(), altChain.end() ); + } + + multimap< wstring, pair< string, string > > mainArticles, alternateArticles; + + set< uint32_t > articlesIncluded; // Some synonims make it that the articles + // appear several times. We combat this + // by only allowing them to appear once. + + wstring wordCaseFolded = Folding::applySimpleCaseOnly( word ); + + for( unsigned x = 0; x < chain.size(); ++x ) + { + if ( isCancelled ) + { + finish(); + return; + } + + if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() ) + continue; // We already have this article in the body. + + // Now grab that article + + string headword, articleText; + + headword = chain[ x ].word; + dict.loadArticle( chain[ x ].articleOffset, articleText ); + + // Ok. Now, does it go to main articles, or to alternate ones? We list + // main ones first, and alternates after. + + // We do the case-folded comparison here. + + wstring headwordStripped = + Folding::applySimpleCaseOnly( Utf8::decode( headword ) ); + + multimap< wstring, pair< string, string > > & mapToUse = + ( wordCaseFolded == headwordStripped ) ? + mainArticles : alternateArticles; + + mapToUse.insert( pair< wstring, pair< string, string > >( + Folding::applySimpleCaseOnly( Utf8::decode( headword ) ), + pair< string, string >( headword, articleText ) ) ); + + articlesIncluded.insert( chain[ x ].articleOffset ); + } + + if ( mainArticles.empty() && alternateArticles.empty() ) + { + // No such word + finish(); + return; + } + + string result; + + multimap< wstring, pair< string, string > >::const_iterator i; + + for( i = mainArticles.begin(); i != mainArticles.end(); ++i ) + { + result += "

"; + result += i->second.first; + result += "

"; + result += i->second.second; + } + + for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i ) + { + result += "

"; + result += i->second.first; + result += "

"; + result += i->second.second; + } + + Mutex::Lock _( dataMutex ); + + data.resize( result.size() ); + + memcpy( &data.front(), result.data(), result.size() ); + + hasAnyData = true; + + finish(); +} + +map< string, string > parseMetaData( string const & metaData ) +{ +// Parsing JSON string + map< string, string > data; + string name, value; + uint32_t n = 0; + + while( metaData[n] != '{' && n < metaData.length() ) + n++; + while( n < metaData.length() ) + { + // Skip to '"' + while( metaData[n] != '\"' && n < metaData.length() ) + n++; + if( ++n >= metaData.length() ) + break; + + // Read name + while( !( ( metaData[n] == '\"' || metaData[n] == '{' ) && metaData[n-1] != '\\' ) + && n < metaData.length() ) + name.push_back( metaData[n++]); + + // Skip to ':' + if( ++n >= metaData.length() ) + break; + while( metaData[n] != ':' && n < metaData.length() ) + n++; + if( ++n >= metaData.length() ) + break; + + // Find value start after ':' + while( !( ( metaData[n] == '\"' + || metaData[n] == '{' + || metaData[n] == '[' + || ( metaData[n] >= '0' && metaData[n] <= '9' ) ) + && metaData[n-1] != '\\' ) + && n < metaData.length() ) + n++; + if( n >= metaData.length() ) + break; + + readJSONValue( metaData, value, n); + + data[name] = value; + + name.clear(); + value.clear(); + if( ++n >= metaData.length() ) + break; + } + return data; +} + +sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word, + vector< wstring > const & alts, + wstring const & ) + throw( std::exception ) +{ + return new AardArticleRequest( word, alts, *this ); +} + +} // anonymous namespace + +vector< sptr< Dictionary::Class > > makeDictionaries( + vector< string > const & fileNames, + string const & indicesDir, + Dictionary::Initializing & initializing ) + throw( std::exception ) +{ + vector< sptr< Dictionary::Class > > dictionaries; + + for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end(); + ++i ) + { + // Skip files with the extensions different to .aar to speed up the + // scanning + if ( i->size() < 4 || + strcasecmp( i->c_str() + ( i->size() - 4 ), ".aar" ) != 0 ) + continue; + + // Got the file -- check if we need to rebuid the index + + vector< string > dictFiles( 1, *i ); + + string dictId = Dictionary::makeDictionaryId( dictFiles ); + + string indexFile = indicesDir + dictId; + + if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) || + indexIsOldOrBad( indexFile ) ) + { + try + { + File::Class df( *i, "rb" ); + + AAR_header dictHeader; + + df.read( &dictHeader, sizeof(dictHeader) ); + if( strncmp( dictHeader.signature, "aard", 4 ) + || strncmp( dictHeader.indexItemFormat, ">LL", 4 ) + || strncmp( dictHeader.keyLengthFormat, ">H", 2 ) + || strncmp( dictHeader.articleLengthFormat, ">L", 2) ) + { + DPRINTF( "File %s is not in supported aard format", i->c_str() ); + continue; + } + + vector< char > data; + uint32_t size = dictHeader.metaLength; + + data.resize( size ); + df.read( &data.front(), size ); + string metaStr = decompressBzip2( data.data(), size ); + if( metaStr.empty() ) + metaStr = decompressZlib( data.data(), size ); + + map< string, string > meta = parseMetaData( metaStr ); + + if( meta.empty() ) + { + DPRINTF( "File %s has invalid metadata", i->c_str() ); + continue; + } + + string dictName; + map< string, string >::const_iterator iter = meta.find( "title" ); + if( iter != meta.end() ) + dictName = iter->second; + + uint16_t volumes = dictHeader.totalVolumes; + if( volumes > 1 ) + { + QString ss; + ss.sprintf( " (%i/%i)", (uint16_t)(dictHeader.volume), volumes ); + dictName += ss.toLocal8Bit().data(); + } + + string langFrom; + iter = meta.find( "index_language" ); + if( iter != meta.end() ) + langFrom = iter->second; + + string langTo; + iter = meta.find( "article_language" ); + if( iter != meta.end() ) + langTo = iter->second; + + initializing.indexingDictionary( dictName ); + + File::Class idx( indexFile, "wb" ); + IdxHeader idxHeader; + memset( &idxHeader, 0, sizeof( idxHeader ) ); + + // We write a dummy header first. At the end of the process the header + // will be rewritten with the right values. + + idx.write( idxHeader ); + + idx.write( (uint32_t) dictName.size() ); + if( !dictName.empty() ) + idx.write( dictName.data(), dictName.size() ); + + IndexedWords indexedWords; + + ChunkedStorage::Writer chunks( idx ); + + uint32_t wordCount = dictHeader.wordsCount; + set< uint32_t > articleOffsets; + uint32_t pos = df.tell(); + uint32_t wordsBase = pos + wordCount * sizeof( IndexElement ); + uint32_t articlesBase = dictHeader.articleOffset; + + for( uint32_t j = 0; j < wordCount; j++ ) + { + IndexElement el; + + df.seek( pos ); + df.read( &el, sizeof(el) ); + uint32_t articleOffset = articlesBase + el.articleOffset; + uint32_t wordOffset = wordsBase + el.wordOffset; + + df.seek( wordOffset ); + + uint16_be sizeBE; + df.read( &sizeBE, sizeof(sizeBE) ); + uint16_t wordSize = sizeBE; + data.resize( wordSize ); + df.read( &data.front(), wordSize ); + + if( articleOffsets.find( articleOffset ) == articleOffsets.end() ) + articleOffsets.insert( articleOffset ); + + // Insert new entry + indexedWords.addWord( Utf8::decode( string( data.data(), wordSize ) ), articleOffset); + + pos += sizeof(el); + } + // Finish with the chunks + + idxHeader.chunksOffset = chunks.finish(); + + // Build index + + IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx ); + + idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements; + idxHeader.indexRootOffset = idxInfo.rootOffset; + + indexedWords.clear(); // Release memory -- no need for this data + + // That concludes it. Update the header. + + idxHeader.signature = Signature; + idxHeader.formatVersion = CurrentFormatVersion; + + idxHeader.articleCount = articleOffsets.size(); + idxHeader.wordCount = wordCount; + + if( langFrom.size() == 3) + idxHeader.langFrom = LangCoder::code3toInt( langFrom.c_str() ); + else if( langFrom.size() == 2 ) + idxHeader.langFrom = LangCoder::code2toInt( langFrom.c_str() ); + + if( langTo.size() == 3) + idxHeader.langTo = LangCoder::code3toInt( langTo.c_str() ); + else if( langTo.size() == 2 ) + idxHeader.langTo = LangCoder::code2toInt( langTo.c_str() ); + + idx.rewind(); + + idx.write( &idxHeader, sizeof( idxHeader ) ); + } + catch( std::exception & e ) + { + FDPRINTF( stderr, "Aard dictionary indexing failed: %s, error: %s\n", + i->c_str(), e.what() ); + continue; + } + catch( ... ) + { + FDPRINTF( stderr, "Aard dictionary indexing failed\n" ); + continue; + } + } // if need to rebuild + dictionaries.push_back( new AardDictionary( dictId, + indexFile, + dictFiles ) ); + } + return dictionaries; +} + +} diff --git a/aard.hh b/aard.hh new file mode 100644 index 00000000..c9aabbcb --- /dev/null +++ b/aard.hh @@ -0,0 +1,23 @@ +/* This file is (c) 2008-2012 Konstantin Isakov + * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ + +#ifndef __AARD_HH_INCLUDED__ +#define __AARD_HH_INCLUDED__ + +#include "dictionary.hh" + +/// Support for the aard dictionaries. +namespace Aard { + +using std::vector; +using std::string; + +vector< sptr< Dictionary::Class > > makeDictionaries( + vector< string > const & fileNames, + string const & indicesDir, + Dictionary::Initializing & ) + throw( std::exception ); + +} + +#endif diff --git a/decompress.cc b/decompress.cc new file mode 100644 index 00000000..5bd3cc97 --- /dev/null +++ b/decompress.cc @@ -0,0 +1,72 @@ +#include + +#include "decompress.hh" +#include "zlib.h" +#include "bzlib.h" + +string decompressZlib( char * bufptr, unsigned length ) +{ +z_stream zs; +char buf[2048]; +string str; +int res; + memset( &zs, 0, sizeof(zs) ); + zs.next_in = (Bytef *)bufptr; + zs.avail_in = length; + while( 1 ) + { + res = inflateInit( &zs ); + if( res != Z_OK ) + break; + while( res != Z_STREAM_END ) + { + memset( buf, 0, sizeof(buf) ); + zs.next_out = (Bytef *)buf; + zs.avail_out = 2047; + res = inflate( &zs, Z_SYNC_FLUSH ); + str += buf; + if( res != Z_OK && res != Z_STREAM_END ) + break; + } + break; + } + inflateEnd( &zs ); + if( res != Z_STREAM_END ) + str.clear(); + return str; +} + +string decompressBzip2( char * bufptr, unsigned length ) +{ +bz_stream zs; +char buf[2048]; +string str; +int res; + memset( &zs, 0, sizeof(zs) ); + zs.next_in = bufptr; + zs.avail_in = length; + zs.total_in_lo32 = length; + while( 1 ) + { + res = BZ2_bzDecompressInit( &zs, 0, 0 ); + if( res != BZ_OK ) + break; + while( res != BZ_STREAM_END ) + { + memset( buf, 0, sizeof(buf) ); + zs.next_out = buf; + zs.avail_out = 2047; + zs.total_out_lo32 = length; + res = BZ2_bzDecompress( &zs ); + str += buf; + if( res != BZ_OK && res != BZ_STREAM_END ) + break; + } + break; + } + BZ2_bzDecompressEnd( &zs ); + if( res != BZ_STREAM_END ) + str.clear(); + return str; +} + diff --git a/decompress.hh b/decompress.hh new file mode 100644 index 00000000..a8fcdfe9 --- /dev/null +++ b/decompress.hh @@ -0,0 +1,12 @@ +#ifndef __DECOMPRESS_HH_INCLUDED__ +#define __DECOMPRESS_HH_INCLUDED__ + +#include + +using std::string; + +string decompressZlib( char * bufptr, unsigned length ); + +string decompressBzip2( char * bufptr, unsigned length ); + +#endif // DECOMPRESS_HH diff --git a/goldendict.pro b/goldendict.pro index 5b48c1b8..be5fafe2 100644 --- a/goldendict.pro +++ b/goldendict.pro @@ -192,7 +192,9 @@ HEADERS += folding.hh \ gdappstyle.hh \ ufile.hh \ xdxf.hh \ - sdict.hh + sdict.hh \ + decompress.hh \ + aard.hh FORMS += groups.ui \ dictgroupwidget.ui \ mainwindow.ui \ @@ -282,7 +284,9 @@ SOURCES += folding.cc \ gdappstyle.cc \ ufile.cc \ xdxf.cc \ - sdict.cc + sdict.cc \ + decompress.cc \ + aard.cc win32 { SOURCES += mouseover_win32/ThTypes.c \ wordbyauto.cc \ diff --git a/icons/icon32_aard.png b/icons/icon32_aard.png new file mode 100644 index 0000000000000000000000000000000000000000..1c6279eaf64bcd437e0d14fa8d2aa45a48e7263a GIT binary patch literal 3623 zcmV+?4%qRDP)KLZ*U+IBfRsybQWXdwQbLP>6pAqfylh#{fb6;Z(vMMVS~$e@S=j*ftg6;Uhf59&ghTmgWD0l;*T zI709Y^p6lP1rIRMx#05C~cW=H_Aw*bJ-5DT&Z2n+x)QHX^p z00esgV8|mQcmRZ%02D^@S3L16t`O%c004NIvOKvYIYoh62rY33S640`D9%Y2D-rV&neh&#Q1i z007~1e$oCcFS8neI|hJl{-P!B1ZZ9hpmq0)X0i`JwE&>$+E?>%_LC6RbVIkUx0b+_+BaR3cnT7Zv!AJxW zizFb)h!jyGOOZ85F;a?DAXP{m@;!0_IfqH8(HlgRxt7s3}k3K`kFu>>-2Q$QMFfPW!La{h336o>X zu_CMttHv6zR;&ZNiS=X8v3CR#fknUxHUxJ0uoBa_M6WNWeqIg~6QE69c9o#eyhGvpiOA@W-aonk<7r1(?fC{oI5N*U!4 zfg=2N-7=cNnjjOr{yriy6mMFgG#l znCF=fnQv8CDz++o6_Lscl}eQ+l^ZHARH>?_s@|##Rr6KLRFA1%Q+=*RRWnoLsR`7U zt5vFIcfW3@?wFpwUVxrVZ>QdQz32KIeJ}k~{cZZE^+ya? z2D1z#2HOnI7(B%_ac?{wFUQ;QQA1tBKtrWrm0_3Rgps+?Jfqb{jYbcQX~taRB;#$y zZN{S}1|}gUOHJxc?wV3fxuz+mJ4`!F$IZ;mqRrNsHJd##*D~ju=bP7?-?v~|cv>vB zsJ6IeNwVZxrdjT`yl#bBIa#GxRa#xMMy;K#CDyyGyQdMSxlWT#tDe?p!?5wT$+oGt z8L;Kp2HUQ-ZMJ=3XJQv;x5ci*?vuTfeY$;({XGW_huIFR9a(?@3)XSs8O^N5RyOM=TTmp(3=8^+zpz2r)C z^>JO{deZfso3oq3?Wo(Y?l$ge?uXo;%ru`Vo>?<<(8I_>;8Eq#KMS9gFl*neeosSB zfoHYnBQIkwkyowPu(zdms`p{<7e4kra-ZWq<2*OsGTvEV%s0Td$hXT+!*8Bnh2KMe zBmZRodjHV?r+_5^X9J0WL4jKW`}lf%A-|44I@@LTvf1rHjG(ze6+w@Jt%Bvjts!X0 z?2xS?_ve_-kiKB_KiJlZ$9G`c^=E@oNG)mWWaNo-3TIW8)$Hg0Ub-~8?KhvJ>$ z3*&nim@mj(aCxE5!t{lw7O5^0EIO7zOo&c6l<+|iDySBWCGrz@C5{St!X3hAA}`T4 z(TLbXTq+(;@<=L8dXnssyft|w#WSTW<++3>sgS%(4NTpeI-VAqb|7ssJvzNHgOZVu zaYCvgO_R1~>SyL=cFU|~g|hy|Zi}}s9+d~lYqOB71z9Z$wnC=pR9Yz4DhIM>Wmjgu z&56o6maCpC&F##y%G;1PobR9i?GnNg;gYtchD%p19a!eQtZF&3JaKv33gZ<8D~47E ztUS1iwkmDaPpj=$m#%)jCVEY4fnLGNg2A-`YwHVD3gv};>)hAvT~AmqS>Lr``i7kw zJ{5_It`yrBmlc25DBO7E8;5VoznR>Ww5hAaxn$2~(q`%A-YuS64wkBy=9dm`4cXeX z4c}I@?e+FW+b@^RDBHV(wnMq2zdX3SWv9u`%{xC-q*U}&`cyXV(%rRT*Z6MH?i+i& z_B8C(+grT%{XWUQ+f@NoP1R=AW&26{v-dx)iK^-Nmiuj8txj!m?Z*Ss1N{dh4z}01 z)YTo*JycSU)+_5r4#yw9{+;i4Ee$peRgIj+;v;ZGdF1K$3E%e~4LaI(jC-u%2h$&R z9cLXcYC@Xwnns&bn)_Q~Te?roKGD|d-g^8;+aC{{G(1^(O7m37Y1-+6)01cN&y1aw zoqc{T`P^XJqPBbIW6s}d4{z_f5Om?vMgNQEJG?v2T=KYd^0M3I6IZxbny)%vZR&LD zJpPl@Psh8QyPB@KTx+@RdcC!KX7}kEo;S|j^u2lU7XQ}Oo;f|;z4Ll+_r>@1-xl3| zawq-H%e&ckC+@AhPrP6BKT#_XdT7&;F71j}Joy zkC~6lh7E@6o;W@^IpRNZ{ptLtL(gQ-CY~4mqW;US7Zxvm_|@yz&e53Bp_lTPlfP|z zrTyx_>lv@x#=^!PzR7qqF<$gm`|ZJZ+;<)Cqu&ot2z=0000WV@Og>004R=004l4008;_004mL004C`008P>0026e000+nl3&F} z000A0Nkl7Kd7{`CpHr?*fCGFN%N+LcQvMAkk;D9DXQI5WnI?jw_p{r59J)I@JDqlCr|5gxXXl-n|MUMp z|M!{qRYZij97W9WC^5U(QoH5yQkUyeWIRDQo**ScN(4c&v1DSjShRc4V$sESdwUd1 z2>_P>j^DpdDm4K>ZlT09%wCi9)Z}k*Q88Bw)YU0so4x{wDr;4B>lTBdqpQFr4=hrV-ErB-T(J0}u zQ2^#ux(SCu8lXOjd6kvvxm;k4y%b==+-hg(3ZS~(Q&?w!Rd=OsZt}UY!BF9=!05LO zn4Z1SY~jJs6xO-7id_Xv%&vF2iQHzug4l0FIVKa~dn8P7=vQIjVzU#93F}>f0Jpk6 zFqDXEOVJ3NU6L7ay9%UPXI>T_PqKAi1B#-sy1w3&wGr4XN$Vyw)LLD=iq9RdaJpPn z)YK4q`%VKMh(-Asi;_$UZij;vs)|gnzQqz4I^X);yBX=}p}(t(oktEcl1ON*y(gNu z9Srj5TOWs7TXVo|n^14ENHWQ?_3L=t-p-4T4gi+;yqeqx{eB*tJ;%*Jfcm3HkTmd2 zCM>b406_WLHF&mf=V4#3HottUkM57303lH4<$&MMo>M0c?w4u%faU$bgb39>FVCAV zaMkPMYMqZ~%`NyE8W>K*iA|)q`1A>DH*MtQwQD;0rZ-?g3G}3rY~H(<_itWnFMj{J z6A`G|v4f(;i&*LLaHp*eN4X1tpRt(c1DTx(g*(9@D?J{Lo^RGz9cwwyq7}Cee5c0@K=TRHPsGvPF~b}n3y z0~{Itoww$;Td^7A$ndbK+%qa^HG%$-5getZl&R{JM+NJ0XMoF`&VmxSk~tmqg+hPr tU}C8gDvGk~Umr+C #include @@ -42,7 +43,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ): nameFilters << "*.bgl" << "*.ifo" << "*.lsa" << "*.dat" << "*.dsl" << "*.dsl.dz" << "*.index" << "*.xdxf" - << "*.xdxf.dz" << "*.dct"; + << "*.xdxf.dz" << "*.dct" << "*.aar"; } void LoadDictionaries::run() @@ -155,6 +156,13 @@ void LoadDictionaries::handlePath( Config::Path const & path ) dictionaries.insert( dictionaries.end(), sdictDictionaries.begin(), sdictDictionaries.end() ); } + { + vector< sptr< Dictionary::Class > > aardDictionaries = + Aard::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this ); + + dictionaries.insert( dictionaries.end(), aardDictionaries.begin(), + aardDictionaries.end() ); + } } void LoadDictionaries::indexingDictionary( string const & dictionaryName ) throw() diff --git a/resources.qrc b/resources.qrc index eb0bb603..103b2650 100644 --- a/resources.qrc +++ b/resources.qrc @@ -48,5 +48,6 @@ icons/error.png icons/macicon.png icons/icon32_sdict.png + icons/icon32_aard.png diff --git a/sdict.cc b/sdict.cc index 748ba3aa..44080db7 100644 --- a/sdict.cc +++ b/sdict.cc @@ -9,13 +9,11 @@ #include "langcoder.hh" #include "dprintf.hh" #include "fsencoding.hh" +#include "decompress.hh" -#include -#include #include #include #include -#include #ifdef _MSC_VER #include @@ -124,72 +122,6 @@ bool indexIsOldOrBad( string const & indexFile ) header.formatVersion != CurrentFormatVersion; } -string decompressZlib( char * bufptr, unsigned length ) -{ -z_stream zs; -char buf[2048]; -string str; -int res; - memset( &zs, 0, sizeof(zs) ); - zs.next_in = (Bytef *)bufptr; - zs.avail_in = length; - while( 1 ) - { - res = inflateInit( &zs ); - if( res != Z_OK ) - break; - while( res != Z_STREAM_END ) - { - memset( buf, 0, sizeof(buf) ); - zs.next_out = (Bytef *)buf; - zs.avail_out = 2047; - res = inflate( &zs, Z_SYNC_FLUSH ); - str += buf; - if( res != Z_OK && res != Z_STREAM_END ) - break; - } - break; - } - inflateEnd( &zs ); - if( res != Z_STREAM_END ) - str.clear(); - return str; -} - -string decompressBzip2( char * bufptr, unsigned length ) -{ -bz_stream zs; -char buf[2048]; -string str; -int res; - memset( &zs, 0, sizeof(zs) ); - zs.next_in = bufptr; - zs.avail_in = length; - zs.total_in_lo32 = length; - while( 1 ) - { - res = BZ2_bzDecompressInit( &zs, 0, 0 ); - if( res != BZ_OK ) - break; - while( res != BZ_STREAM_END ) - { - memset( buf, 0, sizeof(buf) ); - zs.next_out = buf; - zs.avail_out = 2047; - zs.total_out_lo32 = length; - res = BZ2_bzDecompress( &zs ); - str += buf; - if( res != BZ_OK && res != BZ_STREAM_END ) - break; - } - break; - } - BZ2_bzDecompressEnd( &zs ); - if( res != BZ_STREAM_END ) - str.clear(); - return str; -} - class SdictDictionary: public BtreeIndexing::BtreeDictionary { Mutex idxMutex;