From 81ee55aff7b6e33eb265d2f2645f1e860a8a341f Mon Sep 17 00:00:00 2001
From: Abs62 <ottomann@yandex.ru>
Date: Thu, 9 Feb 2012 16:50:38 +0400
Subject: [PATCH] Add support for Aard dictionaries (aar-html)

---
 aard.cc               | 910 ++++++++++++++++++++++++++++++++++++++++++
 aard.hh               |  23 ++
 decompress.cc         |  72 ++++
 decompress.hh         |  12 +
 goldendict.pro        |   8 +-
 icons/icon32_aard.png | Bin 0 -> 3623 bytes
 loaddictionaries.cc   |  10 +-
 resources.qrc         |   1 +
 sdict.cc              |  70 +---
 9 files changed, 1034 insertions(+), 72 deletions(-)
 create mode 100644 aard.cc
 create mode 100644 aard.hh
 create mode 100644 decompress.cc
 create mode 100644 decompress.hh
 create mode 100644 icons/icon32_aard.png
diff --git a/aard.cc b/aard.cc
new file mode 100644
index 00000000..10083a92
--- /dev/null
+++ b/aard.cc
@@ -0,0 +1,910 @@
+/* This file is (c) 2008-2011 Konstantin Isakov <ikm@goldendict.org>
+ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
+
+#include "aard.hh"
+#include "btreeidx.hh"
+#include "folding.hh"
+#include "utf8.hh"
+#include "chunkedstorage.hh"
+#include "langcoder.hh"
+#include "dprintf.hh"
+#include "fsencoding.hh"
+#include "decompress.hh"
+
+#include <map>
+#include <set>
+#include <string>
+
+#ifdef _MSC_VER
+#include <stub_msvc.h>
+#endif
+
+#include <QString>
+#include <QSemaphore>
+#include <QThreadPool>
+#include <QAtomicInt>
+#include <QDomDocument>
+
+#include "ufile.hh"
+#include "wstring_qt.hh"
+
+namespace Aard {
+
+using std::map;
+using std::multimap;
+using std::pair;
+using std::set;
+using std::string;
+using gd::wstring;
+
+using BtreeIndexing::WordArticleLink;
+using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
+
+namespace {
+
+DEF_EX_STR( exNotDctFile, "Not an Sdictionary file", Dictionary::Ex )
+DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
+DEF_EX_STR( exWordIsTooLarge, "Enountered a word that is too large:", Dictionary::Ex )
+DEF_EX_STR( exSuddenEndOfFile, "Sudden end of file", Dictionary::Ex )
+
+#ifdef _MSC_VER
+#pragma pack( push, 1 )
+#endif
+
+// Big-Endian template
+// http://habrahabr.ru/blogs/cpp/121811/
+
+template<typename T>
+struct BigEndian
+{
+    union
+    {
+        unsigned char bytes[sizeof(T)];
+        T raw_value;
+    };
+
+    BigEndian(T t = T())
+    {
+        operator =(t);
+    }
+
+    BigEndian(const BigEndian<T> & t)
+    {
+        raw_value = t.raw_value;
+    }
+
+    operator const T() const
+    {
+        T t = T();
+        for (unsigned i = 0; i < sizeof(T); i++)
+            t |= T(bytes[sizeof(T) - 1 - i]) << (i << 3);
+        return t;
+    }
+
+    const T operator = (const T t)
+    {
+        for (unsigned i = 0; i < sizeof(T); i++)
+            bytes[sizeof(T) - 1 - i] = (unsigned char)( t >> (i << 3) );
+        return t;
+    }
+
+}
+#ifndef _MSC_VER
+__attribute__((packed))
+#endif
+;
+
+typedef BigEndian< uint16_t > uint16_be;
+typedef BigEndian< uint32_t > uint32_be;
+
+/// AAR file header
+struct AAR_header
+{
+    char signature[4];
+    char checksum[40];
+    uint16_be version;
+    char uuid[16];
+    uint16_be volume;
+    uint16_be totalVolumes;
+    uint32_be metaLength;
+    uint32_be wordsCount;
+    uint32_be articleOffset;
+    char indexItemFormat[4];
+    char keyLengthFormat[2];
+    char articleLengthFormat[2];
+}
+#ifndef _MSC_VER
+__attribute__((packed))
+#endif
+;
+
+struct IndexElement
+{
+    uint32_be wordOffset;
+    uint32_be articleOffset;
+}
+#ifndef _MSC_VER
+__attribute__((packed))
+#endif
+;
+
+enum
+{
+  Signature = 0x58524141, // AARX on little-endian, XRAA on big-endian
+  CurrentFormatVersion = 1 + BtreeIndexing::FormatVersion + Folding::Version
+};
+
+struct IdxHeader
+{
+  uint32_t signature; // First comes the signature, AARX
+  uint32_t formatVersion; // File format version (CurrentFormatVersion)
+  uint32_t chunksOffset; // The offset to chunks' storage
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
+  uint32_t wordCount;
+  uint32_t articleCount;
+  uint32_t langFrom;  // Source language
+  uint32_t langTo;    // Target language
+}
+#ifndef _MSC_VER
+__attribute__((packed))
+#endif
+;
+
+#ifdef _MSC_VER
+#pragma pack( pop, 1 )
+#endif
+
+bool indexIsOldOrBad( string const & indexFile )
+{
+  File::Class idx( indexFile, "rb" );
+
+  IdxHeader header;
+
+  return idx.readRecords( &header, sizeof( header ), 1 ) != 1 ||
+         header.signature != Signature ||
+         header.formatVersion != CurrentFormatVersion;
+}
+
+void readJSONValue( string const & source, string & str, uint32_t & pos)
+{
+    int level = 1;
+    char endChar;
+    str.push_back( source[pos] );
+    if( source[pos] == '{')
+        endChar = '}';
+    else if( source[pos] == '[' )
+        endChar = ']';
+    else if( source[pos] == '\"' )
+    {
+        str.clear();
+        endChar = '\"';
+    }
+    else
+        endChar = ',';
+
+    pos++;
+    char ch = 0;
+    char lastCh = 0;
+    while( !( ch == endChar && lastCh != '\\' && level == 0 )
+           && pos < source.size() )
+    {
+        lastCh = ch;
+        ch = source[ pos++ ];
+        if( ( ch == '{' || ch == '[' ) && lastCh != '\\' )
+          level++;
+        if( ( ch == '}' || ch == ']' ) && lastCh != '\\' )
+          level--;
+
+        if( ch == endChar &&
+            ( ( ch == '\"' && lastCh != '\\' ) || ch == ',' )
+            && level == 1)
+          break;
+        str.push_back( ch );
+    }
+}
+
+class AardDictionary: public BtreeIndexing::BtreeDictionary
+{
+    Mutex idxMutex;
+    File::Class idx;
+    IdxHeader idxHeader;
+    ChunkedStorage::Reader chunks;
+    string dictionaryName;
+    File::Class df;
+    QIcon dictionaryIcon, dictionaryNativeIcon;
+    bool dictionaryIconLoaded;
+
+  public:
+
+    AardDictionary( string const & id, string const & indexFile,
+                     vector< string > const & dictionaryFiles );
+
+    ~AardDictionary();
+
+    virtual string getName() throw()
+    { return dictionaryName; }
+
+    virtual map< Dictionary::Property, string > getProperties() throw()
+    { return map< Dictionary::Property, string >(); }
+
+    virtual unsigned long getArticleCount() throw()
+    { return idxHeader.articleCount; }
+
+    virtual unsigned long getWordCount() throw()
+    { return idxHeader.wordCount; }
+
+    virtual QIcon getIcon() throw();
+
+    virtual QIcon getNativeIcon() throw();
+
+    inline virtual quint32 getLangFrom() const
+    { return idxHeader.langFrom; }
+
+    inline virtual quint32 getLangTo() const
+    { return idxHeader.langTo; }
+
+    virtual sptr< Dictionary::DataRequest > getArticle( wstring const &,
+                                                        vector< wstring > const & alts,
+                                                        wstring const & )
+      throw( std::exception );
+
+private:
+
+    void loadIcon();
+
+    /// Loads the article.
+    void loadArticle( uint32_t address,
+                      string & articleText );
+    string convert( string const & in_data );
+
+    friend class AardArticleRequest;
+};
+
+AardDictionary::AardDictionary( string const & id,
+                                string const & indexFile,
+                                vector< string > const & dictionaryFiles ):
+    BtreeDictionary( id, dictionaryFiles ),
+    idx( indexFile, "rb" ),
+    idxHeader( idx.read< IdxHeader >() ),
+    chunks( idx, idxHeader.chunksOffset ),
+    df( dictionaryFiles[ 0 ], "rb" ),
+    dictionaryIconLoaded( false )
+{
+    // Read dictionary name
+
+    idx.seek( sizeof( idxHeader ) );
+    vector< char > dName( idx.read< uint32_t >() );
+    if( dName.size() )
+    {
+        idx.read( &dName.front(), dName.size() );
+        dictionaryName = string( &dName.front(), dName.size() );
+    }
+
+    // Initialize the index
+
+    openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                          idxHeader.indexRootOffset ),
+               idx, idxMutex );
+}
+
+AardDictionary::~AardDictionary()
+{
+    df.close();
+}
+
+QIcon AardDictionary::getNativeIcon() throw()
+{
+  loadIcon();
+  return dictionaryNativeIcon;
+}
+
+QIcon AardDictionary::getIcon() throw()
+{
+  loadIcon();
+  return dictionaryIcon;
+}
+
+void AardDictionary::loadIcon()
+{
+  if ( dictionaryIconLoaded )
+    return;
+
+  QString fileName =
+    QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) );
+
+  // Remove the extension
+
+  fileName.chop( 3 );
+  fileName += "bmp";
+  QFileInfo info( fileName );
+
+  if ( !info.exists() )
+  {
+      fileName.chop( 3 );
+      fileName += "png";
+      info = QFileInfo( fileName );
+  }
+
+  if ( info.exists() )
+  {
+    QImage img( fileName );
+
+    if ( !img.isNull() )
+    {
+      // Load successful
+
+      // Apply the color key
+
+      img.setAlphaChannel( img.createMaskFromColor( QColor( 192, 192, 192 ).rgb(),
+                                                    Qt::MaskOutColor ) );
+
+      dictionaryNativeIcon = QIcon( QPixmap::fromImage( img ) );
+
+      // Transform it to be square
+      int max = img.width() > img.height() ? img.width() : img.height();
+
+      QImage result( max, max, QImage::Format_ARGB32 );
+      result.fill( 0 ); // Black transparent
+
+      QPainter painter( &result );
+
+      painter.drawImage( QPoint( img.width() == max ? 0 : ( max - img.width() ) / 2,
+                                 img.height() == max ? 0 : ( max - img.height() ) / 2 ),
+                         img );
+
+      painter.end();
+
+      dictionaryIcon = QIcon( QPixmap::fromImage( result ) );
+    }
+  }
+
+  if ( dictionaryIcon.isNull() )
+  {
+    // Load failed -- use default icons
+    dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/icon32_aard.png");
+  }
+
+  dictionaryIconLoaded = true;
+}
+
+string AardDictionary::convert( const string & in )
+{
+    string inConverted;
+    char inCh, lastCh = 0;
+    bool afterEol = false;
+
+    for( string::const_iterator i = in.begin(), j = in.end(); i != j; ++i )
+    {
+        inCh = *i;
+        if( lastCh == '\\' )
+        {
+            inConverted.erase( inConverted.size() - 1 );
+            lastCh = 0;
+            if( inCh == 'n' )
+            {
+                inConverted.append( "<br/>");
+                afterEol = true;
+                continue;
+            }
+            else if( inCh == 'r')
+                continue;
+        }
+        else if( inCh == ' ' && afterEol )
+        {
+            inConverted.append( "&nbsp;" );
+            continue;
+        } else
+            lastCh = inCh;
+        afterEol = false;
+        inConverted.push_back( inCh );
+    }
+
+    QDomDocument dd;
+    QString errorStr;
+    int errorLine, errorColumn;
+
+    if( !dd.setContent( QByteArray( inConverted.c_str() ), false, &errorStr, &errorLine, &errorColumn ) )
+    {
+        FDPRINTF( stderr, "Aard article parse failed: %s at %d,%d\n", errorStr.toLocal8Bit().constData(),  errorLine,  errorColumn );
+        FDPRINTF( stderr, "The input was: %s\n", in.c_str() );
+        return inConverted;
+    }
+
+    QDomNodeList nodes = dd.elementsByTagName( "a" ); // References
+    for( int i = 0; i < nodes.count(); i++ )
+    {
+        QDomElement el = nodes.at( i ).toElement();
+        QString ref = el.attribute( "href", "" );
+        if( ref.size() == 0 || ref.indexOf( "http://") != -1 || ref[0] == '#' )
+            continue;
+        if( ref.indexOf( "w:") == 0 || ref.indexOf( "s:") == 0 )
+            ref.replace( 0, 2, "bword:" );
+        else
+            ref.insert( 0, "bword:" );
+        el.setAttribute( "href", ref );
+    }
+
+    return dd.toByteArray().data();
+}
+
+void AardDictionary::loadArticle( uint32_t address,
+                                   string & articleText )
+{
+    uint32_t articleOffset = address;
+    uint32_t articleSize;
+    uint32_be size;
+
+    vector< char > articleBody;
+
+    articleText.clear();
+
+    df.seek( articleOffset );
+    df.read( &size, sizeof(size) );
+    articleSize = size;
+    articleBody.resize( articleSize );
+    df.read( &articleBody.front(), articleSize );
+
+    if ( articleBody.empty() )
+      throw exCantReadFile( getDictionaryFilenames()[ 0 ] );
+
+    string text = decompressBzip2( articleBody.data(), articleSize );
+    if( text.empty() )
+        text = decompressZlib( articleBody.data(), articleSize );
+    if( text.empty() )
+        text = string( articleBody.data(), articleSize );
+
+    uint32_t n = 0;
+    while( n < text.size() && text[n] != '\"' )
+        n++;
+
+    if( n >= text.size() )
+        return;
+
+    readJSONValue( text, articleText, n );
+
+    if( articleText.empty() )
+    {
+        n = text.find( "\"r\"" );
+        if( n != string::npos )
+        {
+            n += 3;
+            while( n < text.size() && text[n] != '\"' )
+                n++;
+
+            if( n >= text.size() )
+                return;
+
+            string link;
+            readJSONValue( text, link, n );
+            if( !link.empty() )
+                articleText = "<a href=\"" + link + "\">" + link + "</a>";
+        }
+    }
+
+    if( !articleText.empty() )
+        articleText = convert( articleText );
+
+    articleText = "<div class=\"sdict\">" + articleText + "</div>";
+}
+
+/// AardDictionary::getArticle()
+
+class AardArticleRequest;
+
+class AardArticleRequestRunnable: public QRunnable
+{
+  AardArticleRequest & r;
+  QSemaphore & hasExited;
+
+public:
+
+  AardArticleRequestRunnable( AardArticleRequest & r_,
+                              QSemaphore & hasExited_ ): r( r_ ),
+                                                         hasExited( hasExited_ )
+  {}
+
+  ~AardArticleRequestRunnable()
+  {
+    hasExited.release();
+  }
+
+  virtual void run();
+};
+
+class AardArticleRequest: public Dictionary::DataRequest
+{
+  friend class AardArticleRequestRunnable;
+
+  wstring word;
+  vector< wstring > alts;
+  AardDictionary & dict;
+
+  QAtomicInt isCancelled;
+  QSemaphore hasExited;
+
+public:
+
+  AardArticleRequest( wstring const & word_,
+                      vector< wstring > const & alts_,
+                      AardDictionary & dict_ ):
+    word( word_ ), alts( alts_ ), dict( dict_ )
+  {
+    QThreadPool::globalInstance()->start(
+      new AardArticleRequestRunnable( *this, hasExited ) );
+  }
+
+  void run(); // Run from another thread by DslArticleRequestRunnable
+
+  virtual void cancel()
+  {
+    isCancelled.ref();
+  }
+
+  ~AardArticleRequest()
+  {
+    isCancelled.ref();
+    hasExited.acquire();
+  }
+};
+
+void AardArticleRequestRunnable::run()
+{
+  r.run();
+}
+
+void AardArticleRequest::run()
+{
+  if ( isCancelled )
+  {
+    finish();
+    return;
+  }
+
+  vector< WordArticleLink > chain = dict.findArticles( word );
+
+  for( unsigned x = 0; x < alts.size(); ++x )
+  {
+    /// Make an additional query for each alt
+
+    vector< WordArticleLink > altChain = dict.findArticles( alts[ x ] );
+
+    chain.insert( chain.end(), altChain.begin(), altChain.end() );
+  }
+
+  multimap< wstring, pair< string, string > > mainArticles, alternateArticles;
+
+  set< uint32_t > articlesIncluded; // Some synonims make it that the articles
+                                    // appear several times. We combat this
+                                    // by only allowing them to appear once.
+
+  wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
+
+  for( unsigned x = 0; x < chain.size(); ++x )
+  {
+    if ( isCancelled )
+    {
+      finish();
+      return;
+    }
+
+    if ( articlesIncluded.find( chain[ x ].articleOffset ) != articlesIncluded.end() )
+      continue; // We already have this article in the body.
+
+    // Now grab that article
+
+    string headword, articleText;
+
+    headword = chain[ x ].word;
+    dict.loadArticle( chain[ x ].articleOffset, articleText );
+
+    // Ok. Now, does it go to main articles, or to alternate ones? We list
+    // main ones first, and alternates after.
+
+    // We do the case-folded comparison here.
+
+    wstring headwordStripped =
+      Folding::applySimpleCaseOnly( Utf8::decode( headword ) );
+
+    multimap< wstring, pair< string, string > > & mapToUse =
+      ( wordCaseFolded == headwordStripped ) ?
+        mainArticles : alternateArticles;
+
+    mapToUse.insert( pair< wstring, pair< string, string > >(
+      Folding::applySimpleCaseOnly( Utf8::decode( headword ) ),
+      pair< string, string >( headword, articleText ) ) );
+
+    articlesIncluded.insert( chain[ x ].articleOffset );
+  }
+
+  if ( mainArticles.empty() && alternateArticles.empty() )
+  {
+    // No such word
+    finish();
+    return;
+  }
+
+  string result;
+
+  multimap< wstring, pair< string, string > >::const_iterator i;
+
+  for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
+  {
+      result += "<h3>";
+      result += i->second.first;
+      result += "</h3>";
+      result += i->second.second;
+  }
+
+  for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i )
+  {
+      result += "<h3>";
+      result += i->second.first;
+      result += "</h3>";
+      result += i->second.second;
+  }
+
+  Mutex::Lock _( dataMutex );
+
+  data.resize( result.size() );
+
+  memcpy( &data.front(), result.data(), result.size() );
+
+  hasAnyData = true;
+
+  finish();
+}
+
+map< string, string > parseMetaData( string const & metaData )
+{
+// Parsing JSON string
+    map< string, string > data;
+    string name, value;
+    uint32_t n = 0;
+
+    while( metaData[n] != '{' && n < metaData.length() )
+        n++;
+    while( n < metaData.length() )
+    {
+        // Skip to '"'
+        while( metaData[n] != '\"' && n < metaData.length() )
+            n++;
+        if( ++n >= metaData.length() )
+            break;
+
+        // Read name
+        while( !( ( metaData[n] == '\"' || metaData[n] == '{' ) && metaData[n-1] != '\\' )
+               && n < metaData.length() )
+            name.push_back( metaData[n++]);
+
+        // Skip to ':'
+        if( ++n >= metaData.length() )
+            break;
+        while( metaData[n] != ':' && n < metaData.length() )
+            n++;
+        if( ++n >= metaData.length() )
+            break;
+
+        // Find value start after ':'
+        while( !( ( metaData[n] == '\"'
+                    || metaData[n] == '{'
+                    || metaData[n] == '['
+                    || ( metaData[n] >= '0' && metaData[n] <= '9' ) )
+               && metaData[n-1] != '\\' )
+               && n < metaData.length() )
+            n++;
+        if( n >= metaData.length() )
+            break;
+
+        readJSONValue( metaData, value, n);
+
+        data[name] = value;
+
+        name.clear();
+        value.clear();
+        if( ++n >= metaData.length() )
+            break;
+    }
+    return data;
+}
+
+sptr< Dictionary::DataRequest > AardDictionary::getArticle( wstring const & word,
+                                                            vector< wstring > const & alts,
+                                                            wstring const & )
+  throw( std::exception )
+{
+  return new AardArticleRequest( word, alts, *this );
+}
+
+} // anonymous namespace
+
+vector< sptr< Dictionary::Class > > makeDictionaries(
+                                      vector< string > const & fileNames,
+                                      string const & indicesDir,
+                                      Dictionary::Initializing & initializing )
+  throw( std::exception )
+{
+  vector< sptr< Dictionary::Class > > dictionaries;
+
+  for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
+       ++i )
+  {
+      // Skip files with the extensions different to .aar to speed up the
+      // scanning
+      if ( i->size() < 4 ||
+          strcasecmp( i->c_str() + ( i->size() - 4 ), ".aar" ) != 0 )
+        continue;
+
+      // Got the file -- check if we need to rebuid the index
+
+      vector< string > dictFiles( 1, *i );
+
+      string dictId = Dictionary::makeDictionaryId( dictFiles );
+
+      string indexFile = indicesDir + dictId;
+
+      if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
+           indexIsOldOrBad( indexFile ) )
+      {
+        try
+        {
+          File::Class df( *i, "rb" );
+
+          AAR_header dictHeader;
+
+          df.read( &dictHeader, sizeof(dictHeader) );
+          if( strncmp( dictHeader.signature, "aard", 4 )
+              || strncmp( dictHeader.indexItemFormat, ">LL", 4 )
+              || strncmp( dictHeader.keyLengthFormat, ">H", 2 )
+              || strncmp( dictHeader.articleLengthFormat, ">L", 2) )
+          {
+              DPRINTF( "File %s is not in supported aard format", i->c_str() );
+              continue;
+          }
+
+          vector< char > data;
+          uint32_t size = dictHeader.metaLength;
+
+          data.resize( size );
+          df.read( &data.front(), size );
+          string metaStr = decompressBzip2( data.data(), size );
+          if( metaStr.empty() )
+              metaStr = decompressZlib( data.data(), size );
+
+          map< string, string > meta = parseMetaData( metaStr );
+
+          if( meta.empty() )
+          {
+              DPRINTF( "File %s has invalid metadata", i->c_str() );
+              continue;
+          }
+
+          string dictName;
+          map< string, string >::const_iterator iter = meta.find( "title" );
+          if( iter != meta.end() )
+              dictName = iter->second;
+
+          uint16_t volumes = dictHeader.totalVolumes;
+          if( volumes > 1 )
+          {
+              QString ss;
+              ss.sprintf( " (%i/%i)", (uint16_t)(dictHeader.volume), volumes );
+              dictName += ss.toLocal8Bit().data();
+          }
+
+          string langFrom;
+          iter = meta.find( "index_language" );
+          if( iter != meta.end() )
+              langFrom = iter->second;
+
+          string langTo;
+          iter = meta.find( "article_language" );
+          if( iter != meta.end() )
+              langTo = iter->second;
+
+          initializing.indexingDictionary( dictName );
+
+          File::Class idx( indexFile, "wb" );
+          IdxHeader idxHeader;
+          memset( &idxHeader, 0, sizeof( idxHeader ) );
+
+          // We write a dummy header first. At the end of the process the header
+          // will be rewritten with the right values.
+
+          idx.write( idxHeader );
+
+          idx.write( (uint32_t) dictName.size() );
+          if( !dictName.empty() )
+              idx.write( dictName.data(), dictName.size() );
+
+          IndexedWords indexedWords;
+
+          ChunkedStorage::Writer chunks( idx );
+
+          uint32_t wordCount = dictHeader.wordsCount;
+          set< uint32_t > articleOffsets;
+          uint32_t pos = df.tell();
+          uint32_t wordsBase = pos + wordCount * sizeof( IndexElement );
+          uint32_t articlesBase = dictHeader.articleOffset;
+
+          for( uint32_t j = 0; j < wordCount; j++ )
+          {
+            IndexElement el;
+
+            df.seek( pos );
+            df.read( &el, sizeof(el) );
+            uint32_t articleOffset = articlesBase + el.articleOffset;
+            uint32_t wordOffset = wordsBase + el.wordOffset;
+
+            df.seek( wordOffset );
+
+            uint16_be sizeBE;
+            df.read( &sizeBE, sizeof(sizeBE) );
+            uint16_t wordSize = sizeBE;
+            data.resize( wordSize );
+            df.read( &data.front(), wordSize );
+
+            if( articleOffsets.find( articleOffset ) == articleOffsets.end() )
+                articleOffsets.insert( articleOffset );
+
+            // Insert new entry
+            indexedWords.addWord( Utf8::decode( string( data.data(), wordSize ) ), articleOffset);
+
+            pos += sizeof(el);
+          }
+          // Finish with the chunks
+
+          idxHeader.chunksOffset = chunks.finish();
+
+          // Build index
+
+          IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+          idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+          idxHeader.indexRootOffset = idxInfo.rootOffset;
+
+          indexedWords.clear(); // Release memory -- no need for this data
+
+          // That concludes it. Update the header.
+
+          idxHeader.signature = Signature;
+          idxHeader.formatVersion = CurrentFormatVersion;
+
+          idxHeader.articleCount = articleOffsets.size();
+          idxHeader.wordCount = wordCount;
+
+          if( langFrom.size() == 3)
+              idxHeader.langFrom = LangCoder::code3toInt( langFrom.c_str() );
+          else if( langFrom.size() == 2 )
+              idxHeader.langFrom = LangCoder::code2toInt( langFrom.c_str() );
+
+          if( langTo.size() == 3)
+              idxHeader.langTo = LangCoder::code3toInt( langTo.c_str() );
+          else if( langTo.size() == 2 )
+              idxHeader.langTo = LangCoder::code2toInt( langTo.c_str() );
+
+          idx.rewind();
+
+          idx.write( &idxHeader, sizeof( idxHeader ) );
+        }
+        catch( std::exception & e )
+        {
+          FDPRINTF( stderr, "Aard dictionary indexing failed: %s, error: %s\n",
+            i->c_str(), e.what() );
+          continue;
+        }
+        catch( ... )
+        {
+          FDPRINTF( stderr, "Aard dictionary indexing failed\n" );
+          continue;
+        }
+      } // if need to rebuild
+      dictionaries.push_back( new AardDictionary( dictId,
+                                                   indexFile,
+                                                   dictFiles ) );
+  }
+  return dictionaries;
+}
+
+}
diff --git a/aard.hh b/aard.hh
new file mode 100644
index 00000000..c9aabbcb
--- /dev/null
+++ b/aard.hh
@@ -0,0 +1,23 @@
+/* This file is (c) 2008-2012 Konstantin Isakov <ikm@goldendict.org>
+ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
+
+#ifndef __AARD_HH_INCLUDED__
+#define __AARD_HH_INCLUDED__
+
+#include "dictionary.hh"
+
+/// Support for the aard dictionaries.
+namespace Aard {
+
+using std::vector;
+using std::string;
+
+vector< sptr< Dictionary::Class > > makeDictionaries(
+                                      vector< string > const & fileNames,
+                                      string const & indicesDir,
+                                      Dictionary::Initializing & )
+  throw( std::exception );
+
+}
+
+#endif
diff --git a/decompress.cc b/decompress.cc
new file mode 100644
index 00000000..5bd3cc97
--- /dev/null
+++ b/decompress.cc
@@ -0,0 +1,72 @@
+#include <stdlib.h>
+
+#include "decompress.hh"
+#include "zlib.h"
+#include "bzlib.h"
+
+string decompressZlib( char * bufptr, unsigned length )
+{
+z_stream zs;
+char buf[2048];
+string str;
+int res;
+    memset( &zs, 0, sizeof(zs) );
+    zs.next_in = (Bytef *)bufptr;
+    zs.avail_in = length;
+    while( 1 )
+    {
+        res = inflateInit( &zs );
+        if( res != Z_OK )
+            break;
+        while( res != Z_STREAM_END )
+        {
+            memset( buf, 0, sizeof(buf) );
+            zs.next_out = (Bytef *)buf;
+            zs.avail_out = 2047;
+            res = inflate( &zs, Z_SYNC_FLUSH );
+            str += buf;
+            if( res != Z_OK && res != Z_STREAM_END )
+                 break;
+        }
+        break;
+    }
+    inflateEnd( &zs );
+    if( res != Z_STREAM_END )
+        str.clear();
+    return str;
+}
+
+string decompressBzip2( char * bufptr, unsigned length )
+{
+bz_stream zs;
+char buf[2048];
+string str;
+int res;
+    memset( &zs, 0, sizeof(zs) );
+    zs.next_in = bufptr;
+    zs.avail_in = length;
+    zs.total_in_lo32 = length;
+    while( 1 )
+    {
+        res = BZ2_bzDecompressInit( &zs, 0, 0 );
+        if( res != BZ_OK )
+            break;
+        while( res != BZ_STREAM_END )
+        {
+            memset( buf, 0, sizeof(buf) );
+            zs.next_out = buf;
+            zs.avail_out = 2047;
+            zs.total_out_lo32 = length;
+            res = BZ2_bzDecompress( &zs );
+            str += buf;
+            if( res != BZ_OK && res != BZ_STREAM_END )
+                break;
+        }
+        break;
+    }
+    BZ2_bzDecompressEnd( &zs );
+    if( res != BZ_STREAM_END )
+        str.clear();
+    return str;
+}
+
diff --git a/decompress.hh b/decompress.hh
new file mode 100644
index 00000000..a8fcdfe9
--- /dev/null
+++ b/decompress.hh
@@ -0,0 +1,12 @@
+#ifndef __DECOMPRESS_HH_INCLUDED__
+#define __DECOMPRESS_HH_INCLUDED__
+
+#include <string>
+
+using std::string;
+
+string decompressZlib( char * bufptr, unsigned length );
+
+string decompressBzip2( char * bufptr, unsigned length );
+
+#endif // DECOMPRESS_HH
diff --git a/goldendict.pro b/goldendict.pro
index 5b48c1b8..be5fafe2 100644
--- a/goldendict.pro
+++ b/goldendict.pro
@@ -192,7 +192,9 @@ HEADERS += folding.hh \
     gdappstyle.hh \
     ufile.hh \
     xdxf.hh \
-    sdict.hh
+    sdict.hh \
+    decompress.hh \
+    aard.hh
 FORMS += groups.ui \
     dictgroupwidget.ui \
     mainwindow.ui \
@@ -282,7 +284,9 @@ SOURCES += folding.cc \
     gdappstyle.cc \
     ufile.cc \
     xdxf.cc \
-    sdict.cc
+    sdict.cc \
+    decompress.cc \
+    aard.cc
 win32 { 
     SOURCES += mouseover_win32/ThTypes.c \
                wordbyauto.cc \
diff --git a/icons/icon32_aard.png b/icons/icon32_aard.png
new file mode 100644
index 0000000000000000000000000000000000000000..1c6279eaf64bcd437e0d14fa8d2aa45a48e7263a
GIT binary patch
literal 3623
zcmV+?4%qRDP)<h;3K|Lk000e1NJLTq001BW001Be1^@s6b9#F800009a7bBm000XU
z000XU0RWnu7ytkYPiaF#P*7-ZbZ>KLZ*U+<Lqi~Na&Km7Y-Iodc-oy)XH-+^7Crag
z^g>IBfRsybQWXdwQbLP>6p<z>Aqfylh#{fb6;Z(vMMVS~$e@S=j*ftg6;Uh<iVD~V
z<RPMtgQJLw%KPDaqifc@_vX$1wbwr9tn;0-&j-K=43<bUQ8j=JsX`tR;Dg7+#^K~H
zK!FM*Z~zbpvt%K2{UZSY_<lS*D<Z%Lz5oGu(+dayz)hRLFdT>f59&ghTmgWD0l;*T
zI7<kC6aYYajzXpYKt=(8otP$50H6c_V9R4-;{Z@C0AMG7=F<Rxo%or10RUT+Ar%3j
zkpLhQWr#!oXgdI`&sK^>09Y^p6lP1rIRMx#05C~cW=H_Aw*bJ-5DT&Z2n+x)QHX^p
z00esgV8|mQcmRZ%02D^@S3L16t`O%c004NIvOKvYIYoh62rY33S640`D9%Y2D-<?i
z0%4j!F2Z@488U%158(66005wo6%pWr^Zj_v4zAA5HjcIqUoGmt2LB>rV&neh&#Q1i
z007~1e$oCcFS8neI|hJl{-P!B1ZZ9hpmq0)X0i`JwE&>$+E?>%_<lS*MWK+n+1cgf
z<k(8YLR(?VSAG6x!e78w{cQPuJpA|d;J)G{fihizM+Erb!p!tcr5w+a34~(Y=8s4G
zw+sLL9n&JjNn*KJDiq^U5^;`1nvC-@r6P$!k}1U{(*I=Q-z@tBKHoI}uxdU5dyy@u
zU1J0GOD7Ombim^G008p4Z^6_k2m^p<gW=D2|L;HjN1!DDfM!XOaR2~bL?kX$%CkSm
z2mk;?pn)o|K^yeJ7%adB9Ki+L!3+FgHiSYX#KJ-lLJDMn9CBbOtb#%)hRv`YDqt_v
zKpix|QD}yfa1JiQRk#j4a1Z)n2%f<xynzV>LC6RbVIkUx0b+_+BaR3cnT7Zv!AJxW
zizFb)h!jyGOOZ85F;a?DAXP{m@;!0_Ifq<Ex{*7`05XF7hP+2Hl!3BQJ=6@fL%FCo
z8iYoo3(#bAF`ADSpqtQgv>H8(HlgRxt7s3}k3K`kFu>>-2Q$QMFfPW!La{h336o>X
zu_CMttHv6zR;&ZNiS=X8v3CR#fknUxHUxJ<AYmRsNLWl*PS{AOARHt#5!wki2?K;t
z!Y3k=s7tgax)J%r7-BLphge7~Bi0g+6E6^Zh(p9TBoc{3GAFr^0!gu?RMHaCM$&Fl
zBk3%un>0uoBa_M6WNWeqIg~6QE69c9o#eyhGvpiOA@W-aonk<7r1(?fC{oI5N*U!4
z<uv66WtcKSRim0x-Ke2d5jBrmLam{;Qm;{ms1r1GnmNsb7D-E`t)i9F8fX`2_i3-_
zbh;7Ul^#x)&{xvS=|||7=mYe33=M`AgU5(xC>fg=2N-7=cNnjjOr{yriy6mMFgG#l
znCF=fnQv8CDz++o6_Lscl}eQ+l^ZHARH>?_s@|##Rr6KLRFA1%Q+=*RRWnoLsR`7U
zt5vF<Q0r40Q)j6=sE4X&sBct1q<&fbi3VB2Ov6t@q*0);U*o*SAPZv|vv@2aYYnT0
zb%8a+Cb7-ge0D0knEf5Qi#@8Tp*ce{N;6lpQuCB%KL_KOarm5cP6_8Ir<e17iry6O
zDdH&`rZh~sF=bq9s+O0QSgS~@QL9Jmy*94xr=6y~MY~!1fet~(N+(<=M`w@D1)b+p
z*;C!83a1uLJv#NSE~;y#8=<>IcfW3@?wFpwUVxrVZ>QdQz32KIeJ}k~{cZZE^+ya?
z2D1z#2HOnI7(B%_ac?{wFUQ;QQA1tBKtrWrm0_3Rgps+?Jfqb{jYbcQX~taRB;#$y
zZN{S}1|}gUOHJxc?wV3fxuz+mJ4`!F$IZ;mqRrNsHJd##*D~ju=bP7?-?v~|cv>vB
zsJ6IeNwVZxrdjT`yl#bBIa#GxRa#xMMy;K#CDyyGyQdMSxlWT#tDe?p!?5wT$+oGt
z8L;Kp2HUQ-ZMJ=3XJQv;x5ci*?vuTfeY$;({XGW_huIFR9a<fJbF^|4I#xQ~n$Dc=
zKYhjYmgz5NSkDm8*fZm{6U!;YX`NG>(?@3)XSs8O^N5RyOM=TTmp(3=8^+zpz2r)C
z^>JO{deZfso3oq3?Wo(Y?l$ge?uXo;%ru`Vo>?<<(8I_>;8Eq#KMS9gFl*neeosSB
zfoHYnBQIkwkyowPu(zdms`p{<7e4kra-ZWq<2*OsGTvEV%s0Td$hXT+!*8Bnh2KMe
zBmZRodjHV?r+_5^X9J0WL4jKW`}lf%A-|44I@@LTvf1rHjG(ze6+w@Jt%Bvjts!X0
z?2xS?_ve_-k<Mujg;0Lz*3buG=3$G&ehepthlN*$KaOySSQ^nWmo<0M+(UEUMEXRQ
zMBbZcF;6+KElM>iKB_KiJlZ$9G`c^=E@oNG)mWWaNo-3TIW8)$Hg0Ub-~8?KhvJ>$
z3*&nim@mj(aCxE5!t{lw7O5^0EIO7zOo&c6l<+|iDySBWCGrz@C5{St!X3hAA}`T4
z(TLbXTq+(;@<=L8dXnssyft|w#WSTW<++3>sgS%(4NTpeI-VAqb|7ssJvzNHgOZVu
zaYCvgO_R1~>SyL=cFU|~g|hy|Zi}}s9+d~lYqOB71z9Z$wnC=pR9Yz4DhIM>Wmjgu
z&56o6maCpC&F##y%G;1PobR9i?GnNg;gYtchD%p19a!eQtZF&3JaKv33gZ<8D~47E
ztUS1iwkmDaPpj=$m#%)jCVEY4fnLGNg2A-`YwHVD3gv};>)hAvT~AmqS>Lr``i7kw
zJ{5_It`yrBmlc25DBO7E8;5VoznR>Ww5hAaxn$2~(q`%A-YuS64wkBy=9dm`4cXeX
z4c}I@?e+FW+b@^RDBHV(wnMq2zdX3SWv9u`%{xC-q*U}&`cyXV(%rRT*Z6MH?i+i&
z_B8C(+grT%{XWUQ+f@NoP1R=AW&26{v-dx)iK^-Nmiuj8txj!m?Z*Ss1N{dh4z}01
z)YTo*JycSU)+_5r4#yw9{+;i4Ee$peRgIj+;v;ZGdF1K$3E%e~4LaI(jC-u%2h$&R
z9cLXcYC@Xwnns&bn)_Q~Te?roKGD|d-g^8;+aC{{G(1^(O7m37Y1-+6)01cN&y1aw
zoqc{T`P^XJqPBbIW6s}d4{z_f5Om?vMgNQEJG?v2T=KYd^0M3I6IZxbny)%vZR&LD
zJpPl@Psh8QyPB@KTx+@RdcC!KX7}kEo;S|j^u2lU7XQ}Oo;f|;z4Ll+_r>@1-xl3|
zawq-H%e&ckC+@AhPrP6BK<z=<L*0kfKU@CX*zeqbYQT4(^U>T#_XdT7&;F71j}Joy
zkC~6lh7E@6o;W@^IpRNZ{ptLtL(gQ-CY~4mqW;US7Zxvm_|@yz&e53Bp_lTPlfP|z
zrTyx_>lv@x#=^!PzR7qqF<$gm`|ZJZ+;<)Cqu&ot<a{81DF0~rvGr5Xr~8u`lav1h
z1DNytV>2z=0000WV@Og>004R=004l4008;_004mL004C`008P>0026e000+nl3&F}
z000A0Nkl<Zc-q95O>7Kd7{`CpHr?*fCGFN%N+LcQvMAkk;D9DXQI5WnI<YPr35k|N
zap;jqkZ?hQNO~ZIOC3~595l;GLt7`&N>?jw_p{r59J)I@JDqlCr|5gxXXl-n|MUMp
z|M!{qRYZij97W9WC^5U(QoH5yQkUyeWIRDQo**ScN(4c&v1DSjShRc4V$sESdwUd1
z2>_P>j^DpdDm4K>ZlT09%wCi9)Z}k*Q88B<n@oWeGY_=XZjq%f7elcaiDZ&ka(t2r
zX_lt3fi(iUxlBU$3@Vi5tI>w)YU0so4x{wDr;4B>lTBdqpQFr4=hrV-ErB-T(J0}u
zQ2^#ux(SCu8lXOjd6kvvxm;k4y%b==+-hg(3ZS~(Q&?w!Rd=OsZt}UY!BF9=!05LO
zn4Z1SY~jJs6xO-7id_Xv%&vF2iQHzug4l0FIVKa~dn8P7=vQIjVzU#93F}>f0Jpk6
zFqDXEOVJ3NU6L7ay9%UPXI>T_PqKAi1B#-sy1w3&wGr4XN$Vyw)LLD=iq9RdaJpPn
z)YK4q`%VKMh(-Asi;_$UZij;vs)|gnzQqz4I^X);yBX=}p}(t(oktEcl1ON*y(gNu
z9Srj5TOWs7TXVo|n^14ENHWQ?_3L=t-p-4T4gi+;yqeqx{eB*tJ;%*Jfcm3HkTmd2
zCM>b406_WLHF&mf=V4#3HottUkM57303lH4<$&MMo>M0c?w4u%faU$bgb39>FVCAV
zaMkPMYMqZ~%`NyE8W>K*iA|)q`1A>DH*MtQwQD;0rZ-?g3G}3rY~H(<_itWnFMj{J
z6A`G|v4f(;i&*LLaHp*eN4X1tpRt(c1DTx(g*(9@D?J{Lo^RGz9cwwyq7}<IbMpq<
z_U~t)yPNBsom5p+0MK~aPreOb-u=K5)k$P@EbC=4wrDYx+qi*I=a=(wpuh0_fUXsq
zFRMJjqIE&B+lWNh(~aQyfOQMm{xX~mYyO4;_JH-^>Cee5c0@K=TRHPsGvPF~b}n3y
z0~{Itoww$;Td^7A$ndbK+%qa^HG%$-5getZl&R{JM+NJ0XMoF`&VmxSk~tmqg+hPr
tU}C8gDvGk~Umr+C<j0Jy_<xQ+0|1Y!v8yE^d@uk2002ovPDHLkV1ilP)wuuw

literal 0
HcmV?d00001

diff --git a/loaddictionaries.cc b/loaddictionaries.cc
index 862c3dea..fd6e1094 100644
--- a/loaddictionaries.cc
+++ b/loaddictionaries.cc
@@ -22,6 +22,7 @@
 #include "fsencoding.hh"
 #include "xdxf.hh"
 #include "sdict.hh"
+#include "aard.hh"
 
 #include <QMessageBox>
 #include <QDir>
@@ -42,7 +43,7 @@ LoadDictionaries::LoadDictionaries( Config::Class const & cfg ):
 
   nameFilters << "*.bgl" << "*.ifo" << "*.lsa" << "*.dat"
               << "*.dsl" << "*.dsl.dz"  << "*.index" << "*.xdxf"
-              << "*.xdxf.dz" << "*.dct";
+              << "*.xdxf.dz" << "*.dct" << "*.aar";
 }
 
 void LoadDictionaries::run()
@@ -155,6 +156,13 @@ void LoadDictionaries::handlePath( Config::Path const & path )
     dictionaries.insert( dictionaries.end(), sdictDictionaries.begin(),
                          sdictDictionaries.end() );
   }
+  {
+    vector< sptr< Dictionary::Class > > aardDictionaries =
+      Aard::makeDictionaries( allFiles, FsEncoding::encode( Config::getIndexDir() ), *this );
+
+    dictionaries.insert( dictionaries.end(), aardDictionaries.begin(),
+                         aardDictionaries.end() );
+  }
 }
 
 void LoadDictionaries::indexingDictionary( string const & dictionaryName ) throw()
diff --git a/resources.qrc b/resources.qrc
index eb0bb603..103b2650 100644
--- a/resources.qrc
+++ b/resources.qrc
@@ -48,5 +48,6 @@
         <file>icons/error.png</file>
         <file>icons/macicon.png</file>
         <file>icons/icon32_sdict.png</file>
+        <file>icons/icon32_aard.png</file>
     </qresource>
 </RCC>
diff --git a/sdict.cc b/sdict.cc
index 748ba3aa..44080db7 100644
--- a/sdict.cc
+++ b/sdict.cc
@@ -9,13 +9,11 @@
 #include "langcoder.hh"
 #include "dprintf.hh"
 #include "fsencoding.hh"
+#include "decompress.hh"
 
-#include <zlib.h>
-#include <bzlib.h>
 #include <map>
 #include <set>
 #include <string>
-#include <stdlib.h>
 
 #ifdef _MSC_VER
 #include <stub_msvc.h>
@@ -124,72 +122,6 @@ bool indexIsOldOrBad( string const & indexFile )
          header.formatVersion != CurrentFormatVersion;
 }
 
-string decompressZlib( char * bufptr, unsigned length )
-{
-z_stream zs;
-char buf[2048];
-string str;
-int res;
-    memset( &zs, 0, sizeof(zs) );
-    zs.next_in = (Bytef *)bufptr;
-    zs.avail_in = length;
-    while( 1 )
-    {
-        res = inflateInit( &zs );
-        if( res != Z_OK )
-            break;
-        while( res != Z_STREAM_END )
-        {
-            memset( buf, 0, sizeof(buf) );
-            zs.next_out = (Bytef *)buf;
-            zs.avail_out = 2047;
-            res = inflate( &zs, Z_SYNC_FLUSH );
-            str += buf;
-            if( res != Z_OK && res != Z_STREAM_END )
-                 break;
-        }
-        break;
-    }
-    inflateEnd( &zs );
-    if( res != Z_STREAM_END )
-        str.clear();
-    return str;
-}
-
-string decompressBzip2( char * bufptr, unsigned length )
-{
-bz_stream zs;
-char buf[2048];
-string str;
-int res;
-    memset( &zs, 0, sizeof(zs) );
-    zs.next_in = bufptr;
-    zs.avail_in = length;
-    zs.total_in_lo32 = length;
-    while( 1 )
-    {
-        res = BZ2_bzDecompressInit( &zs, 0, 0 );
-        if( res != BZ_OK )
-            break;
-        while( res != BZ_STREAM_END )
-        {
-            memset( buf, 0, sizeof(buf) );
-            zs.next_out = buf;
-            zs.avail_out = 2047;
-            zs.total_out_lo32 = length;
-            res = BZ2_bzDecompress( &zs );
-            str += buf;
-            if( res != BZ_OK && res != BZ_STREAM_END )
-                break;
-        }
-        break;
-    }
-    BZ2_bzDecompressEnd( &zs );
-    if( res != BZ_STREAM_END )
-        str.clear();
-    return str;
-}
-
 class SdictDictionary: public BtreeIndexing::BtreeDictionary
 {
     Mutex idxMutex;