goldendict-ng/slob.cc

/* This file is (c) 2015 Abs62
 * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */

#ifdef MAKE_ZIM_SUPPORT

#include "slob.hh"
#include "btreeidx.hh"
#include "fsencoding.hh"
#include "folding.hh"
#include "gddebug.hh"
#include "utf8.hh"
#include "decompress.hh"
#include "langcoder.hh"
#include "wstring.hh"
#include "wstring_qt.hh"
#include "ftshelpers.hh"
#include "htmlescape.hh"
#include "filetype.hh"
#include "tiff.hh"
#include "utils.hh"

#ifdef _MSC_VER
#include <stub_msvc.h>
#endif

#include <QString>
#include <QFile>
#include <QFileInfo>
#include <QDir>
#include <QTextCodec>
#include <QMap>
#include <QPair>
#include <QRegExp>
#if (QT_VERSION >= QT_VERSION_CHECK(6,0,0))
#include <QtCore5Compat>
#endif
#include <QProcess>
#include <QVector>
#include <QtAlgorithms>

#include <QRegularExpression>

#include <string>
#include <vector>
#include <map>
#include <set>
#include <algorithm>

namespace Slob {

using std::string;
using std::map;
using std::vector;
using std::multimap;
using std::pair;
using std::set;
using gd::wstring;

using BtreeIndexing::WordArticleLink;
using BtreeIndexing::IndexedWords;
using BtreeIndexing::IndexInfo;

DEF_EX_STR( exNotSlobFile, "Not an Slob file", Dictionary::Ex )
DEF_EX_STR( exCantReadFile, "Can't read file", Dictionary::Ex )
DEF_EX_STR( exCantDecodeFile, "Can't decode file", Dictionary::Ex )
DEF_EX_STR( exNoCodecFound, "No text codec found", Dictionary::Ex )
DEF_EX( exUserAbort, "User abort", Dictionary::Ex )
DEF_EX( exNoResource, "No resource found", Dictionary::Ex )

#pragma pack( push, 1 )

enum
{
  Signature = 0x58424C53, // SLBX on little-endian, XBLS on big-endian
  CurrentFormatVersion = 2 + BtreeIndexing::FormatVersion + Folding::Version
};

struct IdxHeader
{
  quint32 signature; // First comes the signature, SLBX
  quint32 formatVersion; // File format version (CurrentFormatVersion)
  quint32 indexBtreeMaxElements; // Two fields from IndexInfo
  quint32 indexRootOffset;
  quint32 resourceIndexBtreeMaxElements; // Two fields from IndexInfo
  quint32 resourceIndexRootOffset;
  quint32 wordCount;
  quint32 articleCount;
  quint32 langFrom;  // Source language
  quint32 langTo;    // Target language
}
#ifndef _MSC_VER
__attribute__((packed))
#endif
;

#pragma pack( pop )

const char SLOB_MAGIC[ 8 ] = { 0x21, 0x2d, 0x31, 0x53, 0x4c, 0x4f, 0x42, 0x1f };

struct RefEntry
{
  QString key;
  quint32 itemIndex;
  quint16 binIndex;
  QString fragment;
};

bool indexIsOldOrBad( string const & indexFile )
{
  File::Class idx( indexFile, "rb" );

  IdxHeader header;

  return idx.readRecords( &header, sizeof( header ), 1 ) != 1 ||
         header.signature != Signature ||
         header.formatVersion != CurrentFormatVersion;
}


class SlobFile
{
public:
  typedef QPair< quint64, quint32 > RefEntryOffsetItem;
  typedef QVector< RefEntryOffsetItem > RefOffsetsVector;

private:
  enum Compressions
  { UNKNOWN = 0, NONE, ZLIB, BZ2, LZMA2 };

  QFile file;
  QString fileName, dictionaryName;
  Compressions compression;
  QString encoding;
  unsigned char uuid[ 16 ];
  QTextCodec *codec;
  QMap< QString, QString > tags;
  QVector< QString > contentTypes;
  quint32 blobCount;
  quint64 storeOffset, fileSize, refsOffset;
  quint32 refsCount, itemsCount;
  quint64 itemsOffset, itemsDataOffset;
  quint32 currentItem;
  quint32 contentTypesCount;
  string currentItemData;
  RefOffsetsVector refsOffsetVector;

  QString readTinyText();
  QString readText();
  QString readLargeText();
  QString readString( unsigned length );

public:
  SlobFile() :
    compression( UNKNOWN )
  , codec( 0 )
  , blobCount( 0 )
  , storeOffset( 0 )
  , fileSize( 0 )
  , refsOffset( 0 )
  , refsCount( 0 )
  , itemsCount( 0 )
  , itemsOffset( 0 )
  , itemsDataOffset( 0 )
  , currentItem( 0xFFFFFFFF )
  , contentTypesCount( 0 )
  {}

  ~SlobFile();

  Compressions getCompression() const
  { return compression; }

  QString const & getEncoding() const
  { return encoding; }

  QString const & getDictionaryName() const
  { return dictionaryName; }

  quint32 blobsCount() const
  { return blobCount; }

  quint64 dataOffset() const
  { return storeOffset; }

  quint32 getRefsCount() const
  { return refsCount; }

  quint32 getContentTypesCount() const
  { return contentTypesCount; }

  QTextCodec * getCodec() const
  { return codec; }

  const RefOffsetsVector & getSortedRefOffsets();

  void clearRefOffsets()
  { refsOffsetVector.clear(); }

  QString getContentType( quint8 content_id ) const
  { return content_id < contentTypes.size() ? contentTypes[ content_id ] : QString(); }

  QMap< QString, QString > const & getTags() const
  { return tags; }

  void open( const QString & name );

  void getRefEntryAtOffset(quint64 offset, RefEntry & entry );

  void getRefEntry(quint32 ref_nom, RefEntry & entry );

  quint8 getItem( RefEntry const & entry, string * data );
};

SlobFile::~SlobFile()
{
  file.close();
}

QString SlobFile::readString( unsigned length )
{
  QByteArray data = file.read( length );
  QString str;

  if( codec != 0 && !data.isEmpty() )
    str = codec->toUnicode( data );
  else
    str = QString( data );

  char term = 0;
  int n = str.indexOf( term );
  if( n >= 0 )
    str.resize( n );

  return str;
}

QString SlobFile::readTinyText()
{
  unsigned char len;
  if( !file.getChar( ( char * )&len ) )
  {
    QString error = fileName + ": " + file.errorString();
    throw exCantReadFile( string( error.toUtf8().data() ) );
  }
  return readString( len );
}

QString SlobFile::readText()
{
  quint16 len;
  if( file.read( ( char * )&len, sizeof( len ) ) != sizeof( len ) )
  {
    QString error = fileName + ": " + file.errorString();
    throw exCantReadFile( string( error.toUtf8().data() ) );
  }
  return readString( qFromBigEndian( len ) );
}

QString SlobFile::readLargeText()
{
  quint32 len;
  if( file.read( ( char * )&len, sizeof( len ) ) != sizeof( len ) )
  {
    QString error = fileName + ": " + file.errorString();
    throw exCantReadFile( string( error.toUtf8().data() ) );
  }
  return readString( qFromBigEndian( len ) );
}

void SlobFile::open( const QString & name )
{
QString error( name + ": " );

  if( file.isOpen() )
    file.close();

  fileName = name;

  file.setFileName( name );

  {
    QFileInfo fi( name );
    dictionaryName = fi.fileName();
  }

  for( ; ; )
  {

    if( !file.open( QFile::ReadOnly ) )
      break;

    char magic[ 8 ];
    if( file.read( magic, sizeof( magic ) ) != sizeof( magic ) )
      break;

    if( memcmp( magic, SLOB_MAGIC, sizeof( magic ) ) != 0 )
      throw exNotSlobFile( string( name.toUtf8().data() ) );

    if( file.read( ( char * )uuid, sizeof( uuid ) ) != sizeof( uuid ) )
      break;

    // Read encoding

    encoding = readTinyText();

    codec = QTextCodec::codecForName( encoding.toLatin1() );
    if( codec == 0 )
    {
      error = QString( "for encoding \"") + encoding + "\"";
      throw exNoCodecFound( string( error.toUtf8().data() ) );
    }

    // Read compression type

    QString compr = readTinyText();

    if( compr.compare( "zlib", Qt::CaseInsensitive ) == 0 )
      compression = ZLIB;
    else
    if( compr.compare( "bz2", Qt::CaseInsensitive ) == 0 )
      compression = BZ2;
    else
    if( compr.compare( "lzma2", Qt::CaseInsensitive ) == 0 )
      compression = LZMA2;
    else
    if( compr.isEmpty() || compr.compare( "none", Qt::CaseInsensitive ) == 0 )
      compression = NONE;

    // Read tags

    unsigned char count;
    if( !file.getChar( ( char * )&count ) )
      break;

    for( unsigned i = 0; i < count; i++ )
    {
      QString key = readTinyText();
      QString value = readTinyText();
      tags[ key ] = value;

      if( key.compare( "label", Qt::CaseInsensitive ) == 0
          || key.compare( "name", Qt::CaseInsensitive ) == 0)
        dictionaryName = value;
    }

    // Read content types

    if( !file.getChar( ( char * )&count ) )
      break;

    for( unsigned i = 0; i < count; i++ )
    {
      QString type = readText();
      contentTypes.append( type );
    }
    contentTypesCount = count;

    // Read data parameters

    quint32 cnt;
    if( file.read( ( char * )&cnt, sizeof( cnt ) ) != sizeof( cnt ) )
      break;
    blobCount = qFromBigEndian( cnt );

    quint64 tmp;
    if( file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) )
      break;
    storeOffset = qFromBigEndian( tmp );

    if( file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) )
      break;
    fileSize = qFromBigEndian( tmp );

    if( file.read( ( char * )&cnt, sizeof( cnt ) ) != sizeof( cnt ) )
      break;
    refsCount = qFromBigEndian( cnt );

    refsOffset = file.pos();

    if( !file.seek( storeOffset ) )
      break;

    if( file.read( ( char * )&cnt, sizeof( cnt ) ) != sizeof( cnt ) )
      break;
    itemsCount = qFromBigEndian( cnt );

    itemsOffset = storeOffset + sizeof( itemsCount );
    itemsDataOffset = itemsOffset + itemsCount * sizeof( quint64 );

    return;
  }
  error += file.errorString();
  throw exCantReadFile( string( error.toUtf8().data() ) );
}

const SlobFile::RefOffsetsVector & SlobFile::getSortedRefOffsets()
{
  quint64 tmp;
  qint64 size = refsCount * sizeof( quint64 );
  quint64 base = refsOffset + size;

  refsOffsetVector.clear();
  refsOffsetVector.reserve( refsCount );

  for( ; ; )
  {
    QByteArray offsets;
    offsets.resize( size );

    if( !file.seek( refsOffset ) || file.read( offsets.data(), size ) != size )
      break;

    for( quint32 i = 0; i < refsCount; i++ )
    {
      memcpy( &tmp, offsets.data() + i * sizeof( quint64 ), sizeof( tmp ) );
      refsOffsetVector.append( RefEntryOffsetItem( base + qFromBigEndian( tmp ), i ) );
    }

    std::sort( refsOffsetVector.begin(), refsOffsetVector.end() );
    return refsOffsetVector;
  }
  QString error = fileName + ": " + file.errorString();
  throw exCantReadFile( string( error.toUtf8().data() ) );
}

void SlobFile::getRefEntryAtOffset( quint64 offset, RefEntry & entry )
{
  for( ; ; )
  {
    if( !file.seek( offset ) )
      break;

    entry.key = readText();

    quint32 index;
    if( file.read( ( char * )&index, sizeof( index ) ) != sizeof( index ) )
      break;
    entry.itemIndex = qFromBigEndian( index );

    quint16 binIndex;
    if( file.read( ( char * )&binIndex, sizeof( binIndex ) ) != sizeof( binIndex ) )
      break;
    entry.binIndex = qFromBigEndian( binIndex );

    entry.fragment = readTinyText();

    return;
  }
  QString error = fileName + ": " + file.errorString();
  throw exCantReadFile( string( error.toUtf8().data() ) );
}

void SlobFile::getRefEntry( quint32 ref_nom, RefEntry & entry )
{
  quint64 pos = refsOffset + ref_nom * sizeof( quint64 );
  quint64 offset, tmp;

  for( ; ; )
  {
    if( !file.seek( pos ) || file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) )
      break;

    offset = qFromBigEndian( tmp ) + refsOffset + refsCount * sizeof( quint64 );

    getRefEntryAtOffset( offset, entry );

    return;
  }
  QString error = fileName + ": " + file.errorString();
  throw exCantReadFile( string( error.toUtf8().data() ) );
}

quint8 SlobFile::getItem( RefEntry const & entry, string * data )
{
  quint64 pos = itemsOffset + entry.itemIndex * sizeof( quint64 );
  quint64 offset, tmp;

  for( ; ; )
  {
    // Read item data types

    if( !file.seek( pos ) || file.read( ( char * )&tmp, sizeof( tmp ) ) != sizeof( tmp ) )
      break;

    offset = qFromBigEndian( tmp ) + itemsDataOffset;

    if( !file.seek( offset ) )
      break;

    quint32 bins, bins_be;
    if( file.read( ( char * )&bins_be, sizeof( bins_be ) ) != sizeof( bins_be ) )
      break;
    bins = qFromBigEndian( bins_be );

    if( entry.binIndex >= bins )
      return 0xFF;

    QVector< quint8 > ids;
    ids.resize( bins );
    if( file.read( ( char * )ids.data(), bins ) != bins )
      break;

    quint8 id = ids[ entry.binIndex ];

    if( id >= (unsigned)contentTypes.size() )
      return 0xFF;

    if( data != 0 )
    {
      // Read item data
      if( currentItem != entry.itemIndex )
      {
        currentItemData.clear();
        quint32 length, length_be;
        if( file.read( ( char * )&length_be, sizeof( length_be ) ) != sizeof( length_be ) )
          break;
        length = qFromBigEndian( length_be );

        QByteArray compressedData = file.read( length );

        if( compression == NONE )
          currentItemData = string( compressedData.data(), compressedData.length() );
        else
        if( compression == ZLIB )
          currentItemData = decompressZlib( compressedData.data(), length );
        else
        if( compression == BZ2 )
          currentItemData = decompressBzip2( compressedData.data(), length );
        else
          currentItemData = decompressLzma2( compressedData.data(), length, true );

        if( currentItemData.empty() )
        {
          currentItem = 0xFFFFFFFF;
          return 0xFF;
        }
        currentItem = entry.itemIndex;
      }

      // Find bin data inside item

      const char * ptr = currentItemData.c_str();
      quint32 pos = entry.binIndex * sizeof( quint32 );

      if( pos >= currentItemData.length() - sizeof( quint32 ) )
        return 0xFF;

      quint32 offset, offset_be;
      memcpy( &offset_be, ptr + pos, sizeof( offset_be ) );
      offset = qFromBigEndian( offset_be );

      pos = bins * sizeof( quint32 ) + offset;

      if( pos >= currentItemData.length() - sizeof( quint32 ) )
        return 0xFF;

      quint32 length, len_be;
      memcpy( &len_be, ptr + pos, sizeof( len_be ) );
      length = qFromBigEndian( len_be );

      *data = currentItemData.substr( pos + sizeof( len_be ), length );
    }

    return ids[ entry.binIndex ];
  }
  QString error = fileName + ": " + file.errorString();
  throw exCantReadFile( string( error.toUtf8().data() ) );
}

// SlobDictionary

class SlobDictionary: public BtreeIndexing::BtreeDictionary
{
    Mutex idxMutex;
    Mutex slobMutex, idxResourceMutex;
    File::Class idx;
    BtreeIndex resourceIndex;
    IdxHeader idxHeader;
    string dictionaryName;
    SlobFile sf;
    QString texCgiPath, texCachePath;

  public:

    SlobDictionary( string const & id, string const & indexFile,
                    vector< string > const & dictionaryFiles );

    ~SlobDictionary();

    virtual string getName() throw()
    { return dictionaryName; }

    virtual map< Dictionary::Property, string > getProperties() throw()
    { return map< Dictionary::Property, string >(); }

    virtual unsigned long getArticleCount() throw()
    { return idxHeader.articleCount; }

    virtual unsigned long getWordCount() throw()
    { return idxHeader.wordCount; }

    inline virtual quint32 getLangFrom() const
    { return idxHeader.langFrom; }

    inline virtual quint32 getLangTo() const
    { return idxHeader.langTo; }

    virtual sptr< Dictionary::DataRequest > getArticle( wstring const &,
                                                        vector< wstring > const & alts,
                                                        wstring const &,
                                                        bool ignoreDiacritics )
      ;

    virtual sptr< Dictionary::DataRequest > getResource( string const & name )
      ;

    virtual QString const& getDescription();

    /// Loads the resource.
    void loadResource( std::string &resourceName, string & data );

    virtual sptr< Dictionary::DataRequest > getSearchResults( QString const & searchString,
                                                              int searchMode, bool matchCase,
                                                              int distanceBetweenWords,
                                                              int maxResults,
                                                              bool ignoreWordsOrder,
                                                              bool ignoreDiacritics );
    virtual void getArticleText( uint32_t articleAddress, QString & headword, QString & text );

    quint64 getArticlePos(uint32_t articleNumber );

    virtual void makeFTSIndex(QAtomicInt & isCancelled, bool firstIteration );

    virtual void setFTSParameters( Config::FullTextSearch const & fts )
    {
      can_FTS = fts.enabled
                && !fts.disabledTypes.contains( "SLOB", Qt::CaseInsensitive )
                && ( fts.maxDictionarySize == 0 || getArticleCount() <= fts.maxDictionarySize );
    }

    virtual uint32_t getFtsIndexVersion()
    { return 2; }

protected:

    virtual void loadIcon() throw();

private:

    /// Loads the article.
    void loadArticle( quint32 address,
                      string & articleText );

    quint32 readArticle( quint32 address,
                         string & articleText,
                         RefEntry & entry );

    string convert( string const & in_data, RefEntry const & entry );

    void removeDirectory( QString const & directory );

    friend class SlobArticleRequest;
    friend class SlobResourceRequest;
};

SlobDictionary::SlobDictionary( string const & id,
                                string const & indexFile,
                                vector< string > const & dictionaryFiles ):
    BtreeDictionary( id, dictionaryFiles ),
    idx( indexFile, "rb" ),
    idxHeader( idx.read< IdxHeader >() )
{
    // Open data file

    try
    {
      sf.open( FsEncoding::decode( dictionaryFiles[ 0 ].c_str() ) );
    }
    catch( std::exception & e )
    {
      gdWarning( "Slob dictionary initializing failed: %s, error: %s\n",
                 dictionaryFiles[ 0 ].c_str(), e.what() );
    }

    // Initialize the indexes

    openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
                          idxHeader.indexRootOffset ),
               idx, idxMutex );

    resourceIndex.openIndex( IndexInfo( idxHeader.resourceIndexBtreeMaxElements,
                                        idxHeader.resourceIndexRootOffset ),
                             idx, idxResourceMutex );

    // Read dictionary name

    dictionaryName = string( sf.getDictionaryName().toUtf8().constData() );
    if( dictionaryName.empty() )
    {
      QString name = QDir::fromNativeSeparators( FsEncoding::decode( dictionaryFiles[ 0 ].c_str() ) );
      int n = name.lastIndexOf( '/' );
      dictionaryName = string( name.mid( n + 1 ).toUtf8().constData() );
    }

    // Full-text search parameters

    can_FTS = true;

    ftsIdxName = indexFile + "_FTS";

    if( !Dictionary::needToRebuildIndex( dictionaryFiles, ftsIdxName )
        && !FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) )
      FTS_index_completed.ref();

    texCgiPath = Config::getProgramDataDir() + "/mimetex.cgi";
    if( QFileInfo( texCgiPath ).exists() )
    {
      QString dirName = QString::fromStdString( getId() );
      QDir( QDir::tempPath() ).mkdir( dirName );
      texCachePath = QDir::tempPath() + "/" + dirName;
    }
    else
      texCgiPath.clear();
}

SlobDictionary::~SlobDictionary()
{
  if( !texCachePath.isEmpty() )
    removeDirectory( texCachePath );
}

void SlobDictionary::removeDirectory( QString const & directory )
{
  QDir dir( directory );
  Q_FOREACH( QFileInfo info, dir.entryInfoList( QDir::NoDotAndDotDot
                                                | QDir::AllDirs
                                                | QDir::Files,
                                                QDir::DirsFirst))
  {
    if( info.isDir() )
      removeDirectory( info.absoluteFilePath() );
    else
      QFile::remove( info.absoluteFilePath() );
  }

  dir.rmdir( directory );
}

void SlobDictionary::loadIcon() throw()
{
  if ( dictionaryIconLoaded )
    return;

  QString fileName =
    QDir::fromNativeSeparators( FsEncoding::decode( getDictionaryFilenames()[ 0 ].c_str() ) );

  // Remove the extension
  fileName.chop( 4 );

  if( !loadIconFromFile( fileName ) )
  {
    // Load failed -- use default icons
    dictionaryNativeIcon = dictionaryIcon = QIcon(":/icons/icon32_slob.png");
  }

  dictionaryIconLoaded = true;
}

QString const& SlobDictionary::getDescription()
{
  if( !dictionaryDescription.isEmpty() )
    return dictionaryDescription;

  QMap< QString, QString > const & tags = sf.getTags();

  QMap< QString, QString >::const_iterator it;
  for( it = tags.begin(); it != tags.end(); ++it )
  {
    if( it != tags.begin() )
      dictionaryDescription += "\n\n";

    dictionaryDescription += it.key() + ": " +it.value();
  }

  return dictionaryDescription;
}

void SlobDictionary::loadArticle( quint32 address,
                                  string & articleText )
{
  articleText.clear();

  RefEntry entry;

  readArticle( address, articleText, entry );

  if( !articleText.empty() )
  {
    articleText = convert( articleText, entry );
  }
  else
    articleText = string( QObject::tr( "Article decoding error" ).toUtf8().constData() );

  // See Issue #271: A mechanism to clean-up invalid HTML cards.
  // leave the invalid tags at the mercy of modern browsers.(webengine chrome)
  // https://html.spec.whatwg.org/#an-introduction-to-error-handling-and-strange-cases-in-the-parser
  // https://en.wikipedia.org/wiki/Tag_soup#HTML5
  string cleaner = "";

  string prefix( "<div class=\"slobdict\"" );
  if( isToLanguageRTL() )
    prefix += " dir=\"rtl\"";
  prefix += ">";

  articleText = prefix + articleText + cleaner + "</div>";
}

string SlobDictionary::convert( const string & in, RefEntry const & entry )
{
  QString text = QString::fromUtf8( in.c_str() );

  // pattern of img and script
  text.replace( QRegularExpression( "<\\s*(img|script)\\s+([^>]*)src=\"(?!(?:data|https?|ftp):)(|/)([^\"]*)\"" ),
                QString( "<\\1 \\2src=\"bres://%1/\\4\"").arg( getId().c_str() ) );

  // pattern <link... href="..." ...>
  text.replace( QRegularExpression( "<\\s*link\\s+([^>]*)href=\"(?!(?:data|https?|ftp):)" ),
                QString( "<link \\1href=\"bres://%1/").arg( getId().c_str() ) );

  // pattern <a href="..." ...>, excluding any known protocols such as http://, mailto:, #(comment)
  // these links will be translated into local definitions
  QString anchor;
  QRegularExpression rxLink( "<\\s*a\\s+([^>]*)href=\"(?!(?:\\w+://|#|mailto:|tel:))(/|)([^\"]*)\"\\s*(title=\"[^\"]*\")?[^>]*>" );
  QRegularExpressionMatchIterator it = rxLink.globalMatch( text );
  int pos = 0;
  QString newText;
  while( it.hasNext() )
  {
    QRegularExpressionMatch match = it.next();

    newText += text.mid( pos, match.capturedStart() - pos );
    pos = match.capturedEnd();

    QStringList list = match.capturedTexts();
    // Add empty strings for compatibility with QRegExp behaviour
    for( int i = match.lastCapturedIndex() + 1; i < 5; i++ )
      list.append( QString() );

    QString tag = list[3];
    if ( !list[4].isEmpty() )
      tag = list[4].split("\"")[1];

    // Find anchor
    int n = list[ 3 ].indexOf( '#' );
    if( n > 0 )
      anchor = QString( "?gdanchor=" ) + list[ 3 ].mid( n + 1 );
    else
      anchor.clear();

    tag.remove( QRegularExpression(".*/") ).
        remove( QRegularExpression( "\\.(s|)htm(l|)$", QRegularExpression::PatternOption::CaseInsensitiveOption ) ).
        replace( "_", "%20" ).
        prepend( "<a href=\"gdlookup://localhost/" ).
        append( anchor + "\" " + list[4] + ">" );

    newText += tag;
  }
  if( pos )
  {
    newText += text.mid( pos );
    text = newText;
  }
  newText.clear();


  // Handle TeX formulas via mimetex.cgi

  if( !texCgiPath.isEmpty() )
  {
      QRegularExpression texImage( "<\\s*img\\s+class=\"([^\"]+)\"\\s*([^>]*)alt=\"([^\"]+)\"[^>]*>" );
      QRegularExpression regFrac( "\\\\[dt]frac" );
      QRegularExpression regSpaces( "\\s+([\\{\\(\\[\\}\\)\\]])" );

    QRegExp multReg( "\\*\\{(\\d+)\\}([^\\{]|\\{([^\\}]+)\\})", Qt::CaseSensitive, QRegExp::RegExp2 );

    QString arrayDesc( "\\begin{array}{" );
    pos = 0;
    unsigned texCount = 0;
    QString imgName;

    QRegularExpressionMatchIterator it = texImage.globalMatch( text );
    QString newText;
    while( it.hasNext() )
    {
      QRegularExpressionMatch match = it.next();

      newText += text.mid( pos, match.capturedStart() - pos );
      pos = match.capturedEnd();

      QStringList list = match.capturedTexts();


      if( list[ 1 ].compare( "tex" ) == 0
          || list[ 1 ].compare( "mwe-math-fallback-image-inline" ) == 0
          || list[ 1 ].endsWith( " tex" ) )
      {
        QString name;
        name.asprintf( "%04X%04X%04X.gif", entry.itemIndex, entry.binIndex, texCount );
        imgName = texCachePath + "/" + name;

        if( !QFileInfo( imgName ).exists() )
        {

          // Replace some TeX commands which don't support by mimetex.cgi

          QString tex = list[ 3 ];
          tex.replace( regSpaces, "\\1" );
          tex.replace( regFrac, "\\frac" );
          tex.replace( "\\leqslant", "\\leq" );
          tex.replace( "\\geqslant", "\\geq" );
          tex.replace( "\\infin", "\\infty" );
          tex.replace( "\\iff", "\\Longleftrightarrow" );
          tex.replace( "\\tbinom", "\\binom" );
          tex.replace( "\\implies", "\\Longrightarrow" );
          tex.replace( "{aligned}", "{align*}" );
          tex.replace( "\\Subset", "\\subset" );
          tex.replace( "\\xrightarrow", "\\longrightarrow^" );
          tex.remove( "\\scriptstyle" );
          tex.remove( "\\mathop" );
          tex.replace( "\\bigg|", "|" );

          // Format array descriptions (mimetex now don't support *{N}x constructions in it)

          int pos1 = 0;
          while( pos1 >= 0 )
          {
            pos1 = tex.indexOf( arrayDesc, pos1, Qt::CaseInsensitive );
            if( pos1 >= 0 )
            {
              // Retrieve array description
              QString desc, newDesc;
              int n = 0;
              int nstart = pos1 + arrayDesc.size();
              int i;
              for( i = 0; i + nstart < tex.size(); i++ )
              {
                if( tex[ i + nstart ] == '{' )
                  n += 1;
                if( tex[ i + nstart ] == '}' )
                  n -= 1;
                if( n < 0 )
                  break;
              }
              if( i > 0 && i + nstart + 1 < tex.size() )
                desc = tex.mid( nstart, i );

              if( !desc.isEmpty() )
              {
                // Expand multipliers: "*{5}x" -> "xxxxx"

                newDesc = desc;
                QString newStr;
                int pos2 = 0;
                while( pos2 >= 0 )
                {
                  pos2 = multReg.indexIn( newDesc, pos2 );
                  if( pos2 >= 0 )
                  {
                    QStringList list = multReg.capturedTexts();
                    int n = list[ 1 ].toInt();
                    for( int i = 0; i < n; i++ )
                      newStr += list[ 3 ].isEmpty() ? list[ 2 ] : list[ 3 ];
                    newDesc.replace( pos2, list[ 0 ].size(), newStr );
                    pos2 += newStr.size();
                  }
                  else
                    break;
                }
                tex.replace( pos1 + arrayDesc.size(), desc.size(), newDesc );
                pos1 += arrayDesc.size() + newDesc.size();
              }
              else
                pos1 += arrayDesc.size();
            }
            else
              break;
          }

          QString command = texCgiPath + " -e " +  imgName
                            + " \"" + tex + "\"";
          QProcess::execute( command,QStringList() );
        }

        QString tag = QString( "<img class=\"imgtex\" src=\"file://" )
#ifdef Q_OS_WIN32
                      + "/"
#endif
                      + imgName + "\" alt=\"" + list[ 3 ] + "\">";

        newText += tag;


        texCount += 1;
      }
      else
        newText += list[ 0 ];

    }
    if( pos )
    {
      newText += text.mid( pos );
      text = newText;
    }
    newText.clear();
  }
#ifdef Q_OS_WIN32
  else
  {
    // Increase equations scale
    text = QString::fromLatin1( "<script type=\"text/x-mathjax-config\">MathJax.Hub.Config({" )
           + " SVG: { scale: 170, linebreaks: { automatic:true } }"
           + ", \"HTML-CSS\": { scale: 210, linebreaks: { automatic:true } }"
           + ", CommonHTML: { scale: 210, linebreaks: { automatic:true } }"
           + " });</script>"
           + text;
  }
#endif

  // Fix outstanding elements
  text += "<br style=\"clear:both;\" />";

  return text.toUtf8().data();
}

void SlobDictionary::loadResource( std::string & resourceName, string & data )
{
  vector< WordArticleLink > link;
  RefEntry entry;

  link = resourceIndex.findArticles( Utf8::decode( resourceName ) );

  if( link.empty() )
    return;

  readArticle( link[ 0 ].articleOffset, data, entry );
}

quint32 SlobDictionary::readArticle( quint32 articleNumber, std::string & result,
                                     RefEntry & entry )
{
  string data;
  quint8 contentId;

  {
    Mutex::Lock _( slobMutex );
    if( entry.key.isEmpty() )
      sf.getRefEntry( articleNumber, entry );
    contentId = sf.getItem( entry, &data );
  }

  if( contentId == 0xFF )
    return 0xFFFFFFFF;

  QString contentType = sf.getContentType( contentId );

  if( contentType.contains( "text/html", Qt::CaseInsensitive )
      || contentType.contains( "text/plain", Qt::CaseInsensitive )
      || contentType.contains( "/css", Qt::CaseInsensitive )
      || contentType.contains( "/javascript", Qt::CaseInsensitive )
      || contentType.contains( "/json", Qt::CaseInsensitive ))
  {
    QTextCodec *codec = sf.getCodec();
    QString content = codec->toUnicode( data.c_str(), data.size() );
    result = string( content.toUtf8().data() );
  }
  else
    result = data;

  return contentId;
}

quint64 SlobDictionary::getArticlePos( uint32_t articleNumber )
{
  RefEntry entry;
  {
    Mutex::Lock _( slobMutex );
    sf.getRefEntry( articleNumber, entry );
  }
  return ( ( (quint64)( entry.binIndex ) ) << 32 ) | entry.itemIndex;
}

void SlobDictionary::makeFTSIndex( QAtomicInt & isCancelled, bool firstIteration )
{
  if( !( Dictionary::needToRebuildIndex( getDictionaryFilenames(), ftsIdxName )
         || FtsHelpers::ftsIndexIsOldOrBad( ftsIdxName, this ) ) )
    FTS_index_completed.ref();

  if( haveFTSIndex() )
    return;

  if( ensureInitDone().size() )
    return;

  if( firstIteration && getArticleCount() > FTS::MaxDictionarySizeForFastSearch )
    return;

  gdDebug( "Slob: Building the full-text index for dictionary: %s\n",
           getName().c_str() );

  try
  {
    Mutex::Lock _( getFtsMutex() );

    File::Class ftsIdx( ftsIndexName(), "wb" );

    FtsHelpers::FtsIdxHeader ftsIdxHeader;
    memset( &ftsIdxHeader, 0, sizeof( ftsIdxHeader ) );

    // We write a dummy header first. At the end of the process the header
    // will be rewritten with the right values.

    ftsIdx.write( ftsIdxHeader );

    ChunkedStorage::Writer chunks( ftsIdx );

    BtreeIndexing::IndexedWords indexedWords;

    QSet< uint32_t > setOfOffsets;
    setOfOffsets.reserve( getWordCount() );

    findArticleLinks( 0, &setOfOffsets, 0, &isCancelled );

    if( Utils::AtomicInt::loadAcquire( isCancelled ) )
      throw exUserAbort();

    QVector< uint32_t > offsets;
    offsets.reserve( setOfOffsets.size() );

    slobMutex.lock();
    SlobFile::RefOffsetsVector const & sortedOffsets = sf.getSortedRefOffsets();
    slobMutex.unlock();

    qint32 entries = sf.getRefsCount();
    for( qint32 i = 0; i < entries; i++ )
    {
      if( setOfOffsets.find( sortedOffsets[ i ].second ) != setOfOffsets.end() )
        offsets.append( sortedOffsets[ i ].second );
    }

    // Free memory
    sf.clearRefOffsets();
    setOfOffsets.clear();

    if( Utils::AtomicInt::loadAcquire( isCancelled ) )
      throw exUserAbort();

    if( Utils::AtomicInt::loadAcquire( isCancelled ) )
      throw exUserAbort();

    QMap< QString, QVector< uint32_t > > ftsWords;

    set< quint64 > indexedArticles;
    RefEntry entry;
    string articleText;
    quint32 htmlType = 0xFFFFFFFF;

    for( unsigned i = 0; i < sf.getContentTypesCount(); i++ )
    {
      if( sf.getContentType( i ).startsWith( "text/html", Qt::CaseInsensitive ) )
      {
        htmlType = i;
        break;
      }
    }

    // index articles for full-text search
    for( int i = 0; i < offsets.size(); i++ )
    {
      if( Utils::AtomicInt::loadAcquire( isCancelled ) )
        throw exUserAbort();

      QString articleStr;
      quint32 articleNom = offsets.at( i );

      {
        Mutex::Lock _( slobMutex );
        sf.getRefEntry( articleNom, entry );
      }

      quint64 articleID = ( ( (quint64)entry.itemIndex ) << 32 ) | entry.binIndex;

      set< quint64 >::iterator it = indexedArticles.find( articleID );
      if( it != indexedArticles.end() )
        continue;

      indexedArticles.insert( articleID );

      quint32 type = readArticle( 0, articleText, entry );

      articleStr = QString::fromUtf8( articleText.c_str(), articleText.length() );

      if( type == htmlType )
        articleStr = Html::unescape( articleStr );

      FtsHelpers::parseArticleForFts( articleNom, articleStr, ftsWords );
    }

    // Free memory
    offsets.clear();

    QMap< QString, QVector< uint32_t > >::iterator it = ftsWords.begin();
    while( it != ftsWords.end() )
    {
      if( Utils::AtomicInt::loadAcquire( isCancelled ) )
        throw exUserAbort();

      uint32_t offset = chunks.startNewBlock();
      uint32_t size = it.value().size();

      chunks.addToBlock( &size, sizeof(uint32_t) );
      chunks.addToBlock( it.value().data(), size * sizeof(uint32_t) );

      indexedWords.addSingleWord( gd::toWString( it.key() ), offset );

      it = ftsWords.erase( it );
    }

    // Free memory
    ftsWords.clear();

    if( Utils::AtomicInt::loadAcquire( isCancelled ) )
      throw exUserAbort();

    ftsIdxHeader.chunksOffset = chunks.finish();
    ftsIdxHeader.wordCount = indexedWords.size();

    if( Utils::AtomicInt::loadAcquire( isCancelled ) )
      throw exUserAbort();

    BtreeIndexing::IndexInfo ftsIdxInfo = BtreeIndexing::buildIndex( indexedWords, ftsIdx );

    // Free memory
    indexedWords.clear();

    ftsIdxHeader.indexBtreeMaxElements = ftsIdxInfo.btreeMaxElements;
    ftsIdxHeader.indexRootOffset = ftsIdxInfo.rootOffset;

    ftsIdxHeader.signature = FtsHelpers::FtsSignature;
    ftsIdxHeader.formatVersion = FtsHelpers::CurrentFtsFormatVersion + getFtsIndexVersion();

    ftsIdx.rewind();
    ftsIdx.writeRecords( &ftsIdxHeader, sizeof(ftsIdxHeader), 1 );

    FTS_index_completed.ref();
  }
  catch( std::exception &ex )
  {
    gdWarning( "Slob: Failed building full-text search index for \"%s\", reason: %s\n", getName().c_str(), ex.what() );
    QFile::remove( FsEncoding::decode( ftsIdxName.c_str() ) );
  }
}

void SlobDictionary::getArticleText( uint32_t articleAddress, QString & headword, QString & text )
{
  try
  {
    RefEntry entry;
    string articleText;

    quint32 htmlType = 0xFFFFFFFF;

    for( unsigned i = 0; i < sf.getContentTypesCount(); i++ )
    {
      if( sf.getContentType( i ).startsWith( "text/html", Qt::CaseInsensitive ) )
      {
        htmlType = i;
        break;
      }
    }

    quint32 type = readArticle( articleAddress, articleText, entry );
    headword = entry.key;

    text = QString::fromUtf8( articleText.data(), articleText.size() );

    if( type == htmlType )
      text = Html::unescape( text );
  }
  catch( std::exception &ex )
  {
    gdWarning( "Slob: Failed retrieving article from \"%s\", reason: %s\n", getName().c_str(), ex.what() );
  }
}


sptr< Dictionary::DataRequest > SlobDictionary::getSearchResults( QString const & searchString,
                                                                  int searchMode, bool matchCase,
                                                                  int distanceBetweenWords,
                                                                  int maxResults,
                                                                  bool ignoreWordsOrder,
                                                                  bool ignoreDiacritics )
{
  return new FtsHelpers::FTSResultsRequest( *this, searchString, searchMode, matchCase, distanceBetweenWords, maxResults, ignoreWordsOrder, ignoreDiacritics );
}


/// SlobDictionary::getArticle()

class SlobArticleRequest;

class SlobArticleRequestRunnable: public QRunnable
{
  SlobArticleRequest & r;
  QSemaphore & hasExited;

public:

  SlobArticleRequestRunnable( SlobArticleRequest & r_,
                              QSemaphore & hasExited_ ): r( r_ ),
                                                         hasExited( hasExited_ )
  {}

  ~SlobArticleRequestRunnable()
  {
    hasExited.release();
  }

  virtual void run();
};

class SlobArticleRequest: public Dictionary::DataRequest
{
  friend class SlobArticleRequestRunnable;

  wstring word;
  vector< wstring > alts;
  SlobDictionary & dict;
  bool ignoreDiacritics;

  QAtomicInt isCancelled;
  QSemaphore hasExited;

public:

  SlobArticleRequest( wstring const & word_,
                      vector< wstring > const & alts_,
                      SlobDictionary & dict_, bool ignoreDiacritics_ ):
    word( word_ ), alts( alts_ ), dict( dict_ ), ignoreDiacritics( ignoreDiacritics_ )
  {
    QThreadPool::globalInstance()->start(
      new SlobArticleRequestRunnable( *this, hasExited ) );
  }

  void run(); // Run from another thread by DslArticleRequestRunnable

  virtual void cancel()
  {
    isCancelled.ref();
  }

  ~SlobArticleRequest()
  {
    isCancelled.ref();
    hasExited.acquire();
  }
};

void SlobArticleRequestRunnable::run()
{
  r.run();
}

void SlobArticleRequest::run()
{
  if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
  {
    finish();
    return;
  }

  vector< WordArticleLink > chain = dict.findArticles( word, ignoreDiacritics );

  for( unsigned x = 0; x < alts.size(); ++x )
  {
    /// Make an additional query for each alt

    vector< WordArticleLink > altChain = dict.findArticles( alts[ x ], ignoreDiacritics );

    chain.insert( chain.end(), altChain.begin(), altChain.end() );
  }

  multimap< wstring, pair< string, string > > mainArticles, alternateArticles;

  set< quint64 > articlesIncluded; // Some synonims make it that the articles
                                    // appear several times. We combat this
                                    // by only allowing them to appear once.

  wstring wordCaseFolded = Folding::applySimpleCaseOnly( word );
  if( ignoreDiacritics )
    wordCaseFolded = Folding::applyDiacriticsOnly( wordCaseFolded );

  for( unsigned x = 0; x < chain.size(); ++x )
  {
    if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
    {
      finish();
      return;
    }

    quint64 pos = dict.getArticlePos( chain[ x ].articleOffset ); // Several "articleOffset" values may refer to one article

    if ( articlesIncluded.find( pos ) != articlesIncluded.end() )
      continue; // We already have this article in the body.

    // Now grab that article

    string headword, articleText;

    headword = chain[ x ].word;
    try
    {
      dict.loadArticle( chain[ x ].articleOffset, articleText );
    }
    catch(...)
    {
    }

    // Ok. Now, does it go to main articles, or to alternate ones? We list
    // main ones first, and alternates after.

    // We do the case-folded comparison here.

    wstring headwordStripped =
      Folding::applySimpleCaseOnly( Utf8::decode( headword ) );
    if( ignoreDiacritics )
      headwordStripped = Folding::applyDiacriticsOnly( headwordStripped );

    multimap< wstring, pair< string, string > > & mapToUse =
      ( wordCaseFolded == headwordStripped ) ?
        mainArticles : alternateArticles;

    mapToUse.insert( pair< wstring, pair< string, string > >(
      Folding::applySimpleCaseOnly( Utf8::decode( headword ) ),
      pair< string, string >( headword, articleText ) ) );

    articlesIncluded.insert( pos );
  }

  if ( mainArticles.empty() && alternateArticles.empty() )
  {
    // No such word
    finish();
    return;
  }

  string result;

  multimap< wstring, pair< string, string > >::const_iterator i;

  for( i = mainArticles.begin(); i != mainArticles.end(); ++i )
  {
      result += "<div class=\"slobdict\"><h3 class=\"slobdict_headword\">";
      result += i->second.first;
      result += "</h3></div>";
      result += i->second.second;
  }

  for( i = alternateArticles.begin(); i != alternateArticles.end(); ++i )
  {
      result += "<div class=\"slobdict\"><h3 class=\"slobdict_headword\">";
      result += i->second.first;
      result += "</h3></div>";
      result += i->second.second;
  }

  Mutex::Lock _( dataMutex );

  data.resize( result.size() );

  memcpy( &data.front(), result.data(), result.size() );

  hasAnyData = true;

  finish();
}

sptr< Dictionary::DataRequest > SlobDictionary::getArticle( wstring const & word,
                                                            vector< wstring > const & alts,
                                                            wstring const &,
                                                            bool ignoreDiacritics )

{
  return new SlobArticleRequest( word, alts, *this, ignoreDiacritics );
}

//// SlobDictionary::getResource()

class SlobResourceRequest;

class SlobResourceRequestRunnable: public QRunnable
{
  SlobResourceRequest & r;
  QSemaphore & hasExited;

public:

  SlobResourceRequestRunnable( SlobResourceRequest & r_,
                               QSemaphore & hasExited_ ): r( r_ ),
                                                          hasExited( hasExited_ )
  {}

  ~SlobResourceRequestRunnable()
  {
    hasExited.release();
  }

  virtual void run();
};

class SlobResourceRequest: public Dictionary::DataRequest
{
  friend class SlobResourceRequestRunnable;

  SlobDictionary & dict;

  string resourceName;

  QAtomicInt isCancelled;
  QSemaphore hasExited;

public:

  SlobResourceRequest( SlobDictionary & dict_,
                       string const & resourceName_ ):
    dict( dict_ ),
    resourceName( resourceName_ )
  {
    QThreadPool::globalInstance()->start(
      new SlobResourceRequestRunnable( *this, hasExited ) );
  }

  void run(); // Run from another thread by ZimResourceRequestRunnable

  virtual void cancel()
  {
    isCancelled.ref();
  }

  ~SlobResourceRequest()
  {
    isCancelled.ref();
    hasExited.acquire();
  }
};

void SlobResourceRequestRunnable::run()
{
  r.run();
}

void SlobResourceRequest::run()
{
  // Some runnables linger enough that they are cancelled before they start
  if ( Utils::AtomicInt::loadAcquire( isCancelled ) )
  {
    finish();
    return;
  }

  try
  {
    string resource;
    dict.loadResource( resourceName, resource );
    if( resource.empty() )
      throw exNoResource();

    if( Filetype::isNameOfCSS( resourceName ) )
    {
      QString css = QString::fromUtf8( resource.data(), resource.size() );
      dict.isolateCSS( css, ".slobdict" );
      QByteArray bytes = css.toUtf8();

      Mutex::Lock _( dataMutex );
      data.resize( bytes.size() );
      memcpy( &data.front(), bytes.constData(), bytes.size() );
    }
    else
    if ( Filetype::isNameOfTiff( resourceName ) )
    {
      // Convert it

      dataMutex.lock();

      QImage img = QImage::fromData( reinterpret_cast< const uchar * >( resource.data() ), resource.size() );

#ifdef MAKE_EXTRA_TIFF_HANDLER
      if( img.isNull() )
        GdTiff::tiffToQImage( &data.front(), data.size(), img );
#endif

      dataMutex.unlock();

      if ( !img.isNull() )
      {
        // Managed to load -- now store it back as BMP

        QByteArray ba;
        QBuffer buffer( &ba );
        buffer.open( QIODevice::WriteOnly );
        img.save( &buffer, "BMP" );

        Mutex::Lock _( dataMutex );

        data.resize( buffer.size() );

        memcpy( &data.front(), buffer.data(), data.size() );
      }
    }
    else
    {
      Mutex::Lock _( dataMutex );
      data.resize( resource.size() );
      memcpy( &data.front(), resource.data(), data.size() );
    }

    Mutex::Lock _( dataMutex );
    hasAnyData = true;
  }
  catch( std::exception &ex )
  {
    gdWarning( "SLOB: Failed loading resource \"%s\" from \"%s\", reason: %s\n",
               resourceName.c_str(), dict.getName().c_str(), ex.what() );
    // Resource not loaded -- we don't set the hasAnyData flag then
  }

  finish();
}

sptr< Dictionary::DataRequest > SlobDictionary::getResource( string const & name )

{
  return new SlobResourceRequest( *this, name );
}


vector< sptr< Dictionary::Class > > makeDictionaries(
                                      vector< string > const & fileNames,
                                      string const & indicesDir,
                                      Dictionary::Initializing & initializing,
                                      unsigned maxHeadwordsToExpand )

{
  vector< sptr< Dictionary::Class > > dictionaries;

  for( vector< string >::const_iterator i = fileNames.begin(); i != fileNames.end();
       ++i )
  {
      // Skip files with the extensions different to .slob to speed up the
      // scanning

      QString firstName = QDir::fromNativeSeparators( FsEncoding::decode( i->c_str() ) );
      if( !firstName.endsWith( ".slob") )
        continue;

      // Got the file -- check if we need to rebuid the index

      vector< string > dictFiles( 1, *i );

      string dictId = Dictionary::makeDictionaryId( dictFiles );

      string indexFile = indicesDir + dictId;

      try
      {
        if ( Dictionary::needToRebuildIndex( dictFiles, indexFile ) ||
             indexIsOldOrBad( indexFile ) )
        {
          SlobFile sf;

          gdDebug( "Slob: Building the index for dictionary: %s\n", i->c_str() );

          sf.open( firstName );

          initializing.indexingDictionary( sf.getDictionaryName().toUtf8().constData() );

          File::Class idx( indexFile, "wb" );
          IdxHeader idxHeader;
          memset( &idxHeader, 0, sizeof( idxHeader ) );

          // We write a dummy header first. At the end of the process the header
          // will be rewritten with the right values.

          idx.write( idxHeader );

          RefEntry refEntry;
          quint32 entries = sf.getRefsCount();

          IndexedWords indexedWords, indexedResources;

          set< quint64 > articlesPos;
          quint32 articleCount = 0, wordCount = 0;

          SlobFile::RefOffsetsVector const & offsets = sf.getSortedRefOffsets();

          for( quint32 i = 0; i < entries; i++ )
          {
            sf.getRefEntryAtOffset( offsets[ i ].first, refEntry );

            quint8 type = sf.getItem( refEntry, 0 );

            QString contentType = sf.getContentType( type );

            if( contentType.startsWith( "text/html", Qt::CaseInsensitive )
                || contentType.startsWith( "text/plain", Qt::CaseInsensitive ) )
            {
              //Article
              if( maxHeadwordsToExpand && entries > maxHeadwordsToExpand )
                indexedWords.addSingleWord( gd::toWString( refEntry.key ), offsets[ i ].second );
              else
                indexedWords.addWord( gd::toWString( refEntry.key ), offsets[ i ].second );

              wordCount += 1;

              quint64 pos = ( ( (quint64)refEntry.itemIndex ) << 32 ) + refEntry.binIndex;
              if( articlesPos.find( pos ) == articlesPos.end() )
              {
                articleCount += 1;
                articlesPos.insert( pos );
              }
            }
            else
            {
              indexedResources.addSingleWord( gd::toWString( refEntry.key ), offsets[ i ].second );
            }
          }
          sf.clearRefOffsets();

          // Build index

          {
            IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );

            idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
            idxHeader.indexRootOffset = idxInfo.rootOffset;

            indexedWords.clear(); // Release memory -- no need for this data
          }

          {
            IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedResources, idx );

            idxHeader.resourceIndexBtreeMaxElements = idxInfo.btreeMaxElements;
            idxHeader.resourceIndexRootOffset = idxInfo.rootOffset;

            indexedResources.clear(); // Release memory -- no need for this data
          }

          idxHeader.signature = Signature;
          idxHeader.formatVersion = CurrentFormatVersion;

          idxHeader.articleCount = articleCount;
          idxHeader.wordCount = wordCount;

          QPair<quint32,quint32> langs =
              LangCoder::findIdsForFilename( QString::fromStdString( dictFiles[ 0 ] ) );

          idxHeader.langFrom = langs.first;
          idxHeader.langTo = langs.second;

          idx.rewind();

          idx.write( &idxHeader, sizeof( idxHeader ) );

        }
        dictionaries.push_back( new SlobDictionary( dictId,
                                                    indexFile,
                                                    dictFiles ) );
      }
      catch( std::exception & e )
      {
        gdWarning( "Slob dictionary initializing failed: %s, error: %s\n",
                   i->c_str(), e.what() );
        continue;
      }
      catch( ... )
      {
        qWarning( "Slob dictionary initializing failed\n" );
        continue;
      }
  }
  return dictionaries;
}

} // namespace Slob

#endif