* A lot of changes aimed to make lookups faster and to reduce startup times.

2024-11-27 19:24:08 +00:00 · 2009-04-14 16:35:47 +00:00 · 2009-04-14 16:35:47 +00:00 · 32fe5dff9e
parent 68c5c73b37
commit 32fe5dff9e
10 changed files with 434 additions and 177 deletions
--- a/src/bgl.cc
+++ b/src/bgl.cc
@ -26,6 +26,7 @@ using std::pair;

 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;

 namespace
 {
@ -49,7 +50,8 @@ namespace
    uint32_t wordCount; // Total number of words, for informative purposes only
    /// Add more fields here, like name, description, author and such.
    uint32_t chunksOffset; // The offset to chunks' storage
-    uint32_t indexOffset; // The offset of the index in the file.
+    uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+    uint32_t indexRootOffset;
    uint32_t resourceListOffset; // The offset of the list of resources
    uint32_t resourcesCount; // Number of resources stored
  } __attribute__((packed));
@ -239,9 +241,9 @@ namespace

    // Initialize the index

-    idx.seek( idxHeader.indexOffset );
-
-    openIndex( idx, idxMutex );
+    openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+               idx, idxMutex );
  }


@ -739,7 +741,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

      // Good. Now build the index

-      idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+      IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+      idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+      idxHeader.indexRootOffset = idxInfo.rootOffset;

      // Save the resource's list.

--- a/src/btreeidx.cc
+++ b/src/btreeidx.cc
@ -42,19 +42,21 @@ enum

 BtreeDictionary::BtreeDictionary( string const & id,
                                  vector< string > const & dictionaryFiles ):
-  Dictionary::Class( id, dictionaryFiles ), idxFile( 0 )
+  Dictionary::Class( id, dictionaryFiles ), idxFile( 0 ), rootNodeLoaded( false )
 {
 }

-void BtreeDictionary::openIndex( File::Class & file, Mutex & mutex )
+void BtreeDictionary::openIndex( IndexInfo const & indexInfo,
+                                 File::Class & file, Mutex & mutex )
 {
-  Mutex::Lock _( mutex );
-  
-  indexNodeSize = file.read< uint32_t >();
-  rootOffset = file.read< uint32_t >();
+  indexNodeSize = indexInfo.btreeMaxElements;
+  rootOffset = indexInfo.rootOffset;

  idxFile = &file;
  idxFileMutex = &mutex;
+
+  rootNodeLoaded = false;
+  rootNode.clear();
 }

 vector< WordArticleLink > BtreeDictionary::findArticles( wstring const & str )
@ -68,8 +70,11 @@ vector< WordArticleLink > BtreeDictionary::findArticles( wstring const & str )
  vector< char > leaf;
  uint32_t nextLeaf;

+  char const * leafEnd;
+
  char const * chainOffset = findChainOffsetExactOrPrefix( folded, exactMatch,
-                                                           leaf, nextLeaf );
+                                                           leaf, nextLeaf,
+                                                           leafEnd );

  if ( chainOffset && exactMatch )
  {
@ -157,9 +162,11 @@ void BtreeWordSearchRequest::run()

  vector< char > leaf;
  uint32_t nextLeaf;
+  char const * leafEnd;

  char const * chainOffset = dict.findChainOffsetExactOrPrefix( folded, exactMatch,
-                                                                leaf, nextLeaf );
+                                                                leaf, nextLeaf,
+                                                                leafEnd );

  if ( chainOffset )
  for( ; ; )
@ -198,7 +205,7 @@ void BtreeWordSearchRequest::run()

    // Fetch new leaf if we're out of chains here

-    if ( chainOffset > &leaf.back() )
+    if ( chainOffset >= leafEnd )
    {
      // We're past the current leaf, fetch the next one

@ -209,6 +216,8 @@ void BtreeWordSearchRequest::run()
        Mutex::Lock _( *dict.idxFileMutex );
        
        dict.readNode( nextLeaf, leaf );
+        leafEnd = &leaf.front() + leaf.size();
+
        nextLeaf = dict.idxFile->read< uint32_t >();
        chainOffset = &leaf.front() + sizeof( uint32_t );

@ -274,8 +283,9 @@ void BtreeDictionary::readNode( uint32_t offset, vector< char > & out )

 char const * BtreeDictionary::findChainOffsetExactOrPrefix( wstring const & target,
                                                            bool & exactMatch,
-                                                            vector< char > & leaf,
-                                                            uint32_t & nextLeaf )
+                                                            vector< char > & extLeaf,
+                                                            uint32_t & nextLeaf,
+                                                            char const * & leafEnd )
 {
  if ( !idxFile )
    throw exIndexWasNotOpened();
@ -294,14 +304,21 @@ char const * BtreeDictionary::findChainOffsetExactOrPrefix( wstring const & targ

  uint32_t currentNodeOffset = rootOffset;

+  if ( !rootNodeLoaded )
+  {
+    // Time to load our root node. We do it only once, at the first request.
+    readNode( rootOffset, rootNode );
+    rootNodeLoaded = true;
+  }
+
+  char const * leaf = &rootNode.front();
+  leafEnd = leaf + rootNode.size();
+
  for( ; ; )
  {
-    //printf( "reading node at %x\n", currentNodeOffset );
-    readNode( currentNodeOffset, leaf );
-
    // Is it a leaf or a node?

-    uint32_t leafEntries = *(uint32_t *)&leaf.front();
+    uint32_t leafEntries = *(uint32_t *)leaf;

    if ( leafEntries == 0xffffFFFF )
    {
@ -309,124 +326,266 @@ char const * BtreeDictionary::findChainOffsetExactOrPrefix( wstring const & targ

      //printf( "=>a node\n" );

-      uint32_t const * offsets = (uint32_t *)&leaf.front() + 1;
+      uint32_t const * offsets = (uint32_t *)leaf + 1;

-      char const * ptr = &leaf.front() + sizeof( uint32_t ) +
+      char const * ptr = leaf + sizeof( uint32_t ) +
                         ( indexNodeSize + 1 ) * sizeof( uint32_t );

-      unsigned entry;
+      // ptr now points to a span of zero-separated strings, up to leafEnd.
+      // We find our match using a binary search.

-      for( entry = 0; entry < indexNodeSize; ++entry )
-      {
-        //printf( "checking node agaist word %s\n", ptr );
-        size_t wordSize = strlen( ptr );
+      char const * closestString;

+      int compareResult;
+
+      char const * window = ptr;
+      unsigned windowSize = leafEnd - ptr;
+
+      for( ; ; )
+      {  
+        // We boldly shoot in the middle of the whole mess, and then adjust
+        // to the beginning of the string that we've hit.
+        char const * testPoint = window + windowSize/2;
+  
+        closestString = testPoint;
+  
+        while( closestString > ptr && closestString[ -1 ] )
+          --closestString;
+  
+        size_t wordSize = strlen( closestString );
+  
        if ( wcharBuffer.size() <= wordSize )
          wcharBuffer.resize( wordSize + 1 );
-
-        long result = Utf8::decode( ptr, wordSize, &wcharBuffer.front() );
-
+  
+        long result = Utf8::decode( closestString, wordSize, &wcharBuffer.front() );
+  
        if ( result < 0 )
-          throw Utf8::exCantDecode( ptr );
-
+          throw Utf8::exCantDecode( closestString );
+  
        wcharBuffer[ result ] = 0;

-        int compareResult = target.compare( &wcharBuffer.front() );
+        //printf( "Checking against %s\n", closestString );

+        compareResult = target.compare( &wcharBuffer.front() );
+  
        if ( !compareResult )
        {
-          // The target string matches the current one.
-          // Go to the right, since it's there where we store such results.
-          currentNodeOffset = offsets[ entry + 1 ];
+          // The target string matches the current one. Finish the search.
          break;
        }
        if ( compareResult < 0 )
        {
          // The target string is smaller than the current one.
          // Go to the left.
-          currentNodeOffset = offsets[ entry ];
-          break;
-        }
+          windowSize = closestString - window;

-        ptr += wordSize + 1;
+          if ( !windowSize )
+            break;
+        }
+        else
+        {
+          // The target string is larger than the current one.
+          // Go to the right.
+          windowSize -= ( closestString - window )  + wordSize + 1;
+          window = closestString + wordSize + 1;
+
+          if ( !windowSize )
+            break;
+        }
      }

-      if ( entry == indexNodeSize )
+      #if 0
+      printf( "The winner is %s, compareResult = %d\n", closestString, compareResult );
+
+      if ( closestString != ptr )
      {
-        // We iterated through all entries, but our string is larger than
-        // all of them. Go the the rightmost node.
+        char const * left = closestString -1;
+
+        while( left != ptr && left[ -1 ] )
+          --left;
+
+        printf( "To the left: %s\n", left );
+      }
+      else
+        printf( "To the lest -- nothing\n" );
+
+      char const * right = closestString + strlen( closestString ) + 1;
+
+      if ( right != leafEnd )
+      {
+        printf( "To the right: %s\n", right );
+      }
+      else
+        printf( "To the right -- nothing\n" );
+      #endif
+
+      // Now, whatever the outcome (compareResult) is, we need to find
+      // entry number for the closestMatch string.
+       
+      unsigned entry = 0;
+
+      for( char const * next = ptr; next != closestString;
+           next += strlen( next ) + 1, ++entry ) ;
+
+      // Ok, now check the outcome
+
+      if ( !compareResult )
+      {
+        // The target string matches the one found.
+        // Go to the right, since it's there where we store such results.
+        currentNodeOffset = offsets[ entry + 1 ];
+      }
+      if ( compareResult < 0 )
+      {
+        // The target string is smaller than the one found.
+        // Go to the left.
        currentNodeOffset = offsets[ entry ];
      }
+      else
+      {
+        // The target string is larger than the one found.
+        // Go to the right.
+        currentNodeOffset = offsets[ entry + 1 ];
+      }
+
+      //printf( "reading node at %x\n", currentNodeOffset );
+      readNode( currentNodeOffset, extLeaf );
+      leaf = &extLeaf.front();
+      leafEnd = leaf + extLeaf.size();
    }
    else
    {
      //printf( "=>a leaf\n" );
      // A leaf
-      nextLeaf = idxFile->read< uint32_t >();

-      // Iterate through chains until we find one that matches
+      // If this leaf is the root, there's no next leaf, it just can't be.
+      // We do this check because the file's position indicator just won't
+      // be in the right place for root node anyway, since we precache it.
+      nextLeaf = ( currentNodeOffset != rootOffset ? idxFile->read< uint32_t >() : 0 );

-      char const * ptr = &leaf.front() + sizeof( uint32_t );
+      if ( !leafEntries )
+      {
+        // Empty leaf? This may only be possible for entirely empty trees only.
+        if ( currentNodeOffset != rootOffset )
+          throw exCorruptedChainData();
+        else
+          return 0; // No match
+      }
+
+      // Build an array containing all chain pointers
+      char const * ptr = leaf + sizeof( uint32_t );

      uint32_t chainSize;

-      while( leafEntries-- )
+      vector< char const * > chainOffsets( leafEntries );
+
      {
-        memcpy( &chainSize, ptr, sizeof( uint32_t ) );
-        ptr += sizeof( uint32_t );
+        char const ** nextOffset = &chainOffsets.front();

-        if( chainSize )
+        while( leafEntries-- )
        {
-          size_t wordSize = strlen( ptr );
+          *nextOffset++ = ptr;

-          if ( wcharBuffer.size() <= wordSize )
-            wcharBuffer.resize( wordSize + 1 );
+          memcpy( &chainSize, ptr, sizeof( uint32_t ) );

-          //printf( "checking agaist word %s, left = %u\n", ptr, leafEntries );
+          //printf( "%s + %s\n", ptr + sizeof( uint32_t ), ptr + sizeof( uint32_t ) + strlen( ptr + sizeof( uint32_t ) ) + 1 );

-          long result = Utf8::decode( ptr, wordSize, &wcharBuffer.front() );
-
-          if ( result < 0 )
-            throw Utf8::exCantDecode( ptr );
-
-          wcharBuffer[ result ] = 0;
-
-          wstring foldedWord = Folding::apply( &wcharBuffer.front() );
-
-          int compareResult = target.compare( foldedWord );
-
-          if ( !compareResult )
-          {
-            // Exact match -- return and be done
-            exactMatch = true;
-
-            return ptr - sizeof( uint32_t );
-          }
-          else
-          if ( compareResult < 0 )
-          {
-            // The target string is smaller than the current one.
-            // No point in travering further, return this result.
-            
-            return ptr - sizeof( uint32_t );
-          }
-          ptr += chainSize;
+          ptr += sizeof( uint32_t ) + chainSize;
        }
      }

-      // Well, our target is larger than all the chains here. This would mean
-      // that the next leaf is the right one.
+      // Now do a binary search in it, aiming to find where our target
+      // string lands.

-      if ( nextLeaf )
+      char const ** window = &chainOffsets.front();
+      unsigned windowSize = chainOffsets.size();
+
+      for( ; ; )
      {
-        readNode( nextLeaf, leaf );
+        //printf( "window = %u, ws = %u\n", window - &chainOffsets.front(), windowSize );

-        nextLeaf = idxFile->read< uint32_t >();
+        char const ** chainToCheck = window + windowSize/2;
+        ptr = *chainToCheck;
+  
+        memcpy( &chainSize, ptr, sizeof( uint32_t ) );
+        ptr += sizeof( uint32_t );
+  
+        size_t wordSize = strlen( ptr );
+  
+        if ( wcharBuffer.size() <= wordSize )
+          wcharBuffer.resize( wordSize + 1 );
+  
+        //printf( "checking agaist word %s, left = %u\n", ptr, leafEntries );
+  
+        long result = Utf8::decode( ptr, wordSize, &wcharBuffer.front() );
+  
+        if ( result < 0 )
+          throw Utf8::exCantDecode( ptr );
+  
+        wcharBuffer[ result ] = 0;
+  
+        wstring foldedWord = Folding::apply( &wcharBuffer.front() );
+  
+        int compareResult = target.compare( foldedWord );
+  
+        if ( !compareResult )
+        {
+          // Exact match -- return and be done
+          exactMatch = true;
+  
+          return ptr - sizeof( uint32_t );
+        }
+        else
+        if ( compareResult < 0 )
+        {
+          // The target string is smaller than the current one.
+          // Go to the first half
+           
+          windowSize /= 2;

-        return &leaf.front() + sizeof( uint32_t );
+          if ( !windowSize )
+          {
+            // That finishes our search. Since our target string
+            // landed before the last tested chain, we return a possible
+            // prefix match against that chain.
+            return ptr - sizeof( uint32_t );
+          }
+        }
+        else
+        {
+          // The target string is larger than the current one.
+          // Go to the second half
+
+          windowSize -= windowSize/2 + 1;
+
+          if ( !windowSize )
+          {
+            // That finishes our search. Since our target string
+            // landed after the last tested chain, we return the next
+            // chain. If there's no next chain in this leaf, this
+            // would mean the first element in the next leaf.
+            if ( chainToCheck == &chainOffsets.back() )
+            {
+              if ( nextLeaf )
+              {
+                readNode( nextLeaf, extLeaf );
+  
+                leafEnd = &extLeaf.front() + extLeaf.size();
+  
+                nextLeaf = idxFile->read< uint32_t >();
+  
+                return &extLeaf.front() + sizeof( uint32_t );
+              }
+              else
+                return 0; // This was the last leaf
+            }
+            else
+              return chainToCheck[ 1 ];
+          }
+
+          window = chainToCheck + 1;
+        }
      }
-      else
-        return 0; // This was the last leaf
    }
  }
 }
@ -764,7 +923,7 @@ void IndexedWords::addWord( wstring const & word, uint32_t articleOffset )
  }
 }

-uint32_t buildIndex( IndexedWords const & indexedWords, File::Class & file )
+IndexInfo buildIndex( IndexedWords const & indexedWords, File::Class & file )
 {
  size_t indexSize = indexedWords.size();
  IndexedWords::const_iterator nextIndex = indexedWords.begin();
@ -798,17 +957,7 @@ uint32_t buildIndex( IndexedWords const & indexedWords, File::Class & file )
                                        file, btreeMaxElements,
                                        lastLeafOffset );

-  // We need to save btreeMaxElements. For simplicity, we just save it here
-  // along with root offset, and then return that record's offset as the
-  // offset of the index itself.
-
-  uint32_t indexOffset = file.tell();
-
-  file.write( (uint32_t) btreeMaxElements );
-  file.write( (uint32_t) rootOffset );
-
-  return indexOffset;
+  return IndexInfo( btreeMaxElements, rootOffset );
 }

-
 }
--- a/src/btreeidx.hh
+++ b/src/btreeidx.hh
@ -25,7 +25,7 @@ enum
  /// This is to be bumped up each time the internal format changes.
  /// The value isn't used here by itself, it is supposed to be added
  /// to each dictionary's internal format version.
-  FormatVersion = 2
+  FormatVersion = 3
 };

 // These exceptions which might be thrown during the index traversal
@ -49,6 +49,16 @@ struct WordArticleLink
  {}
 };

+/// Information needed to open the index
+struct IndexInfo
+{
+  uint32_t btreeMaxElements, rootOffset;
+
+  IndexInfo( uint32_t btreeMaxElements_, uint32_t rootOffset_ ):
+    btreeMaxElements( btreeMaxElements_ ), rootOffset( rootOffset_ )
+  {}
+};
+
 class BtreeWordSearchRequest;

 /// A base for the dictionary that utilizes a btree index build using
@ -67,11 +77,10 @@ public:

 protected:

-  /// Opens the index. The file must be positioned at the offset previously
-  /// returned by buildIndex(). The file reference is saved to be used for
+  /// Opens the index. The file reference is saved to be used for
  /// subsequent lookups.
  /// The mutex is the one to be locked when working with the file.
-  void openIndex( File::Class &, Mutex & );
+  void openIndex( IndexInfo const &, File::Class &, Mutex & );

  /// Finds articles that match the given string. A case-insensitive search
  /// is performed.
@ -83,6 +92,9 @@ private:
  File::Class * idxFile;
  uint32_t indexNodeSize;
  uint32_t rootOffset;
+  bool rootNodeLoaded;
+  vector< char > rootNode; // We load root note here and keep it at all times,
+                           // since all searches always start with it.

  /// Finds the offset in the btree leaf for the given word, either matching
  /// by an exact match, or by finding the smallest entry that might match
@ -91,10 +103,16 @@ private:
  /// to true when an exact match is located, and to false otherwise.
  /// The located leaf is loaded to 'leaf', and the pointer to the next
  /// leaf is saved to 'nextLeaf'.
+  /// However, due to root node being permanently cached, the 'leaf' passed
+  /// might not get used at all if the root node was the terminal one. In that
+  /// case, the returned pointer wouldn't belong to 'leaf' at all. To that end,
+  /// the leafEnd pointer always holds the pointer to the first byte outside
+  /// the node data.
  char const * findChainOffsetExactOrPrefix( wstring const & target,
                                             bool & exactMatch,
                                             vector< char > & leaf,
-                                             uint32_t & nextLeaf );
+                                             uint32_t & nextLeaf,
+                                             char const * & leafEnd );

  /// Reads a node or leaf at the given offset. Just uncompresses its data
  /// to the given vector and does nothing more.
@ -128,10 +146,10 @@ struct IndexedWords: public map< wstring, vector< WordArticleLink > >
  void addWord( wstring const & word, uint32_t articleOffset );
 };

-/// Builds the index, as a compressed btree. Returns offset to its root.
+/// Builds the index, as a compressed btree. Returns IndexInfo.
 /// All the data is stored to the given file, beginning from its current
 /// position.
-uint32_t buildIndex( IndexedWords const &, File::Class & file );
+IndexInfo buildIndex( IndexedWords const &, File::Class & file );

 }

--- a/src/chunkedstorage.cc
+++ b/src/chunkedstorage.cc
@ -15,6 +15,17 @@ enum
 Writer::Writer( File::Class & f ):
  file( f ), chunkStarted( false ), bufferUsed( 0 )
 {
+  // Create a sratchpad at the beginning of file. We use it to write chunk
+  // table if it would fit, in order to save some seek times.
+
+  char zero[ 4096 ];
+
+  memset( zero, 0, sizeof( zero ) );
+
+  scratchPadOffset = file.tell();
+  scratchPadSize = sizeof( zero );
+
+  file.write( zero, sizeof( zero ) );
 }

 uint32_t Writer::startNewBlock()
@ -77,10 +88,25 @@ uint32_t Writer::finish()
  if ( bufferUsed || chunkStarted )
    saveCurrentChunk();

+  bool useScratchPad = false;
+  uint32_t savedOffset = 0;
+
+  if ( scratchPadSize >= offsets.size() * sizeof( uint32_t ) + sizeof( uint32_t ) )
+  {
+    useScratchPad = true;
+    savedOffset = file.tell();
+    file.seek( scratchPadOffset );
+  }
+
  uint32_t offset = file.tell();

  file.write( (uint32_t) offsets.size() );
-  file.write( &offsets.front(), offsets.size() * sizeof( uint32_t ) );
+
+  if ( offsets.size() )
+    file.write( &offsets.front(), offsets.size() * sizeof( uint32_t ) );
+
+  if ( useScratchPad )
+    file.seek( savedOffset );

  offsets.clear();
  chunkStarted = false;
--- a/src/chunkedstorage.hh
+++ b/src/chunkedstorage.hh
@ -29,6 +29,7 @@ class Writer
 {
  vector< uint32_t > offsets;
  File::Class & file;
+  size_t scratchPadOffset, scratchPadSize;

 public:

--- a/src/dictdfiles.cc
+++ b/src/dictdfiles.cc
@ -29,6 +29,7 @@ using std::list;

 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;

 namespace {

@ -48,7 +49,8 @@ struct IdxHeader
  uint32_t signature; // First comes the signature, DCDX
  uint32_t formatVersion; // File format version (CurrentFormatVersion)
  uint32_t wordCount; // Total number of words
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));

 bool indexIsOldOrBad( string const & indexFile )
@ -109,9 +111,9 @@ DictdDictionary::DictdDictionary( string const & id,

  // Initialize the index

-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }

 DictdDictionary::~DictdDictionary()
@ -380,7 +382,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

        // Build index

-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;

        // That concludes it. Update the header.

--- a/src/dsl.cc
+++ b/src/dsl.cc
@ -47,6 +47,7 @@ using std::list;

 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;

 namespace {

@ -66,7 +67,8 @@ struct IdxHeader
  uint32_t chunksOffset; // The offset to chunks' storage
  uint32_t hasAbrv; // Non-zero means file has abrvs at abrvAddress
  uint32_t abrvAddress; // Address of abrv map in the chunked storage
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));

 bool indexIsOldOrBad( string const & indexFile )
@ -201,9 +203,9 @@ DslDictionary::DslDictionary( string const & id,

  // Initialize the index

-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );

  // Open a resource zip file, if there's one
  resourceZip = zip_open( ( getDictionaryFilenames()[ 0 ] + ".files.zip" ).c_str(), 0, 0 );
@ -1184,7 +1186,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

        // Build index

-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;

        // That concludes it. Update the header.

--- a/src/lsa.cc
+++ b/src/lsa.cc
@ -23,6 +23,7 @@ using std::multimap;
 using std::set;
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;

 namespace {

@ -43,7 +44,8 @@ struct IdxHeader
  uint32_t formatVersion; // File format version, currently 1.
  uint32_t soundsCount; // Total number of sounds, for informative purposes only
  uint32_t vorbisOffset; // Offset of the vorbis file which contains all snds
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));

 bool indexIsOldOrBad( string const & indexFile )
@ -174,9 +176,9 @@ LsaDictionary::LsaDictionary( string const & id,
 {
  // Initialize the index

-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }

 sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
@ -546,7 +548,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

        // Build the index

-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;

         // That concludes it. Update the header.

--- a/src/sounddir.cc
+++ b/src/sounddir.cc
@ -23,6 +23,7 @@ using std::multimap;
 using std::set;
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;

 namespace {

@ -38,7 +39,8 @@ struct IdxHeader
  uint32_t formatVersion; // File format version, is to be CurrentFormatVersion
  uint32_t soundsCount; // Total number of sounds, for informative purposes only
  uint32_t chunksOffset; // The offset to chunks' storage
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));

 bool indexIsOldOrBad( string const & indexFile )
@ -98,9 +100,9 @@ SoundDirDictionary::SoundDirDictionary( string const & id,
 {
  // Initialize the index

-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }

 sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
@ -365,7 +367,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const &
      
      // Build the index

-      idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+      IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+      idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+      idxHeader.indexRootOffset = idxInfo.rootOffset;

       // That concludes it. Update the header.

--- a/src/stardict.cc
+++ b/src/stardict.cc
@ -33,6 +33,7 @@ using std::wstring;

 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;

 namespace {

@ -65,7 +66,7 @@ struct Ifo
 enum
 {
  Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
-  CurrentFormatVersion = 4 + BtreeIndexing::FormatVersion + Folding::Version
+  CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
 };

 struct IdxHeader
@ -73,7 +74,12 @@ struct IdxHeader
  uint32_t signature; // First comes the signature, SIDX
  uint32_t formatVersion; // File format version (CurrentFormatVersion)
  uint32_t chunksOffset; // The offset to chunks' storage
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
+  uint32_t wordCount; // Saved from Ifo::wordcount
+  uint32_t synWordCount; // Saved from Ifo::synwordcount
+  uint32_t bookNameSize; // Book name's length. Used to read it then.
+  uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
 } __attribute__((packed));

 bool indexIsOldOrBad( string const & indexFile )
@ -90,32 +96,32 @@ bool indexIsOldOrBad( string const & indexFile )

 class StardictDictionary: public BtreeIndexing::BtreeDictionary
 {
-  Ifo ifo;
  Mutex idxMutex;
  File::Class idx;
  IdxHeader idxHeader;
+  string bookName;
+  string sameTypeSequence;
  ChunkedStorage::Reader chunks;
  dictData * dz;

 public:

  StardictDictionary( string const & id, string const & indexFile,
-                      vector< string > const & dictionaryFiles,
-                      Ifo const & );
+                      vector< string > const & dictionaryFiles );

  ~StardictDictionary();

  virtual string getName() throw()
-  { return ifo.bookname; }
+  { return bookName; }

  virtual map< Dictionary::Property, string > getProperties() throw()
  { return map< Dictionary::Property, string >(); }

  virtual unsigned long getArticleCount() throw()
-  { return ifo.wordcount; }
+  { return idxHeader.wordCount; }

  virtual unsigned long getWordCount() throw()
-  { return ifo.wordcount + ifo.synwordcount; }
+  { return idxHeader.wordCount + idxHeader.synWordCount; }

  virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
    throw( std::exception );
@ -136,16 +142,18 @@ private:
  void loadArticle(  uint32_t address,
                     string & headword,
                     string & articleText );
+
+  string loadString( size_t size );
 };

 StardictDictionary::StardictDictionary( string const & id,
                                        string const & indexFile,
-                                        vector< string > const & dictionaryFiles,
-                                        Ifo const & ifo_ ):
+                                        vector< string > const & dictionaryFiles ):
  BtreeDictionary( id, dictionaryFiles ),
-  ifo( ifo_ ),
  idx( indexFile, "rb" ),
  idxHeader( idx.read< IdxHeader >() ),
+  bookName( loadString( idxHeader.bookNameSize ) ),
+  sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
  chunks( idx, idxHeader.chunksOffset )
 {
  // Open the .dict file
@ -157,9 +165,9 @@ StardictDictionary::StardictDictionary( string const & id,

  // Initialize the index

-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }

 StardictDictionary::~StardictDictionary()
@ -168,6 +176,15 @@ StardictDictionary::~StardictDictionary()
    dict_data_close( dz );
 }

+string StardictDictionary::loadString( size_t size )
+{
+  vector< char > data( size );
+
+  idx.read( &data.front(), data.size() );
+
+  return string( &data.front(), data.size() );
+}
+
 void StardictDictionary::getArticleProps( uint32_t articleAddress,
                                          string & headword,
                                          uint32_t & offset, uint32_t & size )
@ -252,14 +269,14 @@ void StardictDictionary::loadArticle( uint32_t address,

  char * ptr = articleBody;

-  if ( ifo.sametypesequence.size() )
+  if ( sameTypeSequence.size() )
  {
    /// The sequence is known, it's not stored in the article itself
-    for( unsigned seq = 0; seq < ifo.sametypesequence.size(); ++seq )
+    for( unsigned seq = 0; seq < sameTypeSequence.size(); ++seq )
    {
      // Last entry doesn't have size info -- it is inferred from
      // the bytes left
-      bool entrySizeKnown = ( seq == ifo.sametypesequence.size() - 1 );
+      bool entrySizeKnown = ( seq == sameTypeSequence.size() - 1 );

      uint32_t entrySize;

@ -272,7 +289,7 @@ void StardictDictionary::loadArticle( uint32_t address,
        break;
      }

-      char type = ifo.sametypesequence[ seq ];
+      char type = sameTypeSequence[ seq ];

      if ( islower( type ) )
      {
@ -610,8 +627,7 @@ static bool tryPossibleName( string const & name, string & copyTo )
 }

 static void findCorrespondingFiles( string const & ifo,
-                                    string & idx, string & dict, string & syn,
-                                    bool needSyn )
+                                    string & idx, string & dict, string & syn )
 {
  string base( ifo, 0, ifo.size() - 3 );

@ -633,15 +649,15 @@ static void findCorrespondingFiles( string const & ifo,
      ) )
    throw exNoDictFile( ifo );

-  if ( needSyn && !(
-                     tryPossibleName( base + "syn", syn ) ||
-                     tryPossibleName( base + "syn.gz", syn ) ||
-                     tryPossibleName( base + "syn.dz", syn ) ||
-                     tryPossibleName( base + "SYN", syn ) ||
-                     tryPossibleName( base + "SYN.GZ", syn ) ||
-                     tryPossibleName( base + "SYN.DZ", syn )
+  if ( !(
+         tryPossibleName( base + "syn", syn ) ||
+         tryPossibleName( base + "syn.gz", syn ) ||
+         tryPossibleName( base + "syn.dz", syn ) ||
+         tryPossibleName( base + "SYN", syn ) ||
+         tryPossibleName( base + "SYN.GZ", syn ) ||
+         tryPossibleName( base + "SYN.DZ", syn )
     ) )
-    throw exNoSynFile( ifo );
+    syn.clear();
 }

 static void handleIdxSynFile( string const & fileName,
@ -764,30 +780,16 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

    try
    {
-      File::Class ifoFile( *i, "r" );
-
-      Ifo ifo( ifoFile );
-
-      if ( ifo.idxoffsetbits == 64 )
-        throw ex64BitsNotSupported();
-
-      if ( ifo.dicttype.size() )
-        throw exDicttypeNotSupported();
-
-      printf( "bookname = %s\n", ifo.bookname.c_str() );
-      printf( "wordcount = %u\n", ifo.wordcount );
-
      vector< string > dictFiles( 1, *i );

      string idxFileName, dictFileName, synFileName;

-      findCorrespondingFiles( *i, idxFileName, dictFileName, synFileName,
-                              ifo.synwordcount );
+      findCorrespondingFiles( *i, idxFileName, dictFileName, synFileName );

      dictFiles.push_back( idxFileName );
      dictFiles.push_back( dictFileName );

-      if ( ifo.synwordcount )
+      if ( synFileName.size() )
        dictFiles.push_back( synFileName );

      string dictId = Dictionary::makeDictionaryId( dictFiles );
@ -798,6 +800,33 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
           indexIsOldOrBad( indexFile ) )
      {
        // Building the index
+
+        File::Class ifoFile( *i, "r" );
+
+        Ifo ifo( ifoFile );
+
+        if ( ifo.idxoffsetbits == 64 )
+          throw ex64BitsNotSupported();
+
+        if ( ifo.dicttype.size() )
+          throw exDicttypeNotSupported();
+
+        if( synFileName.empty() )
+        {
+          if ( ifo.synwordcount )
+            throw exNoSynFile( *i );
+        }
+        else
+        if ( !ifo.synwordcount )
+        {
+          printf( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified\n",
+                  synFileName.c_str() );
+        }
+
+
+        printf( "bookname = %s\n", ifo.bookname.c_str() );
+        printf( "wordcount = %u\n", ifo.wordcount );
+
        initializing.indexingDictionary( ifo.bookname );

        File::Class idx( indexFile, "wb" );
@ -811,6 +840,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

        idx.write( idxHeader );

+        idx.write( ifo.bookname.data(), ifo.bookname.size() );
+        idx.write( ifo.sametypesequence.data(), ifo.sametypesequence.size() );
+
        IndexedWords indexedWords;

        ChunkedStorage::Writer chunks( idx );
@ -837,13 +869,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

        // Build index

-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;

        // That concludes it. Update the header.

        idxHeader.signature = Signature;
        idxHeader.formatVersion = CurrentFormatVersion;

+        idxHeader.wordCount = ifo.wordcount;
+        idxHeader.synWordCount = ifo.synwordcount;
+        idxHeader.bookNameSize = ifo.bookname.size();
+        idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
+
        idx.rewind();

        idx.write( &idxHeader, sizeof( idxHeader ) );
@ -851,9 +891,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(

      dictionaries.push_back( new StardictDictionary( dictId,
                                                      indexFile,
-                                                      dictFiles,
-                                                      ifo ) );
-      
+                                                      dictFiles ) );
    }
    catch( std::exception & e )
    {