From 32fe5dff9ee0c09476d79a21266f74fa231d3db0 Mon Sep 17 00:00:00 2001
From: Konstantin Isakov <ikm@users.berlios.de>
Date: Tue, 14 Apr 2009 16:35:47 +0000
Subject: [PATCH] * A lot of changes aimed to make lookups faster and to reduce
 startup times.

---
 src/bgl.cc            |  15 +-
 src/btreeidx.cc       | 341 ++++++++++++++++++++++++++++++------------
 src/btreeidx.hh       |  32 +++-
 src/chunkedstorage.cc |  28 +++-
 src/chunkedstorage.hh |   1 +
 src/dictdfiles.cc     |  15 +-
 src/dsl.cc            |  15 +-
 src/lsa.cc            |  15 +-
 src/sounddir.cc       |  15 +-
 src/stardict.cc       | 134 +++++++++++------
 10 files changed, 434 insertions(+), 177 deletions(-)

diff --git a/src/bgl.cc b/src/bgl.cc
index 8d0c838a..b56dc1eb 100644
--- a/src/bgl.cc
+++ b/src/bgl.cc
@@ -26,6 +26,7 @@ using std::pair;
 
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
 
 namespace
 {
@@ -49,7 +50,8 @@ namespace
     uint32_t wordCount; // Total number of words, for informative purposes only
     /// Add more fields here, like name, description, author and such.
     uint32_t chunksOffset; // The offset to chunks' storage
-    uint32_t indexOffset; // The offset of the index in the file.
+    uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+    uint32_t indexRootOffset;
     uint32_t resourceListOffset; // The offset of the list of resources
     uint32_t resourcesCount; // Number of resources stored
   } __attribute__((packed));
@@ -239,9 +241,9 @@ namespace
 
     // Initialize the index
 
-    idx.seek( idxHeader.indexOffset );
-
-    openIndex( idx, idxMutex );
+    openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+               idx, idxMutex );
   }
 
 
@@ -739,7 +741,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
       // Good. Now build the index
 
-      idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+      IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+      idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+      idxHeader.indexRootOffset = idxInfo.rootOffset;
 
       // Save the resource's list.
 
diff --git a/src/btreeidx.cc b/src/btreeidx.cc
index 83025e12..5c988230 100644
--- a/src/btreeidx.cc
+++ b/src/btreeidx.cc
@@ -42,19 +42,21 @@ enum
 
 BtreeDictionary::BtreeDictionary( string const & id,
                                   vector< string > const & dictionaryFiles ):
-  Dictionary::Class( id, dictionaryFiles ), idxFile( 0 )
+  Dictionary::Class( id, dictionaryFiles ), idxFile( 0 ), rootNodeLoaded( false )
 {
 }
 
-void BtreeDictionary::openIndex( File::Class & file, Mutex & mutex )
+void BtreeDictionary::openIndex( IndexInfo const & indexInfo,
+                                 File::Class & file, Mutex & mutex )
 {
-  Mutex::Lock _( mutex );
-  
-  indexNodeSize = file.read< uint32_t >();
-  rootOffset = file.read< uint32_t >();
+  indexNodeSize = indexInfo.btreeMaxElements;
+  rootOffset = indexInfo.rootOffset;
 
   idxFile = &file;
   idxFileMutex = &mutex;
+
+  rootNodeLoaded = false;
+  rootNode.clear();
 }
 
 vector< WordArticleLink > BtreeDictionary::findArticles( wstring const & str )
@@ -68,8 +70,11 @@ vector< WordArticleLink > BtreeDictionary::findArticles( wstring const & str )
   vector< char > leaf;
   uint32_t nextLeaf;
 
+  char const * leafEnd;
+
   char const * chainOffset = findChainOffsetExactOrPrefix( folded, exactMatch,
-                                                           leaf, nextLeaf );
+                                                           leaf, nextLeaf,
+                                                           leafEnd );
 
   if ( chainOffset && exactMatch )
   {
@@ -157,9 +162,11 @@ void BtreeWordSearchRequest::run()
 
   vector< char > leaf;
   uint32_t nextLeaf;
+  char const * leafEnd;
 
   char const * chainOffset = dict.findChainOffsetExactOrPrefix( folded, exactMatch,
-                                                                leaf, nextLeaf );
+                                                                leaf, nextLeaf,
+                                                                leafEnd );
 
   if ( chainOffset )
   for( ; ; )
@@ -198,7 +205,7 @@ void BtreeWordSearchRequest::run()
 
     // Fetch new leaf if we're out of chains here
 
-    if ( chainOffset > &leaf.back() )
+    if ( chainOffset >= leafEnd )
     {
       // We're past the current leaf, fetch the next one
 
@@ -209,6 +216,8 @@ void BtreeWordSearchRequest::run()
         Mutex::Lock _( *dict.idxFileMutex );
         
         dict.readNode( nextLeaf, leaf );
+        leafEnd = &leaf.front() + leaf.size();
+
         nextLeaf = dict.idxFile->read< uint32_t >();
         chainOffset = &leaf.front() + sizeof( uint32_t );
 
@@ -274,8 +283,9 @@ void BtreeDictionary::readNode( uint32_t offset, vector< char > & out )
 
 char const * BtreeDictionary::findChainOffsetExactOrPrefix( wstring const & target,
                                                             bool & exactMatch,
-                                                            vector< char > & leaf,
-                                                            uint32_t & nextLeaf )
+                                                            vector< char > & extLeaf,
+                                                            uint32_t & nextLeaf,
+                                                            char const * & leafEnd )
 {
   if ( !idxFile )
     throw exIndexWasNotOpened();
@@ -294,14 +304,21 @@ char const * BtreeDictionary::findChainOffsetExactOrPrefix( wstring const & targ
 
   uint32_t currentNodeOffset = rootOffset;
 
+  if ( !rootNodeLoaded )
+  {
+    // Time to load our root node. We do it only once, at the first request.
+    readNode( rootOffset, rootNode );
+    rootNodeLoaded = true;
+  }
+
+  char const * leaf = &rootNode.front();
+  leafEnd = leaf + rootNode.size();
+
   for( ; ; )
   {
-    //printf( "reading node at %x\n", currentNodeOffset );
-    readNode( currentNodeOffset, leaf );
-
     // Is it a leaf or a node?
 
-    uint32_t leafEntries = *(uint32_t *)&leaf.front();
+    uint32_t leafEntries = *(uint32_t *)leaf;
 
     if ( leafEntries == 0xffffFFFF )
     {
@@ -309,124 +326,266 @@ char const * BtreeDictionary::findChainOffsetExactOrPrefix( wstring const & targ
 
       //printf( "=>a node\n" );
 
-      uint32_t const * offsets = (uint32_t *)&leaf.front() + 1;
+      uint32_t const * offsets = (uint32_t *)leaf + 1;
 
-      char const * ptr = &leaf.front() + sizeof( uint32_t ) +
+      char const * ptr = leaf + sizeof( uint32_t ) +
                          ( indexNodeSize + 1 ) * sizeof( uint32_t );
 
-      unsigned entry;
+      // ptr now points to a span of zero-separated strings, up to leafEnd.
+      // We find our match using a binary search.
 
-      for( entry = 0; entry < indexNodeSize; ++entry )
-      {
-        //printf( "checking node agaist word %s\n", ptr );
-        size_t wordSize = strlen( ptr );
+      char const * closestString;
 
+      int compareResult;
+
+      char const * window = ptr;
+      unsigned windowSize = leafEnd - ptr;
+
+      for( ; ; )
+      {  
+        // We boldly shoot in the middle of the whole mess, and then adjust
+        // to the beginning of the string that we've hit.
+        char const * testPoint = window + windowSize/2;
+  
+        closestString = testPoint;
+  
+        while( closestString > ptr && closestString[ -1 ] )
+          --closestString;
+  
+        size_t wordSize = strlen( closestString );
+  
         if ( wcharBuffer.size() <= wordSize )
           wcharBuffer.resize( wordSize + 1 );
-
-        long result = Utf8::decode( ptr, wordSize, &wcharBuffer.front() );
-
+  
+        long result = Utf8::decode( closestString, wordSize, &wcharBuffer.front() );
+  
         if ( result < 0 )
-          throw Utf8::exCantDecode( ptr );
-
+          throw Utf8::exCantDecode( closestString );
+  
         wcharBuffer[ result ] = 0;
 
-        int compareResult = target.compare( &wcharBuffer.front() );
+        //printf( "Checking against %s\n", closestString );
 
+        compareResult = target.compare( &wcharBuffer.front() );
+  
         if ( !compareResult )
         {
-          // The target string matches the current one.
-          // Go to the right, since it's there where we store such results.
-          currentNodeOffset = offsets[ entry + 1 ];
+          // The target string matches the current one. Finish the search.
           break;
         }
         if ( compareResult < 0 )
         {
           // The target string is smaller than the current one.
           // Go to the left.
-          currentNodeOffset = offsets[ entry ];
-          break;
-        }
+          windowSize = closestString - window;
 
-        ptr += wordSize + 1;
+          if ( !windowSize )
+            break;
+        }
+        else
+        {
+          // The target string is larger than the current one.
+          // Go to the right.
+          windowSize -= ( closestString - window )  + wordSize + 1;
+          window = closestString + wordSize + 1;
+
+          if ( !windowSize )
+            break;
+        }
       }
 
-      if ( entry == indexNodeSize )
+      #if 0
+      printf( "The winner is %s, compareResult = %d\n", closestString, compareResult );
+
+      if ( closestString != ptr )
       {
-        // We iterated through all entries, but our string is larger than
-        // all of them. Go the the rightmost node.
+        char const * left = closestString -1;
+
+        while( left != ptr && left[ -1 ] )
+          --left;
+
+        printf( "To the left: %s\n", left );
+      }
+      else
+        printf( "To the lest -- nothing\n" );
+
+      char const * right = closestString + strlen( closestString ) + 1;
+
+      if ( right != leafEnd )
+      {
+        printf( "To the right: %s\n", right );
+      }
+      else
+        printf( "To the right -- nothing\n" );
+      #endif
+
+      // Now, whatever the outcome (compareResult) is, we need to find
+      // entry number for the closestMatch string.
+       
+      unsigned entry = 0;
+
+      for( char const * next = ptr; next != closestString;
+           next += strlen( next ) + 1, ++entry ) ;
+
+      // Ok, now check the outcome
+
+      if ( !compareResult )
+      {
+        // The target string matches the one found.
+        // Go to the right, since it's there where we store such results.
+        currentNodeOffset = offsets[ entry + 1 ];
+      }
+      if ( compareResult < 0 )
+      {
+        // The target string is smaller than the one found.
+        // Go to the left.
         currentNodeOffset = offsets[ entry ];
       }
+      else
+      {
+        // The target string is larger than the one found.
+        // Go to the right.
+        currentNodeOffset = offsets[ entry + 1 ];
+      }
+
+      //printf( "reading node at %x\n", currentNodeOffset );
+      readNode( currentNodeOffset, extLeaf );
+      leaf = &extLeaf.front();
+      leafEnd = leaf + extLeaf.size();
     }
     else
     {
       //printf( "=>a leaf\n" );
       // A leaf
-      nextLeaf = idxFile->read< uint32_t >();
 
-      // Iterate through chains until we find one that matches
+      // If this leaf is the root, there's no next leaf, it just can't be.
+      // We do this check because the file's position indicator just won't
+      // be in the right place for root node anyway, since we precache it.
+      nextLeaf = ( currentNodeOffset != rootOffset ? idxFile->read< uint32_t >() : 0 );
 
-      char const * ptr = &leaf.front() + sizeof( uint32_t );
+      if ( !leafEntries )
+      {
+        // Empty leaf? This may only be possible for entirely empty trees only.
+        if ( currentNodeOffset != rootOffset )
+          throw exCorruptedChainData();
+        else
+          return 0; // No match
+      }
+
+      // Build an array containing all chain pointers
+      char const * ptr = leaf + sizeof( uint32_t );
 
       uint32_t chainSize;
 
-      while( leafEntries-- )
+      vector< char const * > chainOffsets( leafEntries );
+
       {
-        memcpy( &chainSize, ptr, sizeof( uint32_t ) );
-        ptr += sizeof( uint32_t );
+        char const ** nextOffset = &chainOffsets.front();
 
-        if( chainSize )
+        while( leafEntries-- )
         {
-          size_t wordSize = strlen( ptr );
+          *nextOffset++ = ptr;
 
-          if ( wcharBuffer.size() <= wordSize )
-            wcharBuffer.resize( wordSize + 1 );
+          memcpy( &chainSize, ptr, sizeof( uint32_t ) );
 
-          //printf( "checking agaist word %s, left = %u\n", ptr, leafEntries );
+          //printf( "%s + %s\n", ptr + sizeof( uint32_t ), ptr + sizeof( uint32_t ) + strlen( ptr + sizeof( uint32_t ) ) + 1 );
 
-          long result = Utf8::decode( ptr, wordSize, &wcharBuffer.front() );
-
-          if ( result < 0 )
-            throw Utf8::exCantDecode( ptr );
-
-          wcharBuffer[ result ] = 0;
-
-          wstring foldedWord = Folding::apply( &wcharBuffer.front() );
-
-          int compareResult = target.compare( foldedWord );
-
-          if ( !compareResult )
-          {
-            // Exact match -- return and be done
-            exactMatch = true;
-
-            return ptr - sizeof( uint32_t );
-          }
-          else
-          if ( compareResult < 0 )
-          {
-            // The target string is smaller than the current one.
-            // No point in travering further, return this result.
-            
-            return ptr - sizeof( uint32_t );
-          }
-          ptr += chainSize;
+          ptr += sizeof( uint32_t ) + chainSize;
         }
       }
 
-      // Well, our target is larger than all the chains here. This would mean
-      // that the next leaf is the right one.
+      // Now do a binary search in it, aiming to find where our target
+      // string lands.
 
-      if ( nextLeaf )
+      char const ** window = &chainOffsets.front();
+      unsigned windowSize = chainOffsets.size();
+
+      for( ; ; )
       {
-        readNode( nextLeaf, leaf );
+        //printf( "window = %u, ws = %u\n", window - &chainOffsets.front(), windowSize );
 
-        nextLeaf = idxFile->read< uint32_t >();
+        char const ** chainToCheck = window + windowSize/2;
+        ptr = *chainToCheck;
+  
+        memcpy( &chainSize, ptr, sizeof( uint32_t ) );
+        ptr += sizeof( uint32_t );
+  
+        size_t wordSize = strlen( ptr );
+  
+        if ( wcharBuffer.size() <= wordSize )
+          wcharBuffer.resize( wordSize + 1 );
+  
+        //printf( "checking agaist word %s, left = %u\n", ptr, leafEntries );
+  
+        long result = Utf8::decode( ptr, wordSize, &wcharBuffer.front() );
+  
+        if ( result < 0 )
+          throw Utf8::exCantDecode( ptr );
+  
+        wcharBuffer[ result ] = 0;
+  
+        wstring foldedWord = Folding::apply( &wcharBuffer.front() );
+  
+        int compareResult = target.compare( foldedWord );
+  
+        if ( !compareResult )
+        {
+          // Exact match -- return and be done
+          exactMatch = true;
+  
+          return ptr - sizeof( uint32_t );
+        }
+        else
+        if ( compareResult < 0 )
+        {
+          // The target string is smaller than the current one.
+          // Go to the first half
+           
+          windowSize /= 2;
 
-        return &leaf.front() + sizeof( uint32_t );
+          if ( !windowSize )
+          {
+            // That finishes our search. Since our target string
+            // landed before the last tested chain, we return a possible
+            // prefix match against that chain.
+            return ptr - sizeof( uint32_t );
+          }
+        }
+        else
+        {
+          // The target string is larger than the current one.
+          // Go to the second half
+
+          windowSize -= windowSize/2 + 1;
+
+          if ( !windowSize )
+          {
+            // That finishes our search. Since our target string
+            // landed after the last tested chain, we return the next
+            // chain. If there's no next chain in this leaf, this
+            // would mean the first element in the next leaf.
+            if ( chainToCheck == &chainOffsets.back() )
+            {
+              if ( nextLeaf )
+              {
+                readNode( nextLeaf, extLeaf );
+  
+                leafEnd = &extLeaf.front() + extLeaf.size();
+  
+                nextLeaf = idxFile->read< uint32_t >();
+  
+                return &extLeaf.front() + sizeof( uint32_t );
+              }
+              else
+                return 0; // This was the last leaf
+            }
+            else
+              return chainToCheck[ 1 ];
+          }
+
+          window = chainToCheck + 1;
+        }
       }
-      else
-        return 0; // This was the last leaf
     }
   }
 }
@@ -764,7 +923,7 @@ void IndexedWords::addWord( wstring const & word, uint32_t articleOffset )
   }
 }
 
-uint32_t buildIndex( IndexedWords const & indexedWords, File::Class & file )
+IndexInfo buildIndex( IndexedWords const & indexedWords, File::Class & file )
 {
   size_t indexSize = indexedWords.size();
   IndexedWords::const_iterator nextIndex = indexedWords.begin();
@@ -798,17 +957,7 @@ uint32_t buildIndex( IndexedWords const & indexedWords, File::Class & file )
                                         file, btreeMaxElements,
                                         lastLeafOffset );
 
-  // We need to save btreeMaxElements. For simplicity, we just save it here
-  // along with root offset, and then return that record's offset as the
-  // offset of the index itself.
-
-  uint32_t indexOffset = file.tell();
-
-  file.write( (uint32_t) btreeMaxElements );
-  file.write( (uint32_t) rootOffset );
-
-  return indexOffset;
+  return IndexInfo( btreeMaxElements, rootOffset );
 }
 
-
 }
diff --git a/src/btreeidx.hh b/src/btreeidx.hh
index c00c04e2..cf67440a 100644
--- a/src/btreeidx.hh
+++ b/src/btreeidx.hh
@@ -25,7 +25,7 @@ enum
   /// This is to be bumped up each time the internal format changes.
   /// The value isn't used here by itself, it is supposed to be added
   /// to each dictionary's internal format version.
-  FormatVersion = 2
+  FormatVersion = 3
 };
 
 // These exceptions which might be thrown during the index traversal
@@ -49,6 +49,16 @@ struct WordArticleLink
   {}
 };
 
+/// Information needed to open the index
+struct IndexInfo
+{
+  uint32_t btreeMaxElements, rootOffset;
+
+  IndexInfo( uint32_t btreeMaxElements_, uint32_t rootOffset_ ):
+    btreeMaxElements( btreeMaxElements_ ), rootOffset( rootOffset_ )
+  {}
+};
+
 class BtreeWordSearchRequest;
 
 /// A base for the dictionary that utilizes a btree index build using
@@ -67,11 +77,10 @@ public:
 
 protected:
 
-  /// Opens the index. The file must be positioned at the offset previously
-  /// returned by buildIndex(). The file reference is saved to be used for
+  /// Opens the index. The file reference is saved to be used for
   /// subsequent lookups.
   /// The mutex is the one to be locked when working with the file.
-  void openIndex( File::Class &, Mutex & );
+  void openIndex( IndexInfo const &, File::Class &, Mutex & );
 
   /// Finds articles that match the given string. A case-insensitive search
   /// is performed.
@@ -83,6 +92,9 @@ private:
   File::Class * idxFile;
   uint32_t indexNodeSize;
   uint32_t rootOffset;
+  bool rootNodeLoaded;
+  vector< char > rootNode; // We load root note here and keep it at all times,
+                           // since all searches always start with it.
 
   /// Finds the offset in the btree leaf for the given word, either matching
   /// by an exact match, or by finding the smallest entry that might match
@@ -91,10 +103,16 @@ private:
   /// to true when an exact match is located, and to false otherwise.
   /// The located leaf is loaded to 'leaf', and the pointer to the next
   /// leaf is saved to 'nextLeaf'.
+  /// However, due to root node being permanently cached, the 'leaf' passed
+  /// might not get used at all if the root node was the terminal one. In that
+  /// case, the returned pointer wouldn't belong to 'leaf' at all. To that end,
+  /// the leafEnd pointer always holds the pointer to the first byte outside
+  /// the node data.
   char const * findChainOffsetExactOrPrefix( wstring const & target,
                                              bool & exactMatch,
                                              vector< char > & leaf,
-                                             uint32_t & nextLeaf );
+                                             uint32_t & nextLeaf,
+                                             char const * & leafEnd );
 
   /// Reads a node or leaf at the given offset. Just uncompresses its data
   /// to the given vector and does nothing more.
@@ -128,10 +146,10 @@ struct IndexedWords: public map< wstring, vector< WordArticleLink > >
   void addWord( wstring const & word, uint32_t articleOffset );
 };
 
-/// Builds the index, as a compressed btree. Returns offset to its root.
+/// Builds the index, as a compressed btree. Returns IndexInfo.
 /// All the data is stored to the given file, beginning from its current
 /// position.
-uint32_t buildIndex( IndexedWords const &, File::Class & file );
+IndexInfo buildIndex( IndexedWords const &, File::Class & file );
 
 }
 
diff --git a/src/chunkedstorage.cc b/src/chunkedstorage.cc
index d97d6097..91658c4a 100644
--- a/src/chunkedstorage.cc
+++ b/src/chunkedstorage.cc
@@ -15,6 +15,17 @@ enum
 Writer::Writer( File::Class & f ):
   file( f ), chunkStarted( false ), bufferUsed( 0 )
 {
+  // Create a sratchpad at the beginning of file. We use it to write chunk
+  // table if it would fit, in order to save some seek times.
+
+  char zero[ 4096 ];
+
+  memset( zero, 0, sizeof( zero ) );
+
+  scratchPadOffset = file.tell();
+  scratchPadSize = sizeof( zero );
+
+  file.write( zero, sizeof( zero ) );
 }
 
 uint32_t Writer::startNewBlock()
@@ -77,10 +88,25 @@ uint32_t Writer::finish()
   if ( bufferUsed || chunkStarted )
     saveCurrentChunk();
 
+  bool useScratchPad = false;
+  uint32_t savedOffset = 0;
+
+  if ( scratchPadSize >= offsets.size() * sizeof( uint32_t ) + sizeof( uint32_t ) )
+  {
+    useScratchPad = true;
+    savedOffset = file.tell();
+    file.seek( scratchPadOffset );
+  }
+
   uint32_t offset = file.tell();
 
   file.write( (uint32_t) offsets.size() );
-  file.write( &offsets.front(), offsets.size() * sizeof( uint32_t ) );
+
+  if ( offsets.size() )
+    file.write( &offsets.front(), offsets.size() * sizeof( uint32_t ) );
+
+  if ( useScratchPad )
+    file.seek( savedOffset );
 
   offsets.clear();
   chunkStarted = false;
diff --git a/src/chunkedstorage.hh b/src/chunkedstorage.hh
index 367fba67..c47e878d 100644
--- a/src/chunkedstorage.hh
+++ b/src/chunkedstorage.hh
@@ -29,6 +29,7 @@ class Writer
 {
   vector< uint32_t > offsets;
   File::Class & file;
+  size_t scratchPadOffset, scratchPadSize;
 
 public:
 
diff --git a/src/dictdfiles.cc b/src/dictdfiles.cc
index d112ab1d..dbdfc3a7 100644
--- a/src/dictdfiles.cc
+++ b/src/dictdfiles.cc
@@ -29,6 +29,7 @@ using std::list;
 
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
 
 namespace {
 
@@ -48,7 +49,8 @@ struct IdxHeader
   uint32_t signature; // First comes the signature, DCDX
   uint32_t formatVersion; // File format version (CurrentFormatVersion)
   uint32_t wordCount; // Total number of words
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));
 
 bool indexIsOldOrBad( string const & indexFile )
@@ -109,9 +111,9 @@ DictdDictionary::DictdDictionary( string const & id,
 
   // Initialize the index
 
-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }
 
 DictdDictionary::~DictdDictionary()
@@ -380,7 +382,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
         // Build index
 
-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;
 
         // That concludes it. Update the header.
 
diff --git a/src/dsl.cc b/src/dsl.cc
index 145631d3..58b32177 100644
--- a/src/dsl.cc
+++ b/src/dsl.cc
@@ -47,6 +47,7 @@ using std::list;
 
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
 
 namespace {
 
@@ -66,7 +67,8 @@ struct IdxHeader
   uint32_t chunksOffset; // The offset to chunks' storage
   uint32_t hasAbrv; // Non-zero means file has abrvs at abrvAddress
   uint32_t abrvAddress; // Address of abrv map in the chunked storage
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));
 
 bool indexIsOldOrBad( string const & indexFile )
@@ -201,9 +203,9 @@ DslDictionary::DslDictionary( string const & id,
 
   // Initialize the index
 
-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 
   // Open a resource zip file, if there's one
   resourceZip = zip_open( ( getDictionaryFilenames()[ 0 ] + ".files.zip" ).c_str(), 0, 0 );
@@ -1184,7 +1186,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
         // Build index
 
-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;
 
         // That concludes it. Update the header.
 
diff --git a/src/lsa.cc b/src/lsa.cc
index 8f205a9a..9a0e2023 100644
--- a/src/lsa.cc
+++ b/src/lsa.cc
@@ -23,6 +23,7 @@ using std::multimap;
 using std::set;
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
 
 namespace {
 
@@ -43,7 +44,8 @@ struct IdxHeader
   uint32_t formatVersion; // File format version, currently 1.
   uint32_t soundsCount; // Total number of sounds, for informative purposes only
   uint32_t vorbisOffset; // Offset of the vorbis file which contains all snds
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));
 
 bool indexIsOldOrBad( string const & indexFile )
@@ -174,9 +176,9 @@ LsaDictionary::LsaDictionary( string const & id,
 {
   // Initialize the index
 
-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }
 
 sptr< Dictionary::DataRequest > LsaDictionary::getArticle( wstring const & word,
@@ -546,7 +548,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
         // Build the index
 
-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;
 
          // That concludes it. Update the header.
 
diff --git a/src/sounddir.cc b/src/sounddir.cc
index 3d61c13c..e45b2edb 100644
--- a/src/sounddir.cc
+++ b/src/sounddir.cc
@@ -23,6 +23,7 @@ using std::multimap;
 using std::set;
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
 
 namespace {
 
@@ -38,7 +39,8 @@ struct IdxHeader
   uint32_t formatVersion; // File format version, is to be CurrentFormatVersion
   uint32_t soundsCount; // Total number of sounds, for informative purposes only
   uint32_t chunksOffset; // The offset to chunks' storage
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
 } __attribute__((packed));
 
 bool indexIsOldOrBad( string const & indexFile )
@@ -98,9 +100,9 @@ SoundDirDictionary::SoundDirDictionary( string const & id,
 {
   // Initialize the index
 
-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }
 
 sptr< Dictionary::DataRequest > SoundDirDictionary::getArticle( wstring const & word,
@@ -365,7 +367,10 @@ vector< sptr< Dictionary::Class > > makeDictionaries( Config::SoundDirs const &
       
       // Build the index
 
-      idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+      IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+      idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+      idxHeader.indexRootOffset = idxInfo.rootOffset;
 
        // That concludes it. Update the header.
 
diff --git a/src/stardict.cc b/src/stardict.cc
index ef490ffa..bc038e69 100644
--- a/src/stardict.cc
+++ b/src/stardict.cc
@@ -33,6 +33,7 @@ using std::wstring;
 
 using BtreeIndexing::WordArticleLink;
 using BtreeIndexing::IndexedWords;
+using BtreeIndexing::IndexInfo;
 
 namespace {
 
@@ -65,7 +66,7 @@ struct Ifo
 enum
 {
   Signature = 0x58444953, // SIDX on little-endian, XDIS on big-endian
-  CurrentFormatVersion = 4 + BtreeIndexing::FormatVersion + Folding::Version
+  CurrentFormatVersion = 5 + BtreeIndexing::FormatVersion + Folding::Version
 };
 
 struct IdxHeader
@@ -73,7 +74,12 @@ struct IdxHeader
   uint32_t signature; // First comes the signature, SIDX
   uint32_t formatVersion; // File format version (CurrentFormatVersion)
   uint32_t chunksOffset; // The offset to chunks' storage
-  uint32_t indexOffset; // The offset of the index in the file
+  uint32_t indexBtreeMaxElements; // Two fields from IndexInfo
+  uint32_t indexRootOffset;
+  uint32_t wordCount; // Saved from Ifo::wordcount
+  uint32_t synWordCount; // Saved from Ifo::synwordcount
+  uint32_t bookNameSize; // Book name's length. Used to read it then.
+  uint32_t sameTypeSequenceSize; // That string's size. Used to read it then.
 } __attribute__((packed));
 
 bool indexIsOldOrBad( string const & indexFile )
@@ -90,32 +96,32 @@ bool indexIsOldOrBad( string const & indexFile )
 
 class StardictDictionary: public BtreeIndexing::BtreeDictionary
 {
-  Ifo ifo;
   Mutex idxMutex;
   File::Class idx;
   IdxHeader idxHeader;
+  string bookName;
+  string sameTypeSequence;
   ChunkedStorage::Reader chunks;
   dictData * dz;
 
 public:
 
   StardictDictionary( string const & id, string const & indexFile,
-                      vector< string > const & dictionaryFiles,
-                      Ifo const & );
+                      vector< string > const & dictionaryFiles );
 
   ~StardictDictionary();
 
   virtual string getName() throw()
-  { return ifo.bookname; }
+  { return bookName; }
 
   virtual map< Dictionary::Property, string > getProperties() throw()
   { return map< Dictionary::Property, string >(); }
 
   virtual unsigned long getArticleCount() throw()
-  { return ifo.wordcount; }
+  { return idxHeader.wordCount; }
 
   virtual unsigned long getWordCount() throw()
-  { return ifo.wordcount + ifo.synwordcount; }
+  { return idxHeader.wordCount + idxHeader.synWordCount; }
 
   virtual sptr< Dictionary::WordSearchRequest > findHeadwordsForSynonym( wstring const & )
     throw( std::exception );
@@ -136,16 +142,18 @@ private:
   void loadArticle(  uint32_t address,
                      string & headword,
                      string & articleText );
+
+  string loadString( size_t size );
 };
 
 StardictDictionary::StardictDictionary( string const & id,
                                         string const & indexFile,
-                                        vector< string > const & dictionaryFiles,
-                                        Ifo const & ifo_ ):
+                                        vector< string > const & dictionaryFiles ):
   BtreeDictionary( id, dictionaryFiles ),
-  ifo( ifo_ ),
   idx( indexFile, "rb" ),
   idxHeader( idx.read< IdxHeader >() ),
+  bookName( loadString( idxHeader.bookNameSize ) ),
+  sameTypeSequence( loadString( idxHeader.sameTypeSequenceSize ) ),
   chunks( idx, idxHeader.chunksOffset )
 {
   // Open the .dict file
@@ -157,9 +165,9 @@ StardictDictionary::StardictDictionary( string const & id,
 
   // Initialize the index
 
-  idx.seek( idxHeader.indexOffset );
-
-  openIndex( idx, idxMutex );
+  openIndex( IndexInfo( idxHeader.indexBtreeMaxElements,
+                        idxHeader.indexRootOffset ),
+             idx, idxMutex );
 }
 
 StardictDictionary::~StardictDictionary()
@@ -168,6 +176,15 @@ StardictDictionary::~StardictDictionary()
     dict_data_close( dz );
 }
 
+string StardictDictionary::loadString( size_t size )
+{
+  vector< char > data( size );
+
+  idx.read( &data.front(), data.size() );
+
+  return string( &data.front(), data.size() );
+}
+
 void StardictDictionary::getArticleProps( uint32_t articleAddress,
                                           string & headword,
                                           uint32_t & offset, uint32_t & size )
@@ -252,14 +269,14 @@ void StardictDictionary::loadArticle( uint32_t address,
 
   char * ptr = articleBody;
 
-  if ( ifo.sametypesequence.size() )
+  if ( sameTypeSequence.size() )
   {
     /// The sequence is known, it's not stored in the article itself
-    for( unsigned seq = 0; seq < ifo.sametypesequence.size(); ++seq )
+    for( unsigned seq = 0; seq < sameTypeSequence.size(); ++seq )
     {
       // Last entry doesn't have size info -- it is inferred from
       // the bytes left
-      bool entrySizeKnown = ( seq == ifo.sametypesequence.size() - 1 );
+      bool entrySizeKnown = ( seq == sameTypeSequence.size() - 1 );
 
       uint32_t entrySize;
 
@@ -272,7 +289,7 @@ void StardictDictionary::loadArticle( uint32_t address,
         break;
       }
 
-      char type = ifo.sametypesequence[ seq ];
+      char type = sameTypeSequence[ seq ];
 
       if ( islower( type ) )
       {
@@ -610,8 +627,7 @@ static bool tryPossibleName( string const & name, string & copyTo )
 }
 
 static void findCorrespondingFiles( string const & ifo,
-                                    string & idx, string & dict, string & syn,
-                                    bool needSyn )
+                                    string & idx, string & dict, string & syn )
 {
   string base( ifo, 0, ifo.size() - 3 );
 
@@ -633,15 +649,15 @@ static void findCorrespondingFiles( string const & ifo,
       ) )
     throw exNoDictFile( ifo );
 
-  if ( needSyn && !(
-                     tryPossibleName( base + "syn", syn ) ||
-                     tryPossibleName( base + "syn.gz", syn ) ||
-                     tryPossibleName( base + "syn.dz", syn ) ||
-                     tryPossibleName( base + "SYN", syn ) ||
-                     tryPossibleName( base + "SYN.GZ", syn ) ||
-                     tryPossibleName( base + "SYN.DZ", syn )
+  if ( !(
+         tryPossibleName( base + "syn", syn ) ||
+         tryPossibleName( base + "syn.gz", syn ) ||
+         tryPossibleName( base + "syn.dz", syn ) ||
+         tryPossibleName( base + "SYN", syn ) ||
+         tryPossibleName( base + "SYN.GZ", syn ) ||
+         tryPossibleName( base + "SYN.DZ", syn )
      ) )
-    throw exNoSynFile( ifo );
+    syn.clear();
 }
 
 static void handleIdxSynFile( string const & fileName,
@@ -764,30 +780,16 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
     try
     {
-      File::Class ifoFile( *i, "r" );
-
-      Ifo ifo( ifoFile );
-
-      if ( ifo.idxoffsetbits == 64 )
-        throw ex64BitsNotSupported();
-
-      if ( ifo.dicttype.size() )
-        throw exDicttypeNotSupported();
-
-      printf( "bookname = %s\n", ifo.bookname.c_str() );
-      printf( "wordcount = %u\n", ifo.wordcount );
-
       vector< string > dictFiles( 1, *i );
 
       string idxFileName, dictFileName, synFileName;
 
-      findCorrespondingFiles( *i, idxFileName, dictFileName, synFileName,
-                              ifo.synwordcount );
+      findCorrespondingFiles( *i, idxFileName, dictFileName, synFileName );
 
       dictFiles.push_back( idxFileName );
       dictFiles.push_back( dictFileName );
 
-      if ( ifo.synwordcount )
+      if ( synFileName.size() )
         dictFiles.push_back( synFileName );
 
       string dictId = Dictionary::makeDictionaryId( dictFiles );
@@ -798,6 +800,33 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
            indexIsOldOrBad( indexFile ) )
       {
         // Building the index
+
+        File::Class ifoFile( *i, "r" );
+
+        Ifo ifo( ifoFile );
+
+        if ( ifo.idxoffsetbits == 64 )
+          throw ex64BitsNotSupported();
+
+        if ( ifo.dicttype.size() )
+          throw exDicttypeNotSupported();
+
+        if( synFileName.empty() )
+        {
+          if ( ifo.synwordcount )
+            throw exNoSynFile( *i );
+        }
+        else
+        if ( !ifo.synwordcount )
+        {
+          printf( "Warning: ignoring .syn file %s, since there's no synwordcount in .ifo specified\n",
+                  synFileName.c_str() );
+        }
+
+
+        printf( "bookname = %s\n", ifo.bookname.c_str() );
+        printf( "wordcount = %u\n", ifo.wordcount );
+
         initializing.indexingDictionary( ifo.bookname );
 
         File::Class idx( indexFile, "wb" );
@@ -811,6 +840,9 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
         idx.write( idxHeader );
 
+        idx.write( ifo.bookname.data(), ifo.bookname.size() );
+        idx.write( ifo.sametypesequence.data(), ifo.sametypesequence.size() );
+
         IndexedWords indexedWords;
 
         ChunkedStorage::Writer chunks( idx );
@@ -837,13 +869,21 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
         // Build index
 
-        idxHeader.indexOffset = BtreeIndexing::buildIndex( indexedWords, idx );
+        IndexInfo idxInfo = BtreeIndexing::buildIndex( indexedWords, idx );
+
+        idxHeader.indexBtreeMaxElements = idxInfo.btreeMaxElements;
+        idxHeader.indexRootOffset = idxInfo.rootOffset;
 
         // That concludes it. Update the header.
 
         idxHeader.signature = Signature;
         idxHeader.formatVersion = CurrentFormatVersion;
 
+        idxHeader.wordCount = ifo.wordcount;
+        idxHeader.synWordCount = ifo.synwordcount;
+        idxHeader.bookNameSize = ifo.bookname.size();
+        idxHeader.sameTypeSequenceSize = ifo.sametypesequence.size();
+
         idx.rewind();
 
         idx.write( &idxHeader, sizeof( idxHeader ) );
@@ -851,9 +891,7 @@ vector< sptr< Dictionary::Class > > makeDictionaries(
 
       dictionaries.push_back( new StardictDictionary( dictId,
                                                       indexFile,
-                                                      dictFiles,
-                                                      ifo ) );
-      
+                                                      dictFiles ) );
     }
     catch( std::exception & e )
     {