diff --git a/src/btreeidx.cc b/src/btreeidx.cc
index 98f00ec0..f9da0ffe 100644
--- a/src/btreeidx.cc
+++ b/src/btreeidx.cc
@@ -780,8 +780,6 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex,
 
     unsigned prevEntry = 0;
 
-    vector< char > charBuffer;
-
     for( unsigned x = 0; x < maxElements; ++x )
     {
       unsigned curEntry = (uint64_t) indexSize * ( x + 1 ) / ( maxElements + 1 );
@@ -793,18 +791,13 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex,
 
       memcpy( &uncompressedData.front() + sizeof( uint32_t ) + x * sizeof( uint32_t ), &offset, sizeof( uint32_t ) );
 
-      if ( charBuffer.size() < nextIndex->first.size() * 4 )
-        charBuffer.resize( nextIndex->first.size() * 4 );
-
-      size_t sz = Utf8::encode( nextIndex->first.data(), nextIndex->first.size(),
-                                &charBuffer.front() );
+      size_t sz = nextIndex->first.size() + 1;
 
       size_t prevSize = uncompressedData.size();
-      uncompressedData.resize( prevSize + sz + 1 );
+      uncompressedData.resize( prevSize + sz );
 
-      memcpy( &uncompressedData.front() + prevSize, &charBuffer.front(), sz );
-
-      uncompressedData.back() = 0;
+      memcpy( &uncompressedData.front() + prevSize, nextIndex->first.c_str(),
+              sz );
 
       prevEntry = curEntry;
     }
@@ -914,9 +907,12 @@ void IndexedWords::addWord( wstring const & word, uint32_t articleOffset )
     }
 
     // Insert this word
+    wstring folded = Folding::apply( nextChar );
+    
     iterator i = insert(
       IndexedWords::value_type(
-        Folding::apply( nextChar ),
+        string( &utfBuffer.front(),
+                Utf8::encode( folded.data(), folded.size(), &utfBuffer.front() ) ),
         vector< WordArticleLink >() ) ).first;
 
     if ( ( i->second.size() < 1024 ) || ( nextChar == wordBegin ) ) // Don't overpopulate chains with middle matches
diff --git a/src/btreeidx.hh b/src/btreeidx.hh
index 403a18c3..89a1bede 100644
--- a/src/btreeidx.hh
+++ b/src/btreeidx.hh
@@ -139,8 +139,9 @@ private:
 
 /// This represents the index in its source form, as a map which binds folded
 /// words to sequences of their unfolded source forms and the corresponding
-/// article offsets.
-struct IndexedWords: public map< wstring, vector< WordArticleLink > >
+/// article offsets. The words are utf8-encoded -- it doesn't break Unicode
+/// sorting, but conserves space.
+struct IndexedWords: public map< string, vector< WordArticleLink > >
 {
   /// Instead of adding to the map directly, use this function. It does folding
   /// itself, and for phrases/sentences it adds additional entries beginning with