diff --git a/.gitignore b/.gitignore
index 7636d73..84ce414 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,4 @@ build/
 cmake-build-debug/
 .idea/
 .DS_Store
+src/xcdat_config.hpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f939135..42e5fcd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,22 +1,74 @@
 cmake_minimum_required(VERSION 2.8)
 project(XCDAT)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -g -std=c++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11")
 
-if(NOT CMAKE_BUILD_TYPE)
+if (NOT CMAKE_BUILD_TYPE)
   set(CMAKE_BUILD_TYPE Release)
-endif()
+endif ()
 
-if(NOT XCDAT64)
-  set(XCDAT64 OFF)
-endif()
+configure_file(
+  ${XCDAT_SOURCE_DIR}/xcdat_config.hpp.in
+  ${XCDAT_SOURCE_DIR}/src/xcdat_config.hpp
+)
 
-if(XCDAT64)
-  add_definitions(-DXCDAT64)
-endif()
+option(XCDAT_X64
+  "Use 64-bit integers to represent nodes."
+  OFF)
+
+option(XCDAT_USE_POPCNT
+  "Use popcount intrinsic. Available on x86-64 since SSE4.2."
+  OFF)
+
+if (XCDAT_USE_POPCNT)
+  if (UNIX)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
+  endif ()
+endif ()
 
 message(STATUS "BUILD_TYPE is ${CMAKE_BUILD_TYPE}")
-message(STATUS "XCDAT64 is ${XCDAT64}")
+message(STATUS "CXX_FLAGS are ${CMAKE_CXX_FLAGS}")
+message(STATUS "CXX_FLAGS_DEBUG are ${CMAKE_CXX_FLAGS_DEBUG}")
+message(STATUS "CXX_FLAGS_RELEASE are ${CMAKE_CXX_FLAGS_RELEASE}")
+message(STATUS "XCDAT_X64 is ${XCDAT_X64}")
+message(STATUS "XCDAT_USE_POPCNT is ${XCDAT_USE_POPCNT}")
+
+set(HEADER_FILES
+  src/BitVector.hpp
+  src/BitVectorBuilder.hpp
+  src/DacBc.hpp
+  src/FastDacBc.hpp
+  src/FitVector.hpp
+  src/Trie.hpp
+  src/TrieBuilder.hpp
+  src/Vector.hpp
+  src/xcdat_basics.hpp
+  src/xcdat_config.hpp
+  )
+
+set(SOURCE_FILES
+  src/BitVector.cpp
+  src/DacBc.cpp
+  src/FitVector.cpp
+  src/TrieBuilder.cpp
+  src/FastDacBc.cpp
+  )
+
+add_library(xcdat STATIC ${HEADER_FILES} ${SOURCE_FILES})
+
+add_executable(xcdat-exe src/xcdat.cpp)
+set_target_properties(xcdat-exe PROPERTIES OUTPUT_NAME xcdat)
+target_link_libraries(xcdat-exe xcdat)
 
 enable_testing()
-add_subdirectory(src)
+file(GLOB TEST_SOURCES src/test*.cpp)
+foreach(TEST_SOURCE ${TEST_SOURCES})
+  get_filename_component(TEST_SOURCE_NAME ${TEST_SOURCE} NAME_WE)
+  add_executable(${TEST_SOURCE_NAME} ${TEST_SOURCE})
+  target_link_libraries(${TEST_SOURCE_NAME} xcdat)
+  add_test(${TEST_SOURCE_NAME} ${TEST_SOURCE_NAME})
+endforeach()
+
+INSTALL(FILES ${HEADER_FILES} DESTINATION include/xcdat)
+INSTALL(TARGETS xcdat ARCHIVE DESTINATION lib)
+INSTALL(TARGETS xcdat-exe RUNTIME DESTINATION bin)
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
index 6eac522..d750e37 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2016 Shunsuke Kanda
+Copyright (c) 2017 Shunsuke Kanda
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 2873d39..d59ffa1 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Xcdat is a C++ library that implements static compressed dictionaries based on a
 The double array (Aoe, 1989) is known as the fastest trie representation and has been used in many trie libraries. On the other hand, it has a space efficiency problem because of a pointer-based data structure.
 Xcdat solves the problem using the XOR-compressed double array (XCDA) methods described in
 
-- S. Kanda, K. Morita, and M. Fuketa. Compressed double-array tries for string dictionaries supporting fast lookup. _Knowledge and Information Systems_, Online first. [[doi](http://dx.doi.org/10.1007/s10115-016-0999-8)] [[pdf](https://kamp78.github.io/pdf/KAIS16_preprint.pdf)]
+- S. Kanda, K. Morita, and M. Fuketa. Compressed double-array tries for string dictionaries supporting fast lookup. _Knowledge and Information Systems_, 51(3): 1023–1042, 2017. [[doi](http://dx.doi.org/10.1007/s10115-016-0999-8)] [[pdf](https://sites.google.com/site/shnskknd/KAIS2016.pdf)]
 
 Therefore, Xcdat can implement trie dictionaries in smaller space compared to the other double-array libraries.
 In addition, the lookup speed is relatively fast in compressed data structures from the double-array advantage.
@@ -16,9 +16,9 @@ In addition, the lookup speed is relatively fast in compressed data structures f
 
 - **Two compression versions.** There are two versions for compressing elements: using byte-oriented DACs (Brisaboa et al., 2013) and using pointer-based ones (Kanda et al., 2016). For characterless strings such as natural language keywords, the former will be slightly smaller and the latter will be slightly faster. For long strings such as URLs, the latter will outperform the former. Xcdat implements the two versions by using a static polymorphism with C++ template to avoid an overhead of virtual functions. 
 
-- **64-bit version.** Although Xcdat represents array addresses using 32-bit integers in default configuration, we can allow for 64-bit integers by defining `XCDAT64`; therefore, the dictionary can be constructed from a very large dataset. The construction space becomes large, but the output dictionary size is nearly equal.
+- **64-bit version.** Although Xcdat represents node addresses using 32-bit integers in default configuration, we can allow for 64-bit integers by defining `XCDAT_X64`; therefore, the dictionary can be constructed from a very large dataset. The construction space becomes large, but the output dictionary size is nearly equal.
 
-- **NULL character.** The dictionary can be constructed from keys including the NULL character by setting the second parameter of the [xcdat::Trie](https://github.com/kamp78/xcdat/blob/master/src/Trie.hpp) constructer to `true`. However, we can generally construct high-performance dictionaries without this setting.
+- **NULL character.** The dictionary can be constructed from keys including the NULL character by setting the second parameter of `xcdat::TrieBuilder::build()` to `true`.
 
 - **Invertible dictionary coding.** Xcdat supports mapping N different strings to unique IDs in [0,N). That is to say, it supports two basic dictionary operations: Lookup returns the ID corresponding to a given string and Access (also called ReverseLookup) returns the string corresponding to a given ID. Therefore, Xcdat is very useful in many applications for string precessing and indexing, such as described in (Martínez-Prieto et al., 2016).
 
@@ -35,13 +35,44 @@ $ git clone https://github.com/kamp78/xcdat.git
 $ cd xcdat
 $ mkdir build
 $ cd build
-$ cmake .. -DCMAKE_BUILD_TYPE=Release -DXCDAT64=OFF
+$ cmake ..
 $ make
+$ make install
 ```
 
+If you want to use a 64-bit setting, please add `-DXCDAT_X64=ON` to the CMake option. In addition, you can use the SSE4.2 POPCNT instruction by adding `-DXCDAT_USE_POPCNT=ON` for Rank/Select operations.
+
+The code has been tested only on Mac OS X and Linux. That is, this library considers only UNIX-compatible OS.
+
+
+## Command Line Tool
+
+`xcdat` is a general-purpose command line tool to provide three modes as follows:
+
+```
+$ xcdat 
+xcdat build <type> <key> <dict>
+	<type>	'1' for DACs; '2' for FDACs.
+	<key> 	input file of a set of keys.
+	<dict>	output file of the dictionary.
+xcdat query <type> <dict> <limit>
+	<type> 	'1' for DACs; '2' for FDACs.
+	<dict> 	input file of the dictionary.
+	<limit>	limit at lookup (default=10).
+xcdat bench <type> <dict> <key>
+	<type>	'1' for DACs; '2' for FDACs.
+	<dict>	input file of the dictionary.
+	<key> 	input file of keys for benchmark.
+```
+
+
 ## API
 
-Refer to the header comments of [xcdat::Trie](https://github.com/kamp78/xcdat/blob/master/src/Trie.hpp).
+You can build a dictionary using `xcdat::TrieBuilder::build()`. 
+This static function receives a set of keywords and returns the resulting class object of `xcdat::Trie`.
+For the usage, refer to the header comments of [xcdat::TrieBuilder.hpp](https://github.com/kamp78/xcdat/blob/master/src/TrieBuilder.hpp).
+Also for the usage of `xcdat::Trie`, refer to the header comments of [xcdat::Trie](https://github.com/kamp78/xcdat/blob/master/src/Trie.hpp).
+If you want to get specific usage examples, refer to the source code of [xcdat.cpp](https://github.com/kamp78/xcdat/blob/master/src/xcdat.cpp).
 
 ## Benchmark
 
@@ -50,14 +81,14 @@ WIP
 ## Future work
 
 - Show benchmarks
-- Implement faster operations
+- Support faster operations
 - Clear up source codes
-- Set install opetions
+- Extend results returned from prefix operations
 
 ## References
 
 - J. Aoe. An efficient digital search algorithm by using a double-array structure. _IEEE Transactions on Software Engineering_, 15(9):1066–1077, 1989.
 - N. R. Brisaboa, S. Ladra, and G. Navarro. DACs: Bringing direct access to variable-length codes. _Information Processing & Management_, 49(1):392–404, 2013.
-- S. Kanda, K. Morita, and M. Fuketa. Compressed double-array tries for string dictionaries supporting fast lookup. _Knowledge and Information Systems_, Online first.
+- S. Kanda, K. Morita, and M. Fuketa. Compressed double-array tries for string dictionaries supporting fast lookup. _Knowledge and Information Systems_, 51(3): 1023–1042, 2017.
 - M. A. Martínez-Prieto, N. Brisaboa, R. Cánovas, F. Claude, and G. Navarro. Practical compressed string dictionaries. _Information Systems_, 56:73–108, 2016.
 - S. Yata, M. Oono, K. Morita, M. Fuketa, T. Sumitomo, and J. Aoe. A compact static double-array keeping character codes. _Information Processing & Management_, 43(1):237–247, 2007.
diff --git a/src/BitVector.cpp b/src/BitVector.cpp
index 8efae81..4c3b751 100644
--- a/src/BitVector.cpp
+++ b/src/BitVector.cpp
@@ -1,3 +1,5 @@
+#include <popcntintrin.h>
+
 #include "BitVector.hpp"
 
 namespace xcdat {
@@ -169,12 +171,24 @@ constexpr uint8_t kSelectTable[9][256] = {
 };
 
 uint32_t pop_count(uint32_t bits) {
+#ifdef XCDAT_USE_POPCNT
+  return static_cast<uint32_t>(_mm_popcnt_u32(bits));
+#else
   bits = ((bits & 0xAAAAAAAA) >> 1) + (bits & 0x55555555);
   bits = ((bits & 0xCCCCCCCC) >> 2) + (bits & 0x33333333);
   bits = ((bits >> 4) + bits) & 0x0F0F0F0F;
   bits += bits >> 8;
   bits += bits >> 16;
   return bits & 0x3F;
+#endif
+}
+
+BitVector::BitVector(std::istream& is) {
+  bits_ = Vector<uint32_t>(is);
+  rank_tips_ = Vector<RankTip>(is);
+  select_tips_ = Vector<id_type>(is);
+  size_ = read_value<size_t>(is);
+  num_1s_ = read_value<size_t>(is);
 }
 
 BitVector::BitVector(BitVectorBuilder& builder, bool rank_flag, bool select_flag) {
@@ -182,7 +196,7 @@ BitVector::BitVector(BitVectorBuilder& builder, bool rank_flag, bool select_flag
     return;
   }
 
-  bits_.steal(builder.bits_);
+  bits_ = Vector<uint32_t>(builder.bits_);
   size_ = builder.size_;
   num_1s_ = builder.num_1s_;
 
@@ -201,7 +215,7 @@ BitVector::BitVector(BitVectorBuilder& builder, bool rank_flag, bool select_flag
         }
       }
     }
-    rank_tips_.steal(rank_tips);
+    rank_tips_ = Vector<RankTip>(rank_tips);
   }
 
   // builds select_tips_
@@ -215,7 +229,7 @@ BitVector::BitVector(BitVectorBuilder& builder, bool rank_flag, bool select_flag
       }
     }
     select_tips.push_back(static_cast<id_type>(rank_tips_.size() - 1));
-    select_tips_.steal(select_tips);
+    select_tips_ = Vector<id_type>(select_tips);
   }
 }
 
@@ -229,7 +243,7 @@ id_type BitVector::select(size_t i) const {
   id_type left = 0, right = static_cast<id_type>(rank_tips_.size());
 
   if (!select_tips_.is_empty()) {
-    id_type select_tip_id = i / kNum1sPerTip;
+    auto select_tip_id = static_cast<id_type>(i / kNum1sPerTip);
     left = select_tips_[select_tip_id];
     right = select_tips_[select_tip_id + 1] + 1;
   }
@@ -296,20 +310,4 @@ void BitVector::write(std::ostream& os) const {
   write_value(num_1s_, os);
 }
 
-void BitVector::read(std::istream& is) {
-  bits_.read(is);
-  rank_tips_.read(is);
-  select_tips_.read(is);
-  read_value(size_, is);
-  read_value(num_1s_, is);
-}
-
-void BitVector::swap(BitVector& rhs) {
-  bits_.swap(rhs.bits_);
-  rank_tips_.swap(rhs.rank_tips_);
-  select_tips_.swap(rhs.select_tips_);
-  std::swap(size_, rhs.size_);
-  std::swap(num_1s_, rhs.num_1s_);
-}
-
 } //namespace - xcdat
diff --git a/src/BitVector.hpp b/src/BitVector.hpp
index 6702b26..c0ac054 100644
--- a/src/BitVector.hpp
+++ b/src/BitVector.hpp
@@ -6,13 +6,11 @@
 
 namespace xcdat {
 
-/*
- * Bit vector supporting Rank/Select operations.
- * */
+// Bit vector supporting Rank/Select operations.
 class BitVector {
 public:
   BitVector() {}
-  // builder.width_ is stolen.
+  BitVector(std::istream &is);
   BitVector(BitVectorBuilder& builder, bool rank_flag, bool select_flag);
 
   ~BitVector() {}
@@ -21,8 +19,10 @@ public:
     return (bits_[i / 32] & (1U << (i % 32))) != 0;
   }
 
-  id_type rank(size_t i) const; // the number of 1s in B[0,i).
-  id_type select(size_t i) const; // the position of the i+1 th occurrence.
+  // the number of 1s in B[0,i).
+  id_type rank(size_t i) const;
+  // the position of the i+1 th occurrence.
+  id_type select(size_t i) const;
 
   size_t num_1s() const {
     return num_1s_;
@@ -31,19 +31,20 @@ public:
     return size_ - num_1s_;
   }
 
-  size_t size() const { // the number of bits
+  // the number of bits
+  size_t size() const {
     return size_;
   }
   size_t size_in_bytes() const;
 
   void write(std::ostream &os) const;
-  void read(std::istream &is);
-
-  void swap(BitVector& rhs);
 
   BitVector(const BitVector&) = delete;
   BitVector& operator=(const BitVector&) = delete;
 
+  BitVector(BitVector&&) = default;
+  BitVector& operator=(BitVector&&) = default;
+
 private:
   static constexpr id_type kBitsInR1 = 256;
   static constexpr id_type kBitsInR2 = 32;
@@ -52,7 +53,7 @@ private:
 
   struct RankTip {
     id_type L1;
-    std::array<uint8_t, kR1PerR2> L2;
+    uint8_t L2[kR1PerR2];
   };
 
   Vector<uint32_t> bits_;
diff --git a/src/BitVectorBuilder.hpp b/src/BitVectorBuilder.hpp
index 85545b3..74ff448 100644
--- a/src/BitVectorBuilder.hpp
+++ b/src/BitVectorBuilder.hpp
@@ -1,27 +1,23 @@
 #ifndef XCDAT_BIT_VECTOR_BUILDER_HPP_
 #define XCDAT_BIT_VECTOR_BUILDER_HPP_
 
-#include "xcdatBasics.hpp"
+#include "xcdat_basics.hpp"
 
 namespace xcdat {
 
-/*
- *  Bit pool for building BitVector.
- * */
+// Bit pool for building BitVector.
 class BitVectorBuilder {
 public:
   friend class BitVector;
 
   BitVectorBuilder() {}
-  BitVectorBuilder(size_t size) {
-    resize(size);
-  }
+  BitVectorBuilder(size_t size) { resize(size); }
 
   ~BitVectorBuilder() {}
 
   void push_back(bool bit) {
     if (size_ % 32 == 0) {
-      bits_.emplace_back(0);
+      bits_.push_back(0);
     }
     if (bit) {
       set_bit(size_, true);
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
deleted file mode 100644
index b0c9366..0000000
--- a/src/CMakeLists.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-set(HEADER_FILES
-  BitVector.hpp
-  BitVectorBuilder.hpp
-  DacBc.hpp
-  FastDacBc.hpp
-  FitVector.hpp
-  Trie.hpp
-  TrieBuilder.hpp
-  Vector.hpp
-  xcdatBasics.hpp
-  )
-
-set(SOURCE_FILES
-  ${HEADER_FILES}
-  BitVector.cpp
-  DacBc.cpp
-  FitVector.cpp
-  TrieBuilder.cpp
-  FastDacBc.cpp
-  )
-
-add_library(xcdat STATIC ${SOURCE_FILES})
-
-add_executable(xcdat-exe xcdat.cpp)
-set_target_properties(xcdat-exe PROPERTIES OUTPUT_NAME xcdat)
-target_link_libraries(xcdat-exe xcdat)
-
-enable_testing()
-file(GLOB TEST_SOURCES test*.cpp)
-foreach(TEST_SOURCE ${TEST_SOURCES})
-  get_filename_component(TEST_SOURCE_NAME ${TEST_SOURCE} NAME_WE)
-  add_executable(${TEST_SOURCE_NAME} ${TEST_SOURCE})
-  target_link_libraries(${TEST_SOURCE_NAME} xcdat)
-  add_test(${TEST_SOURCE_NAME} ${TEST_SOURCE_NAME})
-endforeach()
diff --git a/src/DacBc.cpp b/src/DacBc.cpp
index 86b0498..be81ab9 100644
--- a/src/DacBc.cpp
+++ b/src/DacBc.cpp
@@ -4,16 +4,29 @@
 
 namespace xcdat {
 
+DacBc::DacBc(std::istream& is) {
+  for (size_t i = 0; i < sizeof(id_type); ++i) {
+    values_[i] = Vector<uint8_t>(is);
+  }
+  for (size_t i = 0; i < sizeof(id_type) - 1; ++i) {
+    flags_[i] = BitVector(is);
+  }
+  leaf_flags_ = BitVector(is);
+  links_ = FitVector(is);
+  max_level_ = read_value<uint8_t>(is);
+  num_free_nodes_ = read_value<size_t>(is);
+}
+
 DacBc::DacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags) {
   if (bc.empty()) {
     return;
   }
 
-  std::array<std::vector<uint8_t>, sizeof(id_type)> values;
-  std::array<BitVectorBuilder, sizeof(id_type)> flags;
+  std::vector<uint8_t> values[sizeof(id_type)];
+  BitVectorBuilder flags[sizeof(id_type)];
   std::vector<id_type> links;
 
-  BitVector(leaf_flags, true, false).swap(leaf_flags_);
+  leaf_flags_ = BitVector(leaf_flags, true, false);
 
   values[0].reserve(bc.size() * 2);
   flags[0].reserve(bc.size() * 2);
@@ -56,11 +69,11 @@ DacBc::DacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags) {
 
   // release
   for (uint8_t i = 0; i < max_level_; ++i) {
-    values_[i].steal(values[i]);
-    BitVector(flags[i], true, false).swap(flags_[i]);
+    values_[i] = Vector<uint8_t>(values[i]);
+    flags_[i] = BitVector(flags[i], true, false);
   }
-  values_[max_level_].steal(values[max_level_]);
-  FitVector(links).swap(links_);
+  values_[max_level_] = Vector<uint8_t>(values[max_level_]);
+  links_ = FitVector(links);
 }
 
 size_t DacBc::size_in_bytes() const {
@@ -111,32 +124,6 @@ void DacBc::write(std::ostream& os) const {
   write_value(num_free_nodes_, os);
 }
 
-void DacBc::read(std::istream& is) {
-  for (auto& values : values_) {
-    values.read(is);
-   }
-  for (auto& flags : flags_) {
-    flags.read(is);
-  }
-  leaf_flags_.read(is);
-  links_.read(is);
-  read_value(max_level_, is);
-  read_value(num_free_nodes_, is);
-}
-
-void DacBc::swap(DacBc& rhs) {
-  for (uint32_t i = 0; i < values_.size(); ++i) {
-    values_[i].swap(rhs.values_[i]);
-  }
-  for (uint32_t i = 0; i < flags_.size(); ++i) {
-    flags_[i].swap(rhs.flags_[i]);
-  }
-  leaf_flags_.swap(rhs.leaf_flags_);
-  links_.swap(rhs.links_);
-  std::swap(max_level_, rhs.max_level_);
-  std::swap(num_free_nodes_, rhs.num_free_nodes_);
-}
-
 id_type DacBc::access_(id_type i) const {
   uint8_t level = 0;
   id_type value = values_[level][i];
diff --git a/src/DacBc.hpp b/src/DacBc.hpp
index 548f789..465b62f 100644
--- a/src/DacBc.hpp
+++ b/src/DacBc.hpp
@@ -6,14 +6,13 @@
 
 namespace xcdat {
 
-/*
- * BASE/CHECK representation using byte-oriented DACs.
- * */
+// BASE/CHECK representation using byte-oriented DACs.
 class DacBc {
 public:
   static constexpr id_type kWidthL1 = 8;
 
   DacBc() {}
+  DacBc(std::istream &is);
   DacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags);
 
   ~DacBc() {}
@@ -49,16 +48,16 @@ public:
   void show_stat(std::ostream &os) const;
 
   void write(std::ostream &os) const;
-  void read(std::istream &is);
-
-  void swap(DacBc& rhs);
 
   DacBc(const DacBc&) = delete;
   DacBc& operator=(const DacBc&) = delete;
 
+  DacBc(DacBc&&) = default;
+  DacBc& operator=(DacBc&&) = default;
+
 private:
-  std::array<Vector<uint8_t>, sizeof(id_type)> values_;
-  std::array<BitVector, sizeof(id_type) - 1> flags_;
+  Vector<uint8_t> values_[sizeof(id_type)];
+  BitVector flags_[sizeof(id_type) - 1];
   BitVector leaf_flags_;
   FitVector links_;
   uint8_t max_level_ = 0;
diff --git a/src/FastDacBc.cpp b/src/FastDacBc.cpp
index 8562b71..c77e24a 100644
--- a/src/FastDacBc.cpp
+++ b/src/FastDacBc.cpp
@@ -2,6 +2,21 @@
 
 namespace xcdat {
 
+FastDacBc::FastDacBc(std::istream& is) {
+  values_L1_ = Vector<uint8_t>(is);
+  values_L2_ = Vector<uint16_t>(is);
+  values_L3_ = Vector<uint32_t>(is);
+#ifdef XCDAT_X64
+  values_L4_ = Vector<uint64_t>(is);
+#endif
+  for (size_t i = 0; i < kLayers - 1; ++i) {
+    ranks_[i] = Vector<id_type>(is);
+  }
+  leaf_flags_ = BitVector(is);
+  links_ = FitVector(is);
+  num_free_nodes_ = read_value<size_t>(is);
+}
+
 FastDacBc::FastDacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags) {
   if (bc.empty()) {
     return;
@@ -10,13 +25,12 @@ FastDacBc::FastDacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags
   std::vector<uint8_t> values_L1;
   std::vector<uint16_t> values_L2;
   std::vector<uint32_t> values_L3;
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   std::vector<uint64_t> values_L4;
 #endif
-  std::array<std::vector<id_type>, kLayers - 1> ranks;
-
+  std::vector<id_type> ranks[kLayers - 1];
   std::vector<id_type> links;
-  BitVector(leaf_flags, true, false).swap(leaf_flags_);
+  leaf_flags_ = BitVector(leaf_flags, true, false);
 
   ranks[0].reserve((bc.size() * 2) / 128);
 
@@ -43,7 +57,7 @@ FastDacBc::FastDacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags
       values_L2.push_back(static_cast<uint16_t>(1 | (pos << 1)));
     }
 
-#ifdef XCDAT64
+#ifdef XCDAT_X64
     if ((values_L3.size() % kBlockLenL3) == 0) {
       ranks[1].push_back(static_cast<id_type>(values_L4.size()));
     }
@@ -82,16 +96,16 @@ FastDacBc::FastDacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags
   }
 
   // release
-  values_L1_.steal(values_L1);
-  values_L2_.steal(values_L2);
-  values_L3_.steal(values_L3);
-#ifdef XCDAT64
-  values_L4_.steal(values_L4);
+  values_L1_ = Vector<uint8_t>(values_L1);
+  values_L2_ = Vector<uint16_t>(values_L2);
+  values_L3_ = Vector<uint32_t>(values_L3);
+#ifdef XCDAT_X64
+  values_L4_ = Vector<uint64_t>(values_L4);
 #endif
-  for (uint8_t j = 0; j < ranks.size(); ++j) {
-    ranks_[j].steal(ranks[j]);
+  for (uint8_t j = 0; j < kLayers - 1; ++j) {
+    ranks_[j] = Vector<id_type>(ranks[j]);
   }
-  FitVector(links).swap(links_);
+  links_ = FitVector(links);
 }
 
 size_t FastDacBc::size_in_bytes() const {
@@ -99,7 +113,7 @@ size_t FastDacBc::size_in_bytes() const {
   ret += values_L1_.size_in_bytes();
   ret += values_L2_.size_in_bytes();
   ret += values_L3_.size_in_bytes();
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   ret += values_L4_.size_in_bytes();
 #endif
   for (auto& ranks : ranks_) {
@@ -120,12 +134,12 @@ void FastDacBc::show_stat(std::ostream& os) const {
   show_size_ratio("\tvalues_L1:", values_L1_.size_in_bytes(), total_size, os);
   show_size_ratio("\tvalues_L2:", values_L2_.size_in_bytes(), total_size, os);
   show_size_ratio("\tvalues_L3:", values_L3_.size_in_bytes(), total_size, os);
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   show_size_ratio("\tvalues_L4:", values_L4_.size_in_bytes(), total_size, os);
 #endif
   show_size_ratio("\tranks_L1: ", ranks_[0].size_in_bytes(), total_size, os);
   show_size_ratio("\tranks_L2: ", ranks_[1].size_in_bytes(), total_size, os);
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   show_size_ratio("\tranks_L3: ", ranks_[2].size_in_bytes(), total_size, os);
 #endif
   show_size_ratio("\tleaves:   ", leaf_flags_.size_in_bytes(), total_size, os);
@@ -136,7 +150,7 @@ void FastDacBc::write(std::ostream& os) const {
   values_L1_.write(os);
   values_L2_.write(os);
   values_L3_.write(os);
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   values_L4_.write(os);
 #endif
   for (auto& ranks : ranks_) {
@@ -147,36 +161,6 @@ void FastDacBc::write(std::ostream& os) const {
   write_value(num_free_nodes_, os);
 }
 
-void FastDacBc::read(std::istream& is) {
-  values_L1_.read(is);
-  values_L2_.read(is);
-  values_L3_.read(is);
-#ifdef XCDAT64
-  values_L4_.read(is);
-#endif
-  for (auto& ranks : ranks_) {
-    ranks.read(is);
-  }
-  leaf_flags_.read(is);
-  links_.read(is);
-  read_value(num_free_nodes_, is);
-}
-
-void FastDacBc::swap(FastDacBc& rhs) {
-  values_L1_.swap(rhs.values_L1_);
-  values_L2_.swap(rhs.values_L2_);
-  values_L3_.swap(rhs.values_L3_);
-#ifdef XCDAT64
-  values_L4_.swap(rhs.values_L4_);
-#endif
-  for (uint32_t j = 0; j < ranks_.size(); ++j) {
-    ranks_[j].swap(rhs.ranks_[j]);
-  }
-  leaf_flags_.swap(rhs.leaf_flags_);
-  links_.swap(rhs.links_);
-  std::swap(num_free_nodes_, rhs.num_free_nodes_);
-}
-
 id_type FastDacBc::access_(id_type i) const {
   uint32_t value = values_L1_[i] >> 1;
   if ((values_L1_[i] & 1U) == 0) {
@@ -188,7 +172,7 @@ id_type FastDacBc::access_(id_type i) const {
     return value;
   }
   i = ranks_[1][i / kBlockLenL2] + value;
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   value = values_L3_[i] >> 1;
   if ((values_L3_[i] & 1U) == 0) {
     return value;
diff --git a/src/FastDacBc.hpp b/src/FastDacBc.hpp
index 6336355..ab17837 100644
--- a/src/FastDacBc.hpp
+++ b/src/FastDacBc.hpp
@@ -9,13 +9,11 @@
 
 namespace xcdat {
 
-/*
- * BASE/CHECK representation using pointer-based byte-oriented DACs.
- * */
+// BASE/CHECK representation using pointer-based byte-oriented DACs.
 class FastDacBc {
 public:
   static constexpr id_type kWidthL1 = 7;
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   static constexpr uint8_t kLayers = 4;
 #else
   static constexpr uint8_t kLayers = 3;
@@ -23,11 +21,12 @@ public:
 
   static constexpr id_type kBlockLenL1 = 1U << 7;
   static constexpr id_type kBlockLenL2 = 1U << 15;
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   static constexpr id_type kBlockLenL3 = 1U << 31;
 #endif
 
   FastDacBc() {}
+  FastDacBc(std::istream& is);
   FastDacBc(const std::vector<BcPair>& bc, BitVectorBuilder& leaf_flags);
 
   ~FastDacBc() {}
@@ -60,26 +59,23 @@ public:
   }
 
   size_t size_in_bytes() const;
-
   void show_stat(std::ostream& os) const;
-
   void write(std::ostream& os) const;
-  void read(std::istream& is);
-
-  void swap(FastDacBc& rhs);
 
   FastDacBc(const FastDacBc&) = delete;
   FastDacBc& operator=(const FastDacBc&) = delete;
 
+  FastDacBc(FastDacBc&&) = default;
+  FastDacBc& operator=(FastDacBc&&) = default;
+
 private:
   Vector<uint8_t> values_L1_;
   Vector<uint16_t> values_L2_;
   Vector<uint32_t> values_L3_;
-#ifdef XCDAT64
+#ifdef XCDAT_X64
   Vector<uint64_t> values_L4_;
 #endif
-  std::array<Vector<id_type>, kLayers - 1> ranks_;
-
+  Vector<id_type> ranks_[kLayers - 1];
   BitVector leaf_flags_;
   FitVector links_;
   size_t num_free_nodes_ = 0;
diff --git a/src/FitVector.cpp b/src/FitVector.cpp
index f0516f4..484af17 100644
--- a/src/FitVector.cpp
+++ b/src/FitVector.cpp
@@ -2,36 +2,43 @@
 
 namespace xcdat {
 
-FitVector::FitVector(const std::vector<id_type>& integers) {
-  if (integers.empty()) {
+FitVector::FitVector(std::istream& is) {
+  chunks_ = Vector<id_type>(is);
+  size_= read_value<size_t>(is);
+  width_ = read_value<id_type>(is);
+  mask_ = read_value<id_type>(is);
+}
+
+FitVector::FitVector(const std::vector<id_type>& values) {
+  if (values.empty()) {
     return;
   }
 
   width_ = 0;
-  auto max_value = *std::max_element(std::begin(integers), std::end(integers));
+  auto max_value = *std::max_element(std::begin(values), std::end(values));
   do {
     ++width_;
     max_value >>= 1;
   } while (max_value);
 
-  size_ = integers.size();
+  size_ = values.size();
   mask_ = (1U << width_) - 1;
   std::vector<id_type> chunks(size_ * width_ / kChunkWidth + 1, 0);
 
   for (id_type i = 0; i < size_; ++i) {
-    const auto chunk_pos = i * width_ / kChunkWidth;
-    const auto offset = i * width_ % kChunkWidth;
+    const auto chunk_pos = static_cast<id_type>(i * width_ / kChunkWidth);
+    const auto offset = static_cast<id_type>(i * width_ % kChunkWidth);
 
     chunks[chunk_pos] &= ~(mask_ << offset);
-    chunks[chunk_pos] |= (integers[i] & mask_) << offset;
+    chunks[chunk_pos] |= (values[i] & mask_) << offset;
 
     if (kChunkWidth < offset + width_) {
       chunks[chunk_pos + 1] &= ~(mask_ >> (kChunkWidth - offset));
-      chunks[chunk_pos + 1] |= (integers[i] & mask_) >> (kChunkWidth - offset);
+      chunks[chunk_pos + 1] |= (values[i] & mask_) >> (kChunkWidth - offset);
     }
   }
 
-  chunks_.steal(chunks);
+  chunks_ = Vector<id_type>(chunks);
 }
 
 size_t FitVector::size_in_bytes() const {
@@ -50,18 +57,4 @@ void FitVector::write(std::ostream& os) const {
   write_value(mask_, os);
 }
 
-void FitVector::read(std::istream& is) {
-  chunks_.read(is);
-  read_value(size_, is);
-  read_value(width_, is);
-  read_value(mask_, is);
-}
-
-void FitVector::swap(FitVector& rhs) {
-  chunks_.swap(rhs.chunks_);
-  std::swap(size_, rhs.size_);
-  std::swap(width_, rhs.width_);
-  std::swap(mask_, rhs.mask_);
-}
-
 } //namespace - xcdat
diff --git a/src/FitVector.hpp b/src/FitVector.hpp
index 0f8369e..2370751 100644
--- a/src/FitVector.hpp
+++ b/src/FitVector.hpp
@@ -5,21 +5,20 @@
 
 namespace xcdat {
 
-/*
- * Compressed integer vector.
- * */
+// Compacted integer vector.
 class FitVector {
 public:
   static constexpr id_type kChunkWidth = sizeof(id_type) * 8;
 
   FitVector() {}
-  FitVector(const std::vector<id_type>& integers);
+  FitVector(std::istream &is);
+  FitVector(const std::vector<id_type>& values);
 
   ~FitVector() {}
 
   id_type operator[](size_t i) const {
-    id_type chunk_pos = i * width_ / kChunkWidth;
-    id_type offset = i * width_ % kChunkWidth;
+    auto chunk_pos = static_cast<id_type>(i * width_ / kChunkWidth);
+    auto offset = static_cast<id_type>(i * width_ % kChunkWidth);
     if (offset + width_ <= kChunkWidth) {
       return (chunks_[chunk_pos] >> offset) & mask_;
     } else {
@@ -32,13 +31,13 @@ public:
   size_t size_in_bytes() const;
 
   void write(std::ostream &os) const;
-  void read(std::istream &is);
-
-  void swap(FitVector& rhs);
 
   FitVector(const FitVector&) = delete;
   FitVector& operator=(const FitVector&) = delete;
 
+  FitVector(FitVector&&) = default;
+  FitVector& operator=(FitVector&&) = default;
+
 private:
   Vector<id_type> chunks_;
   size_t size_ = 0;
diff --git a/src/Trie.hpp b/src/Trie.hpp
index c662f0a..a295502 100644
--- a/src/Trie.hpp
+++ b/src/Trie.hpp
@@ -3,250 +3,242 @@
 
 #include "DacBc.hpp"
 #include "FastDacBc.hpp"
-#include "TrieBuilder.hpp"
 
 namespace xcdat {
 
-constexpr auto kNotFound = static_cast<id_type>(-1);
-constexpr auto kDefaultLimit = static_cast<size_t>(-1);
+constexpr auto kNotFound = kIdMax;
 
-/*
- * Compressed string dictionary using an improved double-array trie. There are two versions for
- * representing BASE/CHECK arrays.
- *  @param Fast: the version of DACs representing BASE/CHECK arrays.
- * */
+// Compressed string dictionary using an improved double-array trie. There are two versions of DACs
+// representing BASE/CHECK arrays, selected with the Fast parameter.
 template<bool Fast>
 class Trie {
 public:
   using Type = Trie<Fast>;
-  using BcType = Conditional<Fast, FastDacBc, DacBc>;
+  using BcType = typename std::conditional<Fast, FastDacBc, DacBc>::type;
 
-  /*
-   * Generic constructor.
-   * */
+  // Generic constructor.
   Trie() {}
 
-  /*
-   * Builds the dictionary from given string keys. The keys must be sorted in lexicographical order
-   * without duplication. Any error in construction is reported by TrieBuilder::Exception.
-   *  @param keys: the pairs of key pointers and lengths
-   *  @param binary_mode: whether the keys include the ASCII zero code or not
-   * */
-  Trie(const std::vector<std::pair<const uint8_t*, size_t>>& keys, bool binary_mode = false) {
-    TrieBuilder builder(keys, BcType::kWidthL1, false);
-
-    BcType(builder.bc_, builder.leaf_flags_).swap(bc_);
-    BitVector(builder.term_flags_, true, true).swap(terminal_flags_);
-    tail_.steal(builder.tail_);
-    BitVector(builder.boundary_flags_, false, false).swap(boundary_flags_);
-    alphabet_.steal(builder.alphabet_);
-    table_ = builder.table_;
-
-    num_keys_ = keys.size();
-    max_length_ = builder.max_length_;
-    binary_mode_ = builder.binary_mode_;
+  // Reads the dictionary from an istream.
+  Trie(std::istream& is) {
+    bc_ = BcType(is);
+    terminal_flags_ = BitVector(is);
+    tail_ = Vector<uint8_t>(is);
+    boundary_flags_ = BitVector(is);
+    alphabet_ = Vector<uint8_t>(is);
+    is.read(reinterpret_cast<char*>(table_), 512);
+    num_keys_ = read_value<size_t>(is);
+    max_length_ = read_value<size_t>(is);
+    binary_mode_ = read_value<bool>(is);
   }
 
-  /*
-   * Generic destructor.
-   * */
+  // Generic destructor.
   ~Trie() {}
 
-  /*
-   * Lookups the ID of a given key.
-   *  @param key: key pointer.
-   *  @param length: key length.
-   *  @returns the ID if the query is registered; otherwise returns kNotFound.
-   * */
+  // Lookups the ID of a given key. If the key is not registered, returns kNotFound.
   id_type lookup(const uint8_t* key, size_t length) const {
+    size_t pos = 0;
     id_type node_id = 0;
-    size_t i = 0;
 
     while (!bc_.is_leaf(node_id)) {
-      if (i == length) {
-        return terminal_flags_[node_id] ? to_string_id_(node_id) : kNotFound;
+      if (pos == length) {
+        return terminal_flags_[node_id] ? to_key_id_(node_id) : kNotFound;
       }
-      const auto child_id = bc_.base(node_id) ^table_[key[i++]];
+
+      const auto child_id = bc_.base(node_id) ^table_[key[pos++]];
       if (bc_.check(child_id) != node_id) {
         return kNotFound;
       }
+
       node_id = child_id;
     }
 
-    if (match_(key + i, length - i, bc_.link(node_id))) {
-      return to_string_id_(node_id);
+    size_t tail_pos = bc_.link(node_id);
+    if (!match_(key, length, pos, tail_pos)) {
+      return kNotFound;
     }
-    return kNotFound;
+
+    return to_key_id_(node_id);
   }
 
-  /*
-   * Decodes the key associated with a given ID.
-   *  @param id: ID.
-   *  @param[out] ret: the decoded key.
-   *  @returns whether the given ID is within the range or not.
-   * */
-  bool access(id_type id, std::vector<uint8_t>& ret) const {
+  // Decodes the key associated with a given ID. The decoded key is appended to 'ret' and its
+  // length is returned.
+  size_t access(id_type id, std::vector<uint8_t>& ret) const {
     if (num_keys_ <= id) {
-      return false;
+      return 0;
     }
 
-    ret.reserve(ret.size() + max_length_);
+    auto orig_size = ret.size();
+    ret.reserve(orig_size + max_length_);
 
     auto node_id = to_node_id_(id);
-    auto link = bc_.is_leaf(node_id) ? bc_.link(node_id) : kNotFound;
+    auto tail_pos = bc_.is_leaf(node_id) ? bc_.link(node_id) : kNotFound;
 
     while (node_id) {
       const auto parent_id = bc_.check(node_id);
-      ret.emplace_back(edge_(parent_id, node_id));
+      ret.push_back(edge_(parent_id, node_id));
       node_id = parent_id;
     }
 
-    std::reverse(std::begin(ret), std::end(ret));
+    std::reverse(std::begin(ret) + orig_size, std::end(ret));
 
-    if (link != 0 && link != kNotFound) {
+    if (tail_pos != 0 && tail_pos != kNotFound) {
       if (binary_mode_) {
         do {
-          ret.push_back(tail_[link]);
-        } while (!boundary_flags_[link++]);
+          ret.push_back(tail_[tail_pos]);
+        } while (!boundary_flags_[tail_pos++]);
       } else {
         do {
-          ret.push_back(tail_[link++]);
-        } while (tail_[link]);
+          ret.push_back(tail_[tail_pos++]);
+        } while (tail_[tail_pos]);
       }
     }
 
-    return true;
+    return ret.size() - orig_size;
   }
 
-  /*
-   * Enumerates the IDs of keys included as prefixes of a given key.
-   *  @param key: key pointer.
-   *  @param length: key length.
-   *  @param[out] ids: IDs of matched keys.
-   *  @param limit: the maximum number of matched keys (optional).
-   *  @returns the number of matched keys.
-   * */
+  // Returns the IDs of keys included as prefixes of a given key. The IDs are appended to 'ids' and
+  // the number is returned. By using 'limit', you can restrict the maximum number of returned IDs.
   size_t common_prefix_lookup(const uint8_t* key, size_t length, std::vector<id_type>& ids,
-                              size_t limit = kDefaultLimit) const {
+                              size_t limit = std::numeric_limits<size_t>::max()) const {
     if (limit == 0) {
       return 0;
     }
 
+    size_t pos = 0, count = 0;
     id_type node_id = 0;
-    size_t i = 0, num_ids = 0;
 
     while (!bc_.is_leaf(node_id)) {
       if (terminal_flags_[node_id]) {
-        ids.push_back(to_string_id_(node_id));
-        ++num_ids;
-        if (num_ids == limit) {
-          return num_ids;
+        ids.push_back(to_key_id_(node_id));
+        if (limit <= ++count) {
+          return count;
         }
       }
-      if (i == length) {
-        return num_ids;
+
+      if (pos == length) {
+        return count;
       }
 
-      const auto child_id = bc_.base(node_id) ^table_[key[i++]];
-
+      const auto child_id = bc_.base(node_id) ^table_[key[pos++]];
       if (bc_.check(child_id) != node_id) {
-        return num_ids;
+        return count;
       }
 
       node_id = child_id;
     }
 
-    if (match_(key + i, length - i, bc_.link(node_id))) {
-      ids.push_back(to_string_id_(node_id));
-      ++num_ids;
+    size_t tail_pos = bc_.link(node_id);
+    if (match_(key, length, pos, tail_pos)) {
+      ids.push_back(to_key_id_(node_id));
+      ++count;
     }
 
-    return num_ids;
+    return count;
   }
 
-  /*
-   * Enumerates the IDs of keys starting with a given key.
-   *  @param key: key pointer.
-   *  @param length: key length.
-   *  @param[out] ids: IDs of matched keys.
-   *  @param limit: the maximum number of matched keys (optional).
-   *  @returns the number of matched keys.
-   * */
+  // Returns the IDs of keys starting with a given key. The IDs are appended to 'ids' and the
+  // number is returned. By using 'limit', you can restrict the maximum number of returned IDs.
   size_t predictive_lookup(const uint8_t* key, size_t length, std::vector<id_type>& ids,
-                           size_t limit = kDefaultLimit) const {
+                           size_t limit = std::numeric_limits<size_t>::max()) const {
     if (limit == 0) {
       return 0;
     }
 
+    size_t pos = 0;
     id_type node_id = 0;
-    size_t i = 0;
 
-    for (; i < length; ++i) {
+    for (; pos < length; ++pos) {
       if (bc_.is_leaf(node_id)) {
-        if (prefix_match_(key + i, length - i, bc_.link(node_id))) {
-          ids.push_back(to_string_id_(node_id));
-          return 1;
+        size_t tail_pos = bc_.link(node_id);
+        if (!prefix_match_(key, length, pos, tail_pos)) {
+          return 0;
         }
-        return 0;
+
+        ids.push_back(to_key_id_(node_id));
+        return 1;
       }
 
-      const auto child_id = bc_.base(node_id) ^table_[key[i]];
+      const auto child_id = bc_.base(node_id) ^table_[key[pos]];
       if (bc_.check(child_id) != node_id) {
         return 0;
       }
+
       node_id = child_id;
     }
 
-    size_t num_ids = 0;
-    enumerate_ids_(node_id, ids, num_ids, limit);
-    return num_ids;
+    size_t count = 0;
+
+    std::vector<std::pair<id_type, size_t>> stack;
+    stack.push_back({node_id, pos});
+
+    while (!stack.empty()) {
+      node_id = stack.back().first;
+      pos = stack.back().second;
+      stack.pop_back();
+
+      if (bc_.is_leaf(node_id)) {
+        ids.push_back(to_key_id_(node_id));
+        if (limit <= ++count) {
+          break;
+        }
+      } else {
+        if (terminal_flags_[node_id]) {
+          ids.push_back(to_key_id_(node_id));
+          if (limit <= ++count) {
+            break;
+          }
+        }
+
+        const auto base = bc_.base(node_id);
+        for (const auto label : alphabet_) {
+          const auto child_id = base ^table_[label];
+          if (bc_.check(child_id) == node_id) {
+            stack.push_back({child_id, pos + 1});
+          }
+        }
+      }
+    }
+
+    return count;
   }
 
-  /*
-   * Gets the number of keys in the dictionary.
-   *  @returns the number of keys in the dictionary.
-   * */
+  // Gets the number of registered keys in the dictionary
   size_t num_keys() const {
     return num_keys_;
   }
 
-  /*
-   * Gets the size of alphabet drawing keys in the dictionary.
-   *  @returns the alphabet size.
-   * */
+  // Gets the maximum length of registered keys
+  size_t max_length() const {
+    return max_length_;
+  }
+
+  // Gets the binary mode
+  bool is_binary_mode() const {
+    return binary_mode_;
+  }
+
+  // Gets the size of alphabet drawing keys in the dictionary.
   size_t alphabet_size() const {
     return alphabet_.size();
   }
 
-  /*
-   * Gets the number of nodes assigned by arranging nodes.
-   * The result is the same as num_used_nodes() + num_free_nodes().
-   *  @returns the number of the nodes.
-   * */
+  // Gets the number of nodes including free nodes.
   size_t num_nodes() const {
     return bc_.num_nodes();
   }
 
-  /*
-   * Gets the number of nodes in the original trie.
-   *  @returns the number of the nodes.
-   * */
+  // Gets the number of nodes in the original trie.
   size_t num_used_nodes() const {
     return bc_.num_used_nodes();
   }
 
-  /*
-   * Gets the number of nodes corresponding to empty elements.
-   *  @returns the number of the nodes.
-   * */
+  // Gets the number of free nodes corresponding to empty elements.
   size_t num_free_nodes() const {
     return bc_.num_free_nodes();
   }
 
-  /*
-   * Computes the size of the structure in bytes.
-   *  @returns the dictionary size in bytes.
-   * */
+  // Computes the output dictionary size in bytes.
   size_t size_in_bytes() const {
     size_t ret = 0;
     ret += bc_.size_in_bytes();
@@ -257,21 +249,11 @@ public:
     ret += sizeof(table_);
     ret += sizeof(num_keys_);
     ret += sizeof(max_length_);
+    ret += sizeof(binary_mode_);
     return ret;
   }
 
-  /*
-   * Gets the binary mode.
-   *  @returns the binary mode.
-   * */
-  bool is_binary_mode() const {
-    return binary_mode_;
-  }
-
-  /*
-   * Reports the dictionary statistics into an ostream.
-   *  @param os: the ostream.
-   * */
+  // Reports the dictionary statistics into an ostream.
   void show_stat(std::ostream& os) const {
     const auto total_size = size_in_bytes();
     os << "basic statistics of xcdat::Trie" << std::endl;
@@ -289,70 +271,39 @@ public:
     bc_.show_stat(os);
   }
 
-  /*
-   * Writes the dictionary into an ostream.
-   *  @param os: the ostream.
-   * */
+  // Writes the dictionary into an ostream.
   void write(std::ostream& os) const {
     bc_.write(os);
     terminal_flags_.write(os);
     tail_.write(os);
     boundary_flags_.write(os);
     alphabet_.write(os);
-    write_value(table_, os);
+    os.write(reinterpret_cast<const char*>(table_), 512);
     write_value(num_keys_, os);
     write_value(max_length_, os);
+    write_value(binary_mode_, os);
   }
 
-  /*
-   * Reads the dictionary from an istream.
-   *  @param is: the istream.
-   * */
-  void read(std::istream& is) {
-    bc_.read(is);
-    terminal_flags_.read(is);
-    tail_.read(is);
-    boundary_flags_.read(is);
-    alphabet_.read(is);
-    read_value(table_, is);
-    read_value(num_keys_, is);
-    read_value(max_length_, is);
-  }
-
-  /*
-   * Swaps the dictionary.
-   *  @param rhs: the dictionary to be swapped.
-   * */
-  void swap(Type& rhs) {
-    bc_.swap(rhs.bc_);
-    terminal_flags_.swap(rhs.terminal_flags_);
-    tail_.swap(rhs.tail_);
-    boundary_flags_.swap(rhs.boundary_flags_);
-    alphabet_.swap(rhs.alphabet_);
-    table_.swap(rhs.table_);
-    std::swap(num_keys_, rhs.num_keys_);
-    std::swap(max_length_, rhs.max_length_);
-  }
-
-  /*
-   * Disallows copy and assignment.
-   * */
+  // Disallows copy and assignment.
   Trie(const Trie&) = delete;
   Trie& operator=(const Trie&) = delete;
 
+  Trie(Trie&&) = default;
+  Trie& operator=(Trie&&) = default;
+
 private:
   BcType bc_;
   BitVector terminal_flags_;
   Vector<uint8_t> tail_;
-  BitVector boundary_flags_; // if binary_mode_
+  BitVector boundary_flags_; // used if binary_mode_ == true
   Vector<uint8_t> alphabet_;
-  std::array<uint8_t, 512> table_; // table[table[c] + 256] = c
+  uint8_t table_[512]; // table[table[c] + 256] = c
 
   size_t num_keys_ = 0;
   size_t max_length_ = 0;
   bool binary_mode_ = false;
 
-  id_type to_string_id_(id_type node_id) const {
+  id_type to_key_id_(id_type node_id) const {
     return terminal_flags_.rank(node_id);
   };
 
@@ -364,78 +315,68 @@ private:
     return table_[static_cast<uint8_t>(bc_.base(node_id) ^ child_id) + 256];
   }
 
-  bool match_(const uint8_t* key, size_t length, id_type link) const {
-    if (link == 0) {
-      return length == 0;
+  bool match_(const uint8_t* key, size_t length, size_t pos, size_t tail_pos) const {
+    assert(pos <= length);
+
+    if (pos == length) {
+      return tail_pos == 0;
     }
 
     if (binary_mode_) {
-      for (size_t i = 0; i < length;) {
-        if (tail_[link] != key[i++]) {
+      do {
+        if (key[pos] != tail_[tail_pos]) {
           return false;
         }
-        if (boundary_flags_[link++]) {
-          return i == length;
+        ++pos;
+        if (boundary_flags_[tail_pos]) {
+          return pos == length;
         }
-      }
+        ++tail_pos;
+      } while (pos < length);
       return false;
     } else {
-      auto tail = tail_.data() + link;
-      for (size_t i = 0; i < length; ++i) {
-        if (tail[i] == '\0' || key[i] != tail[i]) {
+      do {
+        if (!tail_[tail_pos] || key[pos] != tail_[tail_pos]) {
           return false;
         }
-      }
-      return tail[length] == '\0';
+        ++pos;
+        ++tail_pos;
+      } while (pos < length);
+      return !tail_[tail_pos];
     }
   }
 
-  bool prefix_match_(const uint8_t* key, size_t length, id_type link) const {
-    if (link == 0) {
-      return length == 0;
+  bool prefix_match_(const uint8_t* key, size_t length, size_t pos, size_t tail_pos) const {
+    assert(pos < length);
+
+    if (tail_pos == 0) {
+      return false;
     }
 
     if (binary_mode_) {
-      for (size_t i = 0; i < length;) {
-        if (tail_[link] != key[i++]) {
+      do {
+        if (key[pos] != tail_[tail_pos]) {
           return false;
         }
-        if (boundary_flags_[link++]) {
-          return i == length;
+        ++pos;
+        if (boundary_flags_[tail_pos]) {
+          return pos == length;
         }
-      }
+        ++tail_pos;
+      } while (pos < length);
     } else {
-      auto tail = tail_.data() + link;
-      for (size_t i = 0; i < length; ++i) {
-        if (tail[i] == '\0' || key[i] != tail[i]) {
+      do {
+        if (key[pos] != tail_[tail_pos] || !tail_[tail_pos]) {
           return false;
         }
-      }
+        ++pos;
+        ++tail_pos;
+      } while (pos < length);
     }
-
     return true;
   }
 
-  void enumerate_ids_(id_type node_id, std::vector<id_type>& ids,
-                      size_t& num_ids, size_t limit) const {
-    if (terminal_flags_[node_id]) {
-      ids.push_back(to_string_id_(node_id));
-      ++num_ids;
-      if (bc_.is_leaf(node_id)) {
-        return;
-      }
-    }
-    const auto base = bc_.base(node_id);
-    for (const auto label : alphabet_) {
-      if (num_ids == limit) {
-        break;
-      }
-      const auto child_id = base ^table_[label];
-      if (bc_.check(child_id) == node_id) {
-        enumerate_ids_(child_id, ids, num_ids, limit);
-      }
-    }
-  }
+  friend class TrieBuilder;
 };
 
 } //namespace - xcdat
diff --git a/src/TrieBuilder.cpp b/src/TrieBuilder.cpp
index d09cb93..22a4097 100644
--- a/src/TrieBuilder.cpp
+++ b/src/TrieBuilder.cpp
@@ -3,13 +3,12 @@
 
 namespace xcdat {
 
-TrieBuilder::TrieBuilder(const std::vector<std::pair<const uint8_t*, size_t>>& keys,
-                         id_type width_L1, bool binary_mode)
+TrieBuilder::TrieBuilder(const std::vector<Key>& keys, id_type width_L1, bool binary_mode)
   : keys_(keys), block_size_(1U << width_L1), width_L1_(width_L1), binary_mode_(binary_mode) {
   if (keys_.empty()) {
     throw TrieBuilder::Exception("The input data is empty.");
   }
-  if (kIdUpper < keys_.size()) {
+  if (kIdMax < keys_.size()) {
     throw TrieBuilder::Exception("Key ID range error.");
   }
 
@@ -18,6 +17,7 @@ TrieBuilder::TrieBuilder(const std::vector<std::pair<const uint8_t*, size_t>>& k
     while (init_capacity < keys_.size()) {
       init_capacity <<= 1;
     }
+
     bc_.reserve(init_capacity);
     leaf_flags_.reserve(init_capacity);
     term_flags_.reserve(init_capacity);
@@ -40,7 +40,7 @@ TrieBuilder::TrieBuilder(const std::vector<std::pair<const uint8_t*, size_t>>& k
   bc_[0].check = 255;
 
   for (id_type i = 0; i < 256; i += block_size_) {
-    heads_.emplace_back(i);
+    heads_.push_back(i);
   }
 
   use_(0);
@@ -55,24 +55,18 @@ TrieBuilder::TrieBuilder(const std::vector<std::pair<const uint8_t*, size_t>>& k
 
 void TrieBuilder::build_table_() {
   using tb_type = std::pair<uint8_t, size_t>;
-  std::array<tb_type, 256> table_builder;
+  tb_type table_builder[256];
 
   for (uint32_t i = 0; i < 256; ++i) {
     table_builder[i] = {static_cast<uint8_t>(i), 0};
   }
 
-  auto char_count = [&](const std::pair<const uint8_t*, size_t>& key) {
-    for (size_t i = 0; i < key.second; ++i) {
-      ++table_builder[key.first[i]].second;
+  max_length_ = 0;
+  for (size_t i = 0; i < keys_.size(); ++i) {
+    for (size_t j = 0; j < keys_[i].length; ++j) {
+      ++table_builder[keys_[i].ptr[j]].second;
     }
-  };
-
-  char_count(keys_[0]);
-  max_length_ = keys_[0].second;
-
-  for (size_t i = 1; i < keys_.size(); ++i) {
-    char_count(keys_[i]);
-    max_length_ = std::max(max_length_, keys_[i].second);
+    max_length_ = std::max(max_length_, keys_[i].length);
   }
 
   if (table_builder[0].second) { // including '\0'
@@ -87,7 +81,9 @@ void TrieBuilder::build_table_() {
   alphabet_.shrink_to_fit();
 
   std::sort(std::begin(table_builder), std::end(table_builder),
-            [](const tb_type& lhs, const tb_type& rhs) { return lhs.second > rhs.second; });
+            [](const tb_type& lhs, const tb_type& rhs) {
+              return lhs.second > rhs.second;
+            });
 
   for (uint32_t i = 0; i < 256; ++i) {
     table_[table_builder[i].first] = static_cast<uint8_t>(i);
@@ -99,7 +95,7 @@ void TrieBuilder::build_table_() {
 }
 
 void TrieBuilder::build_bc_(size_t begin, size_t end, size_t depth, id_type node_id) {
-  if (keys_[begin].second == depth) {
+  if (keys_[begin].length == depth) {
     term_flags_.set_bit(node_id, true);
     if (++begin == end) { // without link?
       bc_[node_id].base = 0; // with an empty suffix
@@ -110,15 +106,15 @@ void TrieBuilder::build_bc_(size_t begin, size_t end, size_t depth, id_type node
     term_flags_.set_bit(node_id, true);
     leaf_flags_.set_bit(node_id, true);
     auto& key = keys_[begin];
-    suffixes_.push_back({{key.first + depth, key.second - depth}, node_id});
+    suffixes_.push_back({{key.ptr + depth, key.length - depth}, node_id});
     return;
   }
 
   { // fetching edges
     edges_.clear();
-    auto label = keys_[begin].first[depth];
+    auto label = keys_[begin].ptr[depth];
     for (auto str_id = begin + 1; str_id < end; ++str_id) {
-      const auto _label = keys_[str_id].first[depth];
+      const auto _label = keys_[str_id].ptr[depth];
       if (label != _label) {
         if (_label < label) {
           throw TrieBuilder::Exception("The input data is not in lexicographical order.");
@@ -145,9 +141,9 @@ void TrieBuilder::build_bc_(size_t begin, size_t end, size_t depth, id_type node
 
   // following the children
   auto _begin = begin;
-  auto label = keys_[begin].first[depth];
+  auto label = keys_[begin].ptr[depth];
   for (auto _end = begin + 1; _end < end; ++_end) {
-    const auto _label = keys_[_end].first[depth];
+    const auto _label = keys_[_end].ptr[depth];
     if (label != _label) {
       build_bc_(_begin, _end, depth + 1, base ^ table_[label]);
       label = _label;
@@ -159,10 +155,11 @@ void TrieBuilder::build_bc_(size_t begin, size_t end, size_t depth, id_type node
 
 // The algorithm is inspired by marisa-trie
 void TrieBuilder::build_tail_() {
-  auto cmp = [](const Suffix& lhs, const Suffix& rhs) {
-    return std::lexicographical_compare(lhs.rbegin(), lhs.rend(), rhs.rbegin(), rhs.rend());
-  };
-  std::sort(std::begin(suffixes_), std::end(suffixes_), cmp);
+  std::sort(std::begin(suffixes_), std::end(suffixes_),
+            [](const Suffix& lhs, const Suffix& rhs) {
+              return std::lexicographical_compare(lhs.rbegin(), lhs.rend(),
+                                                  rhs.rbegin(), rhs.rend());
+            });
 
   // For empty suffixes
   tail_.emplace_back('\0');
@@ -190,7 +187,7 @@ void TrieBuilder::build_tail_() {
     } else { // append
       bc_[cur.node_id].base = static_cast<id_type>(tail_.size());
       for (size_t j = 0; j < cur.length(); ++j) {
-        tail_.push_back(cur.string.first[j]);
+        tail_.push_back(cur.str.ptr[j]);
       }
       if (binary_mode_) {
         for (size_t j = 1; j < cur.length(); ++j) {
@@ -200,18 +197,16 @@ void TrieBuilder::build_tail_() {
       } else {
         tail_.emplace_back('\0');
       }
-      if (kIdUpper < tail_.size()) {
+      if (kIdMax < tail_.size()) {
         throw TrieBuilder::Exception("TAIL address range error.");
       }
     }
     prev = &cur;
   }
-
-  tail_.shrink_to_fit();
 }
 
 void TrieBuilder::expand_() {
-  if (kIdUpper < bc_.size() + 256) {
+  if (kIdMax < bc_.size() + 256) {
     throw TrieBuilder::Exception("Node ID range error.");
   }
 
@@ -234,7 +229,7 @@ void TrieBuilder::expand_() {
   }
 
   for (auto i = old_size; i < new_size; i += block_size_) {
-    heads_.emplace_back(i);
+    heads_.push_back(i);
   }
 
   const auto block_id = old_size / 256;
diff --git a/src/TrieBuilder.hpp b/src/TrieBuilder.hpp
index 6c471c0..5860a92 100644
--- a/src/TrieBuilder.hpp
+++ b/src/TrieBuilder.hpp
@@ -1,36 +1,50 @@
 #ifndef XCDAT_TRIE_BUILDER_HPP_
 #define XCDAT_TRIE_BUILDER_HPP_
 
-#include "BitVectorBuilder.hpp"
+#include "Trie.hpp"
 
 namespace xcdat {
 
-// prototype declaration for friend
-template<bool> class Trie;
-
-/*
- * Double-array trie builder.
- * */
+// Double-array trie builder.
 class TrieBuilder {
 public:
-  friend class Trie<true>;
-  friend class Trie<false>;
-
   // for avoiding undefined traversal
   static constexpr id_type kTabooId = 1;
   // inspired by darts-clone
   static constexpr id_type kFreeBlocks = 16;
 
-  TrieBuilder(const std::vector<std::pair<const uint8_t*, size_t>>& keys,
-              id_type width_L1, bool binary_mode);
-  ~TrieBuilder() {}
+  struct Key {
+    const uint8_t* ptr;
+    size_t length;
+  };
 
-  /*
-   * Exception class for xcdat::TrieBuilder
-   * */
+  // Builds the dictionary from given string keys. The keys must be sorted in lexicographical order
+  // without duplication. Any error in construction is reported by TrieBuilder::Exception. If the
+  // keys include the ASCII zero code, pass binary_mode = true.
+  template<bool Fast>
+  static Trie<Fast> build(const std::vector<Key>& keys, bool binary_mode = false) {
+    TrieBuilder builder(keys, Trie<Fast>::BcType::kWidthL1, binary_mode);
+
+    Trie<Fast> trie;
+
+    trie.bc_ = typename Trie<Fast>::BcType(builder.bc_, builder.leaf_flags_);
+    trie.terminal_flags_ = BitVector(builder.term_flags_, true, true);
+    trie.tail_ = Vector<uint8_t>(builder.tail_);
+    trie.boundary_flags_ = BitVector(builder.boundary_flags_, false, false);
+    trie.alphabet_ = builder.alphabet_;
+    std::swap(trie.table_, builder.table_);
+
+    trie.num_keys_ = keys.size();
+    trie.max_length_ = builder.max_length_;
+    trie.binary_mode_ = builder.binary_mode_;
+
+    return trie;
+  }
+
+  // Exception class for xcdat::TrieBuilder
   class Exception : public std::exception {
   public:
-    explicit Exception(const std::string& message) : message_(message) {}
+    explicit Exception(std::string message) : message_(message) {}
     virtual ~Exception() throw() {}
 
     // overrides what() of std::exception.
@@ -47,25 +61,25 @@ public:
 
 private:
   struct Suffix {
-    std::pair<const uint8_t*, size_t> string;
+    Key str;
     id_type node_id;
 
     size_t length() const {
-      return string.second;
+      return str.length;
     }
     uint8_t operator[](size_t i) const {
-      return string.first[length() - i - 1];
+      return str.ptr[length() - i - 1];
     }
 
     std::reverse_iterator<const uint8_t*> rbegin() const {
-      return std::reverse_iterator<const uint8_t*>(string.first + string.second);
+      return std::reverse_iterator<const uint8_t*>(str.ptr + str.length);
     }
     std::reverse_iterator<const uint8_t*> rend() const {
-      return std::reverse_iterator<const uint8_t*>(string.first);
+      return std::reverse_iterator<const uint8_t*>(str.ptr);
     }
   };
 
-  const std::vector<std::pair<const uint8_t*, size_t>>& keys_;
+  const std::vector<Key>& keys_;
   const id_type block_size_;
   const id_type width_L1_;
 
@@ -74,12 +88,10 @@ private:
   std::vector<BcPair> bc_;
   BitVectorBuilder leaf_flags_;
   BitVectorBuilder term_flags_;
-
   std::vector<uint8_t> tail_;
   BitVectorBuilder boundary_flags_;
-
   std::vector<uint8_t> alphabet_;
-  std::array<uint8_t, 512> table_;
+  uint8_t table_[512];
 
   std::vector<bool> used_flags_;
   std::vector<uint8_t> edges_;
@@ -88,6 +100,9 @@ private:
 
   size_t max_length_ = 0;
 
+  TrieBuilder(const std::vector<Key>& keys, id_type width_L1, bool binary_mode);
+  ~TrieBuilder() {}
+
   void build_table_();
   void build_bc_(size_t begin, size_t end, size_t depth, id_type node_id);
   void build_tail_();
diff --git a/src/Vector.hpp b/src/Vector.hpp
index a11c369..835ba86 100644
--- a/src/Vector.hpp
+++ b/src/Vector.hpp
@@ -5,33 +5,37 @@
 #ifndef XCDAT_VECTOR_HPP
 #define XCDAT_VECTOR_HPP
 
-#include "xcdatBasics.hpp"
+#include "xcdat_basics.hpp"
 
 namespace xcdat {
 
-/*
- * Simple vector
- * */
+// Simple vector
 template<typename T>
 class Vector {
 public:
   Vector() {
-    static_assert(!Is_same<T, bool>(), "Type bool is not supported.");
-    static_assert(Is_pod<T>(), "T is not POD.");
+    static_assert(!std::is_same<T, bool>::value, "Type bool is not supported.");
+    static_assert(std::is_pod<T>::value, "T is not POD.");
   }
 
-  ~Vector() {}
+  Vector(std::istream& is) {
+    size_ = read_value<size_t>(is);
+    vec_.resize(size_);
+    is.read(reinterpret_cast<char*>(&vec_[0]), sizeof(T) * size_);
+    data_ = vec_.data();
+  }
 
-  void steal(std::vector<T>& vec) {
-    Vector().swap(*this);
+  Vector(std::vector<T>& vec) {
     if (vec.size() != vec.capacity()) {
       vec.shrink_to_fit();
     }
-    buf_.swap(vec);
-    data_ = buf_.data();
-    size_ = buf_.size();
+    vec_ = std::move(vec);
+    data_ = vec_.data();
+    size_ = vec_.size();
   }
 
+  ~Vector() {}
+
   const T& operator[](size_t i) const {
     return data_[i];
   }
@@ -63,27 +67,16 @@ public:
     os.write(reinterpret_cast<const char*>(data_), sizeof(T) * size_);
   }
 
-  void read(std::istream& is) {
-    Vector().swap(*this);
-    read_value(size_, is);
-    buf_.resize(size_);
-    is.read(reinterpret_cast<char*>(&buf_[0]), sizeof(T) * size_);
-    data_ = buf_.data();
-  }
-
-  void swap(Vector<T>& rhs) {
-    std::swap(data_, rhs.data_);
-    std::swap(size_, rhs.size_);
-    buf_.swap(rhs.buf_);
-  }
-
   Vector(const Vector&) = delete;
   Vector& operator=(const Vector&) = delete;
 
+  Vector(Vector&&) = default;
+  Vector& operator=(Vector&&) = default;
+
 private:
   const T* data_ = nullptr;
   size_t size_ = 0;
-  std::vector<T> buf_;
+  std::vector<T> vec_;
 };
 
 }
diff --git a/src/testTrie.cpp b/src/testTrie.cpp
index 32aeffd..9eea59c 100644
--- a/src/testTrie.cpp
+++ b/src/testTrie.cpp
@@ -6,7 +6,7 @@
 #include <random>
 #include <cstring>
 
-#include "Trie.hpp"
+#include "TrieBuilder.hpp"
 
 using namespace xcdat;
 
@@ -15,6 +15,8 @@ namespace {
 constexpr size_t kNumStrings = 1U << 10;
 constexpr size_t kMaxLength = 20;
 
+using Key = TrieBuilder::Key;
+
 void to_set(std::vector<std::string>& keys) {
   std::sort(std::begin(keys), std::end(keys));
   keys.erase(std::unique(std::begin(keys), std::end(keys)), std::end(keys));
@@ -32,19 +34,20 @@ std::string make_key() {
   return key;
 }
 
-void make_keys(std::vector<std::string>& keys) {
-  keys.clear();
+std::vector<std::string> make_keys() {
+  std::vector<std::string> keys;
   keys.reserve(kNumStrings);
 
   for (size_t i = 0; i < kNumStrings; ++i) {
-    keys.emplace_back(make_key());
+    keys.push_back(make_key());
   }
 
   to_set(keys);
+  return keys;
 }
 
-void make_other_keys(const std::vector<std::string>& keys, std::vector<std::string>& others) {
-  others.clear();
+std::vector<std::string> make_other_keys(const std::vector<std::string>& keys) {
+  std::vector<std::string> others;
 
   for (size_t i = 0; i < kNumStrings; ++i) {
     auto string = make_key();
@@ -54,47 +57,47 @@ void make_other_keys(const std::vector<std::string>& keys, std::vector<std::stri
   }
 
   to_set(others);
+  return others;
 }
 
-template <bool Fast>
-void test_build(Trie<Fast>& trie, const std::vector<std::pair<const uint8_t*, size_t>>& keys,
-                bool binary_mode) {
+template<bool Fast>
+void test_build(Trie<Fast>& trie, const std::vector<Key>& keys, bool binary_mode) {
   std::cerr << "Construction -> build()" << std::endl;
 
-  Trie<Fast>(keys, binary_mode).swap(trie);
+  trie = TrieBuilder::build<Fast>(keys, binary_mode);
   assert(trie.num_keys() == keys.size());
 }
 
-template <bool Fast>
-void test_basic_operations(const Trie<Fast>& trie,
-                           const std::vector<std::pair<const uint8_t*, size_t>>& keys,
-                           const std::vector<std::pair<const uint8_t*, size_t>>& others) {
+template<bool Fast>
+void test_basic_operations(const Trie<Fast>& trie, const std::vector<Key>& keys,
+                           const std::vector<Key>& others) {
   std::cerr << "Basic operations -> lookup() and access()" << std::endl;
 
   for (auto& key : keys) {
-    const auto id = trie.lookup(key.first, key.second);
+    const auto id = trie.lookup(key.ptr, key.length);
     assert(id != kNotFound);
+
     std::vector<uint8_t> ret;
-    assert(trie.access(id, ret));
-    assert(ret.size() == key.second);
-    assert(std::memcmp(ret.data(), key.first, key.second) == 0);
+    trie.access(id, ret);
+
+    assert(ret.size() == key.length);
+    assert(std::memcmp(ret.data(), key.ptr, key.length) == 0);
   }
 
   for (auto& other : others) {
-    const auto id = trie.lookup(other.first, other.second);
+    const auto id = trie.lookup(other.ptr, other.length);
     assert(id == kNotFound);
   }
 }
 
-template <bool Fast>
-void test_prefix_operations(const Trie<Fast>& trie,
-                            const std::vector<std::pair<const uint8_t*, size_t>>& keys,
-                            const std::vector<std::pair<const uint8_t*, size_t>>& others) {
+template<bool Fast>
+void test_prefix_operations(const Trie<Fast>& trie, const std::vector<Key>& keys,
+                            const std::vector<Key>& others) {
   std::cerr << "Prefix operations -> common_prefix_lookup()" << std::endl;
 
   for (auto& key : keys) {
     std::vector<id_type> ids;
-    auto num_ids = trie.common_prefix_lookup(key.first, key.second, ids);
+    auto num_ids = trie.common_prefix_lookup(key.ptr, key.length, ids);
 
     assert(1 <= num_ids);
     assert(num_ids <= kMaxLength);
@@ -102,65 +105,88 @@ void test_prefix_operations(const Trie<Fast>& trie,
 
     for (auto id : ids) {
       std::vector<uint8_t> ret;
-      assert(trie.access(id, ret));
-      assert(ret.size() <= key.second);
+      trie.access(id, ret);
+      assert(ret.size() <= key.length);
     }
+
+    auto limit = num_ids / 2;
+    auto new_num_ids = trie.common_prefix_lookup(key.ptr, key.length, ids, limit);
+
+    assert(new_num_ids == limit);
+    assert(num_ids + new_num_ids == ids.size());
   }
 
   for (auto& other : others) {
     std::vector<id_type> ids;
-    auto num_ids = trie.common_prefix_lookup(other.first, other.second, ids);
+    auto num_ids = trie.common_prefix_lookup(other.ptr, other.length, ids);
 
     assert(num_ids <= kMaxLength);
     assert(num_ids == ids.size());
 
     for (auto id : ids) {
       std::vector<uint8_t> ret;
-      assert(trie.access(id, ret));
-      assert(ret.size() < other.second);
+      trie.access(id, ret);
+      assert(ret.size() < other.length);
     }
+
+    auto limit = num_ids / 2;
+    auto new_num_ids = trie.common_prefix_lookup(other.ptr, other.length, ids, limit);
+
+    assert(new_num_ids == limit);
+    assert(num_ids + new_num_ids == ids.size());
   }
 }
 
-template <bool Fast>
-void test_predictive_operations(const Trie<Fast>& trie,
-                                const std::vector<std::pair<const uint8_t*, size_t>>& keys,
-                                const std::vector<std::pair<const uint8_t*, size_t>>& others) {
+template<bool Fast>
+void test_predictive_operations(const Trie<Fast>& trie, const std::vector<Key>& keys,
+                                const std::vector<Key>& others) {
   std::cerr << "Predictive operations -> predictive_lookup()" << std::endl;
 
   for (auto& key : keys) {
     std::vector<id_type> ids;
-    auto num_ids = trie.predictive_lookup(key.first, key.second, ids);
+    auto num_ids = trie.predictive_lookup(key.ptr, key.length, ids);
 
     assert(1 <= num_ids);
     assert(num_ids == ids.size());
 
     for (auto id : ids) {
       std::vector<uint8_t> ret;
-      assert(trie.access(id, ret));
-      assert(key.second <= ret.size());
+      trie.access(id, ret);
+      assert(key.length <= ret.size());
     }
+
+    auto limit = num_ids / 2;
+    auto new_num_ids = trie.predictive_lookup(key.ptr, key.length, ids, limit);
+
+    assert(new_num_ids == limit);
+    assert(num_ids + new_num_ids == ids.size());
   }
 
   for (auto& other : others) {
     std::vector<id_type> ids;
-    auto num_ids = trie.predictive_lookup(other.first, other.second, ids);
+    auto num_ids = trie.predictive_lookup(other.ptr, other.length, ids);
 
     assert(num_ids == ids.size());
 
     for (auto id : ids) {
       std::vector<uint8_t> ret;
-      assert(trie.access(id, ret));
-      assert(other.second < ret.size());
+      trie.access(id, ret);
+      assert(other.length < ret.size());
     }
+
+    auto limit = num_ids / 2;
+    auto new_num_ids = trie.predictive_lookup(other.ptr, other.length, ids, limit);
+
+    assert(new_num_ids == limit);
+    assert(num_ids + new_num_ids == ids.size());
   }
 }
 
-template <bool Fast>
+template<bool Fast>
 void test_io(const Trie<Fast>& trie) {
   std::cerr << "File I/O -> write() and read()" << std::endl;
 
-  const char* file_name = "test.trie";
+  const char* file_name = "index";
   {
     std::ofstream ofs{file_name};
     trie.write(ofs);
@@ -174,10 +200,12 @@ void test_io(const Trie<Fast>& trie) {
   Trie<Fast> _trie;
   {
     std::ifstream ifs{file_name};
-    _trie.read(ifs);
+    _trie = Trie<Fast>(ifs);
   }
 
   assert(trie.num_keys() == _trie.num_keys());
+  assert(trie.max_length() == _trie.max_length());
+  assert(trie.is_binary_mode() == _trie.is_binary_mode());
   assert(trie.alphabet_size() == _trie.alphabet_size());
   assert(trie.num_nodes() == _trie.num_nodes());
   assert(trie.num_used_nodes() == _trie.num_used_nodes());
@@ -185,14 +213,13 @@ void test_io(const Trie<Fast>& trie) {
   assert(trie.size_in_bytes() == _trie.size_in_bytes());
 }
 
-template <bool Fast>
-void test_trie(const std::vector<std::pair<const uint8_t*, size_t>>& strings,
-               const std::vector<std::pair<const uint8_t*, size_t>>& others) {
+template<bool Fast>
+void test_trie(const std::vector<Key>& strings, const std::vector<Key>& others) {
   for (int i = 0; i < 2; ++i) {
     std::cerr << "** " << (i % 2 ? "Binary" : "Text") << " Mode **" << std::endl;
     std::cerr << "Testing xcdat::Trie<" << (Fast ? "true" : "false") << ">" << std::endl;
     Trie<Fast> trie;
-    test_build(trie, strings, i % 2 == 0);
+    test_build(trie, strings, i % 2 != 0);
     test_basic_operations(trie, strings, others);
     test_prefix_operations(trie, strings, others);
     test_predictive_operations(trie, strings, others);
@@ -204,19 +231,16 @@ void test_trie(const std::vector<std::pair<const uint8_t*, size_t>>& strings,
 } // namespace
 
 int main() {
-  std::vector<std::string> keys_buffer;
-  make_keys(keys_buffer);
+  auto keys_buffer = make_keys();
+  auto others_buffer = make_other_keys(keys_buffer);
 
-  std::vector<std::string> others_buffer;
-  make_other_keys(keys_buffer, others_buffer);
-
-  std::vector<std::pair<const uint8_t*, size_t>> keys(keys_buffer.size());
+  std::vector<Key> keys(keys_buffer.size());
   for (size_t i = 0; i < keys.size(); ++i) {
     keys[i] = {reinterpret_cast<const uint8_t*>(keys_buffer[i].c_str()),
                keys_buffer[i].length()};
   }
 
-  std::vector<std::pair<const uint8_t*, size_t>> others(others_buffer.size());
+  std::vector<Key> others(others_buffer.size());
   for (size_t i = 0; i < others.size(); ++i) {
     others[i] = {reinterpret_cast<const uint8_t*>(others_buffer[i].c_str()),
                  others_buffer[i].length()};
diff --git a/src/testVector.cpp b/src/testVector.cpp
index 22fb57b..9750ea1 100644
--- a/src/testVector.cpp
+++ b/src/testVector.cpp
@@ -28,7 +28,7 @@ void test_bit_vector() {
     for (size_t i = 0; i < kSize; ++i) {
       builder.push_back(orig_bit_vector[i]);
     }
-    BitVector(builder, true, true).swap(bit_vector);
+    bit_vector = BitVector(builder, true, true);
   }
 
   assert(bit_vector.size() == kSize);
diff --git a/src/xcdat.cpp b/src/xcdat.cpp
index 2a74d32..48d2d21 100644
--- a/src/xcdat.cpp
+++ b/src/xcdat.cpp
@@ -2,7 +2,7 @@
 #include <iostream>
 #include <random>
 
-#include "Trie.hpp"
+#include "TrieBuilder.hpp"
 
 using namespace xcdat;
 
@@ -10,12 +10,14 @@ namespace {
 
 constexpr uint32_t kRuns = 10;
 
-enum class Times {
-  SEC, MILLI, MICRO
-};
+using Key = TrieBuilder::Key;
 
 class StopWatch {
 public:
+  enum Times {
+    SEC, MILLI, MICRO
+  };
+
   StopWatch() : tp_(std::chrono::high_resolution_clock::now()) {}
   ~StopWatch() {}
 
@@ -57,8 +59,7 @@ size_t read_keys(const char* file_name, std::vector<std::string>& keys) {
   return size;
 }
 
-void extract_pairs(const std::vector<std::string>& keys,
-                   std::vector<std::pair<const uint8_t*, size_t>>& pairs) {
+void extract_pairs(const std::vector<std::string>& keys, std::vector<Key>& pairs) {
   pairs.clear();
   pairs.resize(keys.size());
   for (size_t i = 0; i < keys.size(); ++i) {
@@ -70,7 +71,7 @@ void show_usage(std::ostream& os) {
   os << "xcdat build <type> <key> <dict>" << std::endl;
   os << "\t<type>\t'1' for DACs; '2' for FDACs." << std::endl;
   os << "\t<key> \tinput file of a set of keys." << std::endl;
-  os << "\t<dict>\toutput file for storing the dictionary." << std::endl;
+  os << "\t<dict>\toutput file of the dictionary." << std::endl;
   os << "xcdat query <type> <dict> <limit>" << std::endl;
   os << "\t<type> \t'1' for DACs; '2' for FDACs." << std::endl;
   os << "\t<dict> \tinput file of the dictionary." << std::endl;
@@ -88,22 +89,22 @@ int build(std::vector<std::string>& args) {
     return 1;
   }
 
-  std::vector<std::string> keys;
-  auto raw_size = read_keys(args[2].c_str(), keys);
+  std::vector<std::string> strs;
+  auto raw_size = read_keys(args[2].c_str(), strs);
 
   if (raw_size == 0) {
     std::cerr << "open error : " << args[2] << std::endl;
     return 1;
   }
 
-  std::vector<std::pair<const uint8_t*, size_t>> pairs;
-  extract_pairs(keys, pairs);
+  std::vector<Key> keys;
+  extract_pairs(strs, keys);
 
   Trie<Fast> trie;
   try {
     StopWatch sw;
-    Trie<Fast>(pairs).swap(trie);
-    std::cout << "constr. time: " << sw(Times::SEC) << " sec" << std::endl;
+    trie = TrieBuilder::build<Fast>(keys);
+    std::cout << "constr. time: " << sw(StopWatch::SEC) << " sec" << std::endl;
   } catch (const xcdat::TrieBuilder::Exception& ex) {
     std::cerr << ex.what() << std::endl;
     return 1;
@@ -138,7 +139,7 @@ int query(std::vector<std::string>& args) {
       std::cerr << "open error : " << args[2] << std::endl;
       return 1;
     }
-    trie.read(ifs);
+    trie = Trie<Fast>(ifs);
   }
 
   size_t limit = 10;
@@ -172,20 +173,26 @@ int query(std::vector<std::string>& args) {
     ids.clear();
     trie.common_prefix_lookup(key, length, ids);
     std::cout << ids.size() << " found" << std::endl;
+
     for (size_t i = 0; i < std::min(ids.size(), limit); ++i) {
       buf.clear();
       trie.access(ids[i], buf);
-      std::cout << ids[i] << '\t' << buf.data() << std::endl;
+      std::cout << ids[i] << '\t';
+      std::cout.write(reinterpret_cast<const char*>(buf.data()), buf.size());
+      std::cout << std::endl;
     }
 
     std::cout << "predictive_lookup()" << std::endl;
     ids.clear();
     trie.predictive_lookup(key, length, ids);
     std::cout << ids.size() << " found" << std::endl;
+
     for (size_t i = 0; i < std::min(ids.size(), limit); ++i) {
       buf.clear();
       trie.access(ids[i], buf);
-      std::cout << ids[i] << '\t' << buf.data() << std::endl;
+      std::cout << ids[i] << '\t';
+      std::cout.write(reinterpret_cast<const char*>(buf.data()), buf.size());
+      std::cout << std::endl;
     }
   }
 
@@ -206,21 +213,21 @@ int bench(std::vector<std::string>& args) {
       std::cerr << "open error : " << args[2] << std::endl;
       return 1;
     }
-    trie.read(ifs);
+    trie = Trie<Fast>(ifs);
   }
 
-  std::vector<std::string> keys;
-  if (read_keys(args[3].c_str(), keys) == 0) {
+  std::vector<std::string> strs;
+  if (read_keys(args[3].c_str(), strs) == 0) {
     std::cerr << "open error : " << args[3] << std::endl;
     return 1;
   }
 
-  std::vector<std::pair<const uint8_t*, size_t>> pairs;
-  extract_pairs(keys, pairs);
+  std::vector<Key> keys;
+  extract_pairs(strs, keys);
 
-  std::vector<id_type> ids(pairs.size());
-  for (size_t i = 0; i < pairs.size(); ++i) {
-    ids[i] = trie.lookup(pairs[i].first, pairs[i].second);
+  std::vector<id_type> ids(keys.size());
+  for (size_t i = 0; i < keys.size(); ++i) {
+    ids[i] = trie.lookup(keys[i].ptr, keys[i].length);
   }
 
   {
@@ -228,15 +235,15 @@ int bench(std::vector<std::string>& args) {
 
     StopWatch sw;
     for (uint32_t r = 0; r < kRuns; ++r) {
-      for (size_t i = 0; i < pairs.size(); ++i) {
-        if (trie.lookup(pairs[i].first, pairs[i].second) == kNotFound) {
-          std::cerr << "Failed to lookup " << keys[i] << std::endl;
+      for (size_t i = 0; i < keys.size(); ++i) {
+        if (trie.lookup(keys[i].ptr, keys[i].length) == kNotFound) {
+          std::cerr << "Failed to lookup " << strs[i] << std::endl;
           return 1;
         }
       }
     }
 
-    std::cout << sw(Times::MICRO) / kRuns / pairs.size() << " us per str" << std::endl;
+    std::cout << sw(StopWatch::MICRO) / kRuns / keys.size() << " us per str" << std::endl;
   }
 
   {
@@ -253,7 +260,7 @@ int bench(std::vector<std::string>& args) {
       }
     }
 
-    std::cout << sw(Times::MICRO) / kRuns / ids.size() << " us per ID" << std::endl;
+    std::cout << sw(StopWatch::MICRO) / kRuns / ids.size() << " us per ID" << std::endl;
   }
 
   return 0;
diff --git a/src/xcdatBasics.hpp b/src/xcdat_basics.hpp
similarity index 65%
rename from src/xcdatBasics.hpp
rename to src/xcdat_basics.hpp
index 9032168..89405a6 100644
--- a/src/xcdatBasics.hpp
+++ b/src/xcdat_basics.hpp
@@ -11,24 +11,17 @@
 #include <utility>
 #include <vector>
 
+#include "xcdat_config.hpp"
+
 namespace xcdat {
 
-#ifdef XCDAT64
+#ifdef XCDAT_X64
 using id_type = uint64_t;
 #else
 using id_type = uint32_t;
 #endif
 
-constexpr id_type kIdUpper = std::numeric_limits<id_type>::max();
-
-template<bool B, typename T, typename F>
-using Conditional = typename std::conditional<B, T, F>::type;
-
-template<typename T, typename U>
-inline constexpr bool Is_same() { return std::is_same<T, U>::value; }
-
-template<typename T>
-inline constexpr bool Is_pod() { return std::is_pod<T>::value; }
+constexpr id_type kIdMax = std::numeric_limits<id_type>::max();
 
 struct BcPair {
   id_type base;
@@ -44,7 +37,7 @@ inline void show_size(const char* str, size_t size, std::ostream& os) {
 }
 
 inline void show_size_ratio(const char* str, size_t size, size_t denom, std::ostream& os) {
-  os << str << "\t" << size << "\t" << (double) size / denom << std::endl;
+  os << str << "\t" << size << "\t" << static_cast<double>(size) / denom << std::endl;
 }
 
 template<typename T>
@@ -53,8 +46,10 @@ inline void write_value(const T val, std::ostream& os) {
 }
 
 template<typename T>
-inline void read_value(T& val, std::istream& is) {
+inline T read_value(std::istream& is) {
+  T val;
   is.read(reinterpret_cast<char*>(&val), sizeof(val));
+  return val;
 }
 
 } //namespace - xcdat
diff --git a/xcdat_config.hpp.in b/xcdat_config.hpp.in
new file mode 100644
index 0000000..f084264
--- /dev/null
+++ b/xcdat_config.hpp.in
@@ -0,0 +1,7 @@
+#ifndef XCDAT_CONFIG_HPP
+#define XCDAT_CONFIG_HPP
+
+#cmakedefine XCDAT_X64
+#cmakedefine XCDAT_USE_POPCNT
+
+#endif // XCDAT_CONFIG_HPP
\ No newline at end of file