Merge branch 'development'

# Conflicts: # CMakeLists.txt # LICENSE # README.md # include/xcdat.hpp # sample/CMakeLists.txt # sample/sample.cpp
2021-07-02 22:00:25 +09:00 · 2021-07-02 22:00:25 +09:00 · 0522882198
parent 2aeee082d4 db944cff22
commit 0522882198
43 changed files with 22033 additions and 1 deletions
--- a/.clang-format
+++ b/.clang-format
@ -0,0 +1,113 @@
+---
+Language:        Cpp
+# BasedOnStyle:  Google
+AccessModifierOffset: -2
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Left
+AlignOperands:   true
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: true
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  AfterExternBlock: false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
+  SplitEmptyFunction: true
+  SplitEmptyRecord: true
+  SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit:     120
+CommentPragmas:  '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: false
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+  - foreach
+  - Q_FOREACH
+  - BOOST_FOREACH
+IncludeBlocks:   Preserve
+IncludeCategories:
+  - Regex:           '^<ext/.*\.h>'
+    Priority:        2
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentPPDirectives: None
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Latest
+TabWidth:        8
+UseTab:          Never
+...
+
--- a/.gitignore
+++ b/.gitignore
@ -29,8 +29,9 @@
 *.app

 # My Definition
-build/
+build*/
 cmake-build-debug/
 .idea/
 .DS_Store
 include/xcdat/xcdat_config.hpp
+.vscode/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -0,0 +1,48 @@
+cmake_minimum_required(VERSION 3.0)
+project(xcdat VERSION 1.0.0 LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+
+if (NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE Release)
+endif ()
+
+if ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang"))
+    set(CMAKE_COMPILER_IS_CLANGXX 1)
+endif ()
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+    set(CMAKE_COMPILER_IS_GNUCXX 1)
+endif ()
+
+# C++17 compiler check
+if ((CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 7.0) OR (CMAKE_COMPILER_IS_CLANGXX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.0))
+    message(FATAL_ERROR "Your C++ compiler does not support C++17. Please install g++ 7.0 (or greater) or clang 4.0 (or greater)")
+else ()
+    message(STATUS "Compiler is recent enough to support C++17.")
+endif ()
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++1z -pthread -Wall")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DNDEBUG -march=native -O3")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=address -fno-omit-frame-pointer -O0 -g -DDEBUG")
+
+message(STATUS "BUILD_TYPE is ${CMAKE_BUILD_TYPE}")
+message(STATUS "CXX_FLAGS are ${CMAKE_CXX_FLAGS}")
+message(STATUS "CXX_FLAGS_DEBUG are ${CMAKE_CXX_FLAGS_DEBUG}")
+message(STATUS "CXX_FLAGS_RELEASE are ${CMAKE_CXX_FLAGS_RELEASE}")
+
+include_directories(include)
+
+add_subdirectory(sample)
+add_subdirectory(tools)
+
+enable_testing()
+add_subdirectory(tests)
+
+file(COPY ${CMAKE_SOURCE_DIR}/tests/keys.txt DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tests)
+
+# Install the library
+file(GLOB XCDAT_HEADER_FILES include/xcdat/*.hpp)
+file(GLOB MM_HEADER_FILES include/mm_file/*.hpp)
+install(FILES include/xcdat.hpp DESTINATION include)
+install(FILES ${XCDAT_HEADER_FILES} DESTINATION include/xcdat)
+install(FILES ${MM_HEADER_FILES} DESTINATION include/mm_file)
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Shunsuke Kanda
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,520 @@
+# Xcdat: Fast compressed trie dictionary library
+
+**Xcdat** is a C++17 header-only library of a fast compressed string dictionary based on an improved double-array trie structure described in the paper: [Compressed double-array tries for string dictionaries supporting fast lookup](https://doi.org/10.1007/s10115-016-0999-8), *Knowledge and Information Systems*, 2017, available at [here](https://kampersanda.github.io/pdf/KAIS2017.pdf).
+
+## Table of contents
+
+- [Features](#features)
+- [Build instructions](#build-instructions)
+- [Command line tools](#command-line-tools)
+- [Sample usage](#sample-usage)
+- [API](#api)
+- [Performance](#performance)
+- [Licensing](#licensing)
+- [Todo](#todo)
+- [References](#references)
+
+## Features
+
+- **Compressed string dictionary.** Xcdat implements a (static) *compressed string dictioanry* that stores a set of strings (or keywords) in a compressed space while supporting several search operations [1,2]. For example, Xcdat can store an entire set of English Wikipedia titles at half the size of the raw data.
+- **Fast and compact data structure.** Xcdat employs the *double-array trie* [3] known as the fastest trie implementation. However, the double-array trie resorts to many pointers and consumes a large amount of memory. To address this, Xcdat applies the *XCDA* method [2] that represents the double-array trie in a compressed format while maintaining the fast searches.
+- **Cache efficiency.** Xcdat employs a *minimal-prefix trie* [4] that replaces redundant trie nodes into strings to reduce random access and to improve locality of references.
+- **Dictionary encoding.** Xcdat maps `N` distinct keywords into unique IDs from `[0,N-1]`, and supports the two symmetric operations: `lookup` returns the ID corresponding to a given keyword; `decode` returns the keyword associated with a given ID. The mapping is so-called *dictionary encoding* (or *domain encoding*) and is fundamental in many DB applications as described by Martínez-Prieto et al [1] or Müller et al. [5].
+- **Prefix search operations.** Xcdat supports prefix search operations realized by trie search algorithms: `prefix_search` returns all the keywords contained as prefixes of a given string; `predictive search` returns all the keywords starting with a given string. These will be useful in many NLP applications such as auto completions [6], stemmed searches [7], or input method editors [8].
+- **64-bit support.** As mentioned before, since the double array is a pointer-based data structure, most double-array libraries use 32-bit pointers to reduce memory consumption, resulting in limiting the scale of the input dataset. On the other hand, the XCDA method allows Xcdat to represent 64-bit pointers without sacrificing memory efficiency.
+- **Binary key support.** In normal mode, Xcdat will use the `\0` character as an end marker for each keyword. However, if the dataset include `\0` characters, it will use bit flags instead of end markers, allowing the dataset to consist of binary keywords.
+- **Memory mapping.** Xcdat supports *memory mapping*, allowing data to be deserialized quickly without loading it into memory. Of course, deserialization by the loading is also supported.
+- **Header only.** The library consists only of header files, and you can easily install it.
+
+## Build instructions
+
+You can download, compile, and install Xcdat with the following commands.
+
+```
+$ git clone https://github.com/kampersanda/xcdat.git
+$ cd xcdat
+$ mkdir build
+$ cd build
+$ cmake ..
+$ make -j
+$ make install
+```
+
+Or, since this library consists only of header files, you can easily install it by passing through the path to the directory `include`.
+
+### Requirements
+
+You need to install a modern C++17 ready compiler such as `g++ >= 7.0` or `clang >= 4.0`. For the build system, you need to install `CMake >= 3.0` to compile the library.
+
+The library considers a 64-bit operating system. The code has been tested only on Mac OS X and Linux. That is, this library considers only UNIX-compatible OS.
+
+## Command line tools
+
+ Xcdat provides command line tools to build the dictionary and perform searches, which are inspired by [marisa-trie](https://github.com/s-yata/marisa-trie). All the tools will print the command line options by specifying the parameter `-h`.
+
+### `xcdat_build`
+
+It builds the trie dictionary from a given dataset consisting of keywords separated by newlines. The following command builds the trie dictionary from dataset `enwiki-titles.txt` and writes the dictionary into file `idx.bin`.
+
+```
+$ xcdat_build enwiki-titles.txt idx.bin
+Number of keys: 15955763
+Number of trie nodes: 36441058
+Number of DA units: 36520704
+Memory usage in bytes: 1.70618e+08
+Memory usage in MiB: 162.714
+```
+
+### `xcdat_lookup`
+
+It tests the `lookup` operation for a given dictionary. Given a query string via `stdin`, it prints the associated ID if found, or `-1` otherwise.
+
+```
+$ xcdat_lookup idx.bin
+Algorithm
+1255938	Algorithm
+Double_Array
+-1	Double_Array
+```
+
+### `xcdat_decode`
+
+It tests the `decode` operation for a given dictionary. Given a query ID via `stdin`, it prints the corresponding keyword if the ID is in the range `[0,N-1]`, where `N` is the number of stored keywords.
+
+```
+$ xcdat_decode idx.bin
+1255938
+1255938	Algorithm
+```
+
+### `xcdat_prefix_search`
+
+It tests the `prefix_search` operation for a given dictionary. Given a query string via `stdin`, it prints all the keywords contained as prefixes of a given string.
+
+```
+$ xcdat_prefix_search idx.bin
+Algorithmic
+6 found
+57	A
+798460	Al
+1138004	Alg
+1253024	Algo
+1255938	Algorithm
+1255931	Algorithmic
+```
+
+### `xcdat_predictive_search`
+
+It tests the `predictive_search` operation for a given dictionary. Given a query string via `stdin`, it prints the first `n` keywords starting with a given string, where `n` is one of the parameters.
+
+```
+$ xcdat_predictive_search idx.bin -n 3
+Algorithm
+263 found
+1255938	Algorithm
+1255944	Algorithm's_optimality
+1255972	Algorithm_(C++)
+```
+
+### `xcdat_enumerate`
+
+It prints all the keywords stored in a given dictionary.
+
+```
+$ xcdat_enumerate idx.bin | head -3
+0	!
+107	!!
+138	!!!
+```
+
+### `xcdat_benchmark`
+
+It measures the performances of possible tries for a given dataset. To perform search operations, it randomly samples `n` queires from the dataset, where `n` is one of the parameters.
+
+```
+$ xcdat_benchmark enwiki-titles.txt
+** xcdat::trie_7_type **
+Number of keys: 15955763
+Memory usage in bytes: 1.70618e+08
+Memory usage in MiB: 162.714
+Construction time in seconds: 12.907
+Lookup time in microsec/query: 0.4674
+Decode time in microsec/query: 0.8722
+** xcdat::trie_8_type **
+Number of keys: 15955763
+Memory usage in bytes: 1.64104e+08
+Memory usage in MiB: 156.502
+Construction time in seconds: 13.442
+Lookup time in microsec/query: 0.7593
+Decode time in microsec/query: 1.2341
+```
+
+## Sample usage
+
+`sample/sample.cpp` provides a sample usage. It employs the external library [mm_file](https://github.com/jermp/mm_file) to implement a memory-mapped file, which will be installed by `make install` together.
+
+```c++
+#include <iostream>
+#include <string>
+
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+int main() {
+    // Dataset
+    std::vector<std::string> keys = {
+        "AirPods",  "AirTag",  "Mac",  "MacBook", "MacBook_Air", "MacBook_Pro",
+        "Mac_Mini", "Mac_Pro", "iMac", "iPad",    "iPhone",      "iPhone_SE",
+    };
+
+    // The input keys must be sorted and unique (although they have already satisfied in this case).
+    std::sort(keys.begin(), keys.end());
+    keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
+
+    // The trie dictionary type
+    using trie_type = xcdat::trie_8_type;
+
+    // The dictionary filename
+    const char* tmp_filename = "dic.bin";
+
+    // Build and save the trie dictionary.
+    {
+        const trie_type trie(keys);
+        xcdat::save(trie, tmp_filename);
+    }
+
+    // Memory-map the trie dictionary.
+    const mm::file_source<char> fin(tmp_filename, mm::advice::sequential);
+    const auto trie = xcdat::mmap<trie_type>(fin.data());
+
+    // Or, load the trie dictionary on memory.
+    // const auto trie = xcdat::load<trie_type>(tmp_filename);
+
+    // Basic statistics
+    std::cout << "Number of keys: " << trie.num_keys() << std::endl;
+    std::cout << "Number of trie nodes: " << trie.num_nodes() << std::endl;
+    std::cout << "Number of DA units: " << trie.num_units() << std::endl;
+    std::cout << "Memory usage in bytes: " << xcdat::memory_in_bytes(trie) << std::endl;
+
+    // Lookup the ID for a query key.
+    {
+        const auto id = trie.lookup("Mac_Pro");
+        std::cout << "Lookup(Mac_Pro) = " << id.value_or(UINT64_MAX) << std::endl;
+    }
+    {
+        const auto id = trie.lookup("Google_Pixel");
+        std::cout << "Lookup(Google_Pixel) = " << id.value_or(UINT64_MAX) << std::endl;
+    }
+
+    // Decode the key for a query ID.
+    {
+        const auto dec = trie.decode(4);
+        std::cout << "Decode(4) = " << dec << std::endl;
+    }
+
+    // Common prefix search
+    {
+        std::cout << "CommonPrefixSearch(MacBook_Air) = {" << std::endl;
+        auto itr = trie.make_prefix_iterator("MacBook_Air");
+        while (itr.next()) {
+            std::cout << "   (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    // Predictive search
+    {
+        std::cout << "PredictiveSearch(Mac) = {" << std::endl;
+        auto itr = trie.make_predictive_iterator("Mac");
+        while (itr.next()) {
+            std::cout << "   (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    // Enumerate all the keys (in lex order).
+    {
+        std::cout << "Enumerate() = {" << std::endl;
+        auto itr = trie.make_enumerative_iterator();
+        while (itr.next()) {
+            std::cout << "   (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    std::remove(tmp_filename);
+
+    return 0;
+}
+```
+
+The output will be
+
+```
+Number of keys: 12
+Number of trie nodes: 28
+Number of DA units: 256
+Memory usage in bytes: 1766
+Lookup(Mac_Pro) = 7
+Lookup(Google_Pixel) = 18446744073709551615
+Decode(4) = MacBook_Air
+CommonPrefixSearch(MacBook_Air) = {
+   (Mac, 1),
+   (MacBook, 2),
+   (MacBook_Air, 4),
+}
+PredictiveSearch(Mac) = {
+   (Mac, 1),
+   (MacBook, 2),
+   (MacBook_Air, 4),
+   (MacBook_Pro, 11),
+   (Mac_Mini, 5),
+   (Mac_Pro, 7),
+}
+Enumerate() = {
+   (AirPods, 0),
+   (AirTag, 3),
+   (Mac, 1),
+   (MacBook, 2),
+   (MacBook_Air, 4),
+   (MacBook_Pro, 11),
+   (Mac_Mini, 5),
+   (Mac_Pro, 7),
+   (iMac, 10),
+   (iPad, 6),
+   (iPhone, 8),
+   (iPhone_SE, 9),
+}
+```
+
+## API
+
+Xcdat can be used by including `xcdat.hpp`.
+
+### Trie dictionary types
+
+The two dictionary types are difined.
+
+- `xcdat::trie_8_type` is the trie dictionary using standard DACs [9] using 8-bit integers for elements.
+- `xcdat::trie_7_type` is the trie dictionary using pointer-based DACs [2] using 7-bit integers for elements.
+
+### Trie dictionary class
+
+The trie dictionary has the following members.
+
+```c++
+//! A compressed string dictionary based on an improved double-array trie.
+//! 'BcVector' is the data type of Base and Check vectors.
+template <class BcVector>
+class trie {
+  public:
+    //! Default constructor
+    trie() = default;
+
+    //! Default destructor
+    virtual ~trie() = default;
+
+    //! Copy constructor (deleted)
+    trie(const trie&) = delete;
+
+    //! Copy constructor (deleted)
+    trie& operator=(const trie&) = delete;
+
+    //! Move constructor
+    trie(trie&&) noexcept = default;
+
+    //! Move constructor
+    trie& operator=(trie&&) noexcept = default;
+
+    //! Build the trie from the input keywords, which are lexicographically sorted and unique.
+    //!
+    //! If bin_mode = false, the NULL character is used for the termination of a keyword.
+    //! If bin_mode = true, bit flags are used istead, and the keywords can contain NULL characters.
+    //! If the input keywords contain NULL characters, bin_mode will be forced to be set to true.
+    //!
+    //! The type 'Strings' and 'Strings::value_type' should be a random iterable container such as std::vector.
+    //! Precisely, they should support the following operations:
+    //!  - size() returns the container size.
+    //!  - operator[](i) accesses the i-th element.
+    //!  - begin() returns the iterator to the beginning.
+    //!  - end() returns the iterator to the end.
+    //! The type 'Strings::value_type::value_type' should be one-byte integer type such as 'char'.
+    template <class Strings>
+    trie(const Strings& keys, bool bin_mode = false);
+
+    //! Check if the binary mode.
+    bool bin_mode() const;
+
+    //! Get the number of stored keywords.
+    std::uint64_t num_keys() const;
+
+    //! Get the alphabet size.
+    std::uint64_t alphabet_size() const;
+
+    //! Get the maximum length of keywords.
+    std::uint64_t max_length() const;
+
+    //! Get the number of trie nodes.
+    std::uint64_t num_nodes() const;
+
+    //! Get the number of DA units.
+    std::uint64_t num_units() const;
+
+    //! Get the number of unused DA units.
+    std::uint64_t num_free_units() const;
+
+    //! Get the length of TAIL vector.
+    std::uint64_t tail_length() const;
+
+    //! Lookup the ID of the keyword.
+    std::optional<std::uint64_t> lookup(std::string_view key) const;
+
+    //! Decode the keyword associated with the ID.
+    std::string decode(std::uint64_t id) const;
+
+    //! Decode the keyword associated with the ID and store it in 'decoded'.
+    //! It can avoid reallocation of memory to store the result.
+    void decode(std::uint64_t id, std::string& decoded) const;
+
+    //! An iterator class for common prefix search.
+    //! It enumerates all the keywords contained as prefixes of a given string.
+    //! It should be instantiated via the function 'make_prefix_iterator'.
+    class prefix_iterator {
+      public:
+        prefix_iterator() = default;
+
+        //! Increment the iterator.
+        //! Return false if the iteration is terminated.
+        bool next();
+
+        //! Get the result ID.
+        std::uint64_t id() const;
+
+        //! Get the result keyword.
+        std::string decoded() const;
+
+        //! Get the reference to the result keyword.
+        //! Note that the referenced data will be changed in the next iteration.
+        std::string_view decoded_view() const;
+    };
+
+    //! Make the common prefix searcher for the given keyword.
+    prefix_iterator make_prefix_iterator(std::string_view key) const;
+
+    //! Preform common prefix search for the keyword.
+    void prefix_search(std::string_view key, const std::function<void(std::uint64_t, std::string_view)>& fn) const;
+
+    //! An iterator class for predictive search.
+    //! It enumerates all the keywords starting with a given string.
+    //! It should be instantiated via the function 'make_predictive_iterator'.
+    class predictive_iterator {
+      public:
+        predictive_iterator() = default;
+
+        //! Increment the iterator.
+        //! Return false if the iteration is terminated.
+        bool next();
+
+        //! Get the result ID.
+        std::uint64_t id() const;
+
+        //! Get the result keyword.
+        std::string decoded() const;
+
+        //! Get the reference to the result keyword.
+        //! Note that the referenced data will be changed in the next iteration.
+        std::string_view decoded_view() const;
+    };
+
+    //! Make the predictive searcher for the keyword.
+    predictive_iterator make_predictive_iterator(std::string_view key) const;
+
+    //! Preform predictive search for the keyword.
+    void predictive_search(std::string_view key, const std::function<void(std::uint64_t, std::string_view)>& fn) const;
+
+    //! An iterator class for enumeration.
+    //! It enumerates all the keywords stored in the trie.
+    //! It should be instantiated via the function 'make_enumerative_iterator'.
+    using enumerative_iterator = predictive_iterator;
+
+    //! An iterator class for enumeration.
+    enumerative_iterator make_enumerative_iterator() const;
+
+    //! Enumerate all the keywords and their IDs stored in the trie.
+    void enumerate(const std::function<void(std::uint64_t, std::string_view)>& fn) const;
+
+    //! Visit the members (commonly used for I/O).
+    template <class Visitor>
+    void visit(Visitor& visitor);
+};
+```
+
+### I/O handlers
+
+`xcdat.hpp` provides some functions for handling I/O operations.
+
+```c++
+//! Set the continuous memory block to a new trie instance.
+template <class Trie>
+Trie mmap(const char* address);
+
+//! Load the trie dictionary from the file.
+template <class Trie>
+Trie load(std::string_view filepath);
+
+//! Save the trie dictionary to the file and returns the file size in bytes.
+template <class Trie>
+std::uint64_t save(const Trie& idx, std::string_view filepath);
+
+//! Get the dictionary size in bytes.
+template <class Trie>
+std::uint64_t memory_in_bytes(const Trie& idx);
+
+//! Get the flag indicating the trie type, embedded by the function 'save'.
+//! The flag corresponds to trie::l1_bits and will be used to detect the trie type from the file.
+std::uint32_t get_flag(std::string_view filepath);
+
+//! Load the keywords from the file.
+std::vector<std::string> load_strings(std::string_view filepath, char delim = '\n');
+```
+
+## Performance
+
+To be added...
+
+## Licensing
+
+This library is free software provided under the MIT License.
+
+If you use the library in academic settings, please cite the following paper.
+
+```
+@article{kanda2017compressed,
+    title={Compressed double-array tries for string dictionaries supporting fast lookup},
+    author={Kanda, Shunsuke and Morita, Kazuhiro and Fuketa, Masao},
+    journal={Knowledge and Information Systems (KAIS)},
+    volume={51},
+    number={3},
+    pages={1023--1042},
+    year={2017},
+    publisher={Springer}
+}
+```
+
+## Todo
+
+- Support other language bindings.
+- Add SIMD-ization.
+
+## References
+
+1. M. A. Martínez-Prieto, N. Brisaboa, R. Cánovas, F. Claude, and G. Navarro. Practical compressed string dictionaries. Information Systems, 56:73–108, 2016
+2. S. Kanda, K. Morita, and M. Fuketa. Compressed double-array tries for string dictionaries supporting fast lookup. Knowledge and Information Systems, 51(3): 1023–1042, 2017.
+3. J. Aoe. An efficient digital search algorithm by using a double-array structure. IEEE Transactions on Software Engineering, 15(9):1066–1077, 1989.
+4. S. Yata, M. Oono, K. Morita, M. Fuketa, T. Sumitomo, and J. Aoe. A compact static double-array keeping character codes. Information Processing & Management, 43(1):237–247, 2007.
+5. Müller, Ingo, Cornelius Ratsch, and Franz Faerber. Adaptive string dictionary compression in in-memory column-store database systems. In EDBT, pp. 283–294, 2014.
+6. Gog, Simon, Giulio Ermanno Pibiri, and Rossano Venturini. Efficient and effective query auto-completion. In SIGIR, pp. 2271–2280, 2020.
+7. Ricardo Baeza-Yates, and Berthier Ribeiro-Neto. Modern Information Retrieval. 2nd ed. Addison Wesley, Boston, MA, USA, 2011.
+8. Kudo, Taku, et al. Efficient dictionary and language model compression for input method editors. In WTIM, pp. 19–25, 2011.
+9. N. R. Brisaboa, S. Ladra, and G. Navarro. DACs: Bringing direct access to variable-length codes. Information Processing & Management, 49(1):392–404, 2013.
+
--- a/include/mm_file/mm_file.hpp
+++ b/include/mm_file/mm_file.hpp
@ -0,0 +1,177 @@
+#pragma once
+
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <type_traits>
+#include <fcntl.h>
+#include <unistd.h>  // close(fd)
+#include <string>
+
+namespace mm {
+
+namespace advice {
+static const int normal = POSIX_MADV_NORMAL;
+static const int random = POSIX_MADV_RANDOM;
+static const int sequential = POSIX_MADV_SEQUENTIAL;
+}  // namespace advice
+
+template <typename T>
+struct file {
+    file() {
+        init();
+    }
+
+    ~file() {
+        close();
+    }
+
+    file(file const&) = delete;             // non construction-copyable
+    file& operator=(file const&) = delete;  // non copyable
+
+    bool is_open() const {
+        return m_fd != -1;
+    }
+
+    void close() {
+        if (is_open()) {
+            if (munmap((char*)m_data, m_size) == -1) {
+                throw std::runtime_error("munmap failed when closing file");
+            }
+            ::close(m_fd);
+            init();
+        }
+    }
+
+    size_t bytes() const {
+        return m_size;
+    }
+
+    size_t size() const {
+        return m_size / sizeof(T);
+    }
+
+    T* data() const {
+        return m_data;
+    }
+
+    struct iterator {
+        iterator(T* addr, size_t offset = 0) : m_ptr(addr + offset) {}
+
+        T operator*() {
+            return *m_ptr;
+        }
+
+        void operator++() {
+            ++m_ptr;
+        }
+
+        bool operator==(iterator const& rhs) const {
+            return m_ptr == rhs.m_ptr;
+        }
+
+        bool operator!=(iterator const& rhs) const {
+            return !((*this) == rhs);
+        }
+
+    private:
+        T* m_ptr;
+    };
+
+    iterator begin() const {
+        return iterator(m_data);
+    }
+
+    iterator end() const {
+        return iterator(m_data, size());
+    }
+
+protected:
+    int m_fd;
+    size_t m_size;
+    T* m_data;
+
+    void init() {
+        m_fd = -1;
+        m_size = 0;
+        m_data = nullptr;
+    }
+
+    void check_fd() {
+        if (m_fd == -1) throw std::runtime_error("cannot open file");
+    }
+};
+
+template <typename Pointer>
+Pointer mmap(int fd, size_t size, int prot) {
+    static const size_t offset = 0;
+    Pointer p =
+        static_cast<Pointer>(::mmap(NULL, size, prot, MAP_SHARED, fd, offset));
+    if (p == MAP_FAILED) throw std::runtime_error("mmap failed");
+    return p;
+}
+
+template <typename T>
+struct file_source : public file<T const> {
+    typedef file<T const> base;
+
+    file_source() {}
+
+    file_source(std::string const& path, int adv = advice::normal) {
+        open(path, adv);
+    }
+
+    void open(std::string const& path, int adv = advice::normal) {
+        base::m_fd = ::open(path.c_str(), O_RDONLY);
+        base::check_fd();
+        struct stat fs;
+        if (fstat(base::m_fd, &fs) == -1) {
+            throw std::runtime_error("cannot stat file");
+        }
+        base::m_size = fs.st_size;
+        base::m_data = mmap<T const*>(base::m_fd, base::m_size, PROT_READ);
+        if (posix_madvise((void*)base::m_data, base::m_size, adv)) {
+            throw std::runtime_error("madvise failed");
+        }
+    }
+};
+
+template <typename T>
+struct file_sink : public file<T> {
+    typedef file<T> base;
+
+    file_sink() {}
+
+    file_sink(std::string const& path) {
+        open(path);
+    }
+
+    file_sink(std::string const& path, size_t n) {
+        open(path, n);
+    }
+
+    void open(std::string const& path) {
+        static const mode_t mode = 0600;  // read/write
+        base::m_fd = ::open(path.c_str(), O_RDWR, mode);
+        base::check_fd();
+        struct stat fs;
+        if (fstat(base::m_fd, &fs) == -1) {
+            throw std::runtime_error("cannot stat file");
+        }
+        base::m_size = fs.st_size;
+        base::m_data =
+            mmap<T*>(base::m_fd, base::m_size, PROT_READ | PROT_WRITE);
+    }
+
+    void open(std::string const& path, size_t n) {
+        static const mode_t mode = 0600;  // read/write
+        base::m_fd = ::open(path.c_str(), O_RDWR | O_CREAT | O_TRUNC, mode);
+        base::check_fd();
+        base::m_size = n * sizeof(T);
+        ftruncate(base::m_fd,
+                  base::m_size);  // truncate the file at the new size
+        base::m_data =
+            mmap<T*>(base::m_fd, base::m_size, PROT_READ | PROT_WRITE);
+    }
+};
+
+}  // namespace mm
--- a/include/xcdat.hpp
+++ b/include/xcdat.hpp
@ -0,0 +1,85 @@
+#pragma once
+
+#include "xcdat/bc_vector_7.hpp"
+#include "xcdat/bc_vector_8.hpp"
+#include "xcdat/load_visitor.hpp"
+#include "xcdat/mmap_visitor.hpp"
+#include "xcdat/save_visitor.hpp"
+#include "xcdat/size_visitor.hpp"
+#include "xcdat/trie.hpp"
+
+namespace xcdat {
+
+using trie_8_type = trie<bc_vector_8>;
+using trie_7_type = trie<bc_vector_7>;
+
+//! Set the continuous memory block to a new trie instance.
+template <class Trie>
+[[maybe_unused]] Trie mmap(const char* address) {
+    mmap_visitor visitor(address);
+
+    std::uint32_t flag;
+    visitor.visit(flag);
+    XCDAT_THROW_IF(flag != Trie::l1_bits, "The input dictionary type is different.");
+
+    Trie idx;
+    visitor.visit(idx);
+    return idx;
+}
+
+//! Load the trie dictionary from the file.
+template <class Trie>
+[[maybe_unused]] Trie load(std::string_view filepath) {
+    load_visitor visitor(filepath);
+
+    std::uint32_t flag;
+    visitor.visit(flag);
+    XCDAT_THROW_IF(flag != Trie::l1_bits, "The input dictionary type is different.");
+
+    Trie idx;
+    visitor.visit(idx);
+    return idx;
+}
+
+//! Save the trie dictionary to the file and returns the file size in bytes.
+template <class Trie>
+[[maybe_unused]] std::uint64_t save(const Trie& idx, std::string_view filepath) {
+    save_visitor visitor(filepath);
+    visitor.visit(static_cast<std::uint32_t>(Trie::l1_bits));  // flag
+    visitor.visit(const_cast<Trie&>(idx));
+    return visitor.bytes();
+}
+
+//! Get the dictionary size in bytes.
+template <class Trie>
+[[maybe_unused]] std::uint64_t memory_in_bytes(const Trie& idx) {
+    size_visitor visitor;
+    visitor.visit(static_cast<std::uint32_t>(Trie::l1_bits));  // flag
+    visitor.visit(const_cast<Trie&>(idx));
+    return visitor.bytes();
+}
+
+//! Get the flag indicating the trie dictionary type, embedded by the function 'save'.
+//! The flag corresponds to trie::l1_bits and will be used to detect the trie type from the file.
+[[maybe_unused]] std::uint32_t get_flag(std::string_view filepath) {
+    std::ifstream ifs(filepath);
+    XCDAT_THROW_IF(!ifs.good(), "Cannot open the input file");
+
+    std::uint32_t flag;
+    ifs.read(reinterpret_cast<char*>(&flag), sizeof(flag));
+    return flag;
+}
+
+//! Load the keywords from the file.
+[[maybe_unused]] std::vector<std::string> load_strings(std::string_view filepath, char delim = '\n') {
+    std::ifstream ifs(filepath);
+    XCDAT_THROW_IF(!ifs.good(), "Cannot open the input file");
+
+    std::vector<std::string> strs;
+    for (std::string str; std::getline(ifs, str, delim);) {
+        strs.push_back(str);
+    }
+    return strs;
+}
+
+}  // namespace xcdat
--- a/include/xcdat/bc_vector_7.hpp
+++ b/include/xcdat/bc_vector_7.hpp
@ -0,0 +1,194 @@
+#pragma once
+
+#include <array>
+
+#include "bit_vector.hpp"
+#include "compact_vector.hpp"
+
+namespace xcdat {
+
+class bc_vector_7 {
+  public:
+    static constexpr std::uint32_t l1_bits = 7;
+    static constexpr std::uint32_t max_levels = 4;
+
+    static constexpr std::uint64_t block_size_l1 = 1ULL << 7;
+    static constexpr std::uint64_t block_size_l2 = 1ULL << 15;
+    static constexpr std::uint64_t block_size_l3 = 1ULL << 31;
+
+  private:
+    std::uint64_t m_num_frees = 0;
+    immutable_vector<std::uint8_t> m_ints_l1;
+    immutable_vector<std::uint16_t> m_ints_l2;
+    immutable_vector<std::uint32_t> m_ints_l3;
+    immutable_vector<std::uint64_t> m_ints_l4;
+    std::array<immutable_vector<std::uint64_t>, max_levels - 1> m_ranks;
+    compact_vector m_links;
+    bit_vector m_leaves;
+
+  public:
+    bc_vector_7() = default;
+    virtual ~bc_vector_7() = default;
+
+    bc_vector_7(const bc_vector_7&) = delete;
+    bc_vector_7& operator=(const bc_vector_7&) = delete;
+
+    bc_vector_7(bc_vector_7&&) noexcept = default;
+    bc_vector_7& operator=(bc_vector_7&&) noexcept = default;
+
+    template <class BcUnits>
+    explicit bc_vector_7(const BcUnits& bc_units, bit_vector::builder&& leaves) {
+        std::vector<std::uint8_t> ints_l1;
+        std::vector<std::uint16_t> ints_l2;
+        std::vector<std::uint32_t> ints_l3;
+        std::vector<std::uint64_t> ints_l4;
+        std::array<std::vector<std::uint64_t>, max_levels - 1> ranks;
+        std::vector<std::uint64_t> links;
+
+        ints_l1.reserve(bc_units.size() * 2);
+        ranks[0].reserve((bc_units.size() * 2) >> l1_bits);
+        links.reserve(bc_units.size());
+
+        auto append_unit = [&](std::uint64_t x) {
+            if ((ints_l1.size() % block_size_l1) == 0) {
+                ranks[0].push_back(static_cast<std::uint64_t>(ints_l2.size()));
+            }
+            if ((x / block_size_l1) == 0) {
+                ints_l1.push_back(static_cast<std::uint8_t>(0 | (x << 1)));
+                return;
+            } else {
+                const auto i = ints_l2.size() - ranks[0].back();
+                ints_l1.push_back(static_cast<std::uint8_t>(1 | (i << 1)));
+            }
+
+            if ((ints_l2.size() % block_size_l2) == 0) {
+                ranks[1].push_back(static_cast<std::uint64_t>(ints_l3.size()));
+            }
+            if ((x / block_size_l2) == 0) {
+                ints_l2.push_back(static_cast<std::uint16_t>(0 | (x << 1)));
+                return;
+            } else {
+                const auto i = ints_l3.size() - ranks[1].back();
+                ints_l2.push_back(static_cast<std::uint16_t>(1 | (i << 1)));
+            }
+
+            if ((ints_l3.size() % block_size_l3) == 0) {
+                ranks[2].push_back(static_cast<std::uint64_t>(ints_l4.size()));
+            }
+            if ((x / block_size_l3) == 0) {
+                ints_l3.push_back(static_cast<std::uint32_t>(0 | (x << 1)));
+                return;
+            } else {
+                const auto i = ints_l4.size() - ranks[2].back();
+                ints_l3.push_back(static_cast<std::uint32_t>(1 | (i << 1)));
+            }
+            ints_l4.push_back(x);
+        };
+
+        auto append_leaf = [&](std::uint64_t x) {
+            if ((ints_l1.size() % block_size_l1) == 0) {
+                ranks[0].push_back(static_cast<std::uint64_t>(ints_l2.size()));
+            }
+            ints_l1.push_back(static_cast<std::uint8_t>(x & 0xFF));
+            links.push_back(x >> 8);
+        };
+
+        for (std::uint64_t i = 0; i < bc_units.size(); ++i) {
+            if (leaves[i]) {
+                append_leaf(bc_units[i].base);
+            } else {
+                append_unit(bc_units[i].base ^ i);
+            }
+            append_unit(bc_units[i].check ^ i);
+            if (bc_units[i].check == i) {
+                m_num_frees += 1;
+            }
+        }
+
+        // release
+        m_ints_l1.build(ints_l1);
+        m_ints_l2.build(ints_l2);
+        m_ints_l3.build(ints_l3);
+        m_ints_l4.build(ints_l4);
+        for (std::uint32_t j = 0; j < m_ranks.size(); ++j) {
+            m_ranks[j].build(ranks[j]);
+        }
+        m_links = compact_vector(links);
+        m_leaves = bit_vector(leaves, true, false);
+    }
+
+    inline std::uint64_t base(std::uint64_t i) const {
+        return access(i * 2) ^ i;
+    }
+
+    inline std::uint64_t check(std::uint64_t i) const {
+        return access(i * 2 + 1) ^ i;
+    }
+
+    inline std::uint64_t link(std::uint64_t i) const {
+        return m_ints_l1[i * 2] | (m_links[m_leaves.rank(i)] << 8);
+    }
+
+    inline bool is_leaf(std::uint64_t i) const {
+        return m_leaves[i];
+    }
+
+    inline bool is_used(std::uint64_t i) const {
+        return check(i) != i;
+    }
+
+    inline std::uint64_t num_units() const {
+        return m_ints_l1.size() / 2;
+    }
+
+    inline std::uint64_t num_free_units() const {
+        return m_num_frees;
+    }
+
+    inline std::uint64_t num_nodes() const {
+        return num_units() - num_free_units();
+    }
+
+    inline std::uint64_t num_leaves() const {
+        return m_leaves.num_ones();
+    }
+
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_num_frees);
+        visitor.visit(m_ints_l1);
+        visitor.visit(m_ints_l2);
+        visitor.visit(m_ints_l3);
+        visitor.visit(m_ints_l4);
+        for (std::uint32_t j = 0; j < m_ranks.size(); j++) {
+            visitor.visit(m_ranks[j]);
+        }
+        visitor.visit(m_links);
+        visitor.visit(m_leaves);
+    }
+
+  private:
+    inline std::uint64_t access(std::uint64_t i) const {
+        std::uint64_t x = m_ints_l1[i] >> 1;
+        if ((m_ints_l1[i] & 1U) == 0) {
+            return x;
+        }
+        i = m_ranks[0][i / block_size_l1] + x;
+
+        x = m_ints_l2[i] >> 1;
+        if ((m_ints_l2[i] & 1U) == 0) {
+            return x;
+        }
+        i = m_ranks[1][i / block_size_l2] + x;
+
+        x = m_ints_l3[i] >> 1;
+        if ((m_ints_l3[i] & 1U) == 0) {
+            return x;
+        }
+        i = m_ranks[2][i / block_size_l3] + x;
+
+        return m_ints_l4[i];
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/bc_vector_8.hpp
+++ b/include/xcdat/bc_vector_8.hpp
@ -0,0 +1,150 @@
+#pragma once
+
+#include <array>
+
+#include "bit_vector.hpp"
+#include "compact_vector.hpp"
+
+namespace xcdat {
+
+class bc_vector_8 {
+  public:
+    static constexpr std::uint32_t l1_bits = 8;
+    static constexpr std::uint32_t max_levels = sizeof(std::uint64_t);
+
+  private:
+    std::uint32_t m_num_levels = 0;
+    std::uint64_t m_num_frees = 0;
+    std::array<immutable_vector<std::uint8_t>, max_levels> m_bytes;
+    std::array<bit_vector, max_levels - 1> m_nexts;
+    compact_vector m_links;
+    bit_vector m_leaves;
+
+  public:
+    bc_vector_8() = default;
+    virtual ~bc_vector_8() = default;
+
+    bc_vector_8(const bc_vector_8&) = delete;
+    bc_vector_8& operator=(const bc_vector_8&) = delete;
+
+    bc_vector_8(bc_vector_8&&) noexcept = default;
+    bc_vector_8& operator=(bc_vector_8&&) noexcept = default;
+
+    template <class BcUnits>
+    explicit bc_vector_8(const BcUnits& bc_units, bit_vector::builder&& leaves) {
+        std::array<std::vector<std::uint8_t>, max_levels> bytes;
+        std::array<bit_vector::builder, max_levels> next_flags;  // The last will not be released
+        std::vector<std::uint64_t> links;
+
+        bytes[0].reserve(bc_units.size() * 2);
+        next_flags[0].reserve(bc_units.size() * 2);
+        links.reserve(bc_units.size());
+
+        m_num_levels = 0;
+
+        auto append_unit = [&](std::uint64_t x) {
+            std::uint32_t j = 0;
+            bytes[j].push_back(static_cast<std::uint8_t>(x & 0xFF));
+            next_flags[j].push_back(true);
+            x >>= 8;
+            while (x) {
+                ++j;
+                bytes[j].push_back(static_cast<std::uint8_t>(x & 0xFF));
+                next_flags[j].push_back(true);
+                x >>= 8;
+            }
+            next_flags[j].set_bit(next_flags[j].size() - 1, false);
+            m_num_levels = std::max(m_num_levels, j);
+        };
+
+        auto append_leaf = [&](std::uint64_t x) {
+            bytes[0].push_back(static_cast<std::uint8_t>(x & 0xFF));
+            next_flags[0].push_back(false);
+            links.push_back(x >> 8);
+        };
+
+        for (std::uint64_t i = 0; i < bc_units.size(); ++i) {
+            if (leaves[i]) {
+                append_leaf(bc_units[i].base);
+            } else {
+                append_unit(bc_units[i].base ^ i);
+            }
+            append_unit(bc_units[i].check ^ i);
+            if (bc_units[i].check == i) {
+                m_num_frees += 1;
+            }
+        }
+
+        // release
+        for (std::uint32_t i = 0; i < m_num_levels; ++i) {
+            m_bytes[i].build(bytes[i]);
+            m_nexts[i] = bit_vector(next_flags[i], true, false);
+        }
+        m_bytes[m_num_levels].build(bytes[m_num_levels]);
+        m_links = compact_vector(links);
+        m_leaves = bit_vector(leaves, true, false);
+    }
+
+    inline std::uint64_t base(std::uint64_t i) const {
+        return access(i * 2) ^ i;
+    }
+
+    inline std::uint64_t check(std::uint64_t i) const {
+        return access(i * 2 + 1) ^ i;
+    }
+
+    inline std::uint64_t link(std::uint64_t i) const {
+        return m_bytes[0][i * 2] | (m_links[m_leaves.rank(i)] << 8);
+    }
+
+    inline bool is_leaf(std::uint64_t i) const {
+        return m_leaves[i];
+    }
+
+    inline bool is_used(std::uint64_t i) const {
+        return check(i) != i;
+    }
+
+    inline std::uint64_t num_units() const {
+        return m_bytes[0].size() / 2;
+    }
+
+    inline std::uint64_t num_free_units() const {
+        return m_num_frees;
+    }
+
+    inline std::uint64_t num_nodes() const {
+        return num_units() - num_free_units();
+    }
+
+    inline std::uint64_t num_leaves() const {
+        return m_leaves.num_ones();
+    }
+
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_num_levels);
+        visitor.visit(m_num_frees);
+        for (std::uint32_t j = 0; j < m_bytes.size(); j++) {
+            visitor.visit(m_bytes[j]);
+        }
+        for (std::uint32_t j = 0; j < m_nexts.size(); j++) {
+            visitor.visit(m_nexts[j]);
+        }
+        visitor.visit(m_links);
+        visitor.visit(m_leaves);
+    }
+
+  private:
+    inline std::uint64_t access(std::uint64_t i) const {
+        std::uint32_t j = 0;
+        std::uint64_t x = m_bytes[j][i];
+        while (j < m_num_levels and m_nexts[j][i]) {
+            i = m_nexts[j++].rank(i);
+            x |= static_cast<std::uint64_t>(m_bytes[j][i]) << (j * 8);
+        }
+        return x;
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/bit_tools.hpp
+++ b/include/xcdat/bit_tools.hpp
@ -0,0 +1,148 @@
+#pragma once
+
+#include <cstdint>
+#include <cstdlib>
+
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+#endif
+
+#ifdef __BMI2__
+#include <immintrin.h>
+#endif
+
+// The implementatouns are from https://github.com/ot/succinct.
+namespace xcdat::bit_tools {
+
+static constexpr std::uint64_t ones_step_4 = 0x1111111111111111ULL;
+static constexpr std::uint64_t ones_step_8 = 0x0101010101010101ULL;
+static constexpr std::uint64_t ones_step_9 = 1ULL << 0 | 1ULL << 9 | 1ULL << 18 | 1ULL << 27 |  //
+                                             1ULL << 36 | 1ULL << 45 | 1ULL << 54;
+static constexpr std::uint64_t msbs_step_8 = 0x80ULL * ones_step_8;
+static constexpr std::uint64_t msbs_step_9 = 0x100ULL * ones_step_9;
+
+inline std::uint64_t popcount(std::uint64_t x) {
+#ifdef __SSE4_2__
+    return static_cast<std::uint64_t>(__builtin_popcountll(x));
+#else
+    x = x - ((x >> 1) & 0x5555555555555555ULL);
+    x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
+    x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+    x = (0x0101010101010101ULL * x >> 56);
+    return x;
+#endif
+}
+
+static constexpr std::uint8_t debruijn64_mapping[64] = {
+    63, 0,  58, 1,  59, 47, 53, 2,  60, 39, 48, 27, 54, 33, 42, 3,  61, 51, 37, 40, 49, 18,
+    28, 20, 55, 30, 34, 11, 43, 14, 22, 4,  62, 57, 46, 52, 38, 26, 32, 41, 50, 36, 17, 19,
+    29, 10, 13, 21, 56, 45, 25, 31, 35, 16, 9,  12, 44, 24, 15, 8,  23, 7,  6,  5,
+};
+
+static constexpr std::uint64_t debruijn64 = 0x07EDD5E59A4E28C2ULL;
+
+// return the position of the single bit set in the word x
+inline std::uint8_t bit_position(std::uint64_t x) {
+    return debruijn64_mapping[(x * debruijn64) >> 58];
+}
+
+inline std::uint64_t msb(std::uint64_t x) {
+#ifdef __SSE4_2__
+    return x == 0 ? 0 : 63 - __builtin_clzll(x);
+#else
+    if (x == 0) {
+        return 0;
+    }
+    // right-saturate the word
+    x |= x >> 1;
+    x |= x >> 2;
+    x |= x >> 4;
+    x |= x >> 8;
+    x |= x >> 16;
+    x |= x >> 32;
+    // isolate the MSB
+    x ^= x >> 1;
+    return bit_position(x);
+#endif
+}
+
+inline std::uint64_t uleq_step_9(std::uint64_t x, std::uint64_t y) {
+    return (((((y | msbs_step_9) - (x & ~msbs_step_9)) | (x ^ y)) ^ (x & ~y)) & msbs_step_9) >> 8;
+}
+
+inline std::uint64_t byte_counts(std::uint64_t x) {
+    x = x - ((x & 0xa * ones_step_4) >> 1);
+    x = (x & 3 * ones_step_4) + ((x >> 2) & 3 * ones_step_4);
+    x = (x + (x >> 4)) & 0x0f * ones_step_8;
+    return x;
+}
+
+static constexpr std::uint8_t select_in_byte[2048] = {
+    8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1,
+    0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0,
+    1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,
+    0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0,
+    2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1,
+    0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0,
+    1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1, 8,
+    4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1,
+    4, 3, 3, 1, 3, 2, 2, 1, 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2,
+    1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 7, 7, 1, 7, 2,
+    2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3,
+    2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1,
+    4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3,
+    1, 3, 2, 2, 1, 8, 8, 8, 8, 8, 8, 8, 2, 8, 8, 8, 3, 8, 3, 3, 2, 8, 8, 8, 4, 8, 4, 4, 2, 8, 4, 4, 3, 4, 3, 3, 2, 8, 8,
+    8, 5, 8, 5, 5, 2, 8, 5, 5, 3, 5, 3, 3, 2, 8, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 6, 8, 6, 6, 2, 8,
+    6, 6, 3, 6, 3, 3, 2, 8, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 8, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2,
+    6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 7, 8, 7, 7, 2, 8, 7, 7, 3, 7, 3, 3, 2, 8, 7, 7, 4, 7, 4, 4,
+    2, 7, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3,
+    3, 2, 8, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6,
+    5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 4, 8, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 3, 8, 8, 8,
+    5, 8, 5, 5, 4, 8, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 3, 8, 8, 8, 6, 8, 6, 6, 4, 8, 6,
+    6, 4, 6, 4, 4, 3, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 3, 8, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8,
+    8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 3, 8, 8, 8, 7, 8, 7, 7, 4, 8, 7, 7, 4, 7, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 5,
+    8, 7, 7, 5, 7, 5, 5, 3, 8, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6,
+    3, 8, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, 4, 3, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5,
+    5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 4, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 4, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8,
+    6, 8, 6, 6, 5, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8,
+    8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 7, 8, 7, 7, 5, 8,
+    7, 7, 5, 7, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 4,
+    8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6,
+    8, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8,
+    8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7};
+
+inline std::uint64_t select_in_word(const std::uint64_t x, const std::uint64_t k) {
+#ifdef __BMI2__
+    return _tzcnt_u64(_pdep_u64(1ULL << k, x));
+#else
+    const std::uint64_t byte_sums = byte_counts(x) * ones_step_8;
+    const std::uint64_t k_step_8 = k * ones_step_8;
+    const std::uint64_t geq_k_step_8 = (((k_step_8 | msbs_step_8) - byte_sums) & msbs_step_8);
+    const std::uint64_t place = popcount(geq_k_step_8) * 8;
+    const std::uint64_t byte_rank = k - (((byte_sums << 8) >> place) & 0xFFULL);
+    return place + select_in_byte[((x >> place) & 0xFF) | (byte_rank << 8)];
+#endif
+}
+
+}  // namespace xcdat::bit_tools
--- a/include/xcdat/bit_vector.hpp
+++ b/include/xcdat/bit_vector.hpp
@ -0,0 +1,272 @@
+#pragma once
+
+#include <numeric>
+#include <vector>
+
+#include "bit_tools.hpp"
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+// Vigna's Rank9 implementation from https://github.com/ot/succinct.
+class bit_vector {
+  public:
+    class builder {
+      private:
+        std::uint64_t m_size = 0;
+        std::vector<std::uint64_t> m_bits;
+
+      public:
+        builder() = default;
+        virtual ~builder() = default;
+
+        builder(const builder&) = delete;
+        builder& operator=(const builder&) = delete;
+
+        builder(builder&&) noexcept = default;
+        builder& operator=(builder&&) noexcept = default;
+
+        builder(std::uint64_t size) {
+            resize(size);
+        }
+
+        inline void push_back(bool x) {
+            if (m_size % 64 == 0) {
+                m_bits.push_back(0);
+            }
+            if (x) {
+                set_bit(m_size, true);
+            }
+            m_size += 1;
+        }
+
+        inline bool operator[](std::uint64_t i) const {
+            return m_bits[i / 64] & (1ULL << (i % 64));
+        }
+
+        inline void set_bit(std::uint64_t i, bool x = true) {
+            if (x) {
+                m_bits[i / 64] |= (1ULL << (i % 64));
+            } else {
+                m_bits[i / 64] &= (~(1ULL << (i % 64)));
+            }
+        }
+
+        inline void resize(std::uint64_t size) {
+            m_bits.resize(words_for(size), 0ULL);
+            m_size = size;
+        }
+
+        inline void reserve(std::uint64_t capacity) {
+            m_bits.reserve(words_for(capacity));
+        }
+
+        inline std::uint64_t size() const {
+            return m_size;
+        }
+
+        friend class bit_vector;
+    };
+
+    static constexpr std::uint64_t block_size = 8;  // i.e., 64 * 8 bits
+    static constexpr std::uint64_t selects_per_hint = 64 * block_size * 2;
+
+  private:
+    std::uint64_t m_size = 0;
+    std::uint64_t m_num_ones = 0;
+    immutable_vector<std::uint64_t> m_bits;
+    immutable_vector<std::uint64_t> m_rank_hints;
+    immutable_vector<std::uint64_t> m_select_hints;
+
+  public:
+    bit_vector() = default;
+    virtual ~bit_vector() = default;
+
+    bit_vector(const bit_vector&) = delete;
+    bit_vector& operator=(const bit_vector&) = delete;
+
+    bit_vector(bit_vector&&) noexcept = default;
+    bit_vector& operator=(bit_vector&&) noexcept = default;
+
+    explicit bit_vector(builder& b, bool enable_rank = false, bool enable_select = false) {
+        m_bits.build(b.m_bits);
+        m_size = b.m_size;
+        m_num_ones = std::accumulate(m_bits.begin(), m_bits.end(), 0ULL,
+                                     [](std::uint64_t acc, std::uint64_t x) { return acc + bit_tools::popcount(x); });
+        if (enable_rank) {
+            build_rank_hints();
+        }
+        if (enable_rank and enable_select) {
+            build_select_hints();
+        }
+    }
+
+    inline std::uint64_t size() const {
+        return m_size;
+    }
+
+    inline std::uint64_t num_ones() const {
+        return m_num_ones;
+    }
+
+    inline bool operator[](std::uint64_t i) const {
+        return m_bits[i / 64] & (1ULL << (i % 64));
+    }
+
+    // The number of 1s in B[0..i)
+    inline std::uint64_t rank(std::uint64_t i) const {
+        assert(i <= size());
+        assert(m_rank_hints.size() != 0);
+
+        if (i == size()) {
+            return num_ones();
+        }
+        const auto [wi, wj] = decompose<64>(i);
+        return rank_for_word(wi) + (wj != 0 ? bit_tools::popcount(m_bits[wi] << (64 - wj)) : 0);
+    }
+
+    // The largest position
+    inline std::uint64_t select(std::uint64_t n) const {
+        assert(n < num_ones());
+        assert(m_select_hints.size() != 0);
+
+        const std::uint64_t bi = select_for_block(n);
+        assert(bi < num_blocks());
+
+        std::uint64_t curr_rank = rank_for_block(bi);
+        assert(curr_rank <= n);
+
+        std::uint64_t rank_in_block_parallel = (n - curr_rank) * bit_tools::ones_step_9;
+        std::uint64_t sub_ranks = ranks_in_block(bi);
+        std::uint64_t sub_block_offset =
+            bit_tools::uleq_step_9(sub_ranks, rank_in_block_parallel) * bit_tools::ones_step_9 >> 54 & 0x7;
+        curr_rank += sub_ranks >> (7 - sub_block_offset) * 9 & 0x1FF;
+        assert(curr_rank <= n);
+
+        std::uint64_t word_offset = (bi * block_size) + sub_block_offset;
+        return word_offset * 64 + bit_tools::select_in_word(m_bits[word_offset], n - curr_rank);
+    }
+
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_size);
+        visitor.visit(m_num_ones);
+        visitor.visit(m_bits);
+        visitor.visit(m_rank_hints);
+        visitor.visit(m_select_hints);
+    }
+
+  private:
+    template <std::uint64_t N>
+    static std::tuple<std::uint64_t, std::uint64_t> decompose(std::uint64_t x) {
+        return {x / N, x % N};
+    }
+
+    static std::uint64_t words_for(std::uint64_t nbits) {
+        return (nbits + 63) / 64;
+    }
+
+    inline std::uint64_t num_blocks() const {
+        return m_rank_hints.size() / 2 - 1;
+    }
+
+    // Absolute rank until the bi-th block
+    inline std::uint64_t rank_for_block(std::uint64_t bi) const {
+        return m_rank_hints[bi * 2];
+    }
+
+    // Packed ranks in the bi-th block
+    inline std::uint64_t ranks_in_block(std::uint64_t bi) const {
+        return m_rank_hints[bi * 2 + 1];
+    }
+
+    // Absolute rank until the wi-th word
+    inline std::uint64_t rank_for_word(std::uint64_t wi) const {
+        const auto [bi, bj] = decompose<block_size>(wi);
+        return rank_for_block(bi) + rank_in_block(bi, bj);
+    }
+
+    // Relative rank in the bi-th block
+    inline std::uint64_t rank_in_block(std::uint64_t bi, std::uint64_t bj) const {
+        return ranks_in_block(bi) >> ((7 - bj) * 9) & 0x1FF;
+    }
+
+    inline std::uint64_t select_for_block(std::uint64_t n) const {
+        auto [a, b] = select_with_hint(n);
+        while (b - a > 1) {
+            const std::uint64_t lb = a + (b - a) / 2;
+            if (rank_for_block(lb) <= n) {
+                a = lb;
+            } else {
+                b = lb;
+            }
+        }
+        return a;
+    }
+
+    inline std::tuple<std::uint64_t, std::uint64_t> select_with_hint(std::uint64_t n) const {
+        const std::uint64_t i = n / selects_per_hint;
+        return {i != 0 ? m_select_hints[i - 1] : 0, m_select_hints[i] + 1};
+    }
+
+    void build_rank_hints() {
+        std::uint64_t curr_num_ones = 0;
+        std::uint64_t curr_num_ones_in_block = 0;
+        std::uint64_t curr_ranks_in_block = 0;
+
+        const std::uint64_t num_words = m_bits.size();
+        std::vector<std::uint64_t> rank_hints = {curr_num_ones};
+
+        for (std::uint64_t wi = 0; wi < num_words; wi++) {
+            const std::uint64_t bi = wi % block_size;  // Relative position in the block
+            const std::uint64_t num_ones_in_word = bit_tools::popcount(m_bits[wi]);
+
+            if (bi != 0) {
+                curr_ranks_in_block <<= 9;
+                curr_ranks_in_block |= curr_num_ones_in_block;
+            }
+
+            curr_num_ones += num_ones_in_word;
+            curr_num_ones_in_block += num_ones_in_word;
+
+            if (bi == block_size - 1) {
+                rank_hints.push_back(curr_ranks_in_block);
+                rank_hints.push_back(curr_num_ones);
+                curr_num_ones_in_block = 0;
+                curr_ranks_in_block = 0;
+            }
+        }
+
+        // Padding the remaining hints
+        const std::uint64_t remain = block_size - (num_words % block_size);
+        for (std::uint64_t wi = 0; wi < remain; wi++) {
+            curr_ranks_in_block <<= 9;
+            curr_ranks_in_block |= curr_num_ones_in_block;
+        }
+        rank_hints.push_back(curr_ranks_in_block);
+
+        // Sentinel
+        if (num_words % block_size != 0) {
+            rank_hints.push_back(curr_ranks_in_block);
+            rank_hints.push_back(0);
+        }
+
+        // Release
+        m_rank_hints.build(rank_hints);
+    }
+
+    void build_select_hints() {
+        std::vector<std::uint64_t> select_hints;
+        std::uint64_t threshold = selects_per_hint;
+        for (std::uint64_t bi = 0; bi < num_blocks(); ++bi) {
+            if (rank_for_block(bi + 1) > threshold) {
+                select_hints.push_back(bi);
+                threshold += selects_per_hint;
+            }
+        }
+        select_hints.push_back(num_blocks());
+        m_select_hints.build(select_hints);
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/code_table.hpp
+++ b/include/xcdat/code_table.hpp
@ -0,0 +1,111 @@
+#pragma once
+
+#include <array>
+#include <string_view>
+
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class code_table {
+  private:
+    std::uint64_t m_max_length = 0;
+    std::array<std::uint8_t, 512> m_table;
+    immutable_vector<std::uint8_t> m_alphabet;
+
+    struct counter_type {
+        std::uint8_t ch;
+        std::uint64_t freq;
+    };
+
+  public:
+    code_table() = default;
+    virtual ~code_table() = default;
+
+    code_table(const code_table&) = delete;
+    code_table& operator=(const code_table&) = delete;
+
+    code_table(code_table&&) noexcept = default;
+    code_table& operator=(code_table&&) noexcept = default;
+
+    template <class Strings>
+    code_table(const Strings& keys) {
+        std::array<counter_type, 256> counter;
+        for (std::uint32_t ch = 0; ch < 256; ++ch) {
+            counter[ch] = {static_cast<std::uint8_t>(ch), 0};
+        }
+
+        m_max_length = 0;
+        for (const auto& key : keys) {
+            for (std::uint8_t ch : key) {
+                counter[ch].freq += 1;
+            }
+            m_max_length = std::max<std::uint64_t>(m_max_length, key.length());
+        }
+
+        {
+            std::vector<std::uint8_t> alphabet;
+            for (const auto& cf : counter) {
+                if (cf.freq != 0) {
+                    alphabet.push_back(cf.ch);
+                }
+            }
+            m_alphabet.build(alphabet);
+        }
+
+        std::sort(counter.begin(), counter.end(),
+                  [](const counter_type& a, const counter_type& b) { return a.freq > b.freq; });
+
+        for (std::uint32_t ch = 0; ch < 256; ++ch) {
+            m_table[counter[ch].ch] = static_cast<std::uint8_t>(ch);
+        }
+        for (std::uint32_t ch = 0; ch < 256; ++ch) {
+            m_table[m_table[ch] + 256] = static_cast<std::uint8_t>(ch);
+        }
+    }
+
+    inline std::uint64_t alphabet_size() const {
+        return m_alphabet.size();
+    }
+
+    inline std::uint64_t max_length() const {
+        return m_max_length;
+    }
+
+    inline std::uint8_t get_code(char ch) const {
+        return m_table[static_cast<std::uint8_t>(ch)];
+    }
+
+    inline char get_char(std::uint8_t cd) const {
+        return static_cast<char>(m_table[cd + 256]);
+    }
+
+    inline bool has_null() {
+        return *m_alphabet.begin() == '\0';
+    }
+
+    inline auto begin() const {
+        return m_alphabet.begin();
+    }
+
+    inline auto end() const {
+        return m_alphabet.end();
+    }
+
+    inline auto rbegin() const {
+        return m_alphabet.rbegin();
+    }
+
+    inline auto rend() const {
+        return m_alphabet.rend();
+    }
+
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_max_length);
+        visitor.visit(m_table);
+        visitor.visit(m_alphabet);
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/compact_vector.hpp
+++ b/include/xcdat/compact_vector.hpp
@ -0,0 +1,89 @@
+#pragma once
+
+#include "bit_tools.hpp"
+#include "exception.hpp"
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class compact_vector {
+  private:
+    std::uint64_t m_size = 0;
+    std::uint64_t m_bits = 0;
+    std::uint64_t m_mask = 0;
+    immutable_vector<std::uint64_t> m_chunks;
+
+  public:
+    compact_vector() = default;
+    virtual ~compact_vector() = default;
+
+    compact_vector(const compact_vector&) = delete;
+    compact_vector& operator=(const compact_vector&) = delete;
+
+    compact_vector(compact_vector&&) noexcept = default;
+    compact_vector& operator=(compact_vector&&) noexcept = default;
+
+    template <class Vec>
+    compact_vector(const Vec& vec) {
+        XCDAT_THROW_IF(vec.size() == 0, "The input vector is empty.");
+
+        m_size = vec.size();
+        m_bits = needed_bits(*std::max_element(vec.begin(), vec.end()));
+        m_mask = (1ULL << m_bits) - 1;
+
+        std::vector<std::uint64_t> chunks(words_for(m_size * m_bits));
+
+        for (std::uint64_t i = 0; i < m_size; i++) {
+            const auto [quo, mod] = decompose(i * m_bits);
+            chunks[quo] &= ~(m_mask << mod);
+            chunks[quo] |= (vec[i] & m_mask) << mod;
+            if (64 < mod + m_bits) {
+                const std::uint64_t diff = 64ULL - mod;
+                chunks[quo + 1] &= ~(m_mask >> diff);
+                chunks[quo + 1] |= (vec[i] & m_mask) >> diff;
+            }
+        }
+        m_chunks.build(chunks);
+    }
+
+    inline std::uint64_t operator[](std::uint64_t i) const {
+        assert(i < m_size);
+        const auto [quo, mod] = decompose(i * m_bits);
+        if (mod + m_bits <= 64) {
+            return (m_chunks[quo] >> mod) & m_mask;
+        } else {
+            return ((m_chunks[quo] >> mod) | (m_chunks[quo + 1] << (64 - mod))) & m_mask;
+        }
+    }
+
+    inline std::uint64_t size() const {
+        return m_size;
+    }
+
+    inline std::uint64_t bits() const {
+        return m_bits;
+    }
+
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_size);
+        visitor.visit(m_bits);
+        visitor.visit(m_mask);
+        visitor.visit(m_chunks);
+    }
+
+  private:
+    static std::uint64_t needed_bits(std::uint64_t x) {
+        return bit_tools::msb(x) + 1;
+    }
+
+    static std::tuple<std::uint64_t, std::uint64_t> decompose(std::uint64_t x) {
+        return {x / 64, x % 64};
+    }
+
+    static std::uint64_t words_for(std::uint64_t nbits) {
+        return (nbits + 63) / 64;
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/exception.hpp
+++ b/include/xcdat/exception.hpp
@ -0,0 +1,25 @@
+#pragma once
+
+#include <exception>
+
+namespace xcdat {
+
+class exception : public std::exception {
+  public:
+    explicit exception(const char* msg) : msg_{msg} {}
+    ~exception() throw() override = default;
+
+    const char* what() const throw() override {
+        return msg_;
+    }
+
+  private:
+    const char* msg_;
+};
+
+#define XCDAT_TO_STR_(n) #n
+#define XCDAT_TO_STR(n) XCDAT_TO_STR_(n)
+#define XCDAT_THROW(msg) throw xcdat::exception(__FILE__ ":" XCDAT_TO_STR(__LINE__) ":" msg)
+#define XCDAT_THROW_IF(cond, msg) (void)((!(cond)) || (XCDAT_THROW(msg), 0))
+
+}  // namespace xcdat
--- a/include/xcdat/immutable_vector.hpp
+++ b/include/xcdat/immutable_vector.hpp
@ -0,0 +1,107 @@
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <fstream>
+#include <iterator>
+#include <memory>
+
+namespace xcdat {
+
+template <class T>
+class immutable_vector {
+  private:
+    std::unique_ptr<T[]> m_allocator;
+    std::uint64_t m_size = 0;
+    const T* m_data = nullptr;
+
+  public:
+    immutable_vector() = default;
+    virtual ~immutable_vector() = default;
+
+    immutable_vector(const immutable_vector&) = delete;
+    immutable_vector& operator=(const immutable_vector&) = delete;
+
+    immutable_vector(immutable_vector&&) noexcept = default;
+    immutable_vector& operator=(immutable_vector&&) noexcept = default;
+
+    void clear() {
+        m_allocator.reset();
+        m_size = 0;
+        m_data = nullptr;
+    }
+
+    template <class Vector>
+    immutable_vector(const Vector& vec) {
+        build(vec);
+    }
+
+    template <class Vector>
+    void build(const Vector& vec) {
+        clear();
+        if (vec.size() != 0) {
+            m_allocator = std::make_unique<T[]>(vec.size());
+            std::copy_n(vec.data(), vec.size(), m_allocator.get());
+            m_size = vec.size();
+            m_data = m_allocator.get();
+        }
+    }
+
+    std::uint64_t mmap(const char* address) {
+        clear();
+        m_size = *reinterpret_cast<const std::uint64_t*>(address);
+        m_data = reinterpret_cast<const T*>(address + sizeof(std::uint64_t));
+        return sizeof(std::uint64_t) + m_size * sizeof(T);
+    }
+
+    void load(std::ifstream& ifs) {
+        clear();
+        ifs.read(reinterpret_cast<char*>(&m_size), sizeof(m_size));
+        if (m_size != 0) {
+            m_allocator = std::make_unique<T[]>(m_size);
+            ifs.read(reinterpret_cast<char*>(m_allocator.get()), sizeof(T) * m_size);
+            m_data = m_allocator.get();
+        }
+    }
+
+    void save(std::ofstream& ofs) const {
+        ofs.write(reinterpret_cast<const char*>(&m_size), sizeof(m_size));
+        ofs.write(reinterpret_cast<const char*>(m_data), sizeof(T) * m_size);
+    }
+
+    inline std::uint64_t memory_in_bytes() const {
+        return sizeof(m_size) + sizeof(T) * m_size;
+    }
+
+    inline std::uint64_t size() const {
+        return m_size;
+    }
+
+    inline const T* begin() const {
+        return m_data;
+    }
+
+    inline const T* end() const {
+        return m_data + m_size;
+    }
+
+    inline auto rbegin() const {
+        return std::make_reverse_iterator(end());
+    }
+
+    inline auto rend() const {
+        return std::make_reverse_iterator(begin());
+    }
+
+    inline const T& operator[](std::uint64_t i) const {
+        assert(i < m_size);
+        return m_data[i];
+    }
+
+    inline const T* data() const {
+        return m_data;
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/load_visitor.hpp
+++ b/include/xcdat/load_visitor.hpp
@ -0,0 +1,43 @@
+#pragma once
+
+#include <string_view>
+#include <type_traits>
+
+#include "exception.hpp"
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class load_visitor {
+  private:
+    std::ifstream m_ifs;
+
+  public:
+    load_visitor(std::string_view filepath) : m_ifs(filepath, std::ios::binary) {
+        XCDAT_THROW_IF(!m_ifs.good(), "Cannot open the input file");
+    }
+
+    virtual ~load_visitor() {
+        m_ifs.close();
+    }
+
+    template <class T>
+    void visit(immutable_vector<T>& vec) {
+        vec.load(m_ifs);
+    }
+
+    template <class T>
+    void visit(T& obj) {
+        if constexpr (std::is_pod_v<T>) {
+            m_ifs.read(reinterpret_cast<char*>(&obj), sizeof(T));
+        } else {
+            obj.visit(*this);
+        }
+    }
+
+    std::uint64_t bytes() {
+        return m_ifs.tellg();
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/mmap_visitor.hpp
+++ b/include/xcdat/mmap_visitor.hpp
@ -0,0 +1,39 @@
+#pragma once
+
+#include <type_traits>
+
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class mmap_visitor {
+  private:
+    const char* m_base = nullptr;
+    const char* m_cur = nullptr;
+
+  public:
+    mmap_visitor(const char* base) : m_base(base), m_cur(base) {}
+
+    virtual ~mmap_visitor() = default;
+
+    template <typename T>
+    void visit(immutable_vector<T>& vec) {
+        m_cur += vec.mmap(m_cur);
+    }
+
+    template <typename T>
+    void visit(T& obj) {
+        if constexpr (std::is_pod_v<T>) {
+            obj = *reinterpret_cast<const T*>(m_cur);
+            m_cur += sizeof(T);
+        } else {
+            obj.visit(*this);
+        }
+    }
+
+    std::uint64_t bytes() {
+        return std::distance(m_base, m_cur);
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/save_visitor.hpp
+++ b/include/xcdat/save_visitor.hpp
@ -0,0 +1,43 @@
+#pragma once
+
+#include <string_view>
+#include <type_traits>
+
+#include "exception.hpp"
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class save_visitor {
+  private:
+    std::ofstream m_ofs;
+
+  public:
+    save_visitor(std::string_view filepath) : m_ofs(filepath, std::ios::binary) {
+        XCDAT_THROW_IF(!m_ofs.good(), "Cannot open the input file");
+    }
+
+    virtual ~save_visitor() {
+        m_ofs.close();
+    }
+
+    template <typename T>
+    void visit(const immutable_vector<T>& vec) {
+        vec.save(m_ofs);
+    }
+
+    template <typename T>
+    void visit(const T& obj) {
+        if constexpr (std::is_pod_v<T>) {
+            m_ofs.write(reinterpret_cast<const char*>(&obj), sizeof(T));
+        } else {
+            const_cast<T&>(obj).visit(*this);
+        }
+    }
+
+    std::uint64_t bytes() {
+        return m_ofs.tellp();
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/size_visitor.hpp
+++ b/include/xcdat/size_visitor.hpp
@ -0,0 +1,39 @@
+#pragma once
+
+#include <string_view>
+#include <type_traits>
+
+#include "exception.hpp"
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class size_visitor {
+  private:
+    std::uint64_t m_bytes = 0;
+
+  public:
+    size_visitor() = default;
+
+    virtual ~size_visitor() = default;
+
+    template <typename T>
+    void visit(const immutable_vector<T>& vec) {
+        m_bytes += vec.memory_in_bytes();
+    }
+
+    template <typename T>
+    void visit(const T& obj) {
+        if constexpr (std::is_pod_v<T>) {
+            m_bytes += sizeof(T);
+        } else {
+            const_cast<T&>(obj).visit(*this);
+        }
+    }
+
+    std::uint64_t bytes() {
+        return m_bytes;
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/tail_vector.hpp
+++ b/include/xcdat/tail_vector.hpp
@ -0,0 +1,222 @@
+#pragma once
+
+#include <algorithm>
+#include <functional>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "bit_vector.hpp"
+#include "exception.hpp"
+#include "immutable_vector.hpp"
+
+namespace xcdat {
+
+class tail_vector {
+  public:
+    struct suffix_type {
+        std::string_view str;
+        std::uint64_t npos;
+
+        inline char operator[](std::uint64_t i) const {
+            return str[size() - i - 1];
+        }
+        inline std::uint64_t size() const {
+            return str.size();
+        }
+
+        inline const char* begin() const {
+            return str.data();
+        }
+        inline const char* end() const {
+            return str.data() + str.size();
+        }
+
+        inline std::reverse_iterator<const char*> rbegin() const {
+            return std::make_reverse_iterator(str.data() + str.size());
+        }
+        inline std::reverse_iterator<const char*> rend() const {
+            return std::make_reverse_iterator(str.data());
+        }
+    };
+
+    class builder {
+      private:
+        // Buffer
+        std::vector<suffix_type> m_suffixes;
+
+        // Released
+        std::vector<char> m_chars;
+        bit_vector::builder m_terms;
+
+      public:
+        builder() = default;
+        virtual ~builder() = default;
+
+        builder(const builder&) = delete;
+        builder& operator=(const builder&) = delete;
+
+        builder(builder&&) noexcept = default;
+        builder& operator=(builder&&) noexcept = default;
+
+        void set_suffix(std::string_view str, std::uint64_t npos) {
+            XCDAT_THROW_IF(str.size() == 0, "The given suffix is empty.");
+            m_suffixes.push_back({str, npos});
+        }
+
+        // setter(npos, tpos): Set units[npos].base = tpos.
+        void complete(bool bin_mode, const std::function<void(std::uint64_t, std::uint64_t)>& setter) {
+            std::sort(m_suffixes.begin(), m_suffixes.end(), [](const suffix_type& a, const suffix_type& b) {
+                return std::lexicographical_compare(std::rbegin(a), std::rend(a), std::rbegin(b), std::rend(b));
+            });
+
+            // Dummy for an empty suffix
+            m_chars.emplace_back('\0');
+            if (bin_mode) {
+                m_terms.push_back(false);
+            }
+
+            const suffix_type dmmy_suffix = {{nullptr, 0}, 0};
+            const suffix_type* prev_suffix = &dmmy_suffix;
+
+            std::uint64_t prev_tpos = 0;
+
+            for (std::uint64_t i = m_suffixes.size(); i > 0; --i) {
+                const suffix_type& curr_suffix = m_suffixes[i - 1];
+                XCDAT_THROW_IF(curr_suffix.size() == 0, "A suffix is empty.");
+
+                std::uint64_t match = 0;
+                while ((match < curr_suffix.size()) && (match < prev_suffix->size()) &&
+                       ((*prev_suffix)[match] == curr_suffix[match])) {
+                    ++match;
+                }
+
+                if ((match == curr_suffix.size()) && (prev_suffix->size() != 0)) {  // sharable
+                    setter(curr_suffix.npos, prev_tpos + (prev_suffix->size() - match));
+                    prev_tpos += prev_suffix->size() - match;
+                } else {  // append
+                    setter(curr_suffix.npos, m_chars.size());
+                    prev_tpos = m_chars.size();
+                    std::copy(curr_suffix.begin(), curr_suffix.end(), std::back_inserter(m_chars));
+                    if (bin_mode) {
+                        for (std::uint64_t j = 1; j < curr_suffix.size(); ++j) {
+                            m_terms.push_back(false);
+                        }
+                        m_terms.push_back(true);
+                    } else {
+                        m_chars.emplace_back('\0');
+                    }
+                }
+
+                prev_suffix = &curr_suffix;
+            }
+        }
+
+        friend class tail_vector;
+    };
+
+  private:
+    immutable_vector<char> m_chars;
+    bit_vector m_terms;
+
+  public:
+    tail_vector() = default;
+    virtual ~tail_vector() = default;
+
+    tail_vector(const tail_vector&) = delete;
+    tail_vector& operator=(const tail_vector&) = delete;
+
+    tail_vector(tail_vector&&) noexcept = default;
+    tail_vector& operator=(tail_vector&&) noexcept = default;
+
+    explicit tail_vector(builder&& b) : m_chars(b.m_chars), m_terms(b.m_terms) {}
+
+    inline bool bin_mode() const {
+        return m_terms.size() != 0;
+    }
+
+    inline bool match(std::string_view key, std::uint64_t tpos) const {
+        if (key.size() == 0) {
+            return tpos == 0;
+        }
+
+        std::uint64_t kpos = 0;
+
+        if (bin_mode()) {
+            do {
+                if (key[kpos] != m_chars[tpos]) {
+                    return false;
+                }
+                kpos += 1;
+                if (m_terms[tpos]) {
+                    return kpos == key.size();
+                }
+                tpos += 1;
+            } while (kpos < key.size());
+            return false;
+        } else {
+            do {
+                if (!m_chars[tpos] || key[kpos] != m_chars[tpos]) {
+                    return false;
+                }
+                kpos += 1;
+                tpos += 1;
+            } while (kpos < key.size());
+            return !m_chars[tpos];
+        }
+    }
+
+    inline bool prefix_match(std::string_view key, std::uint64_t tpos) const {
+        assert(key.size() != 0);
+        std::uint64_t kpos = 0;
+
+        if (bin_mode()) {
+            do {
+                if (key[kpos] != m_chars[tpos]) {
+                    return false;
+                }
+                kpos += 1;
+                if (m_terms[tpos]) {
+                    return kpos == key.size();
+                }
+                tpos += 1;
+            } while (kpos < key.size());
+            return true;
+        } else {
+            do {
+                if (!m_chars[tpos] || key[kpos] != m_chars[tpos]) {
+                    return false;
+                }
+                kpos += 1;
+                tpos += 1;
+            } while (kpos < key.size());
+            return true;
+        }
+    }
+
+    inline void decode(std::uint64_t tpos, const std::function<void(char)>& fn) const {
+        if (bin_mode()) {
+            if (tpos != 0) {
+                do {
+                    fn(m_chars[tpos]);
+                } while (!m_terms[tpos++]);
+            }
+        } else {
+            while (m_chars[tpos]) {
+                fn(m_chars[tpos++]);
+            }
+        }
+    }
+
+    inline std::uint64_t size() const {
+        return m_chars.size();
+    }
+
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_chars);
+        visitor.visit(m_terms);
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/trie.hpp
+++ b/include/xcdat/trie.hpp
@ -0,0 +1,468 @@
+#pragma once
+
+#include <functional>
+#include <optional>
+#include <string>
+
+#include "trie_builder.hpp"
+
+namespace xcdat {
+
+//! A compressed string dictionary based on an improved double-array trie.
+//! 'BcVector' is the data type of Base and Check vectors.
+template <class BcVector>
+class trie {
+  public:
+    using trie_type = trie<BcVector>;
+    using bc_vector_type = BcVector;
+
+    static constexpr auto l1_bits = bc_vector_type::l1_bits;
+
+  private:
+    std::uint64_t m_num_keys = 0;
+    code_table m_table;
+    bit_vector m_terms;
+    bc_vector_type m_bcvec;
+    tail_vector m_tvec;
+
+  public:
+    //! Default constructor
+    trie() = default;
+
+    //! Default destructor
+    virtual ~trie() = default;
+
+    //! Copy constructor (deleted)
+    trie(const trie&) = delete;
+
+    //! Copy constructor (deleted)
+    trie& operator=(const trie&) = delete;
+
+    //! Move constructor
+    trie(trie&&) noexcept = default;
+
+    //! Move constructor
+    trie& operator=(trie&&) noexcept = default;
+
+    //! Build the trie from the input keywords, which are lexicographically sorted and unique.
+    //!
+    //! If bin_mode = false, the NULL character is used for the termination of a keyword.
+    //! If bin_mode = true, bit flags are used istead, and the keywords can contain NULL characters.
+    //! If the input keywords contain NULL characters, bin_mode will be forced to be set to true.
+    //!
+    //! The type 'Strings' and 'Strings::value_type' should be a random iterable container such as std::vector.
+    //! Precisely, they should support the following operations:
+    //!  - size() returns the container size.
+    //!  - operator[](i) accesses the i-th element.
+    //!  - begin() returns the iterator to the beginning.
+    //!  - end() returns the iterator to the end.
+    //! The type 'Strings::value_type::value_type' should be one-byte integer type such as 'char'.
+    template <class Strings>
+    trie(const Strings& keys, bool bin_mode = false) : trie(trie_builder(keys, l1_bits, bin_mode)) {
+        static_assert(sizeof(char) == sizeof(typename Strings::value_type::value_type));
+    }
+
+    //! Check if the binary mode.
+    inline bool bin_mode() const {
+        return m_tvec.bin_mode();
+    }
+
+    //! Get the number of stored keywords.
+    inline std::uint64_t num_keys() const {
+        return m_num_keys;
+    }
+
+    //! Get the alphabet size.
+    inline std::uint64_t alphabet_size() const {
+        return m_table.alphabet_size();
+    }
+
+    //! Get the maximum length of keywords.
+    inline std::uint64_t max_length() const {
+        return m_table.max_length();
+    }
+
+    //! Get the number of trie nodes.
+    inline std::uint64_t num_nodes() const {
+        return m_bcvec.num_nodes();
+    }
+
+    //! Get the number of DA units.
+    inline std::uint64_t num_units() const {
+        return m_bcvec.num_units();
+    }
+
+    //! Get the number of unused DA units.
+    inline std::uint64_t num_free_units() const {
+        return m_bcvec.num_free_units();
+    }
+
+    //! Get the length of TAIL vector.
+    inline std::uint64_t tail_length() const {
+        return m_tvec.size();
+    }
+
+    //! Lookup the ID of the keyword.
+    inline std::optional<std::uint64_t> lookup(std::string_view key) const {
+        std::uint64_t kpos = 0, npos = 0;
+        while (!m_bcvec.is_leaf(npos)) {
+            if (kpos == key.size()) {
+                if (!m_terms[npos]) {
+                    return std::nullopt;
+                }
+                return npos_to_id(npos);
+            }
+            const std::uint64_t cpos = m_bcvec.base(npos) ^ m_table.get_code(key[kpos++]);
+            if (m_bcvec.check(cpos) != npos) {
+                return std::nullopt;
+            }
+            npos = cpos;
+        }
+
+        const std::uint64_t tpos = m_bcvec.link(npos);
+        if (!m_tvec.match(get_suffix(key, kpos), tpos)) {
+            return std::nullopt;
+        }
+        return npos_to_id(npos);
+    }
+
+    //! Decode the keyword associated with the ID.
+    inline std::string decode(std::uint64_t id) const {
+        std::string decoded;
+        decoded.reserve(max_length());
+        decode(id, decoded);
+        return decoded;
+    }
+
+    //! Decode the keyword associated with the ID and store it in 'decoded'.
+    //! It can avoid reallocation of memory to store the result.
+    inline void decode(std::uint64_t id, std::string& decoded) const {
+        decoded.clear();
+
+        if (num_keys() <= id) {
+            return;
+        }
+
+        std::uint64_t npos = id_to_npos(id);
+        std::uint64_t tpos = m_bcvec.is_leaf(npos) ? m_bcvec.link(npos) : UINT64_MAX;
+
+        while (npos != 0) {
+            const std::uint64_t ppos = m_bcvec.check(npos);
+            decoded.push_back(m_table.get_char(m_bcvec.base(ppos) ^ npos));
+            npos = ppos;
+        }
+
+        std::reverse(decoded.begin(), decoded.end());
+        if (tpos != 0 && tpos != UINT64_MAX) {
+            m_tvec.decode(tpos, [&](char c) { decoded.push_back(c); });
+        }
+    }
+
+    //! An iterator class for common prefix search.
+    //! It enumerates all the keywords contained as prefixes of a given string.
+    //! It should be instantiated via the function 'make_prefix_iterator'.
+    class prefix_iterator {
+      private:
+        const trie_type* m_obj = nullptr;
+        std::string_view m_key;
+        std::uint64_t m_id = 0;
+        std::uint64_t m_kpos = 0;
+        std::uint64_t m_npos = 0;
+        bool is_beg = true;
+        bool is_end = false;
+
+      public:
+        prefix_iterator() = default;
+
+        //! Increment the iterator.
+        //! Return false if the iteration is terminated.
+        inline bool next() {
+            return m_obj != nullptr && m_obj->next_prefix(this);
+        }
+
+        //! Get the result ID.
+        inline std::uint64_t id() const {
+            return m_id;
+        }
+
+        //! Get the result keyword.
+        inline std::string decoded() const {
+            return std::string(m_key.data(), m_kpos);
+        }
+
+        //! Get the reference to the result keyword.
+        //! Note that the referenced data will be changed in the next iteration.
+        inline std::string_view decoded_view() const {
+            return std::string_view(m_key.data(), m_kpos);
+        }
+
+      private:
+        prefix_iterator(const trie_type* obj, std::string_view key) : m_obj(obj), m_key(key) {}
+
+        friend class trie;
+    };
+
+    //! Make the common prefix searcher for the given keyword.
+    inline prefix_iterator make_prefix_iterator(std::string_view key) const {
+        return prefix_iterator(this, key);
+    }
+
+    //! Preform common prefix search for the keyword.
+    inline void prefix_search(std::string_view key,
+                              const std::function<void(std::uint64_t, std::string_view)>& fn) const {
+        auto itr = make_prefix_iterator(key);
+        while (itr.next()) {
+            fn(itr.id(), itr.decoded_view());
+        }
+    }
+
+    //! An iterator class for predictive search.
+    //! It enumerates all the keywords starting with a given string.
+    //! It should be instantiated via the function 'make_predictive_iterator'.
+    class predictive_iterator {
+      public:
+        struct cursor_type {
+            char label;
+            std::uint64_t kpos;
+            std::uint64_t npos;
+        };
+
+      private:
+        const trie_type* m_obj = nullptr;
+        std::string_view m_key;
+        std::uint64_t m_id = 0;
+        std::string m_decoded;
+        std::vector<cursor_type> m_stack;
+        bool is_beg = true;
+        bool is_end = false;
+
+      public:
+        predictive_iterator() = default;
+
+        //! Increment the iterator.
+        //! Return false if the iteration is terminated.
+        inline bool next() {
+            return m_obj != nullptr && m_obj->next_predictive(this);
+        }
+
+        //! Get the result ID.
+        inline std::uint64_t id() const {
+            return m_id;
+        }
+
+        //! Get the result keyword.
+        inline std::string decoded() const {
+            return m_decoded;
+        }
+
+        //! Get the reference to the result keyword.
+        //! Note that the referenced data will be changed in the next iteration.
+        inline std::string_view decoded_view() const {
+            return m_decoded;
+        }
+
+      private:
+        predictive_iterator(const trie_type* obj, std::string_view key) : m_obj(obj), m_key(key) {}
+
+        friend class trie;
+    };
+
+    //! Make the predictive searcher for the keyword.
+    inline predictive_iterator make_predictive_iterator(std::string_view key) const {
+        return predictive_iterator(this, key);
+    }
+
+    //! Preform predictive search for the keyword.
+    inline void predictive_search(std::string_view key,
+                                  const std::function<void(std::uint64_t, std::string_view)>& fn) const {
+        auto itr = make_predictive_iterator(key);
+        while (itr.next()) {
+            fn(itr.id(), itr.decoded_view());
+        }
+    }
+
+    //! An iterator class for enumeration.
+    //! It enumerates all the keywords stored in the trie.
+    //! It should be instantiated via the function 'make_enumerative_iterator'.
+    using enumerative_iterator = predictive_iterator;
+
+    //! Make the enumerator.
+    inline enumerative_iterator make_enumerative_iterator() const {
+        return enumerative_iterator(this, "");
+    }
+
+    //! Enumerate all the keywords and their IDs stored in the trie.
+    inline void enumerate(const std::function<void(std::uint64_t, std::string_view)>& fn) const {
+        auto itr = make_enumerative_iterator();
+        while (itr.next()) {
+            fn(itr.id(), itr.decoded_view());
+        }
+    }
+
+    //! Visit the members (commonly used for I/O).
+    template <class Visitor>
+    void visit(Visitor& visitor) {
+        visitor.visit(m_num_keys);
+        visitor.visit(m_table);
+        visitor.visit(m_terms);
+        visitor.visit(m_bcvec);
+        visitor.visit(m_tvec);
+    }
+
+  private:
+    template <class Strings>
+    explicit trie(trie_builder<Strings>&& b)
+        : m_num_keys(b.m_keys.size()), m_table(std::move(b.m_table)), m_terms(b.m_terms, true, true),
+          m_bcvec(b.m_units, std::move(b.m_leaves)), m_tvec(std::move(b.m_suffixes)) {}
+
+    template <class String>
+    static constexpr String get_suffix(const String& s, std::uint64_t i) {
+        assert(i <= s.size());
+        return s.substr(i, s.size() - i);
+    }
+
+    inline std::uint64_t npos_to_id(std::uint64_t npos) const {
+        return m_terms.rank(npos);
+    };
+
+    inline std::uint64_t id_to_npos(std::uint64_t id) const {
+        return m_terms.select(id);
+    };
+
+    inline bool next_prefix(prefix_iterator* itr) const {
+        if (itr->is_end) {
+            return false;
+        }
+
+        if (itr->is_beg) {
+            itr->is_beg = false;
+            if (m_terms[itr->m_npos]) {
+                itr->m_id = npos_to_id(itr->m_npos);
+                return true;
+            }
+        }
+
+        if (bin_mode() and itr->m_kpos == itr->m_key.size()) {
+            // Is the key terminated at an inner term?
+            itr->is_end = true;
+            itr->m_id = num_keys();
+            return false;
+        }
+
+        while (!m_bcvec.is_leaf(itr->m_npos)) {
+            if (bin_mode() and itr->m_kpos == itr->m_key.size()) {
+                // Is the key terminated at an internal node (not term)?
+                itr->is_end = true;
+                itr->m_id = num_keys();
+                return false;
+            }
+
+            const std::uint64_t cpos = m_bcvec.base(itr->m_npos) ^ m_table.get_code(itr->m_key[itr->m_kpos++]);
+
+            if (m_bcvec.check(cpos) != itr->m_npos) {
+                itr->is_end = true;
+                itr->m_id = num_keys();
+                return false;
+            }
+
+            itr->m_npos = cpos;
+            if (!m_bcvec.is_leaf(itr->m_npos) && m_terms[itr->m_npos]) {
+                itr->m_id = npos_to_id(itr->m_npos);
+                return true;
+            }
+        }
+        itr->is_end = true;
+
+        const std::uint64_t tpos = m_bcvec.link(itr->m_npos);
+        if (!m_tvec.match(get_suffix(itr->m_key, itr->m_kpos), tpos)) {
+            itr->m_id = num_keys();
+            return false;
+        }
+
+        itr->m_kpos = itr->m_key.size();
+        itr->m_id = npos_to_id(itr->m_npos);
+        return true;
+    }
+
+    inline bool next_predictive(predictive_iterator* itr) const {
+        if (itr->is_end) {
+            return false;
+        }
+
+        if (itr->is_beg) {
+            itr->is_beg = false;
+
+            std::uint64_t kpos = 0;
+            std::uint64_t npos = 0;
+
+            for (; kpos < itr->m_key.size(); ++kpos) {
+                if (m_bcvec.is_leaf(npos)) {
+                    itr->is_end = true;
+                    const std::uint64_t tpos = m_bcvec.link(npos);
+                    if (tpos == 0) {
+                        return false;
+                    }
+                    if (!m_tvec.prefix_match(get_suffix(itr->m_key, kpos), tpos)) {
+                        return false;
+                    }
+                    itr->m_id = npos_to_id(npos);
+                    m_tvec.decode(tpos, [&](char c) { itr->m_decoded.push_back(c); });
+                    return true;
+                }
+
+                const std::uint64_t cpos = m_bcvec.base(npos) ^ m_table.get_code(itr->m_key[kpos]);
+                if (m_bcvec.check(cpos) != npos) {
+                    itr->is_end = true;
+                    return false;
+                }
+
+                npos = cpos;
+                itr->m_decoded.push_back(itr->m_key[kpos]);
+            }
+
+            if (!itr->m_decoded.empty()) {
+                itr->m_stack.push_back({itr->m_decoded.back(), kpos, npos});
+            } else {
+                itr->m_stack.push_back({'\0', kpos, npos});
+            }
+        }
+
+        while (!itr->m_stack.empty()) {
+            const char label = itr->m_stack.back().label;
+            const std::uint64_t kpos = itr->m_stack.back().kpos;
+            const std::uint64_t npos = itr->m_stack.back().npos;
+
+            itr->m_stack.pop_back();
+
+            if (0 < kpos) {
+                itr->m_decoded.resize(kpos);
+                itr->m_decoded.back() = label;
+            }
+
+            if (m_bcvec.is_leaf(npos)) {
+                itr->m_id = npos_to_id(npos);
+                m_tvec.decode(m_bcvec.link(npos), [&](char c) { itr->m_decoded.push_back(c); });
+                return true;
+            }
+
+            const std::uint64_t base = m_bcvec.base(npos);
+
+            for (auto cit = m_table.rbegin(); cit != m_table.rend(); ++cit) {
+                const std::uint64_t cpos = base ^ m_table.get_code(*cit);
+                if (m_bcvec.check(cpos) == npos) {
+                    itr->m_stack.push_back({static_cast<char>(*cit), kpos + 1, cpos});
+                }
+            }
+
+            if (m_terms[npos]) {
+                itr->m_id = npos_to_id(npos);
+                return true;
+            }
+        }
+
+        itr->is_end = true;
+        return false;
+    }
+};
+
+}  // namespace xcdat
--- a/include/xcdat/trie_builder.hpp
+++ b/include/xcdat/trie_builder.hpp
@ -0,0 +1,265 @@
+#pragma once
+
+#include <algorithm>
+#include <iostream>
+#include <string_view>
+
+// #include "bc_vector.hpp"
+#include "code_table.hpp"
+#include "exception.hpp"
+#include "tail_vector.hpp"
+
+namespace xcdat {
+
+template <class Strings>
+class trie_builder {
+    template <class>
+    friend class trie;
+
+  public:
+    struct unit_type {
+        std::uint64_t base;
+        std::uint64_t check;
+    };
+
+  private:
+    static constexpr std::uint64_t taboo_npos = 1;
+    static constexpr std::uint64_t free_blocks = 16;
+
+    const Strings& m_keys;
+    const std::uint32_t m_l1_bits;  // # of bits for L1 layer of DACs
+    const std::uint64_t m_l1_size;
+
+    bool m_bin_mode = false;
+
+    code_table m_table;
+    std::vector<unit_type> m_units;
+    bit_vector::builder m_leaves;
+    bit_vector::builder m_terms;
+    bit_vector::builder m_useds;
+    std::vector<std::uint64_t> m_heads;  // for L1 blocks
+    std::vector<std::uint8_t> m_edges;
+    tail_vector::builder m_suffixes;
+
+  public:
+    explicit trie_builder(const Strings& keys, std::uint32_t l1_bits, bool bin_mode)
+        : m_keys(keys), m_l1_bits(std::min(l1_bits, 8U)), m_l1_size(1ULL << m_l1_bits), m_bin_mode(bin_mode) {
+        XCDAT_THROW_IF(m_keys.size() == 0, "The input dataset is empty.");
+
+        // Reserve
+        {
+            std::uint64_t init_capa = 1;
+            while (init_capa < m_keys.size()) {
+                init_capa <<= 1;
+            }
+            m_units.reserve(init_capa);
+            m_leaves.reserve(init_capa);
+            m_terms.reserve(init_capa);
+            m_useds.reserve(init_capa);
+            m_heads.reserve(init_capa >> m_l1_bits);
+            m_edges.reserve(256);
+        }
+
+        // Initialize an empty list.
+        for (std::uint64_t npos = 0; npos < 256; ++npos) {
+            m_units.push_back(unit_type{npos + 1, npos - 1});
+            m_leaves.push_back(false);
+            m_terms.push_back(false);
+            m_useds.push_back(false);
+        }
+        m_units[255].base = 0;
+        m_units[0].check = 255;
+
+        for (std::uint64_t npos = 0; npos < 256; npos += m_l1_size) {
+            m_heads.push_back(npos);
+        }
+
+        // Fix the root
+        use_unit(0);
+        m_units[0].check = taboo_npos;
+        m_useds.set_bit(taboo_npos, true);
+        m_heads[taboo_npos >> m_l1_bits] = m_units[taboo_npos].base;
+
+        // Build the code table
+        m_table = code_table(keys);
+        m_bin_mode |= m_table.has_null();
+
+        // Build the BC units
+        arrange(0, m_keys.size(), 0, 0);
+
+        // Finish
+        finish();
+
+        // Build the TAIL vector
+        m_suffixes.complete(m_bin_mode, [&](std::uint64_t npos, std::uint64_t tpos) { m_units[npos].base = tpos; });
+    }
+
+    virtual ~trie_builder() = default;
+
+    trie_builder(const trie_builder&) = delete;
+    trie_builder& operator=(const trie_builder&) = delete;
+
+    trie_builder(trie_builder&&) noexcept = default;
+    trie_builder& operator=(trie_builder&&) noexcept = default;
+
+  private:
+    inline void use_unit(std::uint64_t npos) {
+        m_useds.set_bit(npos);
+
+        const auto next = m_units[npos].base;
+        const auto prev = m_units[npos].check;
+        m_units[prev].base = next;
+        m_units[next].check = prev;
+
+        const auto lpos = npos >> m_l1_bits;
+        if (m_heads[lpos] == npos) {
+            m_heads[lpos] = (lpos != next >> m_l1_bits) ? taboo_npos : next;
+        }
+    }
+
+    inline void close_block(std::uint64_t bpos) {
+        const auto beg_npos = bpos * 256;
+        const auto end_npos = beg_npos + 256;
+
+        for (auto npos = beg_npos; npos < end_npos; ++npos) {
+            if (!m_useds[npos]) {
+                use_unit(npos);
+                m_useds.set_bit(npos, false);
+                m_units[npos].base = npos;
+                m_units[npos].check = npos;
+            }
+        }
+
+        for (auto npos = beg_npos; npos < end_npos; npos += m_l1_size) {
+            m_heads[npos >> m_l1_bits] = taboo_npos;
+        }
+    }
+
+    void expand() {
+        const auto old_size = static_cast<std::uint64_t>(m_units.size());
+        const auto new_size = old_size + 256;
+
+        for (auto npos = old_size; npos < new_size; ++npos) {
+            m_units.push_back({npos + 1, npos - 1});
+            m_leaves.push_back(false);
+            m_terms.push_back(false);
+            m_useds.push_back(false);
+        }
+
+        {
+            const auto last_npos = m_units[taboo_npos].check;
+            m_units[old_size].check = last_npos;
+            m_units[last_npos].base = old_size;
+            m_units[new_size - 1].base = taboo_npos;
+            m_units[taboo_npos].check = new_size - 1;
+        }
+
+        for (auto npos = old_size; npos < new_size; npos += m_l1_size) {
+            m_heads.push_back(npos);
+        }
+
+        const auto bpos = old_size / 256;
+        if (free_blocks <= bpos) {
+            close_block(bpos - free_blocks);
+        }
+    }
+
+    void finish() {
+        while (m_units[taboo_npos].base != taboo_npos) {
+            auto bpos = m_units[taboo_npos].base / 256;
+            close_block(bpos);
+        }
+    }
+
+    void arrange(std::uint64_t beg, std::uint64_t end, std::uint64_t kpos, std::uint64_t npos) {
+        if (m_keys[beg].size() == kpos) {
+            m_terms.set_bit(npos, true);
+            if (++beg == end) {  // without link?
+                m_units[npos].base = 0;  // with an empty suffix
+                m_leaves.set_bit(npos, true);
+                return;
+            }
+        } else if (beg + 1 == end) {  // leaf?
+            XCDAT_THROW_IF(m_keys[beg].size() <= kpos, "The input keys are not unique.");
+            m_terms.set_bit(npos, true);
+            m_leaves.set_bit(npos, true);
+            m_suffixes.set_suffix({m_keys[beg].data() + kpos, m_keys[beg].size() - kpos}, npos);
+            return;
+        }
+
+        // fetching edges
+        {
+            m_edges.clear();
+            auto ch = static_cast<std::uint8_t>(m_keys[beg][kpos]);
+            for (auto i = beg + 1; i < end; ++i) {
+                const auto next_ch = static_cast<std::uint8_t>(m_keys[i][kpos]);
+                if (ch != next_ch) {
+                    XCDAT_THROW_IF(next_ch < ch, "The input keys are not in lexicographical order.");
+                    m_edges.push_back(ch);
+                    ch = next_ch;
+                }
+            }
+            m_edges.push_back(ch);
+        }
+
+        const auto base = xcheck(npos >> m_l1_bits);
+        if (m_units.size() <= base) {
+            expand();
+        }
+
+        // defining new edges
+        m_units[npos].base = base;
+        for (const auto ch : m_edges) {
+            const auto child_id = base ^ m_table.get_code(ch);
+            use_unit(child_id);
+            m_units[child_id].check = npos;
+        }
+
+        // following the children
+        auto i = beg;
+        auto ch = static_cast<uint8_t>(m_keys[beg][kpos]);
+        for (auto j = beg + 1; j < end; ++j) {
+            const auto next_ch = static_cast<uint8_t>(m_keys[j][kpos]);
+            if (ch != next_ch) {
+                arrange(i, j, kpos + 1, base ^ m_table.get_code(ch));
+                ch = next_ch;
+                i = j;
+            }
+        }
+        arrange(i, end, kpos + 1, base ^ m_table.get_code(ch));
+    }
+
+    inline std::uint64_t xcheck(std::uint64_t lpos) const {
+        if (m_units[taboo_npos].base == taboo_npos) {  // Full?
+            return m_units.size() ^ m_table.get_code(m_edges[0]);
+        }
+
+        // First, search in the same L1 block
+        for (auto i = m_heads[lpos]; i != taboo_npos && i >> m_l1_bits == lpos; i = m_units[i].base) {
+            const auto base = i ^ m_table.get_code(m_edges[0]);
+            if (is_target(base)) {
+                return base;  // base / block_size_ == lpos
+            }
+        }
+
+        // Second, search in the other blocks
+        for (auto i = m_units[taboo_npos].base; i != taboo_npos; i = m_units[i].base) {
+            const auto base = i ^ m_table.get_code(m_edges[0]);
+            if (is_target(base)) {
+                return base;  // base / block_size_ != lpos
+            }
+        }
+        return m_units.size() ^ m_table.get_code(m_edges[0]);
+    }
+
+    inline bool is_target(std::uint64_t base) const {
+        for (const auto ch : m_edges) {
+            if (m_useds[base ^ m_table.get_code(ch)]) {
+                return false;
+            }
+        }
+        return true;
+    }
+};
+
+}  // namespace xcdat
--- a/sample/CMakeLists.txt
+++ b/sample/CMakeLists.txt
@ -0,0 +1 @@
+add_executable(sample sample.cpp)
--- a/sample/sample.cpp
+++ b/sample/sample.cpp
@ -0,0 +1,92 @@
+#include <iostream>
+#include <string>
+
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+int main() {
+    // Dataset of keywords
+    std::vector<std::string> keys = {
+        "AirPods",  "AirTag",  "Mac",  "MacBook", "MacBook_Air", "MacBook_Pro",
+        "Mac_Mini", "Mac_Pro", "iMac", "iPad",    "iPhone",      "iPhone_SE",
+    };
+
+    // The input keys must be sorted and unique (although they have already satisfied in this case).
+    std::sort(keys.begin(), keys.end());
+    keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
+
+    // The trie dictionary type
+    using trie_type = xcdat::trie_8_type;
+
+    // The dictionary filename
+    const char* tmp_filename = "dic.bin";
+
+    // Build and save the trie dictionary.
+    {
+        const trie_type trie(keys);
+        xcdat::save(trie, tmp_filename);
+    }
+
+    // Memory-map the trie dictionary.
+    const mm::file_source<char> fin(tmp_filename, mm::advice::sequential);
+    const auto trie = xcdat::mmap<trie_type>(fin.data());
+
+    // Or, load the trie dictionary on memory.
+    // const auto trie = xcdat::load<trie_type>(tmp_filename);
+
+    // Basic statistics
+    std::cout << "Number of keys: " << trie.num_keys() << std::endl;
+    std::cout << "Number of trie nodes: " << trie.num_nodes() << std::endl;
+    std::cout << "Number of DA units: " << trie.num_units() << std::endl;
+    std::cout << "Memory usage in bytes: " << xcdat::memory_in_bytes(trie) << std::endl;
+
+    // Lookup the ID for a query key.
+    {
+        const auto id = trie.lookup("Mac_Pro");
+        std::cout << "Lookup(Mac_Pro) = " << id.value_or(UINT64_MAX) << std::endl;
+    }
+    {
+        const auto id = trie.lookup("Google_Pixel");
+        std::cout << "Lookup(Google_Pixel) = " << id.value_or(UINT64_MAX) << std::endl;
+    }
+
+    // Decode the key for a query ID.
+    {
+        const auto dec = trie.decode(4);
+        std::cout << "Decode(4) = " << dec << std::endl;
+    }
+
+    // Common prefix search
+    {
+        std::cout << "CommonPrefixSearch(MacBook_Air) = {" << std::endl;
+        auto itr = trie.make_prefix_iterator("MacBook_Air");
+        while (itr.next()) {
+            std::cout << "   (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    // Predictive search
+    {
+        std::cout << "PredictiveSearch(Mac) = {" << std::endl;
+        auto itr = trie.make_predictive_iterator("Mac");
+        while (itr.next()) {
+            std::cout << "   (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    // Enumerate all the keys (in lex order).
+    {
+        std::cout << "Enumerate() = {" << std::endl;
+        auto itr = trie.make_enumerative_iterator();
+        while (itr.next()) {
+            std::cout << "   (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
+        }
+        std::cout << "}" << std::endl;
+    }
+
+    std::remove(tmp_filename);
+
+    return 0;
+}
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -0,0 +1,24 @@
+add_executable(test_bit_vector test_bit_vector.cpp)
+add_test(test_bit_vector test_bit_vector)
+
+add_executable(test_compact_vector test_compact_vector.cpp)
+add_test(test_compact_vector test_compact_vector)
+
+add_executable(test_tail_vector test_tail_vector.cpp)
+add_test(test_tail_vector test_tail_vector)
+
+set(BC_OPTIONS "7" "8")
+
+foreach(BC_OPTION ${BC_OPTIONS})
+    set(TEST_SRC_NAME test_bc_vector_${BC_OPTION})
+    add_executable(${TEST_SRC_NAME} test_bc_vector.cpp)
+    set_target_properties(${TEST_SRC_NAME} PROPERTIES COMPILE_DEFINITIONS BC_VECTOR_${BC_OPTION})
+    add_test(${TEST_SRC_NAME} ${TEST_SRC_NAME})
+endforeach(BC_OPTION)
+
+foreach(BC_OPTION ${BC_OPTIONS})
+    set(TEST_SRC_NAME test_trie_${BC_OPTION})
+    add_executable(${TEST_SRC_NAME} test_trie.cpp)
+    set_target_properties(${TEST_SRC_NAME} PROPERTIES COMPILE_DEFINITIONS TRIE_${BC_OPTION})
+    add_test(${TEST_SRC_NAME} ${TEST_SRC_NAME})
+endforeach(BC_OPTION)
--- a/tests/doctest/doctest.h
+++ b/tests/doctest/doctest.h
--- a/tests/keys.txt
+++ b/tests/keys.txt
--- a/tests/test_bc_vector.cpp
+++ b/tests/test_bc_vector.cpp
@ -0,0 +1,75 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+
+#include <algorithm>
+#include <random>
+
+#include "doctest/doctest.h"
+#include "test_common.hpp"
+#include "xcdat/bc_vector_7.hpp"
+#include "xcdat/bc_vector_8.hpp"
+
+#ifdef BC_VECTOR_7
+using bc_vector_type = xcdat::bc_vector_7;
+#elif BC_VECTOR_8
+using bc_vector_type = xcdat::bc_vector_8;
+#endif
+
+struct bc_unit {
+    std::uint64_t base;
+    std::uint64_t check;
+};
+
+std::vector<bc_unit> make_random_units(std::uint64_t n, std::uint64_t maxv, std::uint64_t seed = 13) {
+    std::mt19937_64 engine(seed);
+    std::uniform_int_distribution<std::uint64_t> dist(0, maxv);
+
+    std::vector<bc_unit> bc_units(n);
+    for (std::uint64_t i = 0; i < n; i++) {
+        bc_units[i].base = dist(engine);
+        bc_units[i].check = dist(engine);
+    }
+    return bc_units;
+}
+
+xcdat::bit_vector::builder to_bit_vector_builder(const std::vector<bool>& bits) {
+    xcdat::bit_vector::builder bvb(bits.size());
+    for (std::uint64_t i = 0; i < bits.size(); i++) {
+        bvb.set_bit(i, bits[i]);
+    }
+    return bvb;
+}
+
+std::uint64_t get_num_ones(const std::vector<bool>& bits) {
+    return std::accumulate(bits.begin(), bits.end(), 0ULL);
+}
+
+void test_bc_vector(const std::vector<bc_unit>& bc_units, const std::vector<bool>& leaves) {
+    bc_vector_type bc(bc_units, to_bit_vector_builder(leaves));
+
+    REQUIRE_EQ(bc.num_units(), bc_units.size());
+    REQUIRE_EQ(bc.num_leaves(), get_num_ones(leaves));
+
+    for (std::uint64_t i = 0; i < bc.num_units(); i++) {
+        REQUIRE_EQ(bc.is_leaf(i), leaves[i]);
+        if (leaves[i]) {
+            REQUIRE_EQ(bc.link(i), bc_units[i].base);
+        } else {
+            REQUIRE_EQ(bc.base(i), bc_units[i].base);
+        }
+        REQUIRE_EQ(bc.check(i), bc_units[i].check);
+    }
+}
+
+TEST_CASE("Test bc_vector 10K in [0,10K)") {
+    const std::uint64_t size = 10000;
+    auto bc_units = make_random_units(size, size - 1);
+    auto leaves = xcdat::test::make_random_bits(size, 0.2);
+    test_bc_vector(bc_units, leaves);
+}
+
+TEST_CASE("Test bc_vector 10K in [0,UINT64_MAX)") {
+    const std::uint64_t size = 10000;
+    auto bc_units = make_random_units(size, UINT64_MAX);
+    auto leaves = xcdat::test::make_random_bits(size, 0.2);
+    test_bc_vector(bc_units, leaves);
+}
--- a/tests/test_bit_vector.cpp
+++ b/tests/test_bit_vector.cpp
@ -0,0 +1,113 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+
+#include <algorithm>
+#include <random>
+
+#include "doctest/doctest.h"
+#include "test_common.hpp"
+#include "xcdat/bit_vector.hpp"
+
+std::uint64_t get_num_ones(const std::vector<bool>& bits) {
+    return std::accumulate(bits.begin(), bits.end(), 0ULL);
+}
+
+std::uint64_t rank_naive(const std::vector<bool>& bits, std::uint64_t i) {
+    return std::accumulate(bits.begin(), bits.begin() + i, 0ULL);
+}
+
+std::uint64_t select_naive(const std::vector<bool>& bits, std::uint64_t n) {
+    std::uint64_t i = 0;
+    for (; i < bits.size(); i++) {
+        if (bits[i]) {
+            if (n == 0) {
+                break;
+            }
+            n -= 1;
+        }
+    }
+    return i;
+}
+
+void test_rank_select(const std::vector<bool>& bits) {
+    xcdat::bit_vector bv;
+    {
+        xcdat::bit_vector::builder bvb(bits.size());
+        for (std::uint64_t i = 0; i < bits.size(); i++) {
+            bvb.set_bit(i, bits[i]);
+        }
+        bv = xcdat::bit_vector(bvb, true, true);
+    }
+
+    REQUIRE_EQ(bv.size(), bits.size());
+    REQUIRE_EQ(bv.num_ones(), get_num_ones(bits));
+
+    for (std::uint64_t i = 0; i < bits.size(); i++) {
+        REQUIRE_EQ(bv[i], bits[i]);
+    }
+
+    static constexpr std::uint64_t seed = 17;
+    std::mt19937_64 engine(seed);
+
+    {
+        std::uniform_int_distribution<std::uint64_t> dist(0, bv.size());
+        for (std::uint64_t r = 0; r < 100; r++) {
+            const std::uint64_t i = dist(engine);
+            REQUIRE_EQ(bv.rank(i), rank_naive(bits, i));
+        }
+    }
+    if (bv.num_ones() != 0) {
+        std::uniform_int_distribution<std::uint64_t> dist(0, bv.num_ones() - 1);
+        for (std::uint64_t r = 0; r < 100; r++) {
+            const std::uint64_t n = dist(engine);
+            REQUIRE_EQ(bv.select(n), select_naive(bits, n));
+        }
+    }
+}
+
+TEST_CASE("Test bit_vector::builder with resize") {
+    const auto bits = xcdat::test::make_random_bits(10000);
+
+    xcdat::bit_vector::builder bvb;
+    bvb.resize(bits.size());
+
+    REQUIRE_EQ(bvb.size(), bits.size());
+
+    for (std::uint64_t i = 0; i < bits.size(); i++) {
+        bvb.set_bit(i, bits[i]);
+    }
+    for (std::uint64_t i = 0; i < bits.size(); i++) {
+        REQUIRE_EQ(bvb[i], bits[i]);
+    }
+}
+
+TEST_CASE("Test bit_vector::builder with push_back") {
+    const auto bits = xcdat::test::make_random_bits(10000);
+
+    xcdat::bit_vector::builder bvb;
+    bvb.reserve(bits.size());
+
+    for (std::uint64_t i = 0; i < bits.size(); i++) {
+        bvb.push_back(bits[i]);
+    }
+
+    REQUIRE_EQ(bvb.size(), bits.size());
+
+    for (std::uint64_t i = 0; i < bits.size(); i++) {
+        REQUIRE_EQ(bvb[i], bits[i]);
+    }
+}
+
+TEST_CASE("Test rank/select operations") {
+    const auto bits = xcdat::test::make_random_bits(10000);
+    test_rank_select(bits);
+}
+
+TEST_CASE("Test rank/select operations (all zeros)") {
+    const auto bits = xcdat::test::make_random_bits(10000, 0.0);
+    test_rank_select(bits);
+}
+
+TEST_CASE("Test rank/select operations (all ones)") {
+    const auto bits = xcdat::test::make_random_bits(10000, 1.1);
+    test_rank_select(bits);
+}
--- a/tests/test_common.hpp
+++ b/tests/test_common.hpp
@ -0,0 +1,84 @@
+#pragma once
+
+#include <algorithm>
+#include <iostream>
+#include <random>
+#include <string>
+#include <vector>
+
+namespace xcdat::test {
+
+template <class T>
+std::vector<T> to_unique_vec(std::vector<T>&& vec) {
+    std::sort(vec.begin(), vec.end());
+    vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
+    return std::move(vec);
+}
+
+std::uint64_t max_length(const std::vector<std::string>& keys) {
+    std::uint64_t n = 0;
+    for (auto& key : keys) {
+        n = std::max<std::uint64_t>(n, key.size());
+    }
+    return n;
+}
+
+std::vector<bool> make_random_bits(std::uint64_t n, double dens = 0.5, std::uint64_t seed = 13) {
+    std::mt19937_64 engine(seed);
+    std::uniform_real_distribution<double> dist(0.0, 1.0);
+
+    std::vector<bool> bits(n);
+    for (std::uint64_t i = 0; i < n; i++) {
+        bits[i] = dist(engine) < dens;
+    }
+    return bits;
+}
+
+std::vector<std::uint64_t> make_random_ints(std::uint64_t n, std::uint64_t min, std::uint64_t max,
+                                            std::uint64_t seed = 13) {
+    std::mt19937_64 engine(seed);
+    std::uniform_int_distribution<std::uint64_t> dist(min, max);
+
+    std::vector<std::uint64_t> ints(n);
+    for (std::uint64_t i = 0; i < n; i++) {
+        ints[i] = dist(engine);
+    }
+    return ints;
+}
+
+std::vector<std::string> make_random_keys(std::uint64_t n, std::uint64_t min_m, std::uint64_t max_m,  //
+                                          char min_c = 'A', char max_c = 'Z', std::uint64_t seed = 13) {
+    std::mt19937_64 engine(seed);
+    std::uniform_int_distribution<std::uint64_t> dist_m(min_m, max_m);
+    std::uniform_int_distribution<char> dist_c(min_c, max_c);
+
+    std::vector<std::string> keys(n);
+    for (std::uint64_t i = 0; i < n; i++) {
+        keys[i].resize(dist_m(engine));
+        for (std::uint64_t j = 0; j < keys[i].size(); j++) {
+            keys[i][j] = dist_c(engine);
+        }
+    }
+    return keys;
+}
+
+std::vector<std::string> extract_keys(std::vector<std::string>& keys, double ratio = 0.1, std::uint64_t seed = 13) {
+    std::mt19937_64 engine(seed);
+    std::uniform_real_distribution<double> dist(0.0, 1.0);
+
+    std::vector<std::string> keys1;
+    std::vector<std::string> keys2;
+
+    for (std::uint64_t i = 0; i < keys.size(); ++i) {
+        if (ratio < dist(engine)) {
+            keys1.push_back(keys[i]);
+        } else {
+            keys2.push_back(keys[i]);
+        }
+    }
+
+    keys = keys1;
+    return keys2;
+}
+
+}  // namespace xcdat::test
--- a/tests/test_compact_vector.cpp
+++ b/tests/test_compact_vector.cpp
@ -0,0 +1,41 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+
+#include <algorithm>
+#include <random>
+
+#include "doctest/doctest.h"
+#include "test_common.hpp"
+#include "xcdat/compact_vector.hpp"
+
+TEST_CASE("Test compact_vector (zero)") {
+    std::vector<std::uint64_t> ints = {0, 0, 0, 0, 0};
+    xcdat::compact_vector cv(ints);
+
+    REQUIRE_EQ(cv.size(), ints.size());
+
+    for (std::uint64_t i = 0; i < ints.size(); i++) {
+        REQUIRE_EQ(cv[i], ints[i]);
+    }
+}
+
+TEST_CASE("Test compact_vector (tiny)") {
+    std::vector<std::uint64_t> ints = {2, 0, 14, 456, 32, 5544, 23};
+    xcdat::compact_vector cv(ints);
+
+    REQUIRE_EQ(cv.size(), ints.size());
+
+    for (std::uint64_t i = 0; i < ints.size(); i++) {
+        REQUIRE_EQ(cv[i], ints[i]);
+    }
+}
+
+TEST_CASE("Test compact_vector (random)") {
+    std::vector<std::uint64_t> ints = xcdat::test::make_random_ints(10000, 0, UINT16_MAX);
+    xcdat::compact_vector cv(ints);
+
+    REQUIRE_EQ(cv.size(), ints.size());
+
+    for (std::uint64_t i = 0; i < ints.size(); i++) {
+        REQUIRE_EQ(cv[i], ints[i]);
+    }
+}
--- a/tests/test_tail_vector.cpp
+++ b/tests/test_tail_vector.cpp
@ -0,0 +1,51 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+
+#include <algorithm>
+#include <random>
+
+#include "doctest/doctest.h"
+#include "test_common.hpp"
+#include "xcdat/tail_vector.hpp"
+
+void test_tail_vector(const std::vector<std::string>& sufs, bool bin_mode = false) {
+    xcdat::tail_vector tvec;
+    std::vector<std::uint64_t> idxs(sufs.size());
+
+    {
+        xcdat::tail_vector::builder tvb;
+        for (std::uint64_t i = 0; i < sufs.size(); i++) {
+            tvb.set_suffix(sufs[i], i);
+        }
+        tvb.complete(bin_mode, [&](std::uint64_t npos, std::uint64_t tpos) { idxs[npos] = tpos; });
+        tvec = xcdat::tail_vector(std::move(tvb));
+    }
+
+    for (std::uint64_t i = 0; i < sufs.size(); i++) {
+        REQUIRE(tvec.match(sufs[i], idxs[i]));
+    }
+    for (std::uint64_t i = 0; i < sufs.size(); i++) {
+        std::string decoded;
+        tvec.decode(idxs[i], [&](char c) { decoded.push_back(c); });
+        REQUIRE_EQ(sufs[i], decoded);
+    }
+}
+
+TEST_CASE("Test xcdat::tail_vector (tiny)") {
+    std::vector<std::string> sufs = {"ML", "STATS", "A", "M", "L", "AKDD", "M", "R", "DD", "OD"};
+    test_tail_vector(sufs);
+}
+
+TEST_CASE("Test xcdat::tail_vector (random, A--B)") {
+    std::vector<std::string> sufs = xcdat::test::make_random_keys(10000, 1, 30, 'A', 'B');
+    test_tail_vector(sufs);
+}
+
+TEST_CASE("Test xcdat::tail_vector (random, A--Z)") {
+    std::vector<std::string> sufs = xcdat::test::make_random_keys(10000, 1, 30, 'A', 'Z');
+    test_tail_vector(sufs);
+}
+
+TEST_CASE("Test xcdat::tail_vector (random, 0x00--0xFF)") {
+    std::vector<std::string> sufs = xcdat::test::make_random_keys(10000, 1, 30, INT8_MIN, INT8_MAX);
+    test_tail_vector(sufs, true);
+}
--- a/tests/test_trie.cpp
+++ b/tests/test_trie.cpp
@ -0,0 +1,297 @@
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+
+#include <algorithm>
+#include <iostream>
+#include <random>
+#include <string>
+
+#include "doctest/doctest.h"
+#include "mm_file/mm_file.hpp"
+#include "test_common.hpp"
+#include "xcdat.hpp"
+
+#ifdef TRIE_7
+using trie_type = xcdat::trie_7_type;
+#elif TRIE_8
+using trie_type = xcdat::trie_8_type;
+#endif
+
+void test_basic_operations(const trie_type& trie, const std::vector<std::string>& keys,
+                           const std::vector<std::string>& others) {
+    REQUIRE_EQ(trie.num_keys(), keys.size());
+    REQUIRE_EQ(trie.max_length(), xcdat::test::max_length(keys));
+
+    for (std::uint64_t i = 0; i < keys.size(); i++) {
+        auto id = trie.lookup(keys[i]);
+        REQUIRE(id.has_value());
+        REQUIRE_LT(id.value(), keys.size());
+        auto decoded = trie.decode(id.value());
+        REQUIRE_EQ(keys[i], decoded);
+    }
+
+    for (std::uint64_t i = 0; i < others.size(); i++) {
+        auto id = trie.lookup(others[i]);
+        REQUIRE_FALSE(id.has_value());
+    }
+}
+
+void test_prefix_search(const trie_type& trie, const std::vector<std::string>& keys,
+                        const std::vector<std::string>& others) {
+    for (auto& key : keys) {
+        size_t num_results = 0;
+        auto itr = trie.make_prefix_iterator(key);
+
+        while (itr.next()) {
+            const auto id = itr.id();
+            const auto decoded = itr.decoded_view();
+
+            REQUIRE_LE(decoded.size(), key.size());
+            REQUIRE_EQ(id, trie.lookup(decoded));
+            REQUIRE_EQ(decoded, trie.decode(id));
+
+            num_results += 1;
+        }
+
+        REQUIRE_LE(1, num_results);
+        REQUIRE_LE(num_results, key.size());
+    }
+
+    for (auto& key : others) {
+        size_t num_results = 0;
+        auto itr = trie.make_prefix_iterator(key);
+
+        while (itr.next()) {
+            const auto id = itr.id();
+            const auto decoded = itr.decoded_view();
+
+            REQUIRE_LT(decoded.size(), key.size());
+            REQUIRE_EQ(id, trie.lookup(decoded));
+            REQUIRE_EQ(decoded, trie.decode(id));
+
+            num_results += 1;
+        }
+
+        REQUIRE_LT(num_results, key.size());
+    }
+}
+
+void test_predictive_search(const trie_type& trie, const std::vector<std::string>& keys,
+                            const std::vector<std::string>& others) {
+    for (auto& key : keys) {
+        size_t num_results = 0;
+        auto itr = trie.make_predictive_iterator(key);
+
+        while (itr.next()) {
+            const auto id = itr.id();
+            const auto decoded = itr.decoded_view();
+
+            REQUIRE_LE(key.size(), decoded.size());
+            REQUIRE_EQ(id, trie.lookup(decoded));
+            REQUIRE_EQ(decoded, trie.decode(id));
+
+            num_results += 1;
+        }
+
+        REQUIRE_LE(1, num_results);
+    }
+
+    for (auto& key : others) {
+        auto itr = trie.make_predictive_iterator(key);
+
+        while (itr.next()) {
+            const auto id = itr.id();
+            const auto decoded = itr.decoded_view();
+
+            REQUIRE_LT(key.size(), decoded.size());
+            REQUIRE_EQ(id, trie.lookup(decoded));
+            REQUIRE_EQ(decoded, trie.decode(id));
+        }
+    }
+}
+
+void test_enumerate(const trie_type& trie, const std::vector<std::string>& keys) {
+    auto itr = trie.make_enumerative_iterator();
+    for (auto& key : keys) {
+        REQUIRE(itr.next());
+        REQUIRE_EQ(itr.decoded_view(), key);
+        REQUIRE_EQ(itr.id(), trie.lookup(key));
+    }
+    REQUIRE_FALSE(itr.next());
+}
+
+void test_io(const trie_type& trie, const std::vector<std::string>& keys, const std::vector<std::string>& others) {
+    const char* tmp_filepath = "tmp.idx";
+
+    const std::uint64_t memory = xcdat::memory_in_bytes(trie);
+    REQUIRE_EQ(memory, xcdat::save(trie, tmp_filepath));
+
+    {
+        const auto loaded = xcdat::load<trie_type>(tmp_filepath);
+        REQUIRE_EQ(trie.bin_mode(), loaded.bin_mode());
+        REQUIRE_EQ(trie.num_keys(), loaded.num_keys());
+        REQUIRE_EQ(trie.alphabet_size(), loaded.alphabet_size());
+        REQUIRE_EQ(trie.max_length(), loaded.max_length());
+        REQUIRE_EQ(memory, xcdat::memory_in_bytes(loaded));
+        test_basic_operations(loaded, keys, others);
+    }
+
+    {
+        mm::file_source<char> fin(tmp_filepath, mm::advice::sequential);
+        const auto mapped = xcdat::mmap<trie_type>(fin.data());
+        REQUIRE_EQ(trie.bin_mode(), mapped.bin_mode());
+        REQUIRE_EQ(trie.num_keys(), mapped.num_keys());
+        REQUIRE_EQ(trie.alphabet_size(), mapped.alphabet_size());
+        REQUIRE_EQ(trie.max_length(), mapped.max_length());
+        REQUIRE_EQ(memory, xcdat::memory_in_bytes(mapped));
+        test_basic_operations(mapped, keys, others);
+    }
+
+    std::remove(tmp_filepath);
+}
+
+TEST_CASE("Test trie_type (tiny)") {
+    std::vector<std::string> keys = {
+        "AirPods",  "AirTag",  "Mac",  "MacBook", "MacBook_Air", "MacBook_Pro",
+        "Mac_Mini", "Mac_Pro", "iMac", "iPad",    "iPhone",      "iPhone_SE",
+    };
+    std::vector<std::string> others = {
+        "Google_Pixel", "iPad_mini", "iPadOS", "iPod", "ThinkPad",
+    };
+
+    trie_type trie(keys);
+    REQUIRE_FALSE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+
+    {
+        auto itr = trie.make_prefix_iterator("MacBook_Pro");
+        std::vector<std::string> expected = {"Mac", "MacBook", "MacBook_Pro"};
+        for (const auto& exp : expected) {
+            REQUIRE(itr.next());
+            REQUIRE_EQ(itr.decoded(), exp);
+            REQUIRE_EQ(itr.id(), trie.lookup(exp));
+        }
+        REQUIRE_FALSE(itr.next());
+    }
+    {
+        auto itr = trie.make_predictive_iterator("MacBook");
+        std::vector<std::string> expected = {"MacBook", "MacBook_Air", "MacBook_Pro"};
+        for (const auto& exp : expected) {
+            REQUIRE(itr.next());
+            REQUIRE_EQ(itr.decoded(), exp);
+            REQUIRE_EQ(itr.id(), trie.lookup(exp));
+        }
+        REQUIRE_FALSE(itr.next());
+    }
+    {
+        auto itr = trie.make_enumerative_iterator();
+        for (const auto& key : keys) {
+            REQUIRE(itr.next());
+            REQUIRE_EQ(itr.decoded(), key);
+            REQUIRE_EQ(itr.id(), trie.lookup(key));
+        }
+        REQUIRE_FALSE(itr.next());
+    }
+
+    test_io(trie, keys, others);
+}
+
+TEST_CASE("Test trie_type (real)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::load_strings("keys.txt"));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE_FALSE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+
+TEST_CASE("Test trie_type (random 10K, A--B)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(10000, 1, 30, 'A', 'B'));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE_FALSE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+
+TEST_CASE("Test trie_type (random 10K, A--Z)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(10000, 1, 30, 'A', 'Z'));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE_FALSE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+
+TEST_CASE("Test trie_type (random 10K, 0x00--0xFF)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(10000, 1, 30, INT8_MIN, INT8_MAX));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+
+#ifdef NDEBUG
+TEST_CASE("Test trie_type (random 100K, A--B)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(100000, 1, 30, 'A', 'B'));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE_FALSE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+
+TEST_CASE("Test trie_type (random 100K, A--Z)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(100000, 1, 30, 'A', 'Z'));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE_FALSE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+
+TEST_CASE("Test trie_type (random 100K, 0x00--0xFF)") {
+    auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(100000, 1, 30, INT8_MIN, INT8_MAX));
+    auto others = xcdat::test::extract_keys(keys);
+
+    trie_type trie(keys);
+    REQUIRE(trie.bin_mode());
+
+    test_basic_operations(trie, keys, others);
+    test_prefix_search(trie, keys, others);
+    test_predictive_search(trie, keys, others);
+    test_enumerate(trie, keys);
+    test_io(trie, keys, others);
+}
+#endif
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@ -0,0 +1,14 @@
+set(XCDAT_FILES
+    "xcdat_build"
+    "xcdat_lookup"
+    "xcdat_decode"
+    "xcdat_prefix_search"
+    "xcdat_predictive_search"
+    "xcdat_enumerate"
+    "xcdat_benchmark"
+)
+
+foreach(XCDAT_FILE ${XCDAT_FILES})
+    add_executable(${XCDAT_FILE} ${XCDAT_FILE}.cpp)
+    install(TARGETS ${XCDAT_FILE} RUNTIME DESTINATION bin)
+endforeach(XCDAT_FILE)
--- a/tools/cmd_line_parser/parser.hpp
+++ b/tools/cmd_line_parser/parser.hpp
@ -0,0 +1,158 @@
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <iostream>
+#include <sstream>
+#include <type_traits>
+#include <unordered_map>
+#include <vector>
+
+namespace cmd_line_parser {
+
+struct parser {
+    inline static const std::string empty = "";
+
+    parser(int argc, char** argv) : m_argc(argc), m_argv(argv), m_required(0) {}
+
+    struct cmd {
+        std::string shorthand, value, descr;
+        bool is_boolean;
+    };
+
+    bool parse() {
+        if (size_t(m_argc - 1) < m_required) return abort();
+        size_t k = 0;
+        for (int i = 1; i != m_argc; ++i, ++k) {
+            std::string parsed(m_argv[i]);
+            if (parsed == "-h" or parsed == "--help") return abort();
+            size_t id = k;
+            bool is_optional = id >= m_required;
+            if (is_optional) {
+                auto it = m_shorthands.find(parsed);
+                if (it == m_shorthands.end()) {
+                    std::cerr << "== error: shorthand '" + parsed + "' not found" << std::endl;
+                    return abort();
+                }
+                id = (*it).second;
+            }
+            assert(id < m_names.size());
+            auto const& name = m_names[id];
+            auto& c = m_cmds[name];
+            if (is_optional) {
+                if (c.is_boolean) {
+                    parsed = "true";
+                } else {
+                    ++i;
+                    if (i == m_argc) return abort();
+                    parsed = m_argv[i];
+                }
+            }
+            c.value = parsed;
+        }
+        return true;
+    }
+
+    void help() const {
+        std::cerr << "Usage: \e[1m" << m_argv[0] << "\e[0m [-h,--help]";
+        auto print = [this](bool with_description) {
+            for (size_t i = 0; i != m_names.size(); ++i) {
+                auto const& c = m_cmds.at(m_names[i]);
+                bool is_optional = i >= m_required;
+                if (is_optional) std::cerr << " [\e[1m" << c.shorthand << "\e[0m";
+                if (!c.is_boolean) std::cerr << " \e[4m" << m_names[i] << "\e[0m";
+                if (is_optional) std::cerr << "]";
+                if (with_description) std::cerr << "\n\t" << c.descr << "\n";
+            }
+        };
+        print(false);
+        std::cerr << "\n\n";
+        print(true);
+        std::cerr << " [-h,--help]\n\tPrint this help text and silently exits." << std::endl;
+    }
+
+    bool add(std::string const& name, std::string const& descr) {
+        bool ret = m_cmds.emplace(name, cmd{empty, empty, descr, false}).second;
+        if (ret) {
+            m_names.push_back(name);
+            m_required += 1;
+        }
+        return ret;
+    }
+
+    bool add(std::string const& name, std::string const& descr, std::string const& shorthand, bool is_boolean = true) {
+        bool ret = m_cmds.emplace(name, cmd{shorthand, is_boolean ? "false" : empty, descr, is_boolean}).second;
+        if (ret) {
+            m_names.push_back(name);
+            m_shorthands.emplace(shorthand, m_names.size() - 1);
+        }
+        return ret;
+    }
+
+    template <typename T>
+    T get(std::string const& name) const {
+        auto it = m_cmds.find(name);
+        if (it == m_cmds.end()) {
+            throw std::runtime_error("error: '" + name + "' not found");
+        }
+        auto const& value = (*it).second.value;
+        return parse<T>(value);
+    }
+
+    // added by Kampersanda
+    template <typename T>
+    T get(std::string const& name, const T& default_value) const {
+        return parsed(name) ? get<T>(name) : default_value;
+    }
+
+    bool parsed(std::string const& name) const {
+        auto it = m_cmds.find(name);
+        if (it == m_cmds.end() or (*it).second.value == empty) return false;
+        return true;
+    }
+
+    template <typename T>
+    T parse(std::string const& value) const {
+        if constexpr (std::is_same<T, std::string>::value) {
+            return value;
+        } else if constexpr (std::is_same<T, char>::value or std::is_same<T, signed char>::value or
+                             std::is_same<T, unsigned char>::value) {
+            return value.front();
+        } else if constexpr (std::is_same<T, unsigned int>::value or std::is_same<T, int>::value or
+                             std::is_same<T, unsigned short int>::value or std::is_same<T, short int>::value) {
+            return std::atoi(value.c_str());
+        } else if constexpr (std::is_same<T, unsigned long int>::value or std::is_same<T, long int>::value or
+                             std::is_same<T, unsigned long long int>::value or std::is_same<T, long long int>::value) {
+            return std::atoll(value.c_str());
+        } else if constexpr (std::is_same<T, float>::value or std::is_same<T, double>::value or
+                             std::is_same<T, long double>::value) {
+            return std::atof(value.c_str());
+        } else if constexpr (std::is_same<T, bool>::value) {
+            std::istringstream stream(value);
+            bool ret;
+            if (value == "true" or value == "false") {
+                stream >> std::boolalpha >> ret;
+            } else {
+                stream >> std::noboolalpha >> ret;
+            }
+            return ret;
+        }
+        assert(false);
+        __builtin_unreachable();
+    }
+
+  private:
+    int m_argc;
+    char** m_argv;
+    size_t m_required;
+    std::unordered_map<std::string, cmd> m_cmds;
+    std::unordered_map<std::string, int> m_shorthands;
+    std::vector<std::string> m_names;
+
+    bool abort() const {
+        help();
+        return false;
+    }
+};
+
+}  // namespace cmd_line_parser
--- a/tools/tinyformat/tinyformat.h
+++ b/tools/tinyformat/tinyformat.h
--- a/tools/xcdat_benchmark.cpp
+++ b/tools/xcdat_benchmark.cpp
@ -0,0 +1,148 @@
+#include <chrono>
+#include <random>
+
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+static constexpr int num_trials = 10;
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_keys", "Input filepath of keywords");
+    p.add("num_samples", "Number of sample keys for searches (default=1000)", "-n", false);
+    p.add("random_seed", "Random seed for sampling (default=13)", "-s", false);
+    p.add("binary_mode", "Is binary mode? (default=0)", "-b", false);
+    return p;
+}
+
+std::vector<std::string_view> sample_keys(const std::vector<std::string>& keys, std::uint64_t num_samples,
+                                          std::uint64_t random_seed) {
+    std::mt19937_64 engine(random_seed);
+    std::uniform_int_distribution<std::uint64_t> dist(0, keys.size() - 1);
+
+    std::vector<std::string_view> sampled_keys(num_samples);
+    for (std::uint64_t i = 0; i < num_samples; i++) {
+        sampled_keys[i] = std::string_view(keys[dist(engine)]);
+    }
+    return sampled_keys;
+}
+
+template <class Trie>
+std::vector<std::uint64_t> extract_ids(const Trie& trie, const std::vector<std::string_view>& keys) {
+    std::vector<std::uint64_t> sampled_ids(keys.size());
+    for (std::uint64_t i = 0; i < keys.size(); i++) {
+        sampled_ids[i] = trie.lookup(keys[i]).value();
+    }
+    return sampled_ids;
+}
+
+template <class Trie>
+Trie benchmark_build(const std::vector<std::string>& keys, bool binary_mode) {
+    const auto start_tp = std::chrono::high_resolution_clock::now();
+    Trie trie(keys, binary_mode);
+    const auto stop_tp = std::chrono::high_resolution_clock::now();
+
+    const auto dur_ms = std::chrono::duration_cast<std::chrono::milliseconds>(stop_tp - start_tp);
+    const double time_in_sec = dur_ms.count() / 1000.0;
+    const double memory_in_bytes = xcdat::memory_in_bytes(trie);
+
+    tfm::printfln("Number of keys: %d", trie.num_keys());
+    tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
+    tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
+    tfm::printfln("Construction time in seconds: %g", time_in_sec);
+
+    return trie;
+}
+
+template <class Trie>
+void benchmark_lookup(const Trie& trie, const std::vector<std::string_view>& queries) {
+    // Warmup
+    volatile std::uint64_t tmp = 0;
+    for (const auto& query : queries) {
+        tmp += trie.lookup(query).value();
+    }
+
+    // Measure
+    const auto start_tp = std::chrono::high_resolution_clock::now();
+    for (int r = 0; r < num_trials; r++) {
+        for (const auto& query : queries) {
+            tmp += trie.lookup(query).value();
+        }
+    }
+    const auto stop_tp = std::chrono::high_resolution_clock::now();
+
+    const auto dur_us = std::chrono::duration_cast<std::chrono::microseconds>(stop_tp - start_tp);
+    const auto elapsed_us = static_cast<double>(dur_us.count());
+
+    tfm::printfln("Lookup time in microsec/query: %g", elapsed_us / (num_trials * queries.size()));
+}
+
+template <class Trie>
+void benchmark_decode(const Trie& trie, const std::vector<std::uint64_t>& queries) {
+    // Warmup
+    volatile std::uint64_t tmp = 0;
+    for (const std::uint64_t query : queries) {
+        tmp += trie.decode(query).size();
+    }
+
+    // Measure
+    const auto start_tp = std::chrono::high_resolution_clock::now();
+    for (int r = 0; r < num_trials; r++) {
+        for (const std::uint64_t query : queries) {
+            tmp += trie.decode(query).size();
+        }
+    }
+    const auto stop_tp = std::chrono::high_resolution_clock::now();
+
+    const auto dur_us = std::chrono::duration_cast<std::chrono::microseconds>(stop_tp - start_tp);
+    const auto elapsed_us = static_cast<double>(dur_us.count());
+
+    tfm::printfln("Decode time in microsec/query: %g", elapsed_us / (num_trials * queries.size()));
+}
+
+template <class Trie>
+void benchmark(std::vector<std::string> keys, const std::vector<std::string_view>& query_keys, bool binary_mode) {
+    const auto trie = benchmark_build<Trie>(keys, binary_mode);
+    const auto query_ids = extract_ids(trie, query_keys);
+
+    benchmark_lookup(trie, query_keys);
+    benchmark_decode(trie, query_ids);
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto input_keys = p.get<std::string>("input_keys");
+    const auto num_samples = p.get<std::uint64_t>("num_samples", 1000);
+    const auto random_seed = p.get<std::uint64_t>("random_seed", 13);
+    const auto binary_mode = p.get<bool>("binary_mode", false);
+
+    auto keys = xcdat::load_strings(input_keys);
+    if (keys.empty()) {
+        tfm::errorfln("Error: The input dataset is empty.");
+        return 1;
+    }
+
+    std::sort(keys.begin(), keys.end());
+    keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
+
+    const auto query_keys = sample_keys(keys, num_samples, random_seed);
+
+    tfm::printfln("** xcdat::trie_7_type **");
+    benchmark<xcdat::trie_7_type>(keys, query_keys, binary_mode);
+
+    tfm::printfln("** xcdat::trie_8_type **");
+    benchmark<xcdat::trie_8_type>(keys, query_keys, binary_mode);
+
+    return 0;
+}
--- a/tools/xcdat_build.cpp
+++ b/tools/xcdat_build.cpp
@ -0,0 +1,67 @@
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_keys", "Input filepath of keywords");
+    p.add("output_dic", "Output filepath of trie dictionary");
+    p.add("trie_type", "Trie type: [7|8] (default=7)", "-t", false);
+    p.add("binary_mode", "Is binary mode? (default=0)", "-b", false);
+    return p;
+}
+
+template <class Trie>
+int build(const cmd_line_parser::parser& p) {
+    const auto input_keys = p.get<std::string>("input_keys");
+    const auto output_dic = p.get<std::string>("output_dic");
+    const auto binary_mode = p.get<bool>("binary_mode", false);
+
+    auto keys = xcdat::load_strings(input_keys);
+    if (keys.empty()) {
+        tfm::errorfln("Error: The input dataset is empty.");
+    }
+
+    std::sort(keys.begin(), keys.end());
+    keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
+
+    const Trie trie(keys, binary_mode);
+    const double memory_in_bytes = xcdat::memory_in_bytes(trie);
+
+    tfm::printfln("Number of keys: %d", trie.num_keys());
+    tfm::printfln("Number of trie nodes: %d", trie.num_nodes());
+    tfm::printfln("Number of DA units: %d", trie.num_units());
+    tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
+    tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
+
+    xcdat::save(trie, output_dic);
+
+    return 0;
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto trie_type = p.get<int>("trie_type", 7);
+
+    switch (trie_type) {
+        case 7:
+            return build<xcdat::trie_7_type>(p);
+        case 8:
+            return build<xcdat::trie_8_type>(p);
+        default:
+            break;
+    }
+
+    p.help();
+    return 1;
+}
--- a/tools/xcdat_decode.cpp
+++ b/tools/xcdat_decode.cpp
@ -0,0 +1,53 @@
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_dic", "Input filepath of trie dictionary");
+    return p;
+}
+
+template <class Trie>
+int decode(const cmd_line_parser::parser& p) {
+    const auto input_dic = p.get<std::string>("input_dic");
+
+    const mm::file_source<char> fin(input_dic.c_str(), mm::advice::sequential);
+    const auto trie = xcdat::mmap<Trie>(fin.data());
+
+    for (std::uint64_t id; std::cin >> id;) {
+        const auto dec = trie.decode(id);
+        tfm::printfln("%d\t%s", id, dec);
+    }
+
+    return 0;
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto input_dic = p.get<std::string>("input_dic");
+    const auto flag = xcdat::get_flag(input_dic);
+
+    switch (flag) {
+        case 7:
+            return decode<xcdat::trie_7_type>(p);
+        case 8:
+            return decode<xcdat::trie_8_type>(p);
+        default:
+            break;
+    }
+
+    p.help();
+    return 1;
+}
--- a/tools/xcdat_enumerate.cpp
+++ b/tools/xcdat_enumerate.cpp
@ -0,0 +1,50 @@
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_dic", "Input filepath of trie dictionary");
+    return p;
+}
+
+template <class Trie>
+int enumerate(const cmd_line_parser::parser& p) {
+    const auto input_dic = p.get<std::string>("input_dic");
+
+    const mm::file_source<char> fin(input_dic.c_str(), mm::advice::sequential);
+    const auto trie = xcdat::mmap<Trie>(fin.data());
+
+    trie.enumerate([&](std::uint64_t id, std::string_view str) { tfm::printfln("%d\t%s", id, str); });
+
+    return 0;
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto input_dic = p.get<std::string>("input_dic");
+    const auto flag = xcdat::get_flag(input_dic);
+
+    switch (flag) {
+        case 7:
+            return enumerate<xcdat::trie_7_type>(p);
+        case 8:
+            return enumerate<xcdat::trie_8_type>(p);
+        default:
+            break;
+    }
+
+    p.help();
+    return 1;
+}
--- a/tools/xcdat_lookup.cpp
+++ b/tools/xcdat_lookup.cpp
@ -0,0 +1,57 @@
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_dic", "Input filepath of trie dictionary");
+    return p;
+}
+
+template <class Trie>
+int lookup(const cmd_line_parser::parser& p) {
+    const auto input_dic = p.get<std::string>("input_dic");
+
+    const mm::file_source<char> fin(input_dic.c_str(), mm::advice::sequential);
+    const auto trie = xcdat::mmap<Trie>(fin.data());
+
+    for (std::string str; std::getline(std::cin, str);) {
+        const auto id = trie.lookup(str);
+        if (id.has_value()) {
+            tfm::printfln("%d\t%s", id.value(), str);
+        } else {
+            tfm::printfln("-1\t%s", str);
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto input_dic = p.get<std::string>("input_dic");
+    const auto flag = xcdat::get_flag(input_dic);
+
+    switch (flag) {
+        case 7:
+            return lookup<xcdat::trie_7_type>(p);
+        case 8:
+            return lookup<xcdat::trie_8_type>(p);
+        default:
+            break;
+    }
+
+    p.help();
+    return 1;
+}
--- a/tools/xcdat_predictive_search.cpp
+++ b/tools/xcdat_predictive_search.cpp
@ -0,0 +1,70 @@
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_dic", "Input filepath of trie dictionary");
+    p.add("max_num_results", "The max number of results (default=10)", "-n", false);
+    return p;
+}
+
+template <class Trie>
+int predictive_search(const cmd_line_parser::parser& p) {
+    const auto input_dic = p.get<std::string>("input_dic");
+    const auto max_num_results = p.get<std::uint64_t>("max_num_results", 10);
+
+    const mm::file_source<char> fin(input_dic.c_str(), mm::advice::sequential);
+    const auto trie = xcdat::mmap<Trie>(fin.data());
+
+    struct result_type {
+        std::uint64_t id;
+        std::string str;
+    };
+    std::vector<result_type> results;
+    results.reserve(1ULL << 10);
+
+    for (std::string key; std::getline(std::cin, key);) {
+        results.clear();
+        trie.predictive_search(key, [&](std::uint64_t id, std::string_view str) {
+            results.push_back({id, std::string(str)});
+        });
+
+        tfm::printfln("%d found", results.size());
+        for (std::uint64_t i = 0; i < std::min<std::uint64_t>(results.size(), max_num_results); i++) {
+            const auto& r = results[i];
+            tfm::printfln("%d\t%s", r.id, r.str);
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto input_dic = p.get<std::string>("input_dic");
+    const auto flag = xcdat::get_flag(input_dic);
+
+    switch (flag) {
+        case 7:
+            return predictive_search<xcdat::trie_7_type>(p);
+        case 8:
+            return predictive_search<xcdat::trie_8_type>(p);
+        default:
+            break;
+    }
+
+    p.help();
+    return 1;
+}
--- a/tools/xcdat_prefix_search.cpp
+++ b/tools/xcdat_prefix_search.cpp
@ -0,0 +1,66 @@
+#include <mm_file/mm_file.hpp>
+#include <xcdat.hpp>
+
+#include "cmd_line_parser/parser.hpp"
+#include "tinyformat/tinyformat.h"
+
+cmd_line_parser::parser make_parser(int argc, char** argv) {
+    cmd_line_parser::parser p(argc, argv);
+    p.add("input_dic", "Input filepath of trie dictionary");
+    return p;
+}
+
+template <class Trie>
+int prefix_search(const cmd_line_parser::parser& p) {
+    const auto input_dic = p.get<std::string>("input_dic");
+
+    const mm::file_source<char> fin(input_dic.c_str(), mm::advice::sequential);
+    const auto trie = xcdat::mmap<Trie>(fin.data());
+
+    struct result_type {
+        std::uint64_t id;
+        std::string_view str;
+    };
+
+    std::vector<result_type> results;
+    results.reserve(trie.max_length());
+
+    for (std::string key; std::getline(std::cin, key);) {
+        results.clear();
+        trie.prefix_search(key, [&](std::uint64_t id, std::string_view str) { results.push_back({id, str}); });
+
+        tfm::printfln("%d found", results.size());
+        for (const auto& r : results) {
+            tfm::printfln("%d\t%s", r.id, r.str);
+        }
+    }
+
+    return 0;
+}
+
+int main(int argc, char** argv) {
+#ifndef NDEBUG
+    tfm::warnfln("The code is running in debug mode.");
+#endif
+    std::ios::sync_with_stdio(false);
+
+    auto p = make_parser(argc, argv);
+    if (!p.parse()) {
+        return 1;
+    }
+
+    const auto input_dic = p.get<std::string>("input_dic");
+    const auto flag = xcdat::get_flag(input_dic);
+
+    switch (flag) {
+        case 7:
+            return prefix_search<xcdat::trie_7_type>(p);
+        case 8:
+            return prefix_search<xcdat::trie_8_type>(p);
+        default:
+            break;
+    }
+
+    p.help();
+    return 1;
+}