diff --git a/README.md b/README.md index 868cbd4..2cb1db9 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ It builds the trie index from a given dataset consisting of keywords separated b $ xcdat_build enwiki-titles.txt idx.bin Number of keys: 15955763 Number of trie nodes: 36441058 +Number of DA units: 36520704 Memory usage in bytes: 1.70618e+08 Memory usage in MiB: 162.714 ``` @@ -128,38 +129,24 @@ $ xcdat_enumerate idx.bin | head -3 ### `xcdat_benchmark` -It measures the performance of Xcdat for a given dataset. +It measures the performances of possible tries for a given dataset. To perform search operations, it randomly samples `n` queires from the dataset, where `n` is one of the parameters. ``` $ xcdat_benchmark enwiki-titles.txt ** xcdat::trie_7_type ** -Binary mode: 0 -Alphabet size: 198 -Max key length: 253 Number of keys: 15955763 -Number of trie nodes: 36441058 -Number of DA units: 36520704 -Number of free DA units: 79646 -TAIL length: 30776290 Memory usage in bytes: 1.70618e+08 Memory usage in MiB: 162.714 -Construction time in seconds: 11.828 -Lookup time in microsec/query: 0.8259 -Decode time in microsec/query: 1.4545 +Construction time in seconds: 12.907 +Lookup time in microsec/query: 0.4674 +Decode time in microsec/query: 0.8722 ** xcdat::trie_8_type ** -Binary mode: 0 -Alphabet size: 198 -Max key length: 253 Number of keys: 15955763 -Number of trie nodes: 36441035 -Number of DA units: 36515840 -Number of free DA units: 74805 -TAIL length: 30776290 Memory usage in bytes: 1.64104e+08 Memory usage in MiB: 156.502 -Construction time in seconds: 11.966 -Lookup time in microsec/query: 0.844 -Decode time in microsec/query: 1.0029 +Construction time in seconds: 13.442 +Lookup time in microsec/query: 0.7593 +Decode time in microsec/query: 1.2341 ``` ## Sample usage diff --git a/tools/xcdat_benchmark.cpp b/tools/xcdat_benchmark.cpp index c525321..87c8abd 100644 --- a/tools/xcdat_benchmark.cpp +++ b/tools/xcdat_benchmark.cpp @@ -48,14 +48,7 @@ Trie benchmark_build(const std::vector& keys, bool binary_mode) { const double time_in_sec = dur_ms.count() / 1000.0; const double memory_in_bytes = xcdat::memory_in_bytes(trie); - tfm::printfln("Binary mode: %d", trie.bin_mode()); - tfm::printfln("Alphabet size: %d", trie.alphabet_size()); - tfm::printfln("Max key length: %d", trie.max_length()); tfm::printfln("Number of keys: %d", trie.num_keys()); - tfm::printfln("Number of trie nodes: %d", trie.num_nodes()); - tfm::printfln("Number of DA units: %d", trie.num_units()); - tfm::printfln("Number of free DA units: %d", trie.num_free_units()); - tfm::printfln("TAIL length: %d", trie.tail_length()); tfm::printfln("Memory usage in bytes: %d", memory_in_bytes); tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0)); tfm::printfln("Construction time in seconds: %g", time_in_sec); diff --git a/tools/xcdat_build.cpp b/tools/xcdat_build.cpp index a75d8c7..a71e07b 100644 --- a/tools/xcdat_build.cpp +++ b/tools/xcdat_build.cpp @@ -33,6 +33,7 @@ int build(const cmd_line_parser::parser& p) { tfm::printfln("Number of keys: %d", trie.num_keys()); tfm::printfln("Number of trie nodes: %d", trie.num_nodes()); + tfm::printfln("Number of DA units: %d", trie.num_units()); tfm::printfln("Memory usage in bytes: %d", memory_in_bytes); tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));