This commit is contained in:
Shunsuke Kanda 2021-07-02 13:40:38 +09:00
parent af9731e6c2
commit 0362cc0453
3 changed files with 9 additions and 28 deletions

View file

@ -60,6 +60,7 @@ It builds the trie index from a given dataset consisting of keywords separated b
$ xcdat_build enwiki-titles.txt idx.bin $ xcdat_build enwiki-titles.txt idx.bin
Number of keys: 15955763 Number of keys: 15955763
Number of trie nodes: 36441058 Number of trie nodes: 36441058
Number of DA units: 36520704
Memory usage in bytes: 1.70618e+08 Memory usage in bytes: 1.70618e+08
Memory usage in MiB: 162.714 Memory usage in MiB: 162.714
``` ```
@ -128,38 +129,24 @@ $ xcdat_enumerate idx.bin | head -3
### `xcdat_benchmark` ### `xcdat_benchmark`
It measures the performance of Xcdat for a given dataset. It measures the performances of possible tries for a given dataset. To perform search operations, it randomly samples `n` queires from the dataset, where `n` is one of the parameters.
``` ```
$ xcdat_benchmark enwiki-titles.txt $ xcdat_benchmark enwiki-titles.txt
** xcdat::trie_7_type ** ** xcdat::trie_7_type **
Binary mode: 0
Alphabet size: 198
Max key length: 253
Number of keys: 15955763 Number of keys: 15955763
Number of trie nodes: 36441058
Number of DA units: 36520704
Number of free DA units: 79646
TAIL length: 30776290
Memory usage in bytes: 1.70618e+08 Memory usage in bytes: 1.70618e+08
Memory usage in MiB: 162.714 Memory usage in MiB: 162.714
Construction time in seconds: 11.828 Construction time in seconds: 12.907
Lookup time in microsec/query: 0.8259 Lookup time in microsec/query: 0.4674
Decode time in microsec/query: 1.4545 Decode time in microsec/query: 0.8722
** xcdat::trie_8_type ** ** xcdat::trie_8_type **
Binary mode: 0
Alphabet size: 198
Max key length: 253
Number of keys: 15955763 Number of keys: 15955763
Number of trie nodes: 36441035
Number of DA units: 36515840
Number of free DA units: 74805
TAIL length: 30776290
Memory usage in bytes: 1.64104e+08 Memory usage in bytes: 1.64104e+08
Memory usage in MiB: 156.502 Memory usage in MiB: 156.502
Construction time in seconds: 11.966 Construction time in seconds: 13.442
Lookup time in microsec/query: 0.844 Lookup time in microsec/query: 0.7593
Decode time in microsec/query: 1.0029 Decode time in microsec/query: 1.2341
``` ```
## Sample usage ## Sample usage

View file

@ -48,14 +48,7 @@ Trie benchmark_build(const std::vector<std::string>& keys, bool binary_mode) {
const double time_in_sec = dur_ms.count() / 1000.0; const double time_in_sec = dur_ms.count() / 1000.0;
const double memory_in_bytes = xcdat::memory_in_bytes(trie); const double memory_in_bytes = xcdat::memory_in_bytes(trie);
tfm::printfln("Binary mode: %d", trie.bin_mode());
tfm::printfln("Alphabet size: %d", trie.alphabet_size());
tfm::printfln("Max key length: %d", trie.max_length());
tfm::printfln("Number of keys: %d", trie.num_keys()); tfm::printfln("Number of keys: %d", trie.num_keys());
tfm::printfln("Number of trie nodes: %d", trie.num_nodes());
tfm::printfln("Number of DA units: %d", trie.num_units());
tfm::printfln("Number of free DA units: %d", trie.num_free_units());
tfm::printfln("TAIL length: %d", trie.tail_length());
tfm::printfln("Memory usage in bytes: %d", memory_in_bytes); tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0)); tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
tfm::printfln("Construction time in seconds: %g", time_in_sec); tfm::printfln("Construction time in seconds: %g", time_in_sec);

View file

@ -33,6 +33,7 @@ int build(const cmd_line_parser::parser& p) {
tfm::printfln("Number of keys: %d", trie.num_keys()); tfm::printfln("Number of keys: %d", trie.num_keys());
tfm::printfln("Number of trie nodes: %d", trie.num_nodes()); tfm::printfln("Number of trie nodes: %d", trie.num_nodes());
tfm::printfln("Number of DA units: %d", trie.num_units());
tfm::printfln("Memory usage in bytes: %d", memory_in_bytes); tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0)); tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));