add
This commit is contained in:
parent
af9731e6c2
commit
0362cc0453
29
README.md
29
README.md
|
@ -60,6 +60,7 @@ It builds the trie index from a given dataset consisting of keywords separated b
|
|||
$ xcdat_build enwiki-titles.txt idx.bin
|
||||
Number of keys: 15955763
|
||||
Number of trie nodes: 36441058
|
||||
Number of DA units: 36520704
|
||||
Memory usage in bytes: 1.70618e+08
|
||||
Memory usage in MiB: 162.714
|
||||
```
|
||||
|
@ -128,38 +129,24 @@ $ xcdat_enumerate idx.bin | head -3
|
|||
|
||||
### `xcdat_benchmark`
|
||||
|
||||
It measures the performance of Xcdat for a given dataset.
|
||||
It measures the performances of possible tries for a given dataset. To perform search operations, it randomly samples `n` queires from the dataset, where `n` is one of the parameters.
|
||||
|
||||
```
|
||||
$ xcdat_benchmark enwiki-titles.txt
|
||||
** xcdat::trie_7_type **
|
||||
Binary mode: 0
|
||||
Alphabet size: 198
|
||||
Max key length: 253
|
||||
Number of keys: 15955763
|
||||
Number of trie nodes: 36441058
|
||||
Number of DA units: 36520704
|
||||
Number of free DA units: 79646
|
||||
TAIL length: 30776290
|
||||
Memory usage in bytes: 1.70618e+08
|
||||
Memory usage in MiB: 162.714
|
||||
Construction time in seconds: 11.828
|
||||
Lookup time in microsec/query: 0.8259
|
||||
Decode time in microsec/query: 1.4545
|
||||
Construction time in seconds: 12.907
|
||||
Lookup time in microsec/query: 0.4674
|
||||
Decode time in microsec/query: 0.8722
|
||||
** xcdat::trie_8_type **
|
||||
Binary mode: 0
|
||||
Alphabet size: 198
|
||||
Max key length: 253
|
||||
Number of keys: 15955763
|
||||
Number of trie nodes: 36441035
|
||||
Number of DA units: 36515840
|
||||
Number of free DA units: 74805
|
||||
TAIL length: 30776290
|
||||
Memory usage in bytes: 1.64104e+08
|
||||
Memory usage in MiB: 156.502
|
||||
Construction time in seconds: 11.966
|
||||
Lookup time in microsec/query: 0.844
|
||||
Decode time in microsec/query: 1.0029
|
||||
Construction time in seconds: 13.442
|
||||
Lookup time in microsec/query: 0.7593
|
||||
Decode time in microsec/query: 1.2341
|
||||
```
|
||||
|
||||
## Sample usage
|
||||
|
|
|
@ -48,14 +48,7 @@ Trie benchmark_build(const std::vector<std::string>& keys, bool binary_mode) {
|
|||
const double time_in_sec = dur_ms.count() / 1000.0;
|
||||
const double memory_in_bytes = xcdat::memory_in_bytes(trie);
|
||||
|
||||
tfm::printfln("Binary mode: %d", trie.bin_mode());
|
||||
tfm::printfln("Alphabet size: %d", trie.alphabet_size());
|
||||
tfm::printfln("Max key length: %d", trie.max_length());
|
||||
tfm::printfln("Number of keys: %d", trie.num_keys());
|
||||
tfm::printfln("Number of trie nodes: %d", trie.num_nodes());
|
||||
tfm::printfln("Number of DA units: %d", trie.num_units());
|
||||
tfm::printfln("Number of free DA units: %d", trie.num_free_units());
|
||||
tfm::printfln("TAIL length: %d", trie.tail_length());
|
||||
tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
|
||||
tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
|
||||
tfm::printfln("Construction time in seconds: %g", time_in_sec);
|
||||
|
|
|
@ -33,6 +33,7 @@ int build(const cmd_line_parser::parser& p) {
|
|||
|
||||
tfm::printfln("Number of keys: %d", trie.num_keys());
|
||||
tfm::printfln("Number of trie nodes: %d", trie.num_nodes());
|
||||
tfm::printfln("Number of DA units: %d", trie.num_units());
|
||||
tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
|
||||
tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
|
||||
|
||||
|
|
Loading…
Reference in a new issue