xcdat/sample/sample.cpp

97 lines
3.1 KiB
C++
Raw Normal View History

2021-06-26 16:40:11 +00:00
#include <iostream>
#include <string>
#include <xcdat.hpp>
int main() {
2021-07-02 12:50:10 +00:00
// Dataset of keywords
2021-06-26 16:40:11 +00:00
std::vector<std::string> keys = {
2021-06-27 03:57:34 +00:00
"AirPods", "AirTag", "Mac", "MacBook", "MacBook_Air", "MacBook_Pro",
"Mac_Mini", "Mac_Pro", "iMac", "iPad", "iPhone", "iPhone_SE",
2021-06-26 16:40:11 +00:00
};
2021-07-08 13:47:59 +00:00
// The input keys must be sorted and unique (already satisfied in this case).
2021-06-26 16:40:11 +00:00
std::sort(keys.begin(), keys.end());
keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
2021-07-08 13:47:59 +00:00
// The trie dictionary type from the four types
using trie_type = xcdat::trie_8_type;
2021-07-03 00:57:23 +00:00
// using trie_type = xcdat::trie_16_type;
2021-07-08 13:47:59 +00:00
// using trie_type = xcdat::trie_7_type;
// using trie_type = xcdat::trie_15_type;
2021-06-27 17:15:09 +00:00
2021-07-02 12:50:10 +00:00
// The dictionary filename
const char* tmp_filename = "dic.bin";
// Build and save the trie dictionary.
2021-07-03 00:57:23 +00:00
try {
2021-06-29 00:06:40 +00:00
const trie_type trie(keys);
2021-07-02 12:50:10 +00:00
xcdat::save(trie, tmp_filename);
2021-07-03 00:57:23 +00:00
} catch (const xcdat::exception& ex) {
std::cerr << ex.what() << std::endl;
return 1;
2021-06-27 17:15:09 +00:00
}
2021-07-03 00:46:04 +00:00
// Load the trie dictionary on memory.
const auto trie = xcdat::load<trie_type>(tmp_filename);
2021-06-26 16:40:11 +00:00
2021-07-08 13:47:59 +00:00
// Or, you can set the continuous memory block via a memory-mapped file.
// const auto trie = xcdat::mmap<trie_type>(mapped_data);
2021-06-29 01:36:00 +00:00
// Basic statistics
2021-07-02 12:50:10 +00:00
std::cout << "Number of keys: " << trie.num_keys() << std::endl;
std::cout << "Number of trie nodes: " << trie.num_nodes() << std::endl;
std::cout << "Number of DA units: " << trie.num_units() << std::endl;
std::cout << "Memory usage in bytes: " << xcdat::memory_in_bytes(trie) << std::endl;
2021-06-29 01:36:00 +00:00
2021-07-02 12:50:10 +00:00
// Lookup the ID for a query key.
2021-06-26 16:40:11 +00:00
{
2021-06-29 01:27:02 +00:00
const auto id = trie.lookup("Mac_Pro");
2021-06-29 01:36:00 +00:00
std::cout << "Lookup(Mac_Pro) = " << id.value_or(UINT64_MAX) << std::endl;
2021-06-29 01:27:02 +00:00
}
{
const auto id = trie.lookup("Google_Pixel");
2021-06-29 01:36:00 +00:00
std::cout << "Lookup(Google_Pixel) = " << id.value_or(UINT64_MAX) << std::endl;
2021-06-29 01:27:02 +00:00
}
2021-07-02 12:50:10 +00:00
// Decode the key for a query ID.
2021-06-29 01:27:02 +00:00
{
const auto dec = trie.decode(4);
2021-06-29 01:36:00 +00:00
std::cout << "Decode(4) = " << dec << std::endl;
2021-06-26 16:40:11 +00:00
}
2021-06-29 01:27:02 +00:00
// Common prefix search
2021-06-26 16:40:11 +00:00
{
2021-06-29 01:36:00 +00:00
std::cout << "CommonPrefixSearch(MacBook_Air) = {" << std::endl;
2021-06-26 16:40:11 +00:00
auto itr = trie.make_prefix_iterator("MacBook_Air");
while (itr.next()) {
2021-06-29 01:27:02 +00:00
std::cout << " (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
2021-06-26 16:40:11 +00:00
}
2021-06-29 01:27:02 +00:00
std::cout << "}" << std::endl;
2021-06-26 16:40:11 +00:00
}
2021-06-29 01:27:02 +00:00
// Predictive search
2021-06-26 16:40:11 +00:00
{
2021-06-29 01:36:00 +00:00
std::cout << "PredictiveSearch(Mac) = {" << std::endl;
2021-06-26 16:40:11 +00:00
auto itr = trie.make_predictive_iterator("Mac");
while (itr.next()) {
2021-06-29 01:27:02 +00:00
std::cout << " (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
}
std::cout << "}" << std::endl;
}
2021-07-08 13:47:59 +00:00
// Enumerate all the keys (in lexicographical order).
2021-06-29 01:27:02 +00:00
{
2021-06-29 01:36:00 +00:00
std::cout << "Enumerate() = {" << std::endl;
2021-06-29 01:27:02 +00:00
auto itr = trie.make_enumerative_iterator();
while (itr.next()) {
std::cout << " (" << itr.decoded_view() << ", " << itr.id() << ")," << std::endl;
2021-06-26 16:40:11 +00:00
}
2021-06-29 01:27:02 +00:00
std::cout << "}" << std::endl;
2021-06-26 16:40:11 +00:00
}
2021-07-02 12:50:10 +00:00
std::remove(tmp_filename);
2021-06-26 16:40:11 +00:00
return 0;
}