xcdat/tools/xcdat_build.cpp

67 lines
1.9 KiB
C++
Raw Normal View History

2021-06-27 17:15:09 +00:00
#include <xcdat.hpp>
#include "cmd_line_parser/parser.hpp"
#include "tinyformat/tinyformat.h"
cmd_line_parser::parser make_parser(int argc, char** argv) {
cmd_line_parser::parser p(argc, argv);
2021-07-02 12:50:10 +00:00
p.add("input_keys", "Input filepath of keywords");
p.add("output_dic", "Output filepath of trie dictionary");
2021-06-29 02:30:20 +00:00
p.add("trie_type", "Trie type: [7|8] (default=7)", "-t", false);
2021-07-01 22:05:06 +00:00
p.add("binary_mode", "Is binary mode? (default=0)", "-b", false);
2021-06-27 17:15:09 +00:00
return p;
}
template <class Trie>
int build(const cmd_line_parser::parser& p) {
const auto input_keys = p.get<std::string>("input_keys");
2021-07-02 12:50:10 +00:00
const auto output_dic = p.get<std::string>("output_dic");
2021-07-01 22:05:06 +00:00
const auto binary_mode = p.get<bool>("binary_mode", false);
2021-06-27 17:15:09 +00:00
2021-06-29 00:27:37 +00:00
auto keys = xcdat::load_strings(input_keys);
2021-06-27 17:15:09 +00:00
if (keys.empty()) {
tfm::errorfln("Error: The input dataset is empty.");
}
2021-07-01 22:05:06 +00:00
std::sort(keys.begin(), keys.end());
keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
2021-06-27 17:15:09 +00:00
2021-07-01 22:05:06 +00:00
const Trie trie(keys, binary_mode);
2021-06-29 00:06:40 +00:00
const double memory_in_bytes = xcdat::memory_in_bytes(trie);
2021-06-27 17:15:09 +00:00
2021-07-01 22:05:06 +00:00
tfm::printfln("Number of keys: %d", trie.num_keys());
tfm::printfln("Number of trie nodes: %d", trie.num_nodes());
2021-07-02 04:40:38 +00:00
tfm::printfln("Number of DA units: %d", trie.num_units());
2021-07-01 22:05:06 +00:00
tfm::printfln("Memory usage in bytes: %d", memory_in_bytes);
tfm::printfln("Memory usage in MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
2021-06-27 17:15:09 +00:00
2021-07-02 12:50:10 +00:00
xcdat::save(trie, output_dic);
2021-06-27 17:54:58 +00:00
2021-06-27 17:15:09 +00:00
return 0;
}
int main(int argc, char** argv) {
#ifndef NDEBUG
tfm::warnfln("The code is running in debug mode.");
#endif
2021-06-27 17:54:58 +00:00
std::ios::sync_with_stdio(false);
2021-06-27 17:15:09 +00:00
auto p = make_parser(argc, argv);
if (!p.parse()) {
return 1;
}
const auto trie_type = p.get<int>("trie_type", 7);
switch (trie_type) {
case 7:
return build<xcdat::trie_7_type>(p);
2021-06-29 01:45:54 +00:00
case 8:
return build<xcdat::trie_8_type>(p);
2021-06-27 17:15:09 +00:00
default:
break;
}
p.help();
return 1;
}