#include #include #include #include "xcdat.hpp" using namespace xcdat; namespace { constexpr int RUNS = 10; class StopWatch { public: using hrc = std::chrono::high_resolution_clock; StopWatch() : tp_{hrc::now()} {} double sec() const { const auto tp = hrc::now() - tp_; return std::chrono::duration(tp).count(); } double milli_sec() const { const auto tp = hrc::now() - tp_; return std::chrono::duration(tp).count(); } double micro_sec() const { const auto tp = hrc::now() - tp_; return std::chrono::duration(tp).count(); } private: hrc::time_point tp_; }; size_t read_keys(const char* file_name, std::vector& keys) { std::ifstream ifs{file_name}; if (!ifs) { return 0; } size_t size = 0; for (std::string line; std::getline(ifs, line);) { keys.push_back(line); size += line.length() + 1; // with terminator } return size; } std::vector extract_views(const std::vector& keys) { std::vector views(keys.size()); for (size_t i = 0; i < keys.size(); ++i) { views[i] = keys[i]; } return views; }; void show_usage(std::ostream& os) { os << "xcdat build \n"; os << "\t\t1: DACs, 2: FDACs\n"; os << "\t \tInput file name of a set of keys (must be sorted)\n"; os << "\t\tOutput file name of the dictionary (optional)\n"; os << "\t \tIf omitted, .dacs or .fdacs is output\n"; os << "xcdat query \n"; os << "\t \t1: DACs, 2: FDACs\n"; os << "\t \tInput file name of the dictionary\n"; os << "\t\tLimit of #results (optional, default=10)\n"; os << "xcdat bench \n"; os << "\t\t1: DACs, 2: FDACs\n"; os << "\t\tInput file name of the dictionary\n"; os << "\t \tInput file name of keys for benchmark\n"; os.flush(); } template int build(std::vector& args) { if (args.size() != 3 && args.size() != 4) { show_usage(std::cerr); return 1; } std::vector keys_buffer; auto raw_size = read_keys(args[2].c_str(), keys_buffer); if (raw_size == 0) { std::cerr << "open error : " << args[2] << std::endl; return 1; } auto keys = extract_views(keys_buffer); Trie trie; try { StopWatch sw; trie = TrieBuilder::build(keys); std::cout << "constr. time:\t" << sw.sec() << " sec" << std::endl; } catch (const xcdat::TrieBuilder::Exception& ex) { std::cerr << ex.what() << std::endl; return 1; } std::cout << "cmpr. ratio:\t" << static_cast(trie.size_in_bytes()) / raw_size << " over the raw size" << std::endl; std::cout << std::endl; trie.show_stat(std::cout); std::cout << std::endl; std::string out_name; if (args.size() == 4) { out_name = args[3]; } else { out_name = args[2] + (Fast ? ".fdac" : ".dac"); } std::ofstream ofs{out_name}; if (!ofs) { std::cerr << "open error : " << out_name << std::endl; return 1; } trie.write(ofs); std::cout << "output -> " << out_name << std::endl; return 0; } template int query(std::vector& args) { if (args.size() != 3 && args.size() != 4) { show_usage(std::cerr); return 1; } Trie trie; { std::ifstream ifs(args[2]); if (!ifs) { std::cerr << "open error : " << args[2] << std::endl; return 1; } trie = Trie(ifs); } size_t limit = 10; if (args.size() == 4) { limit = std::stoull(args.back()); } std::string query; while (true){ std::cout << "> " << std::flush; std::getline(std::cin, query); if (query.empty()){ break; } std::cout << "Lookup" << std::endl; auto id = trie.lookup(query); if (id == Trie::NOT_FOUND) { std::cout << "not found" << std::endl; } else { std::cout << id << '\t' << query << std::endl; } std::cout << "Common Prefix Lookup" << std::endl; { size_t N = 0; auto it = trie.make_prefix_iterator(query); while (N < limit && it.next()) { std::cout << it.id() << '\t' << it.key() << std::endl; ++N; } size_t M = 0; while (it.next()) { ++M; } if (M != 0) { std::cout << "and more..." << std::endl; } std::cout << N + M << " found" << std::endl; } std::cout << "Predictive Lookup" << std::endl; { size_t N = 0; auto it = trie.make_predictive_iterator(query); while (N < limit && it.next()) { std::cout << it.id() << '\t' << it.key() << std::endl; ++N; } size_t M = 0; while (it.next()) { ++M; } if (M != 0) { std::cout << "and more..." << std::endl; } std::cout << N + M << " found" << std::endl; } } return 0; } template int bench(std::vector& args) { if (args.size() != 4) { show_usage(std::cerr); return 1; } Trie trie; { std::ifstream ifs(args[2]); if (!ifs) { std::cerr << "open error : " << args[2] << std::endl; return 1; } trie = Trie(ifs); } std::vector keys_buffer; if (read_keys(args[3].c_str(), keys_buffer) == 0) { std::cerr << "open error : " << args[3] << std::endl; return 1; } auto keys = extract_views(keys_buffer); std::vector ids(keys.size()); std::cout << "Warm up" << std::endl; for (size_t i = 0; i < keys.size(); ++i) { ids[i] = trie.lookup(keys[i]); if (ids[i] == Trie::NOT_FOUND) { std::cerr << "A non-registered key is included, " << keys_buffer[i] << std::endl; return 1; } } { std::cout << "Lookup benchmark on " << RUNS << " runs" << std::endl; StopWatch sw; for (uint32_t r = 0; r < RUNS; ++r) { for (size_t i = 0; i < keys.size(); ++i) { if (trie.lookup(keys[i]) != ids[i]) { std::cerr << "Critical lookup error ʅ( ՞ਊ՞)ʃ" << std::endl; return 1; } } } std::cout << sw.micro_sec() / RUNS / keys.size() << " us per str" << std::endl; } { std::cout << "Access benchmark on " << RUNS << " runs" << std::endl; StopWatch sw; for (uint32_t r = 0; r < RUNS; ++r) { for (auto id : ids) { auto dec = trie.access(id); if (dec.empty()) { std::cerr << "Critical access error ʅ( ՞ਊ՞)ʃ" << std::endl; return 1; } } } std::cout << sw.micro_sec() / RUNS / ids.size() << " us per ID" << std::endl; } return 0; } } // namespace int main(int argc, const char* argv[]) { if (argc < 3) { show_usage(std::cerr); return 1; } std::vector args; for (int i = 1; i < argc; ++i) { args.emplace_back(std::string{argv[i]}); } bool is_fast; if (args[1][0] == '1') { is_fast = false; } else if (args[1][0] == '2') { is_fast = true; } else { show_usage(std::cerr); return 1; } if (args[0] == "build") { return is_fast ? build(args) : build(args); } else if (args[0] == "query") { return is_fast ? query(args) : query(args); } else if (args[0] == "bench") { return is_fast ? bench(args) : bench(args); } show_usage(std::cerr); return 1; }