diff --git a/a.out b/a.out new file mode 100755 index 0000000..7f2b06a Binary files /dev/null and b/a.out differ diff --git a/src/main.cc b/src/main.cc index c5fc0ea..7ca1079 100644 --- a/src/main.cc +++ b/src/main.cc @@ -2,8 +2,12 @@ #include #include #include +#include +#include #include +// as this is in test phase, i recommend testing with the input being "特別協力組合理事" + struct Entry { std::string_view decoded_view; uint64_t id; @@ -12,42 +16,47 @@ struct Entry { int main(int argc, char* argv[]) { if (argc < 2) { - std::cerr << "Usage: " << argv[0] << " [--debug]" << std::endl; + std::cerr << "Usage: " << argv[0] << " [--debug] [--dict ]" << std::endl; return 1; } - const char* filename = "dict.bin"; - std::string search_string = argv[1]; - bool debug_mode = false; // Debug mode flag + std::string search_string; + bool debug_mode = false; + std::string dict_file = "dict.bin"; // Default dictionary file path - // Check if --debug is passed - if (argc == 3 && std::string(argv[2]) == "--debug") { - debug_mode = true; + // Parse command line arguments + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--debug") == 0) { + debug_mode = true; + } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) { + dict_file = argv[i + 1]; + i++; // Skip the next argument as it's the dictionary file path + } else { + search_string = argv[i]; + } } - std::string raw_output; // To store raw output + if (search_string.empty()) { + std::cerr << "Search string not provided." << std::endl; + return 1; + } - // The trie dictionary type from the four types - using trie_type = xcdat::trie_15_type; - - // Load the trie dictionary from the hardcoded file. - const auto trie = xcdat::load(filename); + std::string raw_output; + const auto trie = xcdat::load(dict_file); std::vector substrings; - // Process the input string iteratively while (!search_string.empty()) { std::vector results; - - // Common prefix search auto itr = trie.make_prefix_iterator(search_string); + while (itr.next()) { results.emplace_back(itr.decoded_view(), itr.id()); } - // Find the smallest common prefix size_t min_length = std::numeric_limits::max(); std::string_view smallest_prefix; + for (const auto& entry : results) { if (entry.decoded_view.size() < min_length) { min_length = entry.decoded_view.size(); @@ -55,35 +64,36 @@ int main(int argc, char* argv[]) { } } - // Subtract the smallest common prefix from the input string if (min_length > 0) { std::string substring = search_string.substr(0, search_string.find(smallest_prefix)); if (!substring.empty()) { substrings.push_back(substring); } std::cout << search_string << " - " << smallest_prefix << " = "; + if (debug_mode) { - raw_output += search_string + " - " + smallest_prefix.data() + " = "; // Accumulate raw output + raw_output += search_string + " - " + std::string(smallest_prefix) + " = "; } + size_t pos = search_string.find(smallest_prefix); if (pos != std::string::npos) { search_string.erase(0, pos + smallest_prefix.length()); } + std::cout << search_string << std::endl; + if (debug_mode) { - raw_output += search_string + '\n'; // Add the result to raw output + raw_output += search_string + '\n'; } - // Remove leading whitespace, if any + while (!search_string.empty() && std::isspace(search_string.front())) { search_string.erase(0, 1); } } else { - // If no common prefix found, break the loop break; } } - // Print the stored raw output if debug mode is active if (debug_mode) { std::cout << "Stored raw output:" << std::endl << raw_output << std::endl; }