diff --git a/a.out b/a.out new file mode 100755 index 0000000..207aa1f Binary files /dev/null and b/a.out differ diff --git a/src/main.cc b/src/main.cc index febd5e0..21209c3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -6,7 +6,7 @@ #include #include -// as this is in test phase, i recommend testing with the input being "特別協力組合理事" + struct Entry { std::string_view decoded_view; diff --git a/src/main.cc~ b/src/main.cc~ new file mode 100644 index 0000000..febd5e0 --- /dev/null +++ b/src/main.cc~ @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include + +// as this is in test phase, i recommend testing with the input being "特別協力組合理事" + +struct Entry { + std::string_view decoded_view; + uint64_t id; + Entry(std::string_view decoded_view, uint64_t id) : decoded_view(decoded_view), id(id) {} +}; + +int main(int argc, char* argv[]) { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [--debug] [--dict ]" << std::endl; + return 1; + } + + std::string search_string; + bool debug_mode = false; + std::string dict_file = "dict.bin"; // Default dictionary file path + + // Parse command line arguments + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--debug") == 0) { + debug_mode = true; + } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) { + dict_file = argv[i + 1]; + i++; // Skip the next argument as it's the dictionary file path + } else { + search_string = argv[i]; + } + } + + if (search_string.empty()) { + std::cerr << "Search string not provided." << std::endl; + return 1; + } + + std::string raw_output; + const auto trie = xcdat::load(dict_file); + + std::vector substrings; + std::vector results; + while (!search_string.empty()) { + auto itr = trie.make_prefix_iterator(search_string); + + while (itr.next()) { + results.emplace_back(itr.decoded_view(), itr.id()); + } + + size_t min_length = std::numeric_limits::max(); + std::string_view smallest_prefix; + + for (const auto& entry : results) { + if (entry.decoded_view.size() < min_length) { + min_length = entry.decoded_view.size(); + smallest_prefix = entry.decoded_view; + } + } + + if (min_length > 0) { + std::string substring = search_string.substr(0, search_string.find(smallest_prefix)); + if (!substring.empty()) { + substrings.push_back(substring); + } + std::cout << search_string << " - " << smallest_prefix << " = "; + + if (debug_mode) { + raw_output += search_string + " - " + std::string(smallest_prefix) + " = "; + } + + size_t pos = search_string.find(smallest_prefix); + if (pos != std::string::npos) { + search_string.erase(0, pos + smallest_prefix.length()); + } + else break; + + std::cout << search_string << std::endl; + + if (smallest_prefix.length() == 0) + break; + + if (debug_mode) { + raw_output += search_string + '\n'; + } + + while (!search_string.empty() && std::isspace(search_string.front())) { + search_string.erase(0, 1); + } + } else { + break; + } + } + + if (debug_mode) { + std::cout << "Stored raw output:" << std::endl << raw_output << std::endl; + } + + for (const auto& sub : substrings) { + std::cout << sub << std::endl; + } + + return 0; +}