add option to select the dictionary path

2024-05-11 16:35:34 -04:00 · 2024-05-11 16:35:34 -04:00 · 6d43fe4a24
parent 8b791f04bc
commit 6d43fe4a24
2 changed files with 33 additions and 23 deletions
--- a/a.out
+++ b/a.out
--- a/src/main.cc
+++ b/src/main.cc
@ -2,8 +2,12 @@
 #include <vector>
 #include <algorithm>
 #include <limits>
+#include <cstring>
+#include <cctype>
 #include <xcdat.hpp>

+// as this is in test phase, i recommend testing with the input being "特別協力組合理事"
+
 struct Entry {
    std::string_view decoded_view;
    uint64_t id;
@ -12,42 +16,47 @@ struct Entry {

 int main(int argc, char* argv[]) {
    if (argc < 2) {
-        std::cerr << "Usage: " << argv[0] << " <search_string> [--debug]" << std::endl;
+        std::cerr << "Usage: " << argv[0] << " <search_string> [--debug] [--dict <path_to_dictionary>]" << std::endl;
        return 1;
    }

-    const char* filename = "dict.bin";
-    std::string search_string = argv[1];
-    bool debug_mode = false; // Debug mode flag
+    std::string search_string;
+    bool debug_mode = false;
+    std::string dict_file = "dict.bin"; // Default dictionary file path

-    // Check if --debug is passed
-    if (argc == 3 && std::string(argv[2]) == "--debug") {
-        debug_mode = true;
+    // Parse command line arguments
+    for (int i = 1; i < argc; ++i) {
+        if (std::strcmp(argv[i], "--debug") == 0) {
+            debug_mode = true;
+        } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
+            dict_file = argv[i + 1];
+            i++; // Skip the next argument as it's the dictionary file path
+        } else {
+            search_string = argv[i];
+        }
    }

-    std::string raw_output; // To store raw output
+    if (search_string.empty()) {
+        std::cerr << "Search string not provided." << std::endl;
+        return 1;
+    }

-    // The trie dictionary type from the four types
-    using trie_type = xcdat::trie_15_type;
-
-    // Load the trie dictionary from the hardcoded file.
-    const auto trie = xcdat::load<trie_type>(filename);
+    std::string raw_output;
+    const auto trie = xcdat::load<xcdat::trie_15_type>(dict_file);

    std::vector<std::string> substrings;

-    // Process the input string iteratively
    while (!search_string.empty()) {
        std::vector<Entry> results;
-
-        // Common prefix search
        auto itr = trie.make_prefix_iterator(search_string);
+
        while (itr.next()) {
            results.emplace_back(itr.decoded_view(), itr.id());
        }

-        // Find the smallest common prefix
        size_t min_length = std::numeric_limits<size_t>::max();
        std::string_view smallest_prefix;
+
        for (const auto& entry : results) {
            if (entry.decoded_view.size() < min_length) {
                min_length = entry.decoded_view.size();
@ -55,35 +64,36 @@ int main(int argc, char* argv[]) {
            }
        }

-        // Subtract the smallest common prefix from the input string
        if (min_length > 0) {
            std::string substring = search_string.substr(0, search_string.find(smallest_prefix));
            if (!substring.empty()) {
                substrings.push_back(substring);
            }
            std::cout << search_string << " - " << smallest_prefix << " = ";
+
            if (debug_mode) {
-                raw_output += search_string + " - " + smallest_prefix.data() + " = "; // Accumulate raw output
+                raw_output += search_string + " - " + std::string(smallest_prefix) + " = ";
            }
+
            size_t pos = search_string.find(smallest_prefix);
            if (pos != std::string::npos) {
                search_string.erase(0, pos + smallest_prefix.length());
            }
+
            std::cout << search_string << std::endl;
+
            if (debug_mode) {
-                raw_output += search_string + '\n'; // Add the result to raw output
+                raw_output += search_string + '\n';
            }
-            // Remove leading whitespace, if any
+
            while (!search_string.empty() && std::isspace(search_string.front())) {
                search_string.erase(0, 1);
            }
        } else {
-            // If no common prefix found, break the loop
            break;
        }
    }

-    // Print the stored raw output if debug mode is active
    if (debug_mode) {
        std::cout << "Stored raw output:" << std::endl << raw_output << std::endl;
    }