start

2024-05-10 21:39:29 -04:00 · 2024-05-10 21:39:29 -04:00 · 6fab84f077
parent dc558c1b10
commit 6fab84f077
3 changed files with 1284762 additions and 0 deletions
--- a/res/dict.bin
+++ b/res/dict.bin
--- a/res/word_list.txt
+++ b/res/word_list.txt
--- a/src/main.cc
+++ b/src/main.cc
@ -0,0 +1,78 @@
+#include <iostream>
+#include <vector>
+#include <algorithm>
+#include <limits>
+#include <xcdat.hpp>
+
+struct Entry {
+    std::string_view decoded_view;
+    uint64_t id;
+    Entry(std::string_view decoded_view, uint64_t id) : decoded_view(decoded_view), id(id) {}
+};
+
+int main(int argc, char* argv[]) {
+    if (argc != 2) {
+        std::cerr << "Usage: " << argv[0] << " <search_string>" << std::endl;
+        return 1;
+    }
+
+    const char* filename = "dict.bin";
+    std::string search_string = argv[1];
+
+    // The trie dictionary type from the four types
+    using trie_type = xcdat::trie_15_type;
+
+    // Load the trie dictionary from the hardcoded file.
+    const auto trie = xcdat::load<trie_type>(filename);
+
+    std::vector<std::string> substrings;
+
+    // Process the input string iteratively
+    while (!search_string.empty()) {
+        std::vector<Entry> results;
+
+        // Common prefix search
+        auto itr = trie.make_prefix_iterator(search_string);
+        while (itr.next()) {
+            results.emplace_back(itr.decoded_view(), itr.id());
+        }
+
+        // Find the smallest common prefix
+        size_t min_length = std::numeric_limits<size_t>::max();
+        std::string_view smallest_prefix;
+        for (const auto& entry : results) {
+            if (entry.decoded_view.size() < min_length) {
+                min_length = entry.decoded_view.size();
+                smallest_prefix = entry.decoded_view;
+            }
+        }
+
+        // Subtract the smallest common prefix from the input string
+        if (min_length > 0) {
+            std::string substring = search_string.substr(0, search_string.find(smallest_prefix));
+            if (!substring.empty()) {
+                substrings.push_back(substring);
+            }
+            std::cout << search_string << " - " << smallest_prefix << " = ";
+            size_t pos = search_string.find(smallest_prefix);
+            if (pos != std::string::npos) {
+                search_string.erase(0, pos + smallest_prefix.length());
+            }
+            std::cout << search_string << std::endl;
+            // Remove leading whitespace, if any
+            while (!search_string.empty() && std::isspace(search_string.front())) {
+                search_string.erase(0, 1);
+            }
+        } else {
+            // If no common prefix found, break the loop
+            break;
+        }
+    }
+
+
+    for (const auto& sub : substrings) {
+        std::cout << sub << std::endl;
+    }
+
+    return 0;
+}