add option to select the dictionary path
This commit is contained in:
parent
8b791f04bc
commit
6d43fe4a24
54
src/main.cc
54
src/main.cc
|
@ -2,8 +2,12 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <cstring>
|
||||||
|
#include <cctype>
|
||||||
#include <xcdat.hpp>
|
#include <xcdat.hpp>
|
||||||
|
|
||||||
|
// as this is in test phase, i recommend testing with the input being "特別協力組合理事"
|
||||||
|
|
||||||
struct Entry {
|
struct Entry {
|
||||||
std::string_view decoded_view;
|
std::string_view decoded_view;
|
||||||
uint64_t id;
|
uint64_t id;
|
||||||
|
@ -12,42 +16,47 @@ struct Entry {
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
std::cerr << "Usage: " << argv[0] << " <search_string> [--debug]" << std::endl;
|
std::cerr << "Usage: " << argv[0] << " <search_string> [--debug] [--dict <path_to_dictionary>]" << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* filename = "dict.bin";
|
std::string search_string;
|
||||||
std::string search_string = argv[1];
|
bool debug_mode = false;
|
||||||
bool debug_mode = false; // Debug mode flag
|
std::string dict_file = "dict.bin"; // Default dictionary file path
|
||||||
|
|
||||||
// Check if --debug is passed
|
// Parse command line arguments
|
||||||
if (argc == 3 && std::string(argv[2]) == "--debug") {
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
if (std::strcmp(argv[i], "--debug") == 0) {
|
||||||
debug_mode = true;
|
debug_mode = true;
|
||||||
|
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
||||||
|
dict_file = argv[i + 1];
|
||||||
|
i++; // Skip the next argument as it's the dictionary file path
|
||||||
|
} else {
|
||||||
|
search_string = argv[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string raw_output; // To store raw output
|
if (search_string.empty()) {
|
||||||
|
std::cerr << "Search string not provided." << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// The trie dictionary type from the four types
|
std::string raw_output;
|
||||||
using trie_type = xcdat::trie_15_type;
|
const auto trie = xcdat::load<xcdat::trie_15_type>(dict_file);
|
||||||
|
|
||||||
// Load the trie dictionary from the hardcoded file.
|
|
||||||
const auto trie = xcdat::load<trie_type>(filename);
|
|
||||||
|
|
||||||
std::vector<std::string> substrings;
|
std::vector<std::string> substrings;
|
||||||
|
|
||||||
// Process the input string iteratively
|
|
||||||
while (!search_string.empty()) {
|
while (!search_string.empty()) {
|
||||||
std::vector<Entry> results;
|
std::vector<Entry> results;
|
||||||
|
|
||||||
// Common prefix search
|
|
||||||
auto itr = trie.make_prefix_iterator(search_string);
|
auto itr = trie.make_prefix_iterator(search_string);
|
||||||
|
|
||||||
while (itr.next()) {
|
while (itr.next()) {
|
||||||
results.emplace_back(itr.decoded_view(), itr.id());
|
results.emplace_back(itr.decoded_view(), itr.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find the smallest common prefix
|
|
||||||
size_t min_length = std::numeric_limits<size_t>::max();
|
size_t min_length = std::numeric_limits<size_t>::max();
|
||||||
std::string_view smallest_prefix;
|
std::string_view smallest_prefix;
|
||||||
|
|
||||||
for (const auto& entry : results) {
|
for (const auto& entry : results) {
|
||||||
if (entry.decoded_view.size() < min_length) {
|
if (entry.decoded_view.size() < min_length) {
|
||||||
min_length = entry.decoded_view.size();
|
min_length = entry.decoded_view.size();
|
||||||
|
@ -55,35 +64,36 @@ int main(int argc, char* argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Subtract the smallest common prefix from the input string
|
|
||||||
if (min_length > 0) {
|
if (min_length > 0) {
|
||||||
std::string substring = search_string.substr(0, search_string.find(smallest_prefix));
|
std::string substring = search_string.substr(0, search_string.find(smallest_prefix));
|
||||||
if (!substring.empty()) {
|
if (!substring.empty()) {
|
||||||
substrings.push_back(substring);
|
substrings.push_back(substring);
|
||||||
}
|
}
|
||||||
std::cout << search_string << " - " << smallest_prefix << " = ";
|
std::cout << search_string << " - " << smallest_prefix << " = ";
|
||||||
|
|
||||||
if (debug_mode) {
|
if (debug_mode) {
|
||||||
raw_output += search_string + " - " + smallest_prefix.data() + " = "; // Accumulate raw output
|
raw_output += search_string + " - " + std::string(smallest_prefix) + " = ";
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t pos = search_string.find(smallest_prefix);
|
size_t pos = search_string.find(smallest_prefix);
|
||||||
if (pos != std::string::npos) {
|
if (pos != std::string::npos) {
|
||||||
search_string.erase(0, pos + smallest_prefix.length());
|
search_string.erase(0, pos + smallest_prefix.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << search_string << std::endl;
|
std::cout << search_string << std::endl;
|
||||||
|
|
||||||
if (debug_mode) {
|
if (debug_mode) {
|
||||||
raw_output += search_string + '\n'; // Add the result to raw output
|
raw_output += search_string + '\n';
|
||||||
}
|
}
|
||||||
// Remove leading whitespace, if any
|
|
||||||
while (!search_string.empty() && std::isspace(search_string.front())) {
|
while (!search_string.empty() && std::isspace(search_string.front())) {
|
||||||
search_string.erase(0, 1);
|
search_string.erase(0, 1);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If no common prefix found, break the loop
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print the stored raw output if debug mode is active
|
|
||||||
if (debug_mode) {
|
if (debug_mode) {
|
||||||
std::cout << "Stored raw output:" << std::endl << raw_output << std::endl;
|
std::cout << "Stored raw output:" << std::endl << raw_output << std::endl;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue