hakurei is now usable!
This commit is contained in:
parent
50c02a1324
commit
f7bddc62bf
80
src/main.cc
80
src/main.cc
|
@ -6,14 +6,34 @@
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <xcdat.hpp>
|
#include <xcdat.hpp>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
struct Entry {
|
struct Entry {
|
||||||
std::string_view decoded_view;
|
std::string_view decoded_view;
|
||||||
uint64_t id;
|
uint64_t id;
|
||||||
Entry(std::string_view decoded_view, uint64_t id) : decoded_view(decoded_view), id(id) {}
|
Entry(std::string_view decoded_view, uint64_t id) : decoded_view(decoded_view), id(id) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
std::string remove_one_utf8_char(const std::string& str) {
|
||||||
|
if (str.empty()) {
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
size_t len = str.size();
|
||||||
|
size_t i = 0;
|
||||||
|
while (i < len) {
|
||||||
|
unsigned char c = str[i];
|
||||||
|
if (c < 0x80) { // 1-byte character
|
||||||
|
return str.substr(i + 1);
|
||||||
|
} else if ((c >> 5) == 0x6) { // 2-byte character
|
||||||
|
return str.substr(i + 2);
|
||||||
|
} else if ((c >> 4) == 0xe) { // 3-byte character
|
||||||
|
return str.substr(i + 3);
|
||||||
|
} else if ((c >> 3) == 0x1e) { // 4-byte character
|
||||||
|
return str.substr(i + 4);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
std::cerr << "Usage: " << argv[0] << " <search_string> [--debug] [--dict <path_to_dictionary>]" << std::endl;
|
std::cerr << "Usage: " << argv[0] << " <search_string> [--debug] [--dict <path_to_dictionary>]" << std::endl;
|
||||||
|
@ -46,6 +66,7 @@ int main(int argc, char* argv[]) {
|
||||||
|
|
||||||
std::vector<std::string> substrings;
|
std::vector<std::string> substrings;
|
||||||
std::vector<Entry> results;
|
std::vector<Entry> results;
|
||||||
|
|
||||||
while (!search_string.empty()) {
|
while (!search_string.empty()) {
|
||||||
auto itr = trie.make_prefix_iterator(search_string);
|
auto itr = trie.make_prefix_iterator(search_string);
|
||||||
|
|
||||||
|
@ -53,54 +74,41 @@ int main(int argc, char* argv[]) {
|
||||||
results.emplace_back(itr.decoded_view(), itr.id());
|
results.emplace_back(itr.decoded_view(), itr.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t min_length = std::numeric_limits<size_t>::max();
|
// Add all substrings to the results
|
||||||
std::string_view smallest_prefix;
|
if (!results.empty()) {
|
||||||
|
for (const auto& entry : results) {
|
||||||
for (const auto& entry : results) {
|
substrings.push_back(std::string(entry.decoded_view));
|
||||||
if (entry.decoded_view.size() < min_length) {
|
|
||||||
min_length = entry.decoded_view.size();
|
|
||||||
smallest_prefix = entry.decoded_view;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (min_length > 0) {
|
// Print debug information if in debug mode
|
||||||
std::string substring = search_string.substr(0, search_string.find(smallest_prefix));
|
if (debug_mode) {
|
||||||
if (!substring.empty()) {
|
std::cout << "Original search string: " << search_string << std::endl;
|
||||||
substrings.push_back(substring);
|
}
|
||||||
}
|
|
||||||
std::cout << search_string << " - " << smallest_prefix << " = ";
|
|
||||||
|
|
||||||
if (debug_mode) {
|
// Remove one UTF-8 character from the search string
|
||||||
raw_output += search_string + " - " + std::string(smallest_prefix) + " = ";
|
search_string = remove_one_utf8_char(search_string);
|
||||||
}
|
|
||||||
|
|
||||||
size_t pos = search_string.find(smallest_prefix);
|
// Print debug information if in debug mode
|
||||||
if (pos != std::string::npos) {
|
if (debug_mode) {
|
||||||
search_string.erase(0, pos + smallest_prefix.length());
|
std::cout << "After removing one character: " << search_string << std::endl;
|
||||||
}
|
raw_output += search_string + '\n';
|
||||||
else break;
|
}
|
||||||
|
|
||||||
std::cout << search_string << std::endl;
|
results.clear();
|
||||||
|
|
||||||
if (smallest_prefix.length() == 0)
|
// Remove leading whitespace
|
||||||
break;
|
while (!search_string.empty() && std::isspace(search_string.front())) {
|
||||||
|
search_string.erase(0, 1);
|
||||||
if (debug_mode) {
|
|
||||||
raw_output += search_string + '\n';
|
|
||||||
}
|
|
||||||
|
|
||||||
while (!search_string.empty() && std::isspace(search_string.front())) {
|
|
||||||
search_string.erase(0, 1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print debug information if in debug mode
|
||||||
if (debug_mode) {
|
if (debug_mode) {
|
||||||
std::cout << "Stored raw output:" << std::endl << raw_output << std::endl;
|
std::cout << "Stored raw output:" << std::endl << raw_output << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print the substrings
|
||||||
for (const auto& sub : substrings) {
|
for (const auto& sub : substrings) {
|
||||||
std::cout << sub << std::endl;
|
std::cout << sub << std::endl;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue