diff --git a/hakurei b/hakurei new file mode 100755 index 0000000..f3df313 Binary files /dev/null and b/hakurei differ diff --git a/src/error.patch b/src/error.patch new file mode 100644 index 0000000..15b458e --- /dev/null +++ b/src/error.patch @@ -0,0 +1,332 @@ +--- hakurei.cpp 2024-05-19 09:00:00.000000000 +0000 ++++ hakurei.cpp 2024-05-19 09:00:00.000000000 +0000 +@@ -2,6 +2,7 @@ + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + struct Entry { + const std::string_view decoded_view; + const uint64_t id; + inline Entry(const std::string_view decoded_view, const uint64_t id) : decoded_view(decoded_view), id(id) {} + }; + + inline std::pair remove_one_utf8_char(const std::string& str) { + if (str.empty()) { + return {"", str}; + } + const size_t len = str.size(); + size_t i = 0; + while (i < len) { + const unsigned char c = str[i]; + if (c < 0x80) { + return {str.substr(i, 1), str.substr(i + 1)}; + } else if ((c >> 5) == 0x6) { + return {str.substr(i, 2), str.substr(i + 2)}; + } else if ((c >> 4) == 0xe) { + return {str.substr(i, 3), str.substr(i + 3)}; + } else if ((c >> 3) == 0x1e) { + return {str.substr(i, 4), str.substr(i + 4)}; + } + i++; + } + return {"", ""}; + } + + inline std::string get_input(const int argc, char* const argv[], const bool goldendict_mode) { + std::string search_string; + if (!isatty(fileno(stdin))) { + std::getline(std::cin, search_string); + } else { + if (goldendict_mode) { + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) { + search_string = argv[i + 1]; + break; + } + } + } else { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + exit(1); + } + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--debug") == 0) { + // Handle debug mode + } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) { + i++; + } else { + search_string = argv[i]; + break; + } + } + } + } + if (search_string.empty()) { + std::cerr << "Search string not provided." << std::endl; + exit(1); + } + return search_string; + } + + inline std::filesystem::path find_dic_file() { + static const std::vector locations = { + std::filesystem::path("/usr/share/hakurei/"), + std::filesystem::path(std::getenv("HOME")) / ".local/share/hakurei/", + std::filesystem::current_path() + }; + for (const auto& location : locations) { + const auto dict_path = location / "dict.bin"; + if (std::filesystem::exists(dict_path) && std::filesystem::is_regular_file(dict_path)) { + return dict_path; + } + } + throw std::runtime_error("Couldn't find the word list."); + } + + inline void print_debug_info(const std::vector& substrings, const std::string& raw_output) { + std::cout << "Stored raw output:" << std::endl; + for (const auto& str : substrings) { + std::cout << str << std::endl; + } + std::cout << "Raw output:" << std::endl; + std::cout << raw_output << std::endl; + } + + void log_execution(const int argc, char* argv[], const std::string& search_string, const std::string& dict_file, bool debug_mode, bool goldendict_mode) { + std::ofstream log_file; + log_file.open("/tmp/hakurei.log", std::ios_base::app); + if (!log_file) { + std::cerr << "Failed to open log file." << std::endl; + return; + } + + std::time_t now = std::time(nullptr); + log_file << "Hakurei executed at " << std::ctime(&now); + log_file << "Command-line arguments:\n"; + for (int i = 0; i < argc; ++i) { + log_file << "argv[" << i << "]: " << argv[i] << "\n"; + } + log_file << "Parsed parameters:\n"; + log_file << "search_string: " << search_string << "\n"; + log_file << "dict_file: " << dict_file << "\n"; + log_file << "debug_mode: " << debug_mode << "\n"; + log_file << "goldendict_mode: " << goldendict_mode << "\n"; + + log_file.close(); + } + + void log_internal(const std::string& message) { + std::ofstream log_file; + log_file.open("/tmp/hakurei.log", std::ios_base::app); + if (!log_file) { + std::cerr << "Failed to open log file." << std::endl; + return; + } + std::time_t now = std::time(nullptr); + log_file << std::ctime(&now) << ": " << message << std::endl; + log_file.close(); + } + + inline void wrap_html_output( + const std::vector& substrings, + const std::map>& alternatives_map, + std::string& sentence + ) { + // Log the HTML output call + log_internal("HTML output is being called"); + + std::string output_html; + output_html += "
"; + + std::string sentence_copy = sentence; + + output_html += "
"; + + while (!sentence_copy.empty()) { + bool found = false; + for (const auto& substring : substrings) { + if (sentence_copy.rfind(substring, 0) == 0) { + output_html += "
" + substring + ""; + + auto alt_itr = alternatives_map.find(sentence_copy); + if (alt_itr != alternatives_map.end() && !alt_itr->second.empty()) { + output_html += "
    "; + for (const auto& alt : alt_itr->second) { + output_html += "
  • " + alt + "
  • "; + } + output_html += "
"; + } + + output_html += "
"; + output_html += ""; + sentence_copy.erase(0, substring.size()); + found = true; + break; + } + } + if (!found) { + output_html += sentence_copy[0]; + sentence_copy.erase(0, 1); + } + } + + output_html += "
"; + output_html += "
"; + + std::cout << output_html << std::endl; + } + + int main(const int argc, char* argv[]) { + bool debug_mode = false; + bool goldendict_mode = false; + std::string word, sentence; + std::string dict_file; + + log_internal("Program started"); + + try { + dict_file = find_dic_file().string(); + log_internal("Dictionary file located at: " + dict_file); + } catch (const std::exception& e) { + log_internal(std::string("Error: ") + e.what()); + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--debug") == 0) { + debug_mode = true; + log_internal("Debug mode enabled"); + } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) { + dict_file = argv[i + 1]; + log_internal("Dictionary file set to: " + dict_file); + if (!std::filesystem::exists(dict_file)) { + log_internal("Error: Dictionary file not found at " + dict_file); + std::cerr << "Error: Dictionary file not found at " << dict_file << std::endl; + return 1; + } + i++; + } else if (std::strcmp(argv[i], "--goldendict") == 0) { + goldendict_mode = true; + log_internal("GoldenDict mode enabled"); + } else if (std::strcmp(argv[i], "--word") == 0 && i + 1 < argc) { + word = argv[i + 1]; + log_internal("Word set to: " + word); + i++; + } else if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) { + sentence = argv[i + 1]; + log_internal("Sentence set to: " + sentence); + i++; + } + } + + std::string search_string; + try { + search_string = get_input(argc, argv, goldendict_mode); + log_internal("Search string: " + search_string); + } catch (const std::exception& e) { + log_internal(std::string("Error: ") + e.what()); + std::cerr << "Error: Failed to retrieve search string." << std::endl; + exit(1); + } + + if (goldendict_mode && !sentence.empty()) { + search_string = sentence; + log_internal("Search string overridden by sentence: " + search_string); + } + + log_execution(argc, argv, search_string, dict_file, debug_mode, goldendict_mode); + + xcdat::trie_15_type trie; + try { + trie = xcdat::load(dict_file); + log_internal("Trie loaded from dictionary file"); + } catch (const std::exception& e) { + log_internal(std::string("Error: Failed to load trie from dictionary file: ") + e.what()); + std::cerr << "Error: Failed to load trie from dictionary file." << std::endl; + return 1; + } + + std::vector substrings; + std::vector results; + std::map> alternatives_map; + + log_internal("Starting search loop with search string: " + search_string); + while (!search_string.empty()) { + auto itr = trie.make_prefix_iterator(search_string); + + while (itr.next()) { + results.emplace_back(itr.decoded_view(), itr.id()); + log_internal("Found result: " + std::string(itr.decoded_view())); + } + + if (!results.empty()) { + for (const auto& entry : results) { + const std::string substring(entry.decoded_view); + substrings.push_back(substring); + derived_map[substring].push_back(search_string); + alternatives_map[search_string].insert(substring); + log_internal("Substring added: " + substring); + if (!goldendict_mode) { + std::cout << substring << std::endl; + } + } + } + + const auto [removed_char, new_search_string] = remove_one_utf8_char(search_string); + log_internal("Removed character: " + removed_char); + + if (!removed_char.empty() && !goldendict_mode) { + std::cout << removed_char << std::endl; + } + + if (new_search_string == search_string) { + std::cerr << "Error: Search string did not change after removing a character. Exiting to prevent infinite loop." << std::endl; + log_internal("Error: Search string did not change after removing a character. Exiting to prevent infinite loop."); + break; + } + + search_string = new_search_string; + log_internal("New search string: " + search_string); + + if (debug_mode) { + std::cout << "After removing one character: " << search_string << std::endl; + raw_output += search_string + '\n'; + } + + results.clear(); + search_string.erase(0, search_string.find_first_not_of(" \t\n\r\f\v")); + } + + log_internal("Search loop ended"); + if (debug_mode) { + print_debug_info(substrings, raw_output); + } + + if (goldendict_mode) { + wrap_html_output(substrings, alternatives_map, sentence); + log_internal("HTML output wrapped"); + exit(0); + } + + log_internal("Program finished"); + return 0; + } diff --git a/src/hakurei.cpp b/src/hakurei.cpp new file mode 100644 index 0000000..66a32f8 --- /dev/null +++ b/src/hakurei.cpp @@ -0,0 +1,325 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct Entry { + const std::string_view decoded_view; + const uint64_t id; + inline Entry(const std::string_view decoded_view, const uint64_t id) : decoded_view(decoded_view), id(id) {} +}; + +inline std::pair remove_one_utf8_char(const std::string& str) { + if (str.empty()) { + return {"", str}; + } + const size_t len = str.size(); + size_t i = 0; + while (i < len) { + const unsigned char c = str[i]; + if (c < 0x80) { + return {str.substr(i, 1), str.substr(i + 1)}; + } else if ((c >> 5) == 0x6) { + return {str.substr(i, 2), str.substr(i + 2)}; + } else if ((c >> 4) == 0xe) { + return {str.substr(i, 3), str.substr(i + 3)}; + } else if ((c >> 3) == 0x1e) { + return {str.substr(i, 4), str.substr(i + 4)}; + } + i++; + } + return {"", ""}; +} + +inline std::string get_input(const int argc, char* const argv[], const bool goldendict_mode) { + std::string search_string; + if (!isatty(fileno(stdin))) { + std::getline(std::cin, search_string); + } else { + if (goldendict_mode) { + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) { + search_string = argv[i + 1]; + break; + } + } + } else { + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + exit(1); + } + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--debug") == 0) { + // Handle debug mode + } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) { + i++; + } else { + search_string = argv[i]; + break; + } + } + } + } + if (search_string.empty()) { + std::cerr << "Search string not provided." << std::endl; + exit(1); + } + return search_string; +} + +inline std::filesystem::path find_dic_file() { + static const std::vector locations = { + std::filesystem::path("/usr/share/hakurei/"), + std::filesystem::path(std::getenv("HOME")) / ".local/share/hakurei/", + std::filesystem::current_path() + }; + for (const auto& location : locations) { + const auto dict_path = location / "dict.bin"; + if (std::filesystem::exists(dict_path) && std::filesystem::is_regular_file(dict_path)) { + return dict_path; + } + } + throw std::runtime_error("Couldn't find the word list."); +} + +inline void print_debug_info(const std::vector& substrings, const std::string& raw_output) { + std::cout << "Stored raw output:" << std::endl; + for (const auto& str : substrings) { + std::cout << str << std::endl; + } + std::cout << "Raw output:" << std::endl; + std::cout << raw_output << std::endl; +} + + +void log_execution(const int argc, char* argv[], const std::string& search_string, const std::string& dict_file, bool debug_mode, bool goldendict_mode) { + std::ofstream log_file; + log_file.open("/tmp/hakurei.log", std::ios_base::app); + if (!log_file) { + std::cerr << "Failed to open log file." << std::endl; + return; + } + + std::time_t now = std::time(nullptr); + log_file << "Hakurei executed at " << std::ctime(&now); + log_file << "Command-line arguments:\n"; + for (int i = 0; i < argc; ++i) { + log_file << "argv[" << i << "]: " << argv[i] << "\n"; + } + log_file << "Parsed parameters:\n"; + log_file << "search_string: " << search_string << "\n"; + log_file << "dict_file: " << dict_file << "\n"; + log_file << "debug_mode: " << debug_mode << "\n"; + log_file << "goldendict_mode: " << goldendict_mode << "\n"; + + log_file.close(); +} + + +void log_internal(const std::string& message) { + std::ofstream log_file; + log_file.open("/tmp/hakurei.log", std::ios_base::app); + if (!log_file) { + std::cerr << "Failed to open log file." << std::endl; + return; + } + std::time_t now = std::time(nullptr); + log_file << std::ctime(&now) << ": " << message << std::endl; + log_file.close(); +} + + +inline void wrap_html_output( + const std::vector& substrings, + const std::map>& alternatives_map, + std::string& sentence + ) { + // Log the HTML output call + log_internal("HTML output is being called"); + + std::string output_html; + output_html += "
"; + + std::string sentence_copy = sentence; + + output_html += "
"; + + while (!sentence_copy.empty()) { + bool found = false; + for (const auto& substring : substrings) { + if (sentence_copy.rfind(substring, 0) == 0) { + output_html += "
" + substring + ""; + + auto alt_itr = alternatives_map.find(sentence_copy); + if (alt_itr != alternatives_map.end() && !alt_itr->second.empty()) { + output_html += "
    "; + for (const auto& alt : alt_itr->second) { + output_html += "
  • " + alt + "
  • "; + } + output_html += "
"; + } + + output_html += "
"; + output_html += ""; + + sentence_copy = sentence_copy.substr(substring.size()); + found = true; + break; + } + } + if (!found) { + const auto [removed_char, new_sentence_copy] = remove_one_utf8_char(sentence_copy); + if (removed_char.empty()) { + std::cerr << "Error: Unable to remove a character from the sentence. Exiting to prevent infinite loop." << std::endl; + break; + } + output_html += "
" + removed_char + "
"; + sentence_copy = new_sentence_copy; + } + } + + output_html += "
"; + output_html += "
"; + std::cout << output_html << std::endl; +} + + + +int main(const int argc, char* argv[]) { + + std::string dict_file = find_dic_file().string(); // Default dictionary file path + const auto trie = xcdat::load(dict_file); + log_internal("Trie loaded from dictionary file"); + + + bool debug_mode = false; + bool goldendict_mode = false; + std::string word, sentence; + + log_internal("Program started"); + + // Parse command line arguments + for (int i = 1; i < argc; ++i) { + if (std::strcmp(argv[i], "--debug") == 0) { + debug_mode = true; + log_internal("Debug mode enabled"); + } else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) { + dict_file = argv[i + 1]; + log_internal("Dictionary file set to: " + dict_file); + i++; // Skip the next argument as it's the dictionary file path + } else if (std::strcmp(argv[i], "--goldendict") == 0) { + goldendict_mode = true; + log_internal("GoldenDict mode enabled"); + } else if (std::strcmp(argv[i], "--word") == 0 && i + 1 < argc) { + word = argv[i + 1]; + log_internal("Word set to: " + word); + i++; + } else if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) { + sentence = argv[i + 1]; + log_internal("Sentence set to: " + sentence); + i++; + } + } + + std::string search_string = get_input(argc, argv, goldendict_mode); + log_internal("Search string: " + search_string); + if (goldendict_mode && !sentence.empty()) { + search_string = sentence; + log_internal("Search string overridden by sentence: " + search_string); + } + + // Log the execution details + log_execution(argc, argv, search_string, dict_file, debug_mode, goldendict_mode); + + std::string raw_output; + + std::vector substrings; + std::vector results; + std::map> derived_map; + std::map> alternatives_map; + + while (!search_string.empty()) { + auto itr = trie.make_prefix_iterator(search_string); + + while (itr.next()) { + results.emplace_back(itr.decoded_view(), itr.id()); + log_internal("Found result: " + std::string(itr.decoded_view())); + } + + // Add all substrings to the results + if (!results.empty()) { + for (const auto& entry : results) { + const std::string substring(entry.decoded_view); + substrings.push_back(substring); + derived_map[substring].push_back(search_string); + alternatives_map[search_string].insert(substring); + log_internal("Substring added: " + substring); + if (!goldendict_mode) { + std::cout << substring << std::endl; + } + } + } + + // Remove one UTF-8 character from the search string and get the removed character + const auto [removed_char, new_search_string] = remove_one_utf8_char(search_string); + log_internal("Removed character: " + removed_char); + + if (!removed_char.empty() && !goldendict_mode) { + std::cout << removed_char << std::endl; + } + + if (new_search_string == search_string) { + std::cerr << "Error: Search string did not change after removing a character. Exiting to prevent infinite loop." << std::endl; + log_internal("Error: Search string did not change after removing a character. Exiting to prevent infinite loop."); + break; + } + + search_string = new_search_string; + log_internal("New search string: " + search_string); + + // Print debug information if in debug mode + if (debug_mode) { + std::cout << "After removing one character: " << search_string << std::endl; + raw_output += search_string + '\n'; + } + + results.clear(); + + // Remove leading whitespace + search_string.erase(0, search_string.find_first_not_of(" \t\n\r\f\v")); + } + + // Print debug information if in debug mode + if (debug_mode) { + print_debug_info(substrings, raw_output); + } + + // Wrap the output in HTML format for GoldenDict if in GoldenDict mode + if (goldendict_mode) { + wrap_html_output(substrings, alternatives_map, sentence); + exit(0); + } + + log_internal("Program finished"); + return 0; +} diff --git a/src/main.cc b/src/hakurei.cpp.orig similarity index 100% rename from src/main.cc rename to src/hakurei.cpp.orig diff --git a/src/hakurei.cpp.rej b/src/hakurei.cpp.rej new file mode 100644 index 0000000..0831211 --- /dev/null +++ b/src/hakurei.cpp.rej @@ -0,0 +1,11 @@ +--- /dev/null ++++ /dev/null +@@ -144,7 +144,8 @@ + + int main(const int argc, char* argv[]) { + bool debug_mode = false; + bool goldendict_mode = false; + std::string word, sentence; ++ std::string dict_file; + + log_internal("Program started");