improve debugging
This commit is contained in:
parent
b6b47f6ba3
commit
df7ffee51c
131
src/main.cc
131
src/main.cc
|
@ -10,15 +10,15 @@
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include <fstream>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
// Struct to hold the view and id of each entry
|
|
||||||
struct Entry {
|
struct Entry {
|
||||||
const std::string_view decoded_view;
|
const std::string_view decoded_view;
|
||||||
const uint64_t id;
|
const uint64_t id;
|
||||||
inline Entry(const std::string_view decoded_view, const uint64_t id) : decoded_view(decoded_view), id(id) {}
|
inline Entry(const std::string_view decoded_view, const uint64_t id) : decoded_view(decoded_view), id(id) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Function to remove one UTF-8 character and return the character along with the new string
|
|
||||||
inline std::pair<std::string, std::string> remove_one_utf8_char(const std::string& str) {
|
inline std::pair<std::string, std::string> remove_one_utf8_char(const std::string& str) {
|
||||||
if (str.empty()) {
|
if (str.empty()) {
|
||||||
return {"", str};
|
return {"", str};
|
||||||
|
@ -27,13 +27,13 @@ inline std::pair<std::string, std::string> remove_one_utf8_char(const std::strin
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
while (i < len) {
|
while (i < len) {
|
||||||
const unsigned char c = str[i];
|
const unsigned char c = str[i];
|
||||||
if (c < 0x80) { // 1-byte character
|
if (c < 0x80) {
|
||||||
return {str.substr(i, 1), str.substr(i + 1)};
|
return {str.substr(i, 1), str.substr(i + 1)};
|
||||||
} else if ((c >> 5) == 0x6) { // 2-byte character
|
} else if ((c >> 5) == 0x6) {
|
||||||
return {str.substr(i, 2), str.substr(i + 2)};
|
return {str.substr(i, 2), str.substr(i + 2)};
|
||||||
} else if ((c >> 4) == 0xe) { // 3-byte character
|
} else if ((c >> 4) == 0xe) {
|
||||||
return {str.substr(i, 3), str.substr(i + 3)};
|
return {str.substr(i, 3), str.substr(i + 3)};
|
||||||
} else if ((c >> 3) == 0x1e) { // 4-byte character
|
} else if ((c >> 3) == 0x1e) {
|
||||||
return {str.substr(i, 4), str.substr(i + 4)};
|
return {str.substr(i, 4), str.substr(i + 4)};
|
||||||
}
|
}
|
||||||
i++;
|
i++;
|
||||||
|
@ -41,13 +41,11 @@ inline std::pair<std::string, std::string> remove_one_utf8_char(const std::strin
|
||||||
return {"", ""};
|
return {"", ""};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to check input from command line or piped input
|
|
||||||
inline std::string get_input(const int argc, char* const argv[], const bool goldendict_mode) {
|
inline std::string get_input(const int argc, char* const argv[], const bool goldendict_mode) {
|
||||||
std::string search_string;
|
std::string search_string;
|
||||||
|
if (!isatty(fileno(stdin))) {
|
||||||
if (!isatty(fileno(stdin))) { // If input is piped
|
|
||||||
std::getline(std::cin, search_string);
|
std::getline(std::cin, search_string);
|
||||||
} else { // If input is provided as command line argument
|
} else {
|
||||||
if (goldendict_mode) {
|
if (goldendict_mode) {
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) {
|
if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) {
|
||||||
|
@ -64,7 +62,6 @@ inline std::string get_input(const int argc, char* const argv[], const bool gold
|
||||||
if (std::strcmp(argv[i], "--debug") == 0) {
|
if (std::strcmp(argv[i], "--debug") == 0) {
|
||||||
// Handle debug mode
|
// Handle debug mode
|
||||||
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
||||||
// Skip the next argument as it's the dictionary file path
|
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else {
|
||||||
search_string = argv[i];
|
search_string = argv[i];
|
||||||
|
@ -73,12 +70,10 @@ inline std::string get_input(const int argc, char* const argv[], const bool gold
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (search_string.empty()) {
|
if (search_string.empty()) {
|
||||||
std::cerr << "Search string not provided." << std::endl;
|
std::cerr << "Search string not provided." << std::endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return search_string;
|
return search_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,27 +101,58 @@ inline void print_debug_info(const std::vector<std::string>& substrings, const s
|
||||||
std::cout << raw_output << std::endl;
|
std::cout << raw_output << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void log_execution(const int argc, char* argv[], const std::string& search_string, const std::string& dict_file, bool debug_mode, bool goldendict_mode) {
|
||||||
|
std::ofstream log_file;
|
||||||
|
log_file.open("/tmp/hakurei.log", std::ios_base::app);
|
||||||
|
if (!log_file) {
|
||||||
|
std::cerr << "Failed to open log file." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::time_t now = std::time(nullptr);
|
||||||
|
log_file << "Hakurei executed at " << std::ctime(&now);
|
||||||
|
log_file << "Command-line arguments:\n";
|
||||||
|
for (int i = 0; i < argc; ++i) {
|
||||||
|
log_file << "argv[" << i << "]: " << argv[i] << "\n";
|
||||||
|
}
|
||||||
|
log_file << "Parsed parameters:\n";
|
||||||
|
log_file << "search_string: " << search_string << "\n";
|
||||||
|
log_file << "dict_file: " << dict_file << "\n";
|
||||||
|
log_file << "debug_mode: " << debug_mode << "\n";
|
||||||
|
log_file << "goldendict_mode: " << goldendict_mode << "\n";
|
||||||
|
|
||||||
|
log_file.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void log_internal(const std::string& message) {
|
||||||
|
std::ofstream log_file;
|
||||||
|
log_file.open("/tmp/hakurei.log", std::ios_base::app);
|
||||||
|
if (!log_file) {
|
||||||
|
std::cerr << "Failed to open log file." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::time_t now = std::time(nullptr);
|
||||||
|
log_file << std::ctime(&now) << ": " << message << std::endl;
|
||||||
|
log_file.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline void wrap_html_output(
|
inline void wrap_html_output(
|
||||||
const std::vector<std::string>& substrings,
|
const std::vector<std::string>& substrings,
|
||||||
const std::map<std::string, std::set<std::string>>& alternatives_map,
|
const std::map<std::string, std::set<std::string>>& alternatives_map,
|
||||||
std::string& sentence
|
std::string& sentence
|
||||||
) {
|
) {
|
||||||
|
// Log the HTML output call
|
||||||
|
log_internal("<span style=\"color:red;\">HTML output is being called</span>");
|
||||||
|
|
||||||
std::string output_html;
|
std::string output_html;
|
||||||
output_html += "<!DOCTYPE html><html><head><style>";
|
output_html += "<div class=\"hakurei\">";
|
||||||
output_html += ".hakurei { font-size: 2rem; margin-bottom: 0.05em; margin-top: -0.2em; color: #1268c3; font-weight: normal; }";
|
|
||||||
output_html += ".hakurei a { display: inline-block; font-weight: normal; color: royalblue; text-decoration: none; border-bottom: dashed max(1px, calc(1em / 16)) currentColor; }";
|
|
||||||
output_html += ".hakurei a.hakurei-headword { background-color: #ddeeff; border-radius: 0.2rem; font-weight: 500; }";
|
|
||||||
output_html += ".hakurei > ul { --size: 1rem; font-size: var(--size); padding-inline-start: var(--size); margin-block: 2px; }";
|
|
||||||
output_html += ".hakurei .alternatives { --size: 1rem; display: grid; font-size: var(--size); gap: calc( var(--size) / 4); max-width: 100%; margin: 0 auto; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); align-content: start; justify-content: space-around; text-align: left; padding: 5px 0px; }";
|
|
||||||
output_html += ".hakurei .alternatives > ul { list-style-type: none; margin: 0; padding: calc( var(--size) / 4); background-color: hsl(0 0% 50% / 0.05); box-shadow: 0 0 4px hsl(0 0% 0% / 0.1); border-radius: 0.2rem; }";
|
|
||||||
output_html += ".hakurei .alternatives > ul > li { margin-right: 1rem; }";
|
|
||||||
output_html += ".container { display: flex; flex-wrap: wrap; gap: 10px; }"; // Adjusted container class
|
|
||||||
output_html += ".segment { display: inline-block; }"; // Adjusted segment class
|
|
||||||
output_html += "</style></head><body><div class=\"hakurei\">";
|
|
||||||
|
|
||||||
std::string sentence_copy = sentence;
|
std::string sentence_copy = sentence;
|
||||||
|
|
||||||
output_html += "<div class=\"container\">"; // Open container div
|
output_html += "<div class=\"container\">";
|
||||||
|
|
||||||
while (!sentence_copy.empty()) {
|
while (!sentence_copy.empty()) {
|
||||||
bool found = false;
|
bool found = false;
|
||||||
|
@ -134,7 +160,6 @@ inline void wrap_html_output(
|
||||||
if (sentence_copy.rfind(substring, 0) == 0) {
|
if (sentence_copy.rfind(substring, 0) == 0) {
|
||||||
output_html += "<div class=\"segment\"><a class=\"hakurei-headword\" href=\"bword:" + substring + "\">" + substring + "</a>";
|
output_html += "<div class=\"segment\"><a class=\"hakurei-headword\" href=\"bword:" + substring + "\">" + substring + "</a>";
|
||||||
|
|
||||||
// Adding alternatives for the current substring
|
|
||||||
auto alt_itr = alternatives_map.find(sentence_copy);
|
auto alt_itr = alternatives_map.find(sentence_copy);
|
||||||
if (alt_itr != alternatives_map.end() && !alt_itr->second.empty()) {
|
if (alt_itr != alternatives_map.end() && !alt_itr->second.empty()) {
|
||||||
output_html += "<div class=\"alternatives\"><ul>";
|
output_html += "<div class=\"alternatives\"><ul>";
|
||||||
|
@ -144,7 +169,19 @@ inline void wrap_html_output(
|
||||||
output_html += "</ul></div>";
|
output_html += "</ul></div>";
|
||||||
}
|
}
|
||||||
|
|
||||||
output_html += "</div>"; // Close segment div
|
output_html += "</div>";
|
||||||
|
output_html += "<style>";
|
||||||
|
output_html += ".hakurei { font-size: 2rem; margin-bottom: 0.05em; margin-top: -0.2em; color: #1268c3; font-weight: normal; }";
|
||||||
|
output_html += ".hakurei a { display: inline-block; font-weight: normal; color: royalblue; text-decoration: none; border-bottom: dashed max(1px, calc(1em / 16)) currentColor; }";
|
||||||
|
output_html += ".hakurei a.hakurei-headword { background-color: #ddeeff; border-radius: 0.2rem; font-weight: 500; }";
|
||||||
|
output_html += ".hakurei > ul { --size: 1rem; font-size: var(--size); padding-inline-start: var(--size); margin-block: 2px; }";
|
||||||
|
output_html += ".hakurei .alternatives { --size: 1rem; display: grid; font-size: var(--size); gap: calc( var(--size) / 4); max-width: 100%; margin: 0 auto; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); align-content: start; justify-content: space-around; text-align: left; padding: 5px 0px; }";
|
||||||
|
output_html += ".hakurei .alternatives > ul { list-style-type: none; margin: 0; padding: calc( var(--size) / 4); background-color: hsl(0 0% 50% / 0.05); box-shadow: 0 0 4px hsl(0 0% 0% / 0.1); border-radius: 0.2rem; }";
|
||||||
|
output_html += ".hakurei .alternatives > ul > li { margin-right: 1rem; }";
|
||||||
|
output_html += ".container { display: flex; flex-wrap: wrap; gap: 10px; }";
|
||||||
|
output_html += ".segment { display: inline-block; }";
|
||||||
|
output_html += "</style>";
|
||||||
|
|
||||||
sentence_copy = sentence_copy.substr(substring.size());
|
sentence_copy = sentence_copy.substr(substring.size());
|
||||||
found = true;
|
found = true;
|
||||||
break;
|
break;
|
||||||
|
@ -152,48 +189,66 @@ inline void wrap_html_output(
|
||||||
}
|
}
|
||||||
if (!found) {
|
if (!found) {
|
||||||
const auto [removed_char, new_sentence_copy] = remove_one_utf8_char(sentence_copy);
|
const auto [removed_char, new_sentence_copy] = remove_one_utf8_char(sentence_copy);
|
||||||
|
if (removed_char.empty()) {
|
||||||
|
std::cerr << "Error: Unable to remove a character from the sentence. Exiting to prevent infinite loop." << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
output_html += "<div class=\"segment\">" + removed_char + "</div>";
|
output_html += "<div class=\"segment\">" + removed_char + "</div>";
|
||||||
sentence_copy = new_sentence_copy;
|
sentence_copy = new_sentence_copy;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
output_html += "</div>"; // Close container div
|
output_html += "</div>";
|
||||||
output_html += "</div></body></html>";
|
output_html += "</div>";
|
||||||
std::cout << output_html << std::endl;
|
std::cout << output_html << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main(const int argc, char* argv[]) {
|
int main(const int argc, char* argv[]) {
|
||||||
bool debug_mode = false;
|
bool debug_mode = false;
|
||||||
bool goldendict_mode = false;
|
bool goldendict_mode = false;
|
||||||
std::string word, sentence;
|
std::string word, sentence;
|
||||||
std::string dict_file = find_dic_file().string(); // Default dictionary file path
|
std::string dict_file = find_dic_file().string(); // Default dictionary file path
|
||||||
|
|
||||||
|
log_internal("Program started");
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
if (std::strcmp(argv[i], "--debug") == 0) {
|
if (std::strcmp(argv[i], "--debug") == 0) {
|
||||||
debug_mode = true;
|
debug_mode = true;
|
||||||
|
log_internal("Debug mode enabled");
|
||||||
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
||||||
dict_file = argv[i + 1];
|
dict_file = argv[i + 1];
|
||||||
|
log_internal("Dictionary file set to: " + dict_file);
|
||||||
i++; // Skip the next argument as it's the dictionary file path
|
i++; // Skip the next argument as it's the dictionary file path
|
||||||
} else if (std::strcmp(argv[i], "--goldendict") == 0) {
|
} else if (std::strcmp(argv[i], "--goldendict") == 0) {
|
||||||
goldendict_mode = true;
|
goldendict_mode = true;
|
||||||
|
log_internal("GoldenDict mode enabled");
|
||||||
} else if (std::strcmp(argv[i], "--word") == 0 && i + 1 < argc) {
|
} else if (std::strcmp(argv[i], "--word") == 0 && i + 1 < argc) {
|
||||||
word = argv[i + 1];
|
word = argv[i + 1];
|
||||||
|
log_internal("Word set to: " + word);
|
||||||
i++;
|
i++;
|
||||||
} else if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) {
|
} else if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) {
|
||||||
sentence = argv[i + 1];
|
sentence = argv[i + 1];
|
||||||
|
log_internal("Sentence set to: " + sentence);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string search_string = get_input(argc, argv, goldendict_mode);
|
std::string search_string = get_input(argc, argv, goldendict_mode);
|
||||||
|
log_internal("Search string: " + search_string);
|
||||||
if (goldendict_mode && !sentence.empty()) {
|
if (goldendict_mode && !sentence.empty()) {
|
||||||
search_string = sentence;
|
search_string = sentence;
|
||||||
|
log_internal("Search string overridden by sentence: " + search_string);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Log the execution details
|
||||||
|
log_execution(argc, argv, search_string, dict_file, debug_mode, goldendict_mode);
|
||||||
|
|
||||||
std::string raw_output;
|
std::string raw_output;
|
||||||
const auto trie = xcdat::load<xcdat::trie_15_type>(dict_file);
|
const auto trie = xcdat::load<xcdat::trie_15_type>(dict_file);
|
||||||
|
log_internal("Trie loaded from dictionary file");
|
||||||
|
|
||||||
std::vector<std::string> substrings;
|
std::vector<std::string> substrings;
|
||||||
std::vector<Entry> results;
|
std::vector<Entry> results;
|
||||||
|
@ -205,6 +260,7 @@ int main(const int argc, char* argv[]) {
|
||||||
|
|
||||||
while (itr.next()) {
|
while (itr.next()) {
|
||||||
results.emplace_back(itr.decoded_view(), itr.id());
|
results.emplace_back(itr.decoded_view(), itr.id());
|
||||||
|
log_internal("Found result: " + std::string(itr.decoded_view()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add all substrings to the results
|
// Add all substrings to the results
|
||||||
|
@ -214,6 +270,7 @@ int main(const int argc, char* argv[]) {
|
||||||
substrings.push_back(substring);
|
substrings.push_back(substring);
|
||||||
derived_map[substring].push_back(search_string);
|
derived_map[substring].push_back(search_string);
|
||||||
alternatives_map[search_string].insert(substring);
|
alternatives_map[search_string].insert(substring);
|
||||||
|
log_internal("Substring added: " + substring);
|
||||||
if (!goldendict_mode) {
|
if (!goldendict_mode) {
|
||||||
std::cout << substring << std::endl;
|
std::cout << substring << std::endl;
|
||||||
}
|
}
|
||||||
|
@ -222,12 +279,20 @@ int main(const int argc, char* argv[]) {
|
||||||
|
|
||||||
// Remove one UTF-8 character from the search string and get the removed character
|
// Remove one UTF-8 character from the search string and get the removed character
|
||||||
const auto [removed_char, new_search_string] = remove_one_utf8_char(search_string);
|
const auto [removed_char, new_search_string] = remove_one_utf8_char(search_string);
|
||||||
|
log_internal("Removed character: " + removed_char);
|
||||||
|
|
||||||
if (!removed_char.empty() && !goldendict_mode) {
|
if (!removed_char.empty() && !goldendict_mode) {
|
||||||
std::cout << removed_char << std::endl;
|
std::cout << removed_char << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new_search_string == search_string) {
|
||||||
|
std::cerr << "Error: Search string did not change after removing a character. Exiting to prevent infinite loop." << std::endl;
|
||||||
|
log_internal("Error: Search string did not change after removing a character. Exiting to prevent infinite loop.");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
search_string = new_search_string;
|
search_string = new_search_string;
|
||||||
|
log_internal("New search string: " + search_string);
|
||||||
|
|
||||||
// Print debug information if in debug mode
|
// Print debug information if in debug mode
|
||||||
if (debug_mode) {
|
if (debug_mode) {
|
||||||
|
@ -249,7 +314,9 @@ int main(const int argc, char* argv[]) {
|
||||||
// Wrap the output in HTML format for GoldenDict if in GoldenDict mode
|
// Wrap the output in HTML format for GoldenDict if in GoldenDict mode
|
||||||
if (goldendict_mode) {
|
if (goldendict_mode) {
|
||||||
wrap_html_output(substrings, alternatives_map, sentence);
|
wrap_html_output(substrings, alternatives_map, sentence);
|
||||||
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_internal("Program finished");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue