add goldendict mode
This commit is contained in:
parent
cd14096781
commit
28f8e8005a
112
src/main.cc
112
src/main.cc
|
@ -7,6 +7,8 @@
|
||||||
#include <xcdat.hpp>
|
#include <xcdat.hpp>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <filesystem>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
// Struct to hold the view and id of each entry
|
// Struct to hold the view and id of each entry
|
||||||
struct Entry {
|
struct Entry {
|
||||||
|
@ -39,42 +41,65 @@ inline std::pair<std::string, std::string> remove_one_utf8_char(const std::strin
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to check input from command line or piped input
|
// Function to check input from command line or piped input
|
||||||
inline std::string get_input(int argc, char* argv[]) {
|
inline std::string get_input(int argc, char* argv[], bool goldendict_mode) {
|
||||||
std::string search_string;
|
std::string search_string;
|
||||||
|
|
||||||
if (!isatty(fileno(stdin))) { // If input is piped
|
if (!isatty(fileno(stdin))) { // If input is piped
|
||||||
std::getline(std::cin, search_string);
|
std::getline(std::cin, search_string);
|
||||||
} else { // If input is provided as command line argument
|
} else { // If input is provided as command line argument
|
||||||
if (argc < 2) {
|
if (goldendict_mode) {
|
||||||
std::cerr << "Usage: " << argv[0] << " <search_string>" << std::endl;
|
for (int i = 1; i < argc; ++i) {
|
||||||
exit(1);
|
if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) {
|
||||||
}
|
search_string = argv[i + 1];
|
||||||
|
break;
|
||||||
for (int i = 1; i < argc; ++i) {
|
}
|
||||||
if (std::strcmp(argv[i], "--debug") == 0) {
|
}
|
||||||
// Handle debug mode
|
} else {
|
||||||
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
if (argc < 2) {
|
||||||
// Skip the next argument as it's the dictionary file path
|
std::cerr << "Usage: " << argv[0] << " <search_string>" << std::endl;
|
||||||
i++;
|
exit(1);
|
||||||
} else {
|
}
|
||||||
search_string = argv[i];
|
for (int i = 1; i < argc; ++i) {
|
||||||
break;
|
if (std::strcmp(argv[i], "--debug") == 0) {
|
||||||
|
// Handle debug mode
|
||||||
|
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
||||||
|
// Skip the next argument as it's the dictionary file path
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
search_string = argv[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (search_string.empty()) {
|
if (search_string.empty()) {
|
||||||
std::cerr << "Search string not provided." << std::endl;
|
std::cerr << "Search string not provided." << std::endl;
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return search_string;
|
return search_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto find_dic_file() -> std::filesystem::path {
|
||||||
|
static auto const locations = {
|
||||||
|
std::filesystem::path("/usr/share/hakurei/"),
|
||||||
|
std::filesystem::path(std::getenv("HOME")) / ".local/share/hakurei/",
|
||||||
|
std::filesystem::current_path()
|
||||||
|
};
|
||||||
|
for (auto const& location : locations) {
|
||||||
|
if (std::filesystem::exists(location / "dict.bin") && std::filesystem::is_regular_file(location / "dict.bin")) {
|
||||||
|
return (location / "dict.bin");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw std::runtime_error("Couldn't find the word list.");
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string search_string = get_input(argc, argv);
|
|
||||||
bool debug_mode = false;
|
bool debug_mode = false;
|
||||||
std::string dict_file = "dict.bin"; // Default dictionary file path
|
bool goldendict_mode = false;
|
||||||
|
std::string word, sentence;
|
||||||
|
std::string dict_file = find_dic_file().string(); // Default dictionary file path
|
||||||
|
|
||||||
// Parse command line arguments
|
// Parse command line arguments
|
||||||
for (int i = 1; i < argc; ++i) {
|
for (int i = 1; i < argc; ++i) {
|
||||||
|
@ -83,14 +108,28 @@ int main(int argc, char* argv[]) {
|
||||||
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
} else if (std::strcmp(argv[i], "--dict") == 0 && i + 1 < argc) {
|
||||||
dict_file = argv[i + 1];
|
dict_file = argv[i + 1];
|
||||||
i++; // Skip the next argument as it's the dictionary file path
|
i++; // Skip the next argument as it's the dictionary file path
|
||||||
|
} else if (std::strcmp(argv[i], "--goldendict") == 0) {
|
||||||
|
goldendict_mode = true;
|
||||||
|
} else if (std::strcmp(argv[i], "--word") == 0 && i + 1 < argc) {
|
||||||
|
word = argv[i + 1];
|
||||||
|
i++;
|
||||||
|
} else if (std::strcmp(argv[i], "--sentence") == 0 && i + 1 < argc) {
|
||||||
|
sentence = argv[i + 1];
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string search_string = get_input(argc, argv, goldendict_mode);
|
||||||
|
if (goldendict_mode && !sentence.empty()) {
|
||||||
|
search_string = sentence;
|
||||||
|
}
|
||||||
|
|
||||||
std::string raw_output;
|
std::string raw_output;
|
||||||
const auto trie = xcdat::load<xcdat::trie_15_type>(dict_file);
|
const auto trie = xcdat::load<xcdat::trie_15_type>(dict_file);
|
||||||
|
|
||||||
std::vector<std::string> substrings;
|
std::vector<std::string> substrings;
|
||||||
std::vector<Entry> results;
|
std::vector<Entry> results;
|
||||||
|
std::map<std::string, std::vector<std::string>> derived_map;
|
||||||
|
|
||||||
while (!search_string.empty()) {
|
while (!search_string.empty()) {
|
||||||
auto itr = trie.make_prefix_iterator(search_string);
|
auto itr = trie.make_prefix_iterator(search_string);
|
||||||
|
@ -104,14 +143,17 @@ int main(int argc, char* argv[]) {
|
||||||
for (const auto& entry : results) {
|
for (const auto& entry : results) {
|
||||||
std::string substring(entry.decoded_view);
|
std::string substring(entry.decoded_view);
|
||||||
substrings.push_back(substring);
|
substrings.push_back(substring);
|
||||||
std::cout << substring << std::endl;
|
derived_map[substring].push_back(search_string);
|
||||||
|
if (!goldendict_mode) {
|
||||||
|
std::cout << substring << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove one UTF-8 character from the search string and get the removed character
|
// Remove one UTF-8 character from the search string and get the removed character
|
||||||
auto [removed_char, new_search_string] = remove_one_utf8_char(search_string);
|
auto [removed_char, new_search_string] = remove_one_utf8_char(search_string);
|
||||||
|
|
||||||
if (!removed_char.empty()) {
|
if (!removed_char.empty() && !goldendict_mode) {
|
||||||
std::cout << removed_char << std::endl;
|
std::cout << removed_char << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -126,15 +168,33 @@ int main(int argc, char* argv[]) {
|
||||||
results.clear();
|
results.clear();
|
||||||
|
|
||||||
// Remove leading whitespace
|
// Remove leading whitespace
|
||||||
while (!search_string.empty() && std::isspace(search_string.front())) {
|
search_string.erase(0, search_string.find_first_not_of(" \t\n\r\f\v"));
|
||||||
search_string.erase(0, 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print debug information if in debug mode
|
// Print debug information if in debug mode
|
||||||
if (debug_mode) {
|
if (debug_mode) {
|
||||||
std::cout << "Stored raw output:" << std::endl << raw_output << std::endl;
|
std::cout << "Stored raw output:" << std::endl;
|
||||||
|
for (const auto& str : substrings) {
|
||||||
|
std::cout << str << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrap the output in HTML format for GoldenDict if in GoldenDict mode
|
||||||
|
if (goldendict_mode) {
|
||||||
|
std::cout << "<!DOCTYPE html><html><head><style>.gd-marisa { font-size: 2rem; margin-bottom: 0.05em; margin-top: -0.2em; color: #1268c3; font-weight: normal; } .gd-marisa a { display: inline-block; font-weight: normal; color: royalblue; text-decoration: none; border-bottom: dashed max(1px, calc(1em / 16)) currentColor; } .gd-marisa a.gd-headword { background-color: #ddeeff; border-radius: 0.2rem; font-weight: 500; } .gd-marisa > ul { --size: 1rem; font-size: var(--size); padding-inline-start: var(--size); margin-block: 2px; } .gd-marisa .alternatives { --size: 1rem; display: grid; font-size: var(--size); gap: calc( var(--size) / 4); max-width: 100%; margin: 0 auto; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); align-content: start; justify-content: space-around; text-align: left; padding: 5px 0px; } .gd-marisa .alternatives > ul { list-style-type: none; margin: 0; padding: calc( var(--size) / 4); background-color: hsl(150deg 30% 60% / 10%); } </style></head><body><div class=\"gd-marisa\">";
|
||||||
|
for (const auto& [key, values] : derived_map) {
|
||||||
|
std::cout << "<a href=\"bword:" << key << "\">" << key << "</a>";
|
||||||
|
if (!values.empty()) {
|
||||||
|
std::cout << "<ul>";
|
||||||
|
for (const auto& value : values) {
|
||||||
|
std::cout << "<li><a href=\"bword:" << value << "\">" << value << "</a></li>";
|
||||||
|
}
|
||||||
|
std::cout << "</ul>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "</div></body></html>";
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue