diff --git a/src/main.cc b/src/main.cc index d436b34..609d085 100644 --- a/src/main.cc +++ b/src/main.cc @@ -5,33 +5,36 @@ #include #include #include +#include +// Struct to hold the view and id of each entry struct Entry { std::string_view decoded_view; uint64_t id; Entry(std::string_view decoded_view, uint64_t id) : decoded_view(decoded_view), id(id) {} }; -std::string remove_one_utf8_char(const std::string& str) { +// Function to remove one UTF-8 character and return the character along with the new string +std::pair remove_one_utf8_char(const std::string& str) { if (str.empty()) { - return str; + return {"", str}; } size_t len = str.size(); size_t i = 0; while (i < len) { unsigned char c = str[i]; if (c < 0x80) { // 1-byte character - return str.substr(i + 1); + return {str.substr(i, 1), str.substr(i + 1)}; } else if ((c >> 5) == 0x6) { // 2-byte character - return str.substr(i + 2); + return {str.substr(i, 2), str.substr(i + 2)}; } else if ((c >> 4) == 0xe) { // 3-byte character - return str.substr(i + 3); + return {str.substr(i, 3), str.substr(i + 3)}; } else if ((c >> 3) == 0x1e) { // 4-byte character - return str.substr(i + 4); + return {str.substr(i, 4), str.substr(i + 4)}; } i++; } - return ""; + return {"", ""}; } int main(int argc, char* argv[]) { @@ -77,17 +80,20 @@ int main(int argc, char* argv[]) { // Add all substrings to the results if (!results.empty()) { for (const auto& entry : results) { - substrings.push_back(std::string(entry.decoded_view)); + std::string substring(entry.decoded_view); + substrings.push_back(substring); + std::cout << substring << std::endl; } } - // Print debug information if in debug mode - if (debug_mode) { - std::cout << "Original search string: " << search_string << std::endl; + // Remove one UTF-8 character from the search string and get the removed character + auto [removed_char, new_search_string] = remove_one_utf8_char(search_string); + + if (!removed_char.empty()) { + std::cout << removed_char << std::endl; } - // Remove one UTF-8 character from the search string - search_string = remove_one_utf8_char(search_string); + search_string = new_search_string; // Print debug information if in debug mode if (debug_mode) { @@ -108,10 +114,5 @@ int main(int argc, char* argv[]) { std::cout << "Stored raw output:" << std::endl << raw_output << std::endl; } - // Print the substrings - for (const auto& sub : substrings) { - std::cout << sub << std::endl; - } - return 0; }