Merge pull request #2 from kampersanda/fix-cps

Fix bug in common prefix search
This commit is contained in:
Shunsuke Kanda 2022-07-10 14:59:38 +09:00 committed by GitHub
commit 7534644813
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 17 deletions

View file

@ -2,6 +2,7 @@
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
#include <optional>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <vector> #include <vector>
@ -166,31 +167,42 @@ class tail_vector {
} }
} }
inline bool prefix_match(std::string_view key, std::uint64_t tpos) const { // Returns epos-tpos+1 if TAIL[tpos..epos] is a prefix of key.
assert(key.size() != 0); inline std::optional<std::uint64_t> prefix_match(std::string_view key, std::uint64_t tpos) const {
std::uint64_t kpos = 0; if (tpos == 0) {
// suffix is empty, always matched.
return 0;
}
if (key.size() == 0) {
// When key is empty, match fails since the suffix is not empty here.
return std::nullopt;
}
std::uint64_t kpos = 0;
if (bin_mode()) { if (bin_mode()) {
do { do {
if (key[kpos] != m_chars[tpos]) { if (key[kpos] != m_chars[tpos]) {
return false; return std::nullopt;
} }
kpos += 1; kpos += 1;
if (m_terms[tpos]) { if (m_terms[tpos]) {
return kpos == key.size(); return kpos;
} }
tpos += 1; tpos += 1;
} while (kpos < key.size()); } while (kpos < key.size());
return true; return kpos;
} else { } else {
do { do {
if (!m_chars[tpos] || key[kpos] != m_chars[tpos]) { if (!m_chars[tpos]) {
return false; return kpos;
}
if (key[kpos] != m_chars[tpos]) {
return std::nullopt;
} }
kpos += 1; kpos += 1;
tpos += 1; tpos += 1;
} while (kpos < key.size()); } while (kpos < key.size());
return true; return kpos;
} }
} }

View file

@ -166,7 +166,7 @@ class trie {
class prefix_iterator { class prefix_iterator {
private: private:
const trie_type* m_obj = nullptr; const trie_type* m_obj = nullptr;
std::string m_key; std::string_view m_key;
std::uint64_t m_id = 0; std::uint64_t m_id = 0;
std::uint64_t m_kpos = 0; std::uint64_t m_kpos = 0;
std::uint64_t m_npos = 0; std::uint64_t m_npos = 0;
@ -231,7 +231,7 @@ class trie {
private: private:
const trie_type* m_obj = nullptr; const trie_type* m_obj = nullptr;
std::string m_key; std::string_view m_key;
std::uint64_t m_id = 0; std::uint64_t m_id = 0;
std::string m_decoded; std::string m_decoded;
std::vector<cursor_type> m_stack; std::vector<cursor_type> m_stack;
@ -317,8 +317,7 @@ class trie {
: m_num_keys(b.m_keys.size()), m_table(std::move(b.m_table)), m_terms(b.m_terms, true, true), : m_num_keys(b.m_keys.size()), m_table(std::move(b.m_table)), m_terms(b.m_terms, true, true),
m_bcvec(b.m_units, std::move(b.m_leaves)), m_tvec(std::move(b.m_suffixes)) {} m_bcvec(b.m_units, std::move(b.m_leaves)), m_tvec(std::move(b.m_suffixes)) {}
template <class String> static constexpr std::string_view get_suffix(std::string_view s, std::uint64_t i) {
static constexpr String get_suffix(const String& s, std::uint64_t i) {
assert(i <= s.size()); assert(i <= s.size());
return s.substr(i, s.size() - i); return s.substr(i, s.size() - i);
} }
@ -376,12 +375,12 @@ class trie {
itr->is_end = true; itr->is_end = true;
const std::uint64_t tpos = m_bcvec.link(itr->m_npos); const std::uint64_t tpos = m_bcvec.link(itr->m_npos);
if (!m_tvec.match(get_suffix(itr->m_key, itr->m_kpos), tpos)) { const auto matched = m_tvec.prefix_match(get_suffix(itr->m_key, itr->m_kpos), tpos);
if (!matched.has_value()) {
itr->m_id = num_keys(); itr->m_id = num_keys();
return false; return false;
} }
itr->m_kpos += matched.value();
itr->m_kpos = itr->m_key.size();
itr->m_id = npos_to_id(itr->m_npos); itr->m_id = npos_to_id(itr->m_npos);
return true; return true;
} }

View file

@ -167,7 +167,7 @@ TEST_CASE("Test " TRIE_NAME " (tiny)") {
test_basic_operations(trie, keys, others); test_basic_operations(trie, keys, others);
{ {
auto itr = trie.make_prefix_iterator("MacBook_Pro"); auto itr = trie.make_prefix_iterator("MacBook_Pro_13inch");
std::vector<std::string> expected = {"Mac", "MacBook", "MacBook_Pro"}; std::vector<std::string> expected = {"Mac", "MacBook", "MacBook_Pro"};
for (const auto& exp : expected) { for (const auto& exp : expected) {
REQUIRE(itr.next()); REQUIRE(itr.next());