fix the interface
This commit is contained in:
parent
4ea523bc4f
commit
e216688f2f
|
@ -34,6 +34,7 @@ message(STATUS "CXX_FLAGS_RELEASE are ${CMAKE_CXX_FLAGS_RELEASE}")
|
|||
include_directories(include)
|
||||
|
||||
add_subdirectory(sample)
|
||||
|
||||
add_subdirectory(tools)
|
||||
|
||||
enable_testing()
|
||||
|
|
|
@ -6,9 +6,44 @@
|
|||
#include "xcdat/io.hpp"
|
||||
#include "xcdat/trie.hpp"
|
||||
|
||||
#include "xcdat/load_visitor.hpp"
|
||||
#include "xcdat/mmap_visitor.hpp"
|
||||
#include "xcdat/save_visitor.hpp"
|
||||
#include "xcdat/size_visitor.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
using trie_7_type = trie<bc_vector_7>;
|
||||
using trie_8_type = trie<bc_vector_8>;
|
||||
|
||||
template <class Trie>
|
||||
static Trie mmap(const char* address) {
|
||||
Trie idx;
|
||||
mmap_visitor visitor(address);
|
||||
visitor.visit(idx);
|
||||
return idx;
|
||||
}
|
||||
|
||||
template <class Trie>
|
||||
static Trie load(std::string_view filepath) {
|
||||
Trie idx;
|
||||
load_visitor visitor(filepath);
|
||||
visitor.visit(idx);
|
||||
return idx;
|
||||
}
|
||||
|
||||
template <class Trie>
|
||||
static std::uint64_t save(const Trie& idx, std::string_view filepath) {
|
||||
save_visitor visitor(filepath);
|
||||
visitor.visit(const_cast<Trie&>(idx));
|
||||
return visitor.bytes();
|
||||
}
|
||||
|
||||
template <class Trie>
|
||||
static std::uint64_t memory_in_bytes(const Trie& idx) {
|
||||
size_visitor visitor;
|
||||
visitor.visit(const_cast<Trie&>(idx));
|
||||
return visitor.bytes();
|
||||
}
|
||||
|
||||
} // namespace xcdat
|
||||
|
|
|
@ -11,6 +11,7 @@ class bc_vector_7 {
|
|||
public:
|
||||
static constexpr std::uint32_t l1_bits = 7;
|
||||
static constexpr std::uint32_t max_levels = 4;
|
||||
|
||||
static constexpr std::uint64_t block_size_l1 = 1ULL << 7;
|
||||
static constexpr std::uint64_t block_size_l2 = 1ULL << 15;
|
||||
static constexpr std::uint64_t block_size_l3 = 1ULL << 31;
|
||||
|
@ -37,11 +38,6 @@ class bc_vector_7 {
|
|||
|
||||
template <class BcUnits>
|
||||
explicit bc_vector_7(const BcUnits& bc_units, bit_vector::builder&& leaves) {
|
||||
build(bc_units, std::move(leaves));
|
||||
}
|
||||
|
||||
template <class BcUnits>
|
||||
void build(const BcUnits& bc_units, bit_vector::builder&& leaves) {
|
||||
std::vector<std::uint8_t> ints_l1;
|
||||
std::vector<std::uint16_t> ints_l2;
|
||||
std::vector<std::uint32_t> ints_l3;
|
||||
|
@ -110,15 +106,15 @@ class bc_vector_7 {
|
|||
}
|
||||
|
||||
// release
|
||||
m_ints_l1.steal(ints_l1);
|
||||
m_ints_l2.steal(ints_l2);
|
||||
m_ints_l3.steal(ints_l3);
|
||||
m_ints_l4.steal(ints_l4);
|
||||
m_ints_l1.build(ints_l1);
|
||||
m_ints_l2.build(ints_l2);
|
||||
m_ints_l3.build(ints_l3);
|
||||
m_ints_l4.build(ints_l4);
|
||||
for (std::uint32_t j = 0; j < m_ranks.size(); ++j) {
|
||||
m_ranks[j].steal(ranks[j]);
|
||||
m_ranks[j].build(ranks[j]);
|
||||
}
|
||||
m_links.build(links);
|
||||
m_leaves.build(leaves, true, false);
|
||||
m_links = compact_vector(links);
|
||||
m_leaves = bit_vector(leaves, true, false);
|
||||
}
|
||||
|
||||
inline std::uint64_t base(std::uint64_t i) const {
|
||||
|
|
|
@ -32,11 +32,6 @@ class bc_vector_8 {
|
|||
|
||||
template <class BcUnits>
|
||||
explicit bc_vector_8(const BcUnits& bc_units, bit_vector::builder&& leaves) {
|
||||
build(bc_units, std::move(leaves));
|
||||
}
|
||||
|
||||
template <class BcUnits>
|
||||
void build(const BcUnits& bc_units, bit_vector::builder&& leaves) {
|
||||
std::array<std::vector<std::uint8_t>, max_levels> bytes;
|
||||
std::array<bit_vector::builder, max_levels - 1> next_flags;
|
||||
std::vector<std::uint64_t> links;
|
||||
|
@ -81,13 +76,13 @@ class bc_vector_8 {
|
|||
}
|
||||
|
||||
// release
|
||||
for (uint8_t i = 0; i < m_num_levels; ++i) {
|
||||
m_bytes[i].steal(bytes[i]);
|
||||
m_nexts[i].build(next_flags[i], true, false);
|
||||
for (std::uint32_t i = 0; i < m_num_levels; ++i) {
|
||||
m_bytes[i].build(bytes[i]);
|
||||
m_nexts[i] = bit_vector(next_flags[i], true, false);
|
||||
}
|
||||
m_bytes[m_num_levels].steal(bytes[m_num_levels]);
|
||||
m_links.build(links);
|
||||
m_leaves.build(leaves, true, false);
|
||||
m_bytes[m_num_levels].build(bytes[m_num_levels]);
|
||||
m_links = compact_vector(links);
|
||||
m_leaves = bit_vector(leaves, true, false);
|
||||
}
|
||||
|
||||
inline std::uint64_t base(std::uint64_t i) const {
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <numeric>
|
||||
|
||||
#include "essentials/essentials.hpp"
|
||||
#include <vector>
|
||||
|
||||
#include "bit_tools.hpp"
|
||||
#include "immutable_vector.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
//! Rank9 implementatoin
|
||||
class bit_vector {
|
||||
public:
|
||||
class builder {
|
||||
|
@ -56,12 +52,12 @@ class bit_vector {
|
|||
}
|
||||
|
||||
inline void resize(std::uint64_t size) {
|
||||
m_bits.resize(essentials::words_for(size), 0ULL);
|
||||
m_bits.resize(words_for(size), 0ULL);
|
||||
m_size = size;
|
||||
}
|
||||
|
||||
inline void reserve(std::uint64_t capacity) {
|
||||
m_bits.reserve(essentials::words_for(capacity));
|
||||
m_bits.reserve(words_for(capacity));
|
||||
}
|
||||
|
||||
inline std::uint64_t size() const {
|
||||
|
@ -92,17 +88,10 @@ class bit_vector {
|
|||
bit_vector& operator=(bit_vector&&) noexcept = default;
|
||||
|
||||
explicit bit_vector(builder& b, bool enable_rank = false, bool enable_select = false) {
|
||||
build(b, enable_rank, enable_select);
|
||||
}
|
||||
|
||||
void build(builder& b, bool enable_rank = false, bool enable_select = false) {
|
||||
m_bits.steal(b.m_bits);
|
||||
m_bits.build(b.m_bits);
|
||||
m_size = b.m_size;
|
||||
m_num_ones = std::accumulate(m_bits.begin(), m_bits.end(), 0ULL,
|
||||
[](std::uint64_t acc, std::uint64_t x) { return acc + bit_tools::popcount(x); });
|
||||
m_rank_hints.clear();
|
||||
m_select_hints.clear();
|
||||
|
||||
if (enable_rank) {
|
||||
build_rank_hints();
|
||||
}
|
||||
|
@ -172,6 +161,10 @@ class bit_vector {
|
|||
return {x / N, x % N};
|
||||
}
|
||||
|
||||
static std::uint64_t words_for(std::uint64_t nbits) {
|
||||
return (nbits + 63) / 64;
|
||||
}
|
||||
|
||||
inline std::uint64_t num_blocks() const {
|
||||
return m_rank_hints.size() / 2 - 1;
|
||||
}
|
||||
|
@ -258,7 +251,7 @@ class bit_vector {
|
|||
}
|
||||
|
||||
// Release
|
||||
m_rank_hints.steal(rank_hints);
|
||||
m_rank_hints.build(rank_hints);
|
||||
}
|
||||
|
||||
void build_select_hints() {
|
||||
|
@ -271,7 +264,7 @@ class bit_vector {
|
|||
}
|
||||
}
|
||||
select_hints.push_back(num_blocks());
|
||||
m_select_hints.steal(select_hints);
|
||||
m_select_hints.build(select_hints);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -29,12 +29,7 @@ class code_table {
|
|||
code_table& operator=(code_table&&) noexcept = default;
|
||||
|
||||
template <class Strings>
|
||||
explicit code_table(const Strings& keys) {
|
||||
build(keys);
|
||||
}
|
||||
|
||||
template <class Strings>
|
||||
void build(const Strings& keys) {
|
||||
code_table(const Strings& keys) {
|
||||
std::array<counter_type, 256> counter;
|
||||
for (std::uint32_t ch = 0; ch < 256; ++ch) {
|
||||
counter[ch] = {static_cast<std::uint8_t>(ch), 0};
|
||||
|
@ -55,7 +50,7 @@ class code_table {
|
|||
alphabet.push_back(cf.ch);
|
||||
}
|
||||
}
|
||||
m_alphabet.steal(alphabet);
|
||||
m_alphabet.build(alphabet);
|
||||
}
|
||||
|
||||
std::sort(counter.begin(), counter.end(),
|
||||
|
|
|
@ -1,14 +1,11 @@
|
|||
#pragma once
|
||||
|
||||
#include "essentials/essentials.hpp"
|
||||
|
||||
#include "bit_tools.hpp"
|
||||
#include "exception.hpp"
|
||||
#include "immutable_vector.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
//! A compressed integer vector.
|
||||
class compact_vector {
|
||||
private:
|
||||
std::uint64_t m_size = 0;
|
||||
|
@ -27,21 +24,14 @@ class compact_vector {
|
|||
compact_vector& operator=(compact_vector&&) noexcept = default;
|
||||
|
||||
template <class Vec>
|
||||
explicit compact_vector(const Vec& vec) {
|
||||
build(vec);
|
||||
}
|
||||
|
||||
template <class Vec>
|
||||
void build(const Vec& vec) {
|
||||
compact_vector(const Vec& vec) {
|
||||
XCDAT_THROW_IF(vec.size() == 0, "The input vector is empty.");
|
||||
|
||||
const std::uint64_t maxv = *std::max_element(vec.begin(), vec.end());
|
||||
|
||||
m_size = vec.size();
|
||||
m_bits = needed_bits(maxv);
|
||||
m_bits = needed_bits(*std::max_element(vec.begin(), vec.end()));
|
||||
m_mask = (1ULL << m_bits) - 1;
|
||||
|
||||
std::vector<std::uint64_t> chunks(essentials::words_for(m_size * m_bits));
|
||||
std::vector<std::uint64_t> chunks(words_for(m_size * m_bits));
|
||||
|
||||
for (std::uint64_t i = 0; i < m_size; i++) {
|
||||
const auto [quo, mod] = decompose(i * m_bits);
|
||||
|
@ -53,7 +43,7 @@ class compact_vector {
|
|||
chunks[quo + 1] |= (vec[i] & m_mask) >> diff;
|
||||
}
|
||||
}
|
||||
m_chunks.steal(chunks);
|
||||
m_chunks.build(chunks);
|
||||
}
|
||||
|
||||
inline std::uint64_t operator[](std::uint64_t i) const {
|
||||
|
@ -90,6 +80,10 @@ class compact_vector {
|
|||
static std::tuple<std::uint64_t, std::uint64_t> decompose(std::uint64_t x) {
|
||||
return {x / 64, x % 64};
|
||||
}
|
||||
|
||||
static std::uint64_t words_for(std::uint64_t nbits) {
|
||||
return (nbits + 63) / 64;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xcdat
|
|
@ -1,679 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include <dirent.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <locale>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __GNUG__
|
||||
#include <cxxabi.h> // for name demangling
|
||||
#endif
|
||||
|
||||
namespace essentials {
|
||||
|
||||
void logger(std::string const& msg) {
|
||||
time_t t = std::time(nullptr);
|
||||
std::locale loc;
|
||||
const std::time_put<char>& tp = std::use_facet<std::time_put<char>>(loc);
|
||||
const char* fmt = "%F %T";
|
||||
tp.put(std::cout, std::cout, ' ', std::localtime(&t), fmt, fmt + strlen(fmt));
|
||||
std::cout << ": " << msg << std::endl;
|
||||
}
|
||||
|
||||
static const uint64_t GB = 1000 * 1000 * 1000;
|
||||
static const uint64_t GiB = uint64_t(1) << 30;
|
||||
static const uint64_t MB = 1000 * 1000;
|
||||
static const uint64_t MiB = uint64_t(1) << 20;
|
||||
static const uint64_t KB = 1000;
|
||||
static const uint64_t KiB = uint64_t(1) << 10;
|
||||
|
||||
double convert(size_t bytes, uint64_t unit) {
|
||||
return static_cast<double>(bytes) / unit;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t vec_bytes(T const& vec) {
|
||||
return vec.size() * sizeof(vec.front()) + sizeof(typename T::size_type);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t pod_bytes(T const& pod) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
return sizeof(pod);
|
||||
}
|
||||
|
||||
size_t file_size(char const* filename) {
|
||||
std::ifstream is(filename, std::ios::binary | std::ios::ate);
|
||||
if (!is.good()) {
|
||||
throw std::runtime_error("Error in opening binary "
|
||||
"file.");
|
||||
}
|
||||
size_t bytes = (size_t)is.tellg();
|
||||
is.close();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
template <typename WordType = uint64_t>
|
||||
uint64_t words_for(uint64_t bits) {
|
||||
uint64_t word_bits = sizeof(WordType) * 8;
|
||||
return (bits + word_bits - 1) / word_bits;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void do_not_optimize_away(T&& value) {
|
||||
asm volatile("" : "+r"(value));
|
||||
}
|
||||
|
||||
uint64_t maxrss_in_bytes() {
|
||||
struct rusage ru;
|
||||
if (getrusage(RUSAGE_SELF, &ru) == 0) {
|
||||
// NOTE: ru_maxrss is in kilobytes on Linux, but not on Apple...
|
||||
#ifdef __APPLE__
|
||||
return ru.ru_maxrss;
|
||||
#endif
|
||||
return ru.ru_maxrss * 1000;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void load_pod(std::istream& is, T& val) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
is.read(reinterpret_cast<char*>(&val), sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void load_vec(std::istream& is, std::vector<T, Allocator>& vec) {
|
||||
size_t n;
|
||||
load_pod(is, n);
|
||||
vec.resize(n);
|
||||
is.read(reinterpret_cast<char*>(vec.data()), static_cast<std::streamsize>(sizeof(T) * n));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void save_pod(std::ostream& os, T const& val) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
os.write(reinterpret_cast<char const*>(&val), sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void save_vec(std::ostream& os, std::vector<T, Allocator> const& vec) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
size_t n = vec.size();
|
||||
save_pod(os, n);
|
||||
os.write(reinterpret_cast<char const*>(vec.data()), static_cast<std::streamsize>(sizeof(T) * n));
|
||||
}
|
||||
|
||||
struct json_lines {
|
||||
struct property {
|
||||
property(std::string n, std::string v) : name(n), value(v) {}
|
||||
|
||||
std::string name;
|
||||
std::string value;
|
||||
};
|
||||
|
||||
void new_line() {
|
||||
m_properties.push_back(std::vector<property>());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void add(std::string name, T value) {
|
||||
if (!m_properties.size()) {
|
||||
new_line();
|
||||
}
|
||||
if constexpr (std::is_same<T, char const*>::value) {
|
||||
m_properties.back().emplace_back(name, value);
|
||||
} else {
|
||||
m_properties.back().emplace_back(name, std::to_string(value));
|
||||
}
|
||||
}
|
||||
|
||||
void save_to_file(char const* filename) const {
|
||||
std::ofstream out(filename);
|
||||
print_to(out);
|
||||
out.close();
|
||||
}
|
||||
|
||||
void print_line() const {
|
||||
print_line_to(m_properties.back(), std::cerr);
|
||||
}
|
||||
|
||||
void print() const {
|
||||
print_to(std::cerr);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::vector<property>> m_properties;
|
||||
|
||||
template <typename T>
|
||||
void print_line_to(std::vector<property> const& properties, T& device) const {
|
||||
device << "{";
|
||||
for (uint64_t i = 0; i != properties.size(); ++i) {
|
||||
auto const& p = properties[i];
|
||||
device << "\"" << p.name << "\": \"" << p.value << "\"";
|
||||
if (i != properties.size() - 1) {
|
||||
device << ", ";
|
||||
}
|
||||
}
|
||||
device << "}\n";
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void print_to(T& device) const {
|
||||
for (auto const& properties : m_properties) {
|
||||
print_line_to(properties, device);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename ClockType, typename DurationType>
|
||||
struct timer {
|
||||
void start() {
|
||||
m_start = ClockType::now();
|
||||
}
|
||||
|
||||
void stop() {
|
||||
m_stop = ClockType::now();
|
||||
auto elapsed = std::chrono::duration_cast<DurationType>(m_stop - m_start);
|
||||
m_timings.push_back(elapsed.count());
|
||||
}
|
||||
|
||||
size_t runs() const {
|
||||
return m_timings.size();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
m_timings.clear();
|
||||
}
|
||||
|
||||
double min() const {
|
||||
return *std::min_element(m_timings.begin(), m_timings.end());
|
||||
}
|
||||
|
||||
double max() const {
|
||||
return *std::max_element(m_timings.begin(), m_timings.end());
|
||||
}
|
||||
|
||||
void discard_first() {
|
||||
if (runs()) {
|
||||
m_timings.erase(m_timings.begin());
|
||||
}
|
||||
}
|
||||
|
||||
void discard_min() {
|
||||
if (runs() > 1) {
|
||||
m_timings.erase(std::min_element(m_timings.begin(), m_timings.end()));
|
||||
}
|
||||
}
|
||||
|
||||
void discard_max() {
|
||||
if (runs() > 1) {
|
||||
m_timings.erase(std::max_element(m_timings.begin(), m_timings.end()));
|
||||
}
|
||||
}
|
||||
|
||||
double elapsed() {
|
||||
return std::accumulate(m_timings.begin(), m_timings.end(), 0.0);
|
||||
}
|
||||
|
||||
double average() {
|
||||
return elapsed() / runs();
|
||||
}
|
||||
|
||||
private:
|
||||
typename ClockType::time_point m_start;
|
||||
typename ClockType::time_point m_stop;
|
||||
std::vector<double> m_timings;
|
||||
};
|
||||
|
||||
typedef std::chrono::high_resolution_clock clock_type;
|
||||
typedef std::chrono::microseconds duration_type;
|
||||
typedef timer<clock_type, duration_type> timer_type;
|
||||
|
||||
unsigned get_random_seed() {
|
||||
return std::chrono::system_clock::now().time_since_epoch().count();
|
||||
}
|
||||
|
||||
template <typename IntType>
|
||||
struct uniform_int_rng {
|
||||
uniform_int_rng(IntType from, IntType to, unsigned seed = 13) : m_rng(seed), m_distr(from, to) {}
|
||||
|
||||
IntType gen() {
|
||||
return m_distr(m_rng);
|
||||
}
|
||||
|
||||
private:
|
||||
std::mt19937_64 m_rng;
|
||||
std::uniform_int_distribution<IntType> m_distr;
|
||||
};
|
||||
|
||||
struct loader {
|
||||
loader(char const* filename) : m_num_bytes_pods(0), m_num_bytes_vecs_of_pods(0), m_is(filename, std::ios::binary) {
|
||||
if (!m_is.good()) {
|
||||
throw std::runtime_error("Error in opening binary "
|
||||
"file.");
|
||||
}
|
||||
}
|
||||
|
||||
~loader() {
|
||||
m_is.close();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(T& val) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
load_pod(m_is, val);
|
||||
m_num_bytes_pods += pod_bytes(val);
|
||||
} else {
|
||||
val.visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void visit(std::vector<T, Allocator>& vec) {
|
||||
size_t n;
|
||||
visit(n);
|
||||
vec.resize(n);
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
m_is.read(reinterpret_cast<char*>(vec.data()), static_cast<std::streamsize>(sizeof(T) * n));
|
||||
m_num_bytes_vecs_of_pods += n * sizeof(T);
|
||||
} else {
|
||||
for (auto& v : vec) visit(v);
|
||||
}
|
||||
}
|
||||
|
||||
size_t bytes() {
|
||||
return m_is.tellg();
|
||||
}
|
||||
|
||||
size_t bytes_pods() {
|
||||
return m_num_bytes_pods;
|
||||
}
|
||||
|
||||
size_t bytes_vecs_of_pods() {
|
||||
return m_num_bytes_vecs_of_pods;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_num_bytes_pods;
|
||||
size_t m_num_bytes_vecs_of_pods;
|
||||
std::ifstream m_is;
|
||||
};
|
||||
|
||||
struct saver {
|
||||
saver(char const* filename) : m_os(filename, std::ios::binary) {
|
||||
if (!m_os.good()) {
|
||||
throw std::runtime_error("Error in opening binary "
|
||||
"file.");
|
||||
}
|
||||
}
|
||||
|
||||
~saver() {
|
||||
m_os.close();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(T& val) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
save_pod(m_os, val);
|
||||
} else {
|
||||
val.visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void visit(std::vector<T, Allocator>& vec) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
save_vec(m_os, vec);
|
||||
} else {
|
||||
size_t n = vec.size();
|
||||
visit(n);
|
||||
for (auto& v : vec) visit(v);
|
||||
}
|
||||
}
|
||||
|
||||
size_t bytes() {
|
||||
return m_os.tellp();
|
||||
}
|
||||
|
||||
private:
|
||||
std::ofstream m_os;
|
||||
};
|
||||
|
||||
std::string demangle(char const* mangled_name) {
|
||||
size_t len = 0;
|
||||
int status = 0;
|
||||
std::unique_ptr<char, decltype(&std::free)> ptr(__cxxabiv1::__cxa_demangle(mangled_name, nullptr, &len, &status),
|
||||
&std::free);
|
||||
return ptr.get();
|
||||
}
|
||||
|
||||
struct sizer {
|
||||
sizer(std::string const& root_name = "") : m_root(0, 0, root_name), m_current(&m_root) {}
|
||||
|
||||
struct node {
|
||||
node(size_t b, size_t d, std::string const& n = "") : bytes(b), depth(d), name(n) {}
|
||||
|
||||
size_t bytes;
|
||||
size_t depth;
|
||||
std::string name;
|
||||
std::vector<node> children;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void visit(T& val) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
node n(pod_bytes(val), m_current->depth + 1, demangle(typeid(T).name()));
|
||||
m_current->children.push_back(n);
|
||||
m_current->bytes += n.bytes;
|
||||
} else {
|
||||
val.visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void visit(std::vector<T, Allocator>& vec) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
node n(vec_bytes(vec), m_current->depth + 1, demangle(typeid(std::vector<T>).name()));
|
||||
m_current->children.push_back(n);
|
||||
m_current->bytes += n.bytes;
|
||||
} else {
|
||||
size_t n = vec.size();
|
||||
m_current->bytes += pod_bytes(n);
|
||||
node* parent = m_current;
|
||||
for (auto& v : vec) {
|
||||
node n(0, parent->depth + 1, demangle(typeid(T).name()));
|
||||
parent->children.push_back(n);
|
||||
m_current = &parent->children.back();
|
||||
visit(v);
|
||||
parent->bytes += m_current->bytes;
|
||||
}
|
||||
m_current = parent;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Device>
|
||||
void print(node const& n, size_t total_bytes, Device& device) const {
|
||||
auto indent = std::string(n.depth * 4, ' ');
|
||||
device << indent << "'" << n.name << "' - bytes = " << n.bytes << " (" << n.bytes * 100.0 / total_bytes << "%)"
|
||||
<< std::endl;
|
||||
for (auto const& child : n.children) {
|
||||
device << indent;
|
||||
print(child, total_bytes, device);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Device>
|
||||
void print(Device& device) const {
|
||||
print(m_root, bytes(), device);
|
||||
}
|
||||
|
||||
size_t bytes() const {
|
||||
return m_root.bytes;
|
||||
}
|
||||
|
||||
private:
|
||||
node m_root;
|
||||
node* m_current;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct allocator : std::allocator<T> {
|
||||
typedef T value_type;
|
||||
|
||||
allocator() : m_addr(nullptr) {}
|
||||
|
||||
allocator(T* addr) : m_addr(addr) {}
|
||||
|
||||
T* allocate(size_t n) {
|
||||
if (m_addr == nullptr) return std::allocator<T>::allocate(n);
|
||||
return m_addr;
|
||||
}
|
||||
|
||||
void deallocate(T* p, size_t n) {
|
||||
if (m_addr == nullptr) return std::allocator<T>::deallocate(p, n);
|
||||
}
|
||||
|
||||
private:
|
||||
T* m_addr;
|
||||
};
|
||||
|
||||
struct contiguous_memory_allocator {
|
||||
contiguous_memory_allocator() : m_begin(nullptr), m_end(nullptr), m_size(0) {}
|
||||
|
||||
struct visitor {
|
||||
visitor(uint8_t* begin, size_t size, char const* filename)
|
||||
: m_begin(begin), m_end(begin), m_size(size), m_is(filename, std::ios::binary) {
|
||||
if (!m_is.good()) {
|
||||
throw std::runtime_error("Error in opening binary "
|
||||
"file.");
|
||||
}
|
||||
}
|
||||
|
||||
~visitor() {
|
||||
m_is.close();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(T& val) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
load_pod(m_is, val);
|
||||
} else {
|
||||
val.visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void visit(std::vector<T, Allocator>& vec) {
|
||||
if constexpr (std::is_pod<T>::value) {
|
||||
vec = std::vector<T, Allocator>(make_allocator<T>());
|
||||
load_vec(m_is, vec);
|
||||
consume(vec.size() * sizeof(T));
|
||||
} else {
|
||||
size_t n;
|
||||
visit(n);
|
||||
vec.resize(n);
|
||||
for (auto& v : vec) visit(v);
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* end() {
|
||||
return m_end;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
size_t allocated() const {
|
||||
assert(m_end >= m_begin);
|
||||
return m_end - m_begin;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
allocator<T> make_allocator() {
|
||||
return allocator<T>(reinterpret_cast<T*>(m_end));
|
||||
}
|
||||
|
||||
void consume(size_t num_bytes) {
|
||||
if (m_end == nullptr) return;
|
||||
if (allocated() + num_bytes > size()) {
|
||||
throw std::runtime_error("allocation failed");
|
||||
}
|
||||
m_end += num_bytes;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t* m_begin;
|
||||
uint8_t* m_end;
|
||||
size_t m_size;
|
||||
std::ifstream m_is;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
size_t allocate(T& data_structure, char const* filename) {
|
||||
loader l(filename);
|
||||
l.visit(data_structure);
|
||||
m_size = l.bytes_vecs_of_pods();
|
||||
m_begin = reinterpret_cast<uint8_t*>(malloc(m_size));
|
||||
if (m_begin == nullptr) throw std::runtime_error("malloc failed");
|
||||
visitor v(m_begin, m_size, filename);
|
||||
v.visit(data_structure);
|
||||
m_end = v.end();
|
||||
return l.bytes();
|
||||
}
|
||||
|
||||
~contiguous_memory_allocator() {
|
||||
free(m_begin);
|
||||
}
|
||||
|
||||
uint8_t* begin() {
|
||||
return m_begin;
|
||||
}
|
||||
|
||||
uint8_t* end() {
|
||||
return m_end;
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t* m_begin;
|
||||
uint8_t* m_end;
|
||||
size_t m_size;
|
||||
};
|
||||
|
||||
template <typename T, typename Visitor>
|
||||
size_t visit(T& data_structure, char const* filename) {
|
||||
Visitor visitor(filename);
|
||||
visitor.visit(data_structure);
|
||||
return visitor.bytes();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t load(T& data_structure, char const* filename) {
|
||||
return visit<T, loader>(data_structure, filename);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t load_with_custom_memory_allocation(T& data_structure, char const* filename) {
|
||||
return data_structure.get_allocator().allocate(data_structure, filename);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t save(T& data_structure, char const* filename) {
|
||||
return visit<T, saver>(data_structure, filename);
|
||||
}
|
||||
|
||||
template <typename T, typename Device>
|
||||
size_t print_size(T& data_structure, Device& device) {
|
||||
sizer visitor(demangle(typeid(T).name()));
|
||||
visitor.visit(data_structure);
|
||||
visitor.print(device);
|
||||
return visitor.bytes();
|
||||
}
|
||||
|
||||
#if defined(__CYGWIN__) || defined(_WIN32) || defined(_WIN64)
|
||||
#else
|
||||
struct directory {
|
||||
struct file_name {
|
||||
std::string name;
|
||||
std::string fullpath;
|
||||
std::string extension;
|
||||
};
|
||||
|
||||
~directory() {
|
||||
for (int i = 0; i != items(); ++i) {
|
||||
free(m_items_names[i]);
|
||||
}
|
||||
free(m_items_names);
|
||||
}
|
||||
|
||||
directory(std::string const& name) : m_name(name) {
|
||||
m_n = scandir(m_name.c_str(), &m_items_names, NULL, alphasort);
|
||||
if (m_n < 0) {
|
||||
throw std::runtime_error("error during scandir");
|
||||
}
|
||||
}
|
||||
|
||||
std::string const& name() const {
|
||||
return m_name;
|
||||
}
|
||||
|
||||
int items() const {
|
||||
return m_n;
|
||||
}
|
||||
|
||||
struct iterator {
|
||||
iterator(directory const* d, int i) : m_d(d), m_i(i) {}
|
||||
|
||||
file_name operator*() {
|
||||
file_name fn;
|
||||
fn.name = m_d->m_items_names[m_i]->d_name;
|
||||
fn.fullpath = m_d->name() + "/" + fn.name;
|
||||
size_t p = fn.name.find_last_of(".");
|
||||
fn.extension = fn.name.substr(p + 1);
|
||||
return fn;
|
||||
}
|
||||
|
||||
void operator++() {
|
||||
++m_i;
|
||||
}
|
||||
|
||||
bool operator==(iterator const& rhs) const {
|
||||
return m_i == rhs.m_i;
|
||||
}
|
||||
|
||||
bool operator!=(iterator const& rhs) const {
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
directory const* m_d;
|
||||
int m_i;
|
||||
};
|
||||
|
||||
iterator begin() {
|
||||
return iterator(this, 0);
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return iterator(this, items());
|
||||
}
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
struct dirent** m_items_names;
|
||||
int m_n;
|
||||
};
|
||||
#endif
|
||||
|
||||
bool create_directory(std::string const& name) {
|
||||
if (mkdir(name.c_str(), 0777) != 0) {
|
||||
if (errno == EEXIST) {
|
||||
std::cerr << "directory already exists" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool remove_directory(std::string const& name) {
|
||||
return rmdir(name.c_str()) == 0;
|
||||
}
|
||||
|
||||
} // namespace essentials
|
|
@ -1,14 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
template <class T>
|
||||
class immutable_vector {
|
||||
private:
|
||||
std::vector<T> m_vec;
|
||||
std::unique_ptr<T[]> m_allocator;
|
||||
std::uint64_t m_size = 0;
|
||||
const T* m_data = nullptr;
|
||||
|
||||
public:
|
||||
immutable_vector() = default;
|
||||
|
@ -20,54 +26,81 @@ class immutable_vector {
|
|||
immutable_vector(immutable_vector&&) noexcept = default;
|
||||
immutable_vector& operator=(immutable_vector&&) noexcept = default;
|
||||
|
||||
explicit immutable_vector(std::vector<T>&& vec) {
|
||||
steal(vec);
|
||||
void clear() {
|
||||
m_allocator.reset();
|
||||
m_size = 0;
|
||||
m_data = nullptr;
|
||||
}
|
||||
|
||||
void steal(std::vector<T>& vec) {
|
||||
template <class Vector>
|
||||
immutable_vector(const Vector& vec) {
|
||||
build(vec);
|
||||
}
|
||||
|
||||
template <class Vector>
|
||||
void build(const Vector& vec) {
|
||||
clear();
|
||||
if (vec.size() != 0) {
|
||||
m_vec = std::move(vec);
|
||||
m_vec.shrink_to_fit();
|
||||
} else {
|
||||
clear();
|
||||
m_allocator = std::make_unique<T[]>(vec.size());
|
||||
std::copy_n(vec.data(), vec.size(), m_allocator.get());
|
||||
m_size = vec.size();
|
||||
m_data = m_allocator.get();
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
*this = immutable_vector<T>();
|
||||
std::uint64_t mmap(const char* address) {
|
||||
clear();
|
||||
m_size = *reinterpret_cast<const std::uint64_t*>(address);
|
||||
m_data = reinterpret_cast<const T*>(address + sizeof(std::uint64_t));
|
||||
return sizeof(std::uint64_t) + m_size * sizeof(T);
|
||||
}
|
||||
|
||||
void load(std::ifstream& ifs) {
|
||||
clear();
|
||||
ifs.read(reinterpret_cast<char*>(&m_size), sizeof(m_size));
|
||||
if (m_size != 0) {
|
||||
m_allocator = std::make_unique<T[]>(m_size);
|
||||
ifs.read(reinterpret_cast<char*>(m_allocator.get()), sizeof(T) * m_size);
|
||||
m_data = m_allocator.get();
|
||||
}
|
||||
}
|
||||
|
||||
void save(std::ofstream& ofs) const {
|
||||
ofs.write(reinterpret_cast<const char*>(&m_size), sizeof(m_size));
|
||||
ofs.write(reinterpret_cast<const char*>(m_data), sizeof(T) * m_size);
|
||||
}
|
||||
|
||||
inline std::uint64_t memory_in_bytes() const {
|
||||
return sizeof(m_size) + sizeof(T) * m_size;
|
||||
}
|
||||
|
||||
inline std::uint64_t size() const {
|
||||
return m_vec.size();
|
||||
return m_size;
|
||||
}
|
||||
|
||||
inline auto begin() const {
|
||||
return m_vec.begin();
|
||||
inline const T* begin() const {
|
||||
return m_data;
|
||||
}
|
||||
|
||||
inline auto end() const {
|
||||
return m_vec.end();
|
||||
inline const T* end() const {
|
||||
return m_data + m_size;
|
||||
}
|
||||
|
||||
inline auto rbegin() const {
|
||||
return m_vec.rbegin();
|
||||
return std::make_reverse_iterator(end());
|
||||
}
|
||||
|
||||
inline auto rend() const {
|
||||
return m_vec.rend();
|
||||
return std::make_reverse_iterator(begin());
|
||||
}
|
||||
|
||||
inline const T& operator[](std::uint64_t i) const {
|
||||
return m_vec[i];
|
||||
assert(i < m_size);
|
||||
return m_data[i];
|
||||
}
|
||||
|
||||
inline const T* data() const {
|
||||
return m_vec.data();
|
||||
}
|
||||
|
||||
template <class Visitor>
|
||||
void visit(Visitor& visitor) {
|
||||
visitor.visit(m_vec);
|
||||
return m_data;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -19,4 +19,32 @@ namespace xcdat::io {
|
|||
return strs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void load_pod(std::istream& is, T& val) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
is.read(reinterpret_cast<char*>(&val), sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void load_vec(std::istream& is, std::vector<T, Allocator>& vec) {
|
||||
size_t n;
|
||||
load_pod(is, n);
|
||||
vec.resize(n);
|
||||
is.read(reinterpret_cast<char*>(vec.data()), static_cast<std::streamsize>(sizeof(T) * n));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void save_pod(std::ostream& os, T const& val) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
os.write(reinterpret_cast<char const*>(&val), sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T, typename Allocator>
|
||||
void save_vec(std::ostream& os, std::vector<T, Allocator> const& vec) {
|
||||
static_assert(std::is_pod<T>::value);
|
||||
size_t n = vec.size();
|
||||
save_pod(os, n);
|
||||
os.write(reinterpret_cast<char const*>(vec.data()), static_cast<std::streamsize>(sizeof(T) * n));
|
||||
}
|
||||
|
||||
} // namespace xcdat::io
|
||||
|
|
43
include/xcdat/load_visitor.hpp
Normal file
43
include/xcdat/load_visitor.hpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "exception.hpp"
|
||||
#include "immutable_vector.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
class load_visitor {
|
||||
private:
|
||||
std::ifstream m_ifs;
|
||||
|
||||
public:
|
||||
load_visitor(std::string_view filepath) : m_ifs(filepath, std::ios::binary) {
|
||||
XCDAT_THROW_IF(!m_ifs.good(), "Cannot open the input file");
|
||||
}
|
||||
|
||||
virtual ~load_visitor() {
|
||||
m_ifs.close();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void visit(immutable_vector<T>& vec) {
|
||||
vec.load(m_ifs);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void visit(T& obj) {
|
||||
if constexpr (std::is_pod_v<T>) {
|
||||
m_ifs.read(reinterpret_cast<char*>(&obj), sizeof(T));
|
||||
} else {
|
||||
obj.visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t bytes() {
|
||||
return m_ifs.tellg();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xcdat
|
39
include/xcdat/mmap_visitor.hpp
Normal file
39
include/xcdat/mmap_visitor.hpp
Normal file
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "immutable_vector.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
class mmap_visitor {
|
||||
private:
|
||||
const char* m_base = nullptr;
|
||||
const char* m_cur = nullptr;
|
||||
|
||||
public:
|
||||
mmap_visitor(const char* base) : m_base(base), m_cur(base) {}
|
||||
|
||||
virtual ~mmap_visitor() = default;
|
||||
|
||||
template <typename T>
|
||||
void visit(immutable_vector<T>& vec) {
|
||||
m_cur += vec.mmap(m_cur);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(T& obj) {
|
||||
if constexpr (std::is_pod_v<T>) {
|
||||
obj = *reinterpret_cast<const T*>(m_cur);
|
||||
m_cur += sizeof(T);
|
||||
} else {
|
||||
obj.visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t bytes() {
|
||||
return std::distance(m_base, m_cur);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xcdat
|
43
include/xcdat/save_visitor.hpp
Normal file
43
include/xcdat/save_visitor.hpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "exception.hpp"
|
||||
#include "immutable_vector.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
class save_visitor {
|
||||
private:
|
||||
std::ofstream m_ofs;
|
||||
|
||||
public:
|
||||
save_visitor(std::string_view filepath) : m_ofs(filepath, std::ios::binary) {
|
||||
XCDAT_THROW_IF(!m_ofs.good(), "Cannot open the input file");
|
||||
}
|
||||
|
||||
virtual ~save_visitor() {
|
||||
m_ofs.close();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(const immutable_vector<T>& vec) {
|
||||
vec.save(m_ofs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(const T& obj) {
|
||||
if constexpr (std::is_pod_v<T>) {
|
||||
m_ofs.write(reinterpret_cast<const char*>(&obj), sizeof(T));
|
||||
} else {
|
||||
const_cast<T&>(obj).visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t bytes() {
|
||||
return m_ofs.tellp();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xcdat
|
39
include/xcdat/size_visitor.hpp
Normal file
39
include/xcdat/size_visitor.hpp
Normal file
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include "exception.hpp"
|
||||
#include "immutable_vector.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
||||
class size_visitor {
|
||||
private:
|
||||
std::uint64_t m_bytes = 0;
|
||||
|
||||
public:
|
||||
size_visitor() = default;
|
||||
|
||||
virtual ~size_visitor() = default;
|
||||
|
||||
template <typename T>
|
||||
void visit(const immutable_vector<T>& vec) {
|
||||
m_bytes += vec.memory_in_bytes();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void visit(const T& obj) {
|
||||
if constexpr (std::is_pod_v<T>) {
|
||||
m_bytes += sizeof(T);
|
||||
} else {
|
||||
const_cast<T&>(obj).visit(*this);
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t bytes() {
|
||||
return m_bytes;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace xcdat
|
|
@ -129,15 +129,7 @@ class tail_vector {
|
|||
tail_vector(tail_vector&&) noexcept = default;
|
||||
tail_vector& operator=(tail_vector&&) noexcept = default;
|
||||
|
||||
explicit tail_vector(builder&& b) {
|
||||
m_chars.steal(b.m_chars);
|
||||
m_terms.build(b.m_terms);
|
||||
}
|
||||
|
||||
void build(builder&& b) {
|
||||
m_chars.steal(b.m_chars);
|
||||
m_terms.build(b.m_terms);
|
||||
}
|
||||
explicit tail_vector(builder&& b) : m_chars(b.m_chars), m_terms(b.m_terms) {}
|
||||
|
||||
inline bool bin_mode() const {
|
||||
return m_terms.size() != 0;
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include "essentials/essentials.hpp"
|
||||
#include "trie_builder.hpp"
|
||||
|
||||
namespace xcdat {
|
||||
|
@ -23,7 +22,7 @@ namespace xcdat {
|
|||
template <class BcVector>
|
||||
class trie {
|
||||
public:
|
||||
using this_type = trie<BcVector>;
|
||||
using trie_type = trie<BcVector>;
|
||||
using bc_vector_type = BcVector;
|
||||
|
||||
static constexpr auto l1_bits = bc_vector_type::l1_bits;
|
||||
|
@ -55,23 +54,7 @@ class trie {
|
|||
trie& operator=(trie&&) noexcept = default;
|
||||
|
||||
template <class Strings>
|
||||
static this_type build(const Strings& keys, bool bin_mode = false) {
|
||||
return this_type(trie_builder(keys, l1_bits, bin_mode));
|
||||
}
|
||||
|
||||
static this_type load(std::string_view filepath) {
|
||||
this_type obj;
|
||||
essentials::load(obj, filepath.data());
|
||||
return obj;
|
||||
}
|
||||
|
||||
std::uint64_t save(std::string_view filepath) const {
|
||||
return essentials::save(const_cast<this_type&>(*this), filepath.data());
|
||||
}
|
||||
|
||||
std::uint64_t memory_in_bytes() const {
|
||||
return essentials::visit<this_type, essentials::sizer>(const_cast<this_type&>(*this), "");
|
||||
}
|
||||
explicit trie(const Strings& keys, bool bin_mode = false) : trie(trie_builder(keys, l1_bits, bin_mode)) {}
|
||||
|
||||
//! Check the binary mode.
|
||||
inline bool bin_mode() const {
|
||||
|
@ -155,7 +138,7 @@ class trie {
|
|||
*/
|
||||
class prefix_iterator {
|
||||
private:
|
||||
const this_type* m_obj = nullptr;
|
||||
const trie_type* m_obj = nullptr;
|
||||
std::string_view m_key;
|
||||
std::uint64_t m_id = 0;
|
||||
std::uint64_t m_kpos = 0;
|
||||
|
@ -181,7 +164,7 @@ class trie {
|
|||
}
|
||||
|
||||
private:
|
||||
prefix_iterator(const this_type* obj, std::string_view key) : m_obj(obj), m_key(key) {}
|
||||
prefix_iterator(const trie_type* obj, std::string_view key) : m_obj(obj), m_key(key) {}
|
||||
|
||||
friend class trie;
|
||||
};
|
||||
|
@ -211,7 +194,7 @@ class trie {
|
|||
};
|
||||
|
||||
private:
|
||||
const this_type* m_obj = nullptr;
|
||||
const trie_type* m_obj = nullptr;
|
||||
std::string_view m_key;
|
||||
std::uint64_t m_id = 0;
|
||||
std::string m_decoded;
|
||||
|
@ -237,7 +220,7 @@ class trie {
|
|||
}
|
||||
|
||||
private:
|
||||
predictive_iterator(const this_type* obj, std::string_view key) : m_obj(obj), m_key(key) {}
|
||||
predictive_iterator(const trie_type* obj, std::string_view key) : m_obj(obj), m_key(key) {}
|
||||
|
||||
friend class trie;
|
||||
};
|
||||
|
|
|
@ -81,7 +81,7 @@ class trie_builder {
|
|||
m_heads[taboo_npos >> m_l1_bits] = m_units[taboo_npos].base;
|
||||
|
||||
// Build the code table
|
||||
m_table.build(keys);
|
||||
m_table = code_table(keys);
|
||||
m_bin_mode |= m_table.has_null();
|
||||
|
||||
// Build the BC units
|
||||
|
|
|
@ -3,9 +3,10 @@
|
|||
|
||||
#include <xcdat.hpp>
|
||||
|
||||
using xcdat_trie = xcdat::trie_8_type;
|
||||
using trie_type = xcdat::trie_8_type;
|
||||
|
||||
int main() {
|
||||
// Input keys
|
||||
std::vector<std::string> keys = {
|
||||
"AirPods", "AirTag", "Mac", "MacBook", "MacBook_Air", "MacBook_Pro",
|
||||
"Mac_Mini", "Mac_Pro", "iMac", "iPad", "iPhone", "iPhone_SE",
|
||||
|
@ -19,12 +20,12 @@ int main() {
|
|||
|
||||
// Build and save the trie index
|
||||
{
|
||||
const auto trie = xcdat_trie::build(keys);
|
||||
trie.save(index_filename);
|
||||
const trie_type trie(keys);
|
||||
xcdat::save(trie, index_filename);
|
||||
}
|
||||
|
||||
// Load the trie index
|
||||
const auto trie = xcdat_trie::load(index_filename);
|
||||
const auto trie = xcdat::load<trie_type>(index_filename);
|
||||
|
||||
std::cout << "Basic operations" << std::endl;
|
||||
{
|
||||
|
|
|
@ -35,7 +35,7 @@ void test_rank_select(const std::vector<bool>& bits) {
|
|||
for (std::uint64_t i = 0; i < bits.size(); i++) {
|
||||
bvb.set_bit(i, bits[i]);
|
||||
}
|
||||
bv.build(bvb, true, true);
|
||||
bv = xcdat::bit_vector(bvb, true, true);
|
||||
}
|
||||
|
||||
REQUIRE_EQ(bv.size(), bits.size());
|
||||
|
|
|
@ -124,7 +124,7 @@ TEST_CASE("Test trie_type (tiny)") {
|
|||
"Google_Pixel", "iPad_mini", "iPadOS", "iPod", "ThinkPad",
|
||||
};
|
||||
|
||||
auto trie = trie_type::build(keys);
|
||||
trie_type trie(keys);
|
||||
REQUIRE_FALSE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
|
@ -163,7 +163,7 @@ TEST_CASE("Test trie_type (real)") {
|
|||
auto keys = xcdat::test::to_unique_vec(xcdat::io::load_strings("keys.txt"));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
auto trie = trie_type::build(keys);
|
||||
trie_type trie(keys);
|
||||
REQUIRE_FALSE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
|
@ -176,7 +176,7 @@ TEST_CASE("Test trie_type (random 10K, A--B)") {
|
|||
auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(10000, 1, 30, 'A', 'B'));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
auto trie = trie_type::build(keys);
|
||||
trie_type trie(keys);
|
||||
REQUIRE_FALSE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
|
@ -189,7 +189,7 @@ TEST_CASE("Test trie_type (random 10K, A--Z)") {
|
|||
auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(10000, 1, 30, 'A', 'Z'));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
auto trie = trie_type::build(keys);
|
||||
trie_type trie(keys);
|
||||
REQUIRE_FALSE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
|
@ -202,7 +202,7 @@ TEST_CASE("Test trie_type (random 10K, 0x00--0xFF)") {
|
|||
auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(10000, 1, 30, INT8_MIN, INT8_MAX));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
auto trie = trie_type::build(keys);
|
||||
trie_type trie(keys);
|
||||
REQUIRE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
|
@ -210,3 +210,44 @@ TEST_CASE("Test trie_type (random 10K, 0x00--0xFF)") {
|
|||
test_predictive_search(trie, keys, others);
|
||||
test_enumerate(trie, keys);
|
||||
}
|
||||
|
||||
#ifdef NDEBUG
|
||||
TEST_CASE("Test trie_type (random 100K, A--B)") {
|
||||
auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(100000, 1, 30, 'A', 'B'));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
trie_type trie(keys);
|
||||
REQUIRE_FALSE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
test_prefix_search(trie, keys, others);
|
||||
test_predictive_search(trie, keys, others);
|
||||
test_enumerate(trie, keys);
|
||||
}
|
||||
|
||||
TEST_CASE("Test trie_type (random 100K, A--Z)") {
|
||||
auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(100000, 1, 30, 'A', 'Z'));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
trie_type trie(keys);
|
||||
REQUIRE_FALSE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
test_prefix_search(trie, keys, others);
|
||||
test_predictive_search(trie, keys, others);
|
||||
test_enumerate(trie, keys);
|
||||
}
|
||||
|
||||
TEST_CASE("Test trie_type (random 100K, 0x00--0xFF)") {
|
||||
auto keys = xcdat::test::to_unique_vec(xcdat::test::make_random_keys(100000, 1, 30, INT8_MIN, INT8_MAX));
|
||||
auto others = xcdat::test::extract_keys(keys);
|
||||
|
||||
trie_type trie(keys);
|
||||
REQUIRE(trie.bin_mode());
|
||||
|
||||
test_basic_operations(trie, keys, others);
|
||||
test_prefix_search(trie, keys, others);
|
||||
test_predictive_search(trie, keys, others);
|
||||
test_enumerate(trie, keys);
|
||||
}
|
||||
#endif
|
|
@ -1,3 +1,5 @@
|
|||
#include <chrono>
|
||||
|
||||
#include <xcdat.hpp>
|
||||
|
||||
#include "cmd_line_parser/parser.hpp"
|
||||
|
@ -28,22 +30,21 @@ int build(const cmd_line_parser::parser& p) {
|
|||
keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
|
||||
}
|
||||
|
||||
essentials::timer<essentials::clock_type, std::chrono::seconds> timer;
|
||||
timer.start();
|
||||
const auto trie = Trie::build(keys);
|
||||
timer.stop();
|
||||
const auto start_tp = std::chrono::high_resolution_clock::now();
|
||||
const Trie trie(keys);
|
||||
const auto stop_tp = std::chrono::high_resolution_clock::now();
|
||||
|
||||
const double construction_time_in_sec = timer.average();
|
||||
const double memory_in_bytes = trie.memory_in_bytes();
|
||||
const double time_in_sec = std::chrono::duration_cast<std::chrono::seconds>(stop_tp - start_tp).count();
|
||||
const double memory_in_bytes = xcdat::memory_in_bytes(trie);
|
||||
|
||||
tfm::printfln("construction_time_in_sec: %g", construction_time_in_sec);
|
||||
tfm::printfln("time_in_sec: %g", time_in_sec);
|
||||
tfm::printfln("memory_in_bytes: %d", memory_in_bytes);
|
||||
tfm::printfln("memory_in_MiB: %g", memory_in_bytes / essentials::MiB);
|
||||
tfm::printfln("memory_in_MiB: %g", memory_in_bytes / (1024.0 * 1024.0));
|
||||
tfm::printfln("number_of_keys: %d", trie.num_keys());
|
||||
tfm::printfln("alphabet_size: %d", trie.alphabet_size());
|
||||
tfm::printfln("max_length: %d", trie.max_length());
|
||||
|
||||
trie.save(output_idx);
|
||||
xcdat::save(trie, output_idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ cmd_line_parser::parser make_parser(int argc, char** argv) {
|
|||
template <class Trie>
|
||||
int decode(const cmd_line_parser::parser& p) {
|
||||
const auto input_idx = p.get<std::string>("input_idx");
|
||||
const auto trie = Trie::load(input_idx);
|
||||
const auto trie = xcdat::load<Trie>(input_idx);
|
||||
|
||||
for (std::uint64_t id; std::cin >> id;) {
|
||||
const auto dec = trie.decode(id);
|
||||
|
|
|
@ -13,7 +13,7 @@ cmd_line_parser::parser make_parser(int argc, char** argv) {
|
|||
template <class Trie>
|
||||
int enumerate(const cmd_line_parser::parser& p) {
|
||||
const auto input_idx = p.get<std::string>("input_idx");
|
||||
const auto trie = Trie::load(input_idx);
|
||||
const auto trie = xcdat::load<Trie>(input_idx);
|
||||
|
||||
trie.enumerate([&](std::uint64_t id, std::string_view str) { tfm::printfln("%d\t%s", id, str); });
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@ cmd_line_parser::parser make_parser(int argc, char** argv) {
|
|||
template <class Trie>
|
||||
int lookup(const cmd_line_parser::parser& p) {
|
||||
const auto input_idx = p.get<std::string>("input_idx");
|
||||
const auto trie = Trie::load(input_idx);
|
||||
const auto trie = xcdat::load<Trie>(input_idx);
|
||||
|
||||
for (std::string str; std::getline(std::cin, str);) {
|
||||
const auto id = trie.lookup(str);
|
||||
|
|
|
@ -16,13 +16,12 @@ int predictive_search(const cmd_line_parser::parser& p) {
|
|||
const auto input_idx = p.get<std::string>("input_idx");
|
||||
const auto max_num_results = p.get<std::uint64_t>("max_num_results", 10);
|
||||
|
||||
const auto trie = Trie::load(input_idx);
|
||||
const auto trie = xcdat::load<Trie>(input_idx);
|
||||
|
||||
struct result_type {
|
||||
std::uint64_t id;
|
||||
std::string str;
|
||||
};
|
||||
|
||||
std::vector<result_type> results;
|
||||
results.reserve(1ULL << 10);
|
||||
|
||||
|
|
|
@ -13,7 +13,8 @@ cmd_line_parser::parser make_parser(int argc, char** argv) {
|
|||
template <class Trie>
|
||||
int prefix_search(const cmd_line_parser::parser& p) {
|
||||
const auto input_idx = p.get<std::string>("input_idx");
|
||||
const auto trie = Trie::load(input_idx);
|
||||
|
||||
const auto trie = xcdat::load<Trie>(input_idx);
|
||||
|
||||
struct result_type {
|
||||
std::uint64_t id;
|
||||
|
|
Loading…
Reference in a new issue