2021-06-25 17:18:57 +00:00
|
|
|
#pragma once
|
2021-06-26 14:48:29 +00:00
|
|
|
|
2021-07-02 23:12:35 +00:00
|
|
|
#include "xcdat/bc_vector_15.hpp"
|
|
|
|
#include "xcdat/bc_vector_16.hpp"
|
2021-06-27 05:06:08 +00:00
|
|
|
#include "xcdat/bc_vector_7.hpp"
|
2021-06-27 04:12:35 +00:00
|
|
|
#include "xcdat/bc_vector_8.hpp"
|
2021-06-29 00:06:40 +00:00
|
|
|
#include "xcdat/load_visitor.hpp"
|
|
|
|
#include "xcdat/mmap_visitor.hpp"
|
|
|
|
#include "xcdat/save_visitor.hpp"
|
|
|
|
#include "xcdat/size_visitor.hpp"
|
2021-06-29 00:27:37 +00:00
|
|
|
#include "xcdat/trie.hpp"
|
2021-06-29 00:06:40 +00:00
|
|
|
|
2021-06-26 22:40:15 +00:00
|
|
|
namespace xcdat {
|
|
|
|
|
2021-07-03 00:46:04 +00:00
|
|
|
//! The trie type with standard DACs using 8-bit integers
|
2021-06-27 04:12:35 +00:00
|
|
|
using trie_8_type = trie<bc_vector_8>;
|
2021-07-03 00:46:04 +00:00
|
|
|
|
|
|
|
//! The trie type with standard DACs using 16-bit integers
|
2021-07-02 23:12:35 +00:00
|
|
|
using trie_16_type = trie<bc_vector_16>;
|
|
|
|
|
2021-07-03 00:46:04 +00:00
|
|
|
//! The trie type with pointer-based DACs using 7-bit integers (for the 1st layer)
|
2021-07-02 06:00:59 +00:00
|
|
|
using trie_7_type = trie<bc_vector_7>;
|
2021-07-03 00:46:04 +00:00
|
|
|
|
|
|
|
//! The trie type with pointer-based DACs using 15-bit integers (for the 1st layer)
|
2021-07-02 23:12:35 +00:00
|
|
|
using trie_15_type = trie<bc_vector_15>;
|
2021-06-26 22:40:15 +00:00
|
|
|
|
2021-07-02 13:18:55 +00:00
|
|
|
//! Set the continuous memory block to a new trie instance (for a memory-mapped file).
|
2021-06-29 00:06:40 +00:00
|
|
|
template <class Trie>
|
2021-06-29 00:27:37 +00:00
|
|
|
[[maybe_unused]] Trie mmap(const char* address) {
|
2021-06-29 00:06:40 +00:00
|
|
|
mmap_visitor visitor(address);
|
2021-06-29 03:10:08 +00:00
|
|
|
|
|
|
|
std::uint32_t flag;
|
2021-06-29 03:02:51 +00:00
|
|
|
visitor.visit(flag);
|
2021-07-02 12:50:10 +00:00
|
|
|
XCDAT_THROW_IF(flag != Trie::l1_bits, "The input dictionary type is different.");
|
2021-06-29 03:10:08 +00:00
|
|
|
|
2021-06-29 03:02:51 +00:00
|
|
|
Trie idx;
|
2021-06-29 00:06:40 +00:00
|
|
|
visitor.visit(idx);
|
|
|
|
return idx;
|
|
|
|
}
|
|
|
|
|
2021-07-02 12:50:10 +00:00
|
|
|
//! Load the trie dictionary from the file.
|
2021-06-29 00:06:40 +00:00
|
|
|
template <class Trie>
|
2021-06-29 00:27:37 +00:00
|
|
|
[[maybe_unused]] Trie load(std::string_view filepath) {
|
2021-06-29 00:06:40 +00:00
|
|
|
load_visitor visitor(filepath);
|
2021-06-29 03:10:08 +00:00
|
|
|
|
|
|
|
std::uint32_t flag;
|
2021-06-29 03:02:51 +00:00
|
|
|
visitor.visit(flag);
|
2021-07-02 12:50:10 +00:00
|
|
|
XCDAT_THROW_IF(flag != Trie::l1_bits, "The input dictionary type is different.");
|
2021-06-29 03:10:08 +00:00
|
|
|
|
2021-06-29 03:02:51 +00:00
|
|
|
Trie idx;
|
2021-06-29 00:06:40 +00:00
|
|
|
visitor.visit(idx);
|
|
|
|
return idx;
|
|
|
|
}
|
|
|
|
|
2021-07-02 12:50:10 +00:00
|
|
|
//! Save the trie dictionary to the file and returns the file size in bytes.
|
2021-06-29 00:06:40 +00:00
|
|
|
template <class Trie>
|
2021-06-29 00:27:37 +00:00
|
|
|
[[maybe_unused]] std::uint64_t save(const Trie& idx, std::string_view filepath) {
|
2021-06-29 00:06:40 +00:00
|
|
|
save_visitor visitor(filepath);
|
2021-06-29 03:10:08 +00:00
|
|
|
visitor.visit(static_cast<std::uint32_t>(Trie::l1_bits)); // flag
|
2021-06-29 00:06:40 +00:00
|
|
|
visitor.visit(const_cast<Trie&>(idx));
|
|
|
|
return visitor.bytes();
|
|
|
|
}
|
|
|
|
|
2021-07-02 12:50:10 +00:00
|
|
|
//! Get the dictionary size in bytes.
|
2021-06-29 00:06:40 +00:00
|
|
|
template <class Trie>
|
2021-06-29 00:27:37 +00:00
|
|
|
[[maybe_unused]] std::uint64_t memory_in_bytes(const Trie& idx) {
|
2021-06-29 00:06:40 +00:00
|
|
|
size_visitor visitor;
|
2021-06-29 03:10:08 +00:00
|
|
|
visitor.visit(static_cast<std::uint32_t>(Trie::l1_bits)); // flag
|
2021-06-29 00:06:40 +00:00
|
|
|
visitor.visit(const_cast<Trie&>(idx));
|
|
|
|
return visitor.bytes();
|
|
|
|
}
|
|
|
|
|
2021-07-02 12:50:10 +00:00
|
|
|
//! Get the flag indicating the trie dictionary type, embedded by the function 'save'.
|
2021-07-02 05:37:03 +00:00
|
|
|
//! The flag corresponds to trie::l1_bits and will be used to detect the trie type from the file.
|
2021-06-29 03:10:08 +00:00
|
|
|
[[maybe_unused]] std::uint32_t get_flag(std::string_view filepath) {
|
|
|
|
std::ifstream ifs(filepath);
|
|
|
|
XCDAT_THROW_IF(!ifs.good(), "Cannot open the input file");
|
|
|
|
|
|
|
|
std::uint32_t flag;
|
|
|
|
ifs.read(reinterpret_cast<char*>(&flag), sizeof(flag));
|
|
|
|
return flag;
|
|
|
|
}
|
|
|
|
|
2021-07-02 05:37:03 +00:00
|
|
|
//! Load the keywords from the file.
|
|
|
|
[[maybe_unused]] std::vector<std::string> load_strings(std::string_view filepath, char delim = '\n') {
|
2021-06-29 00:27:37 +00:00
|
|
|
std::ifstream ifs(filepath);
|
2021-07-01 21:14:55 +00:00
|
|
|
XCDAT_THROW_IF(!ifs.good(), "Cannot open the input file");
|
|
|
|
|
2021-06-29 00:27:37 +00:00
|
|
|
std::vector<std::string> strs;
|
2021-07-02 05:37:03 +00:00
|
|
|
for (std::string str; std::getline(ifs, str, delim);) {
|
2021-06-29 00:27:37 +00:00
|
|
|
strs.push_back(str);
|
|
|
|
}
|
|
|
|
return strs;
|
|
|
|
}
|
|
|
|
|
2021-06-27 05:06:08 +00:00
|
|
|
} // namespace xcdat
|