xcdat/include/xcdat.hpp

97 lines
3 KiB
C++
Raw Normal View History

2021-06-25 17:18:57 +00:00
#pragma once
2021-06-26 14:48:29 +00:00
2021-07-02 23:12:35 +00:00
#include "xcdat/bc_vector_15.hpp"
#include "xcdat/bc_vector_16.hpp"
2021-06-27 05:06:08 +00:00
#include "xcdat/bc_vector_7.hpp"
2021-06-27 04:12:35 +00:00
#include "xcdat/bc_vector_8.hpp"
2021-06-29 00:06:40 +00:00
#include "xcdat/load_visitor.hpp"
#include "xcdat/mmap_visitor.hpp"
#include "xcdat/save_visitor.hpp"
#include "xcdat/size_visitor.hpp"
2021-06-29 00:27:37 +00:00
#include "xcdat/trie.hpp"
2021-06-29 00:06:40 +00:00
2021-06-26 22:40:15 +00:00
namespace xcdat {
2021-07-03 00:46:04 +00:00
//! The trie type with standard DACs using 8-bit integers
2021-06-27 04:12:35 +00:00
using trie_8_type = trie<bc_vector_8>;
2021-07-03 00:46:04 +00:00
//! The trie type with standard DACs using 16-bit integers
2021-07-02 23:12:35 +00:00
using trie_16_type = trie<bc_vector_16>;
2021-07-03 00:46:04 +00:00
//! The trie type with pointer-based DACs using 7-bit integers (for the 1st layer)
2021-07-02 06:00:59 +00:00
using trie_7_type = trie<bc_vector_7>;
2021-07-03 00:46:04 +00:00
//! The trie type with pointer-based DACs using 15-bit integers (for the 1st layer)
2021-07-02 23:12:35 +00:00
using trie_15_type = trie<bc_vector_15>;
2021-06-26 22:40:15 +00:00
2021-07-02 13:18:55 +00:00
//! Set the continuous memory block to a new trie instance (for a memory-mapped file).
2021-06-29 00:06:40 +00:00
template <class Trie>
2021-06-29 00:27:37 +00:00
[[maybe_unused]] Trie mmap(const char* address) {
2021-06-29 00:06:40 +00:00
mmap_visitor visitor(address);
2021-06-29 03:10:08 +00:00
std::uint32_t flag;
2021-06-29 03:02:51 +00:00
visitor.visit(flag);
2021-07-02 12:50:10 +00:00
XCDAT_THROW_IF(flag != Trie::l1_bits, "The input dictionary type is different.");
2021-06-29 03:10:08 +00:00
2021-06-29 03:02:51 +00:00
Trie idx;
2021-06-29 00:06:40 +00:00
visitor.visit(idx);
return idx;
}
2021-07-02 12:50:10 +00:00
//! Load the trie dictionary from the file.
2021-06-29 00:06:40 +00:00
template <class Trie>
2021-06-29 00:27:37 +00:00
[[maybe_unused]] Trie load(std::string_view filepath) {
2021-06-29 00:06:40 +00:00
load_visitor visitor(filepath);
2021-06-29 03:10:08 +00:00
std::uint32_t flag;
2021-06-29 03:02:51 +00:00
visitor.visit(flag);
2021-07-02 12:50:10 +00:00
XCDAT_THROW_IF(flag != Trie::l1_bits, "The input dictionary type is different.");
2021-06-29 03:10:08 +00:00
2021-06-29 03:02:51 +00:00
Trie idx;
2021-06-29 00:06:40 +00:00
visitor.visit(idx);
return idx;
}
2021-07-02 12:50:10 +00:00
//! Save the trie dictionary to the file and returns the file size in bytes.
2021-06-29 00:06:40 +00:00
template <class Trie>
2021-06-29 00:27:37 +00:00
[[maybe_unused]] std::uint64_t save(const Trie& idx, std::string_view filepath) {
2021-06-29 00:06:40 +00:00
save_visitor visitor(filepath);
2021-06-29 03:10:08 +00:00
visitor.visit(static_cast<std::uint32_t>(Trie::l1_bits)); // flag
2021-06-29 00:06:40 +00:00
visitor.visit(const_cast<Trie&>(idx));
return visitor.bytes();
}
2021-07-02 12:50:10 +00:00
//! Get the dictionary size in bytes.
2021-06-29 00:06:40 +00:00
template <class Trie>
2021-06-29 00:27:37 +00:00
[[maybe_unused]] std::uint64_t memory_in_bytes(const Trie& idx) {
2021-06-29 00:06:40 +00:00
size_visitor visitor;
2021-06-29 03:10:08 +00:00
visitor.visit(static_cast<std::uint32_t>(Trie::l1_bits)); // flag
2021-06-29 00:06:40 +00:00
visitor.visit(const_cast<Trie&>(idx));
return visitor.bytes();
}
2021-07-02 12:50:10 +00:00
//! Get the flag indicating the trie dictionary type, embedded by the function 'save'.
2021-07-02 05:37:03 +00:00
//! The flag corresponds to trie::l1_bits and will be used to detect the trie type from the file.
2021-06-29 03:10:08 +00:00
[[maybe_unused]] std::uint32_t get_flag(std::string_view filepath) {
std::ifstream ifs(filepath);
XCDAT_THROW_IF(!ifs.good(), "Cannot open the input file");
std::uint32_t flag;
ifs.read(reinterpret_cast<char*>(&flag), sizeof(flag));
return flag;
}
2021-07-02 05:37:03 +00:00
//! Load the keywords from the file.
[[maybe_unused]] std::vector<std::string> load_strings(std::string_view filepath, char delim = '\n') {
2021-06-29 00:27:37 +00:00
std::ifstream ifs(filepath);
2021-07-01 21:14:55 +00:00
XCDAT_THROW_IF(!ifs.good(), "Cannot open the input file");
2021-06-29 00:27:37 +00:00
std::vector<std::string> strs;
2021-07-02 05:37:03 +00:00
for (std::string str; std::getline(ifs, str, delim);) {
2021-06-29 00:27:37 +00:00
strs.push_back(str);
}
return strs;
}
2021-06-27 05:06:08 +00:00
} // namespace xcdat