From 992c74bb799a1cb2fb94fe16196efc5a653f694e Mon Sep 17 00:00:00 2001 From: Shunsuke Kanda Date: Tue, 29 Jun 2021 09:23:02 +0900 Subject: [PATCH] minor --- sample/sample.cpp | 9 +- tests/mm_file/mm_file.hpp | 177 ++++++++++++++++++++++++++++++++++++++ tools/xcdat_build.cpp | 2 +- 3 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 tests/mm_file/mm_file.hpp diff --git a/sample/sample.cpp b/sample/sample.cpp index 34ba332..8ea1e04 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -3,8 +3,6 @@ #include -using trie_type = xcdat::trie_8_type; - int main() { // Input keys std::vector keys = { @@ -12,19 +10,20 @@ int main() { "Mac_Mini", "Mac_Pro", "iMac", "iPad", "iPhone", "iPhone_SE", }; - // The dataset must be sorted and unique (although it is not needed for the keys). + // The input keys must be sorted and unique (although they have already satisfied in this case). std::sort(keys.begin(), keys.end()); keys.erase(std::unique(keys.begin(), keys.end()), keys.end()); + using trie_type = xcdat::trie_8_type; const std::string index_filename = "tmp.idx"; - // Build and save the trie index + // Build and save the trie index. { const trie_type trie(keys); xcdat::save(trie, index_filename); } - // Load the trie index + // Load the trie index. const auto trie = xcdat::load(index_filename); std::cout << "Basic operations" << std::endl; diff --git a/tests/mm_file/mm_file.hpp b/tests/mm_file/mm_file.hpp new file mode 100644 index 0000000..b95031d --- /dev/null +++ b/tests/mm_file/mm_file.hpp @@ -0,0 +1,177 @@ +#pragma once + +#include +#include +#include +#include +#include // close(fd) +#include + +namespace mm { + +namespace advice { +static const int normal = POSIX_MADV_NORMAL; +static const int random = POSIX_MADV_RANDOM; +static const int sequential = POSIX_MADV_SEQUENTIAL; +} // namespace advice + +template +struct file { + file() { + init(); + } + + ~file() { + close(); + } + + file(file const&) = delete; // non construction-copyable + file& operator=(file const&) = delete; // non copyable + + bool is_open() const { + return m_fd != -1; + } + + void close() { + if (is_open()) { + if (munmap((char*)m_data, m_size) == -1) { + throw std::runtime_error("munmap failed when closing file"); + } + ::close(m_fd); + init(); + } + } + + size_t bytes() const { + return m_size; + } + + size_t size() const { + return m_size / sizeof(T); + } + + T* data() const { + return m_data; + } + + struct iterator { + iterator(T* addr, size_t offset = 0) : m_ptr(addr + offset) {} + + T operator*() { + return *m_ptr; + } + + void operator++() { + ++m_ptr; + } + + bool operator==(iterator const& rhs) const { + return m_ptr == rhs.m_ptr; + } + + bool operator!=(iterator const& rhs) const { + return !((*this) == rhs); + } + + private: + T* m_ptr; + }; + + iterator begin() const { + return iterator(m_data); + } + + iterator end() const { + return iterator(m_data, size()); + } + +protected: + int m_fd; + size_t m_size; + T* m_data; + + void init() { + m_fd = -1; + m_size = 0; + m_data = nullptr; + } + + void check_fd() { + if (m_fd == -1) throw std::runtime_error("cannot open file"); + } +}; + +template +Pointer mmap(int fd, size_t size, int prot) { + static const size_t offset = 0; + Pointer p = + static_cast(::mmap(NULL, size, prot, MAP_SHARED, fd, offset)); + if (p == MAP_FAILED) throw std::runtime_error("mmap failed"); + return p; +} + +template +struct file_source : public file { + typedef file base; + + file_source() {} + + file_source(std::string const& path, int adv = advice::normal) { + open(path, adv); + } + + void open(std::string const& path, int adv = advice::normal) { + base::m_fd = ::open(path.c_str(), O_RDONLY); + base::check_fd(); + struct stat fs; + if (fstat(base::m_fd, &fs) == -1) { + throw std::runtime_error("cannot stat file"); + } + base::m_size = fs.st_size; + base::m_data = mmap(base::m_fd, base::m_size, PROT_READ); + if (posix_madvise((void*)base::m_data, base::m_size, adv)) { + throw std::runtime_error("madvise failed"); + } + } +}; + +template +struct file_sink : public file { + typedef file base; + + file_sink() {} + + file_sink(std::string const& path) { + open(path); + } + + file_sink(std::string const& path, size_t n) { + open(path, n); + } + + void open(std::string const& path) { + static const mode_t mode = 0600; // read/write + base::m_fd = ::open(path.c_str(), O_RDWR, mode); + base::check_fd(); + struct stat fs; + if (fstat(base::m_fd, &fs) == -1) { + throw std::runtime_error("cannot stat file"); + } + base::m_size = fs.st_size; + base::m_data = + mmap(base::m_fd, base::m_size, PROT_READ | PROT_WRITE); + } + + void open(std::string const& path, size_t n) { + static const mode_t mode = 0600; // read/write + base::m_fd = ::open(path.c_str(), O_RDWR | O_CREAT | O_TRUNC, mode); + base::check_fd(); + base::m_size = n * sizeof(T); + ftruncate(base::m_fd, + base::m_size); // truncate the file at the new size + base::m_data = + mmap(base::m_fd, base::m_size, PROT_READ | PROT_WRITE); + } +}; + +} // namespace mm \ No newline at end of file diff --git a/tools/xcdat_build.cpp b/tools/xcdat_build.cpp index 630d275..1a19733 100644 --- a/tools/xcdat_build.cpp +++ b/tools/xcdat_build.cpp @@ -9,7 +9,7 @@ cmd_line_parser::parser make_parser(int argc, char** argv) { cmd_line_parser::parser p(argc, argv); p.add("input_keys", "Input filepath of data keys"); p.add("output_idx", "Output filepath of trie index"); - p.add("trie_type", "Type of trie impl. from [7|8], (default=7)", "-t", false); + p.add("trie_type", "Type of trie impl from 7 or 8 (default=7)", "-t", false); p.add("to_unique", "Make unique the input keys? (default=0)", "-u", false); return p; }