From eebe7ac0dbbc87e862adc27cd4f993feab7aacb3 Mon Sep 17 00:00:00 2001 From: Shunsuke Kanda Date: Mon, 28 Jun 2021 02:15:09 +0900 Subject: [PATCH] update --- .gitignore | 2 +- CMakeLists.txt | 10 +- include/xcdat.hpp | 22 - include/xcdat/trie.hpp | 33 +- sample/sample.cpp | 24 +- {test => tests}/CMakeLists.txt | 0 {test => tests}/doctest/doctest.h | 0 {test => tests}/keys.txt | 0 {test => tests}/test_bc_vector.cpp | 0 {test => tests}/test_bit_vector.cpp | 0 {test => tests}/test_common.hpp | 0 {test => tests}/test_compact_vector.cpp | 0 {test => tests}/test_tail_vector.cpp | 0 {test => tests}/test_trie.cpp | 0 tools/CMakeLists.txt | 1 + tools/cmd_line_parser/parser.hpp | 158 ++++ tools/tinyformat/tinyformat.h | 1155 +++++++++++++++++++++++ tools/xcdat_build.cpp | 70 ++ 18 files changed, 1427 insertions(+), 48 deletions(-) rename {test => tests}/CMakeLists.txt (100%) rename {test => tests}/doctest/doctest.h (100%) rename {test => tests}/keys.txt (100%) rename {test => tests}/test_bc_vector.cpp (100%) rename {test => tests}/test_bit_vector.cpp (100%) rename {test => tests}/test_common.hpp (100%) rename {test => tests}/test_compact_vector.cpp (100%) rename {test => tests}/test_tail_vector.cpp (100%) rename {test => tests}/test_trie.cpp (100%) create mode 100644 tools/CMakeLists.txt create mode 100644 tools/cmd_line_parser/parser.hpp create mode 100644 tools/tinyformat/tinyformat.h create mode 100644 tools/xcdat_build.cpp diff --git a/.gitignore b/.gitignore index 166ad49..550e86d 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,7 @@ *.app # My Definition -build/ +build*/ cmake-build-debug/ .idea/ .DS_Store diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f9e405..c643265 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,9 +33,11 @@ message(STATUS "CXX_FLAGS_RELEASE are ${CMAKE_CXX_FLAGS_RELEASE}") include_directories(include) -enable_testing() -add_subdirectory(test) - add_subdirectory(sample) +add_subdirectory(tools) -file(COPY ${CMAKE_SOURCE_DIR}/test/keys.txt DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/test) +enable_testing() +add_subdirectory(tests) + + +file(COPY ${CMAKE_SOURCE_DIR}/tests/keys.txt DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tests) diff --git a/include/xcdat.hpp b/include/xcdat.hpp index e66b944..2ee025c 100644 --- a/include/xcdat.hpp +++ b/include/xcdat.hpp @@ -11,26 +11,4 @@ namespace xcdat { using trie_7_type = trie; using trie_8_type = trie; -template -static Trie build(const Strings& keys, bool bin_mode = false) { - return Trie(trie_builder(keys, Trie::bc_vector_type::l1_bits, bin_mode)); -} - -template -static Trie load(std::string_view filename) { - Trie trie; - essentials::load(trie, filename.data()); - return trie; -} - -template -static std::uint64_t save(Trie& trie, std::string_view filename) { - return essentials::save(trie, filename.data()); -} - -template -static std::uint64_t get_memory_in_bytes(Trie& trie) { - return essentials::visit(trie, ""); -} - } // namespace xcdat diff --git a/include/xcdat/trie.hpp b/include/xcdat/trie.hpp index b0ccae7..9b95193 100644 --- a/include/xcdat/trie.hpp +++ b/include/xcdat/trie.hpp @@ -26,6 +26,8 @@ class trie { using this_type = trie; using bc_vector_type = BcVector; + static constexpr auto l1_bits = bc_vector_type::l1_bits; + private: std::uint64_t m_num_keys = 0; code_table m_table; @@ -52,19 +54,23 @@ class trie { //! Move constructor trie& operator=(trie&&) noexcept = default; - template - explicit trie(trie_builder&& b) - : m_num_keys(b.m_keys.size()), m_table(std::move(b.m_table)), m_terms(b.m_terms, true, true), - m_bcvec(b.m_units, std::move(b.m_leaves)), m_tvec(std::move(b.m_suffixes)) {} - - /** - * Build the trie dictioanry from the input keywords. - * @param[in] key The query keyword. - * @return The associated ID if found. - */ template static this_type build(const Strings& keys, bool bin_mode = false) { - return this_type(trie_builder(keys, bc_vector_type::l1_bits, bin_mode)); + return this_type(trie_builder(keys, l1_bits, bin_mode)); + } + + static this_type load(std::string_view filepath) { + this_type obj; + essentials::load(obj, filepath.data()); + return obj; + } + + std::uint64_t save(std::string_view filepath) const { + return essentials::save(const_cast(*this), filepath.data()); + } + + std::uint64_t memory_in_bytes() const { + return essentials::visit(const_cast(*this), ""); } //! Check the binary mode. @@ -271,6 +277,11 @@ class trie { } private: + template + explicit trie(trie_builder&& b) + : m_num_keys(b.m_keys.size()), m_table(std::move(b.m_table)), m_terms(b.m_terms, true, true), + m_bcvec(b.m_units, std::move(b.m_leaves)), m_tvec(std::move(b.m_suffixes)) {} + template static constexpr String get_suffix(const String& s, std::uint64_t i) { assert(i <= s.size()); diff --git a/sample/sample.cpp b/sample/sample.cpp index 399aeb5..84d194c 100644 --- a/sample/sample.cpp +++ b/sample/sample.cpp @@ -3,17 +3,28 @@ #include +using xcdat_trie = xcdat::trie_8_type; + int main() { std::vector keys = { "AirPods", "AirTag", "Mac", "MacBook", "MacBook_Air", "MacBook_Pro", "Mac_Mini", "Mac_Pro", "iMac", "iPad", "iPhone", "iPhone_SE", }; - // The dataset must be sorted and unique. + // The dataset must be sorted and unique (although it is not needed for the keys). std::sort(keys.begin(), keys.end()); keys.erase(std::unique(keys.begin(), keys.end()), keys.end()); - auto trie = xcdat::build(keys); + const std::string index_filename = "tmp.idx"; + + // Build and save the trie index + { + const auto trie = xcdat_trie::build(keys); + trie.save(index_filename); + } + + // Load the trie index + const auto trie = xcdat_trie::load(index_filename); std::cout << "Basic operations" << std::endl; { @@ -41,14 +52,7 @@ int main() { } } - std::string index_filename = "tmp.idx"; - std::cout << "mem: " << xcdat::save(trie, index_filename) << std::endl; - - { - auto ohter = xcdat::load(index_filename); - std::cout << "num_keys:" << ohter.num_keys() << std::endl; - std::cout << "mem: " << xcdat::get_memory_in_bytes(ohter) << std::endl; - } + std::remove(index_filename.c_str()); return 0; } diff --git a/test/CMakeLists.txt b/tests/CMakeLists.txt similarity index 100% rename from test/CMakeLists.txt rename to tests/CMakeLists.txt diff --git a/test/doctest/doctest.h b/tests/doctest/doctest.h similarity index 100% rename from test/doctest/doctest.h rename to tests/doctest/doctest.h diff --git a/test/keys.txt b/tests/keys.txt similarity index 100% rename from test/keys.txt rename to tests/keys.txt diff --git a/test/test_bc_vector.cpp b/tests/test_bc_vector.cpp similarity index 100% rename from test/test_bc_vector.cpp rename to tests/test_bc_vector.cpp diff --git a/test/test_bit_vector.cpp b/tests/test_bit_vector.cpp similarity index 100% rename from test/test_bit_vector.cpp rename to tests/test_bit_vector.cpp diff --git a/test/test_common.hpp b/tests/test_common.hpp similarity index 100% rename from test/test_common.hpp rename to tests/test_common.hpp diff --git a/test/test_compact_vector.cpp b/tests/test_compact_vector.cpp similarity index 100% rename from test/test_compact_vector.cpp rename to tests/test_compact_vector.cpp diff --git a/test/test_tail_vector.cpp b/tests/test_tail_vector.cpp similarity index 100% rename from test/test_tail_vector.cpp rename to tests/test_tail_vector.cpp diff --git a/test/test_trie.cpp b/tests/test_trie.cpp similarity index 100% rename from test/test_trie.cpp rename to tests/test_trie.cpp diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt new file mode 100644 index 0000000..8ba4dde --- /dev/null +++ b/tools/CMakeLists.txt @@ -0,0 +1 @@ +add_executable(xcdat_build xcdat_build.cpp) diff --git a/tools/cmd_line_parser/parser.hpp b/tools/cmd_line_parser/parser.hpp new file mode 100644 index 0000000..39948f7 --- /dev/null +++ b/tools/cmd_line_parser/parser.hpp @@ -0,0 +1,158 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cmd_line_parser { + +struct parser { + inline static const std::string empty = ""; + + parser(int argc, char** argv) : m_argc(argc), m_argv(argv), m_required(0) {} + + struct cmd { + std::string shorthand, value, descr; + bool is_boolean; + }; + + bool parse() { + if (size_t(m_argc - 1) < m_required) return abort(); + size_t k = 0; + for (int i = 1; i != m_argc; ++i, ++k) { + std::string parsed(m_argv[i]); + if (parsed == "-h" or parsed == "--help") return abort(); + size_t id = k; + bool is_optional = id >= m_required; + if (is_optional) { + auto it = m_shorthands.find(parsed); + if (it == m_shorthands.end()) { + std::cerr << "== error: shorthand '" + parsed + "' not found" << std::endl; + return abort(); + } + id = (*it).second; + } + assert(id < m_names.size()); + auto const& name = m_names[id]; + auto& c = m_cmds[name]; + if (is_optional) { + if (c.is_boolean) { + parsed = "true"; + } else { + ++i; + if (i == m_argc) return abort(); + parsed = m_argv[i]; + } + } + c.value = parsed; + } + return true; + } + + void help() const { + std::cerr << "Usage: \e[1m" << m_argv[0] << "\e[0m [-h,--help]"; + auto print = [this](bool with_description) { + for (size_t i = 0; i != m_names.size(); ++i) { + auto const& c = m_cmds.at(m_names[i]); + bool is_optional = i >= m_required; + if (is_optional) std::cerr << " [\e[1m" << c.shorthand << "\e[0m"; + if (!c.is_boolean) std::cerr << " \e[4m" << m_names[i] << "\e[0m"; + if (is_optional) std::cerr << "]"; + if (with_description) std::cerr << "\n\t" << c.descr << "\n"; + } + }; + print(false); + std::cerr << "\n\n"; + print(true); + std::cerr << " [-h,--help]\n\tPrint this help text and silently exits." << std::endl; + } + + bool add(std::string const& name, std::string const& descr) { + bool ret = m_cmds.emplace(name, cmd{empty, empty, descr, false}).second; + if (ret) { + m_names.push_back(name); + m_required += 1; + } + return ret; + } + + bool add(std::string const& name, std::string const& descr, std::string const& shorthand, bool is_boolean = true) { + bool ret = m_cmds.emplace(name, cmd{shorthand, is_boolean ? "false" : empty, descr, is_boolean}).second; + if (ret) { + m_names.push_back(name); + m_shorthands.emplace(shorthand, m_names.size() - 1); + } + return ret; + } + + template + T get(std::string const& name) const { + auto it = m_cmds.find(name); + if (it == m_cmds.end()) { + throw std::runtime_error("error: '" + name + "' not found"); + } + auto const& value = (*it).second.value; + return parse(value); + } + + // added by Kampersanda + template + T get(std::string const& name, const T& default_value) const { + return parsed(name) ? get(name) : default_value; + } + + bool parsed(std::string const& name) const { + auto it = m_cmds.find(name); + if (it == m_cmds.end() or (*it).second.value == empty) return false; + return true; + } + + template + T parse(std::string const& value) const { + if constexpr (std::is_same::value) { + return value; + } else if constexpr (std::is_same::value or std::is_same::value or + std::is_same::value) { + return value.front(); + } else if constexpr (std::is_same::value or std::is_same::value or + std::is_same::value or std::is_same::value) { + return std::atoi(value.c_str()); + } else if constexpr (std::is_same::value or std::is_same::value or + std::is_same::value or std::is_same::value) { + return std::atoll(value.c_str()); + } else if constexpr (std::is_same::value or std::is_same::value or + std::is_same::value) { + return std::atof(value.c_str()); + } else if constexpr (std::is_same::value) { + std::istringstream stream(value); + bool ret; + if (value == "true" or value == "false") { + stream >> std::boolalpha >> ret; + } else { + stream >> std::noboolalpha >> ret; + } + return ret; + } + assert(false); + __builtin_unreachable(); + } + + private: + int m_argc; + char** m_argv; + size_t m_required; + std::unordered_map m_cmds; + std::unordered_map m_shorthands; + std::vector m_names; + + bool abort() const { + help(); + return false; + } +}; + +} // namespace cmd_line_parser \ No newline at end of file diff --git a/tools/tinyformat/tinyformat.h b/tools/tinyformat/tinyformat.h new file mode 100644 index 0000000..55f3046 --- /dev/null +++ b/tools/tinyformat/tinyformat.h @@ -0,0 +1,1155 @@ +// tinyformat.h +// Copyright (C) 2011, Chris Foster [chris42f (at) gmail (d0t) com] +// +// Boost Software License - Version 1.0 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +//------------------------------------------------------------------------------ +// Tinyformat: A minimal type safe printf replacement +// +// tinyformat.h is a type safe printf replacement library in a single C++ +// header file. Design goals include: +// +// * Type safety and extensibility for user defined types. +// * C99 printf() compatibility, to the extent possible using std::ostream +// * POSIX extension for positional arguments +// * Simplicity and minimalism. A single header file to include and distribute +// with your projects. +// * Augment rather than replace the standard stream formatting mechanism +// * C++98 support, with optional C++11 niceties +// +// +// Main interface example usage +// ---------------------------- +// +// To print a date to std::cout for American usage: +// +// std::string weekday = "Wednesday"; +// const char* month = "July"; +// size_t day = 27; +// long hour = 14; +// int min = 44; +// +// tfm::printf("%s, %s %d, %.2d:%.2d\n", weekday, month, day, hour, min); +// +// POSIX extension for positional arguments is available. +// The ability to rearrange formatting arguments is an important feature +// for localization because the word order may vary in different languages. +// +// Previous example for German usage. Arguments are reordered: +// +// tfm::printf("%1$s, %3$d. %2$s, %4$d:%5$.2d\n", weekday, month, day, hour, min); +// +// The strange types here emphasize the type safety of the interface; it is +// possible to print a std::string using the "%s" conversion, and a +// size_t using the "%d" conversion. A similar result could be achieved +// using either of the tfm::format() functions. One prints on a user provided +// stream: +// +// tfm::format(std::cerr, "%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// +// The other returns a std::string: +// +// std::string date = tfm::format("%s, %s %d, %.2d:%.2d\n", +// weekday, month, day, hour, min); +// std::cout << date; +// +// These are the three primary interface functions. There is also a +// convenience function printfln() which appends a newline to the usual result +// of printf() for super simple logging. +// +// +// User defined format functions +// ----------------------------- +// +// Simulating variadic templates in C++98 is pretty painful since it requires +// writing out the same function for each desired number of arguments. To make +// this bearable tinyformat comes with a set of macros which are used +// internally to generate the API, but which may also be used in user code. +// +// The three macros TINYFORMAT_ARGTYPES(n), TINYFORMAT_VARARGS(n) and +// TINYFORMAT_PASSARGS(n) will generate a list of n argument types, +// type/name pairs and argument names respectively when called with an integer +// n between 1 and 16. We can use these to define a macro which generates the +// desired user defined function with n arguments. To generate all 16 user +// defined function bodies, use the macro TINYFORMAT_FOREACH_ARGNUM. For an +// example, see the implementation of printf() at the end of the source file. +// +// Sometimes it's useful to be able to pass a list of format arguments through +// to a non-template function. The FormatList class is provided as a way to do +// this by storing the argument list in a type-opaque way. Continuing the +// example from above, we construct a FormatList using makeFormatList(): +// +// FormatListRef formatList = tfm::makeFormatList(weekday, month, day, hour, min); +// +// The format list can now be passed into any non-template function and used +// via a call to the vformat() function: +// +// tfm::vformat(std::cout, "%s, %s %d, %.2d:%.2d\n", formatList); +// +// +// Additional API information +// -------------------------- +// +// Error handling: Define TINYFORMAT_ERROR to customize the error handling for +// format strings which are unsupported or have the wrong number of format +// specifiers (calls assert() by default). +// +// User defined types: Uses operator<< for user defined types by default. +// Overload formatValue() for more control. + +#ifndef TINYFORMAT_H_INCLUDED +#define TINYFORMAT_H_INCLUDED + +namespace tinyformat {} +//------------------------------------------------------------------------------ +// Config section. Customize to your liking! + +// Namespace alias to encourage brevity +namespace tfm = tinyformat; + +// Error handling; calls assert() by default. +// #define TINYFORMAT_ERROR(reasonString) your_error_handler(reasonString) + +// Define for C++11 variadic templates which make the code shorter & more +// general. If you don't define this, C++11 support is autodetected below. +// #define TINYFORMAT_USE_VARIADIC_TEMPLATES + +//------------------------------------------------------------------------------ +// Implementation details. +#include +#include +#include + +#ifndef TINYFORMAT_ASSERT +#include +#define TINYFORMAT_ASSERT(cond) assert(cond) +#endif + +#ifndef TINYFORMAT_ERROR +#include +#define TINYFORMAT_ERROR(reason) assert(0 && reason) +#endif + +#if !defined(TINYFORMAT_USE_VARIADIC_TEMPLATES) && !defined(TINYFORMAT_NO_VARIADIC_TEMPLATES) +#ifdef __GXX_EXPERIMENTAL_CXX0X__ +#define TINYFORMAT_USE_VARIADIC_TEMPLATES +#endif +#endif + +#if defined(__GLIBCXX__) && __GLIBCXX__ < 20080201 +// std::showpos is broken on old libstdc++ as provided with macOS. See +// http://gcc.gnu.org/ml/libstdc++/2007-11/msg00075.html +#define TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND +#endif + +#ifdef __APPLE__ +// Workaround macOS linker warning: Xcode uses different default symbol +// visibilities for static libs vs executables (see issue #25) +#define TINYFORMAT_HIDDEN __attribute__((visibility("hidden"))) +#else +#define TINYFORMAT_HIDDEN +#endif + +namespace tinyformat { + +//------------------------------------------------------------------------------ +namespace detail { + +// Test whether type T1 is convertible to type T2 +template +struct is_convertible { + private: + // two types of different size + struct fail { + char dummy[2]; + }; + struct succeed { + char dummy; + }; + // Try to convert a T1 to a T2 by plugging into tryConvert + static fail tryConvert(...); + static succeed tryConvert(const T2&); + static const T1& makeT1(); + + public: +#ifdef _MSC_VER + // Disable spurious loss of precision warnings in tryConvert(makeT1()) +#pragma warning(push) +#pragma warning(disable : 4244) +#pragma warning(disable : 4267) +#endif + // Standard trick: the (...) version of tryConvert will be chosen from + // the overload set only if the version taking a T2 doesn't match. + // Then we compare the sizes of the return types to check which + // function matched. Very neat, in a disgusting kind of way :) + static const bool value = sizeof(tryConvert(makeT1())) == sizeof(succeed); +#ifdef _MSC_VER +#pragma warning(pop) +#endif +}; + +// Detect when a type is not a wchar_t string +template +struct is_wchar { + typedef int tinyformat_wchar_is_not_supported; +}; +template <> +struct is_wchar {}; +template <> +struct is_wchar {}; +template +struct is_wchar {}; +template +struct is_wchar {}; + +// Format the value by casting to type fmtT. This default implementation +// should never be called. +template ::value> +struct formatValueAsType { + static void invoke(std::ostream& /*out*/, const T& /*value*/) { + TINYFORMAT_ASSERT(0); + } +}; +// Specialized version for types that can actually be converted to fmtT, as +// indicated by the "convertible" template parameter. +template +struct formatValueAsType { + static void invoke(std::ostream& out, const T& value) { + out << static_cast(value); + } +}; + +#ifdef TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND +template ::value> +struct formatZeroIntegerWorkaround { + static bool invoke(std::ostream& /**/, const T& /**/) { + return false; + } +}; +template +struct formatZeroIntegerWorkaround { + static bool invoke(std::ostream& out, const T& value) { + if (static_cast(value) == 0 && out.flags() & std::ios::showpos) { + out << "+0"; + return true; + } + return false; + } +}; +#endif // TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND + +// Convert an arbitrary type to integer. The version with convertible=false +// throws an error. +template ::value> +struct convertToInt { + static int invoke(const T& /*value*/) { + TINYFORMAT_ERROR( + "tinyformat: Cannot convert from argument type to " + "integer for use as variable width or precision"); + return 0; + } +}; +// Specialization for convertToInt when conversion is possible +template +struct convertToInt { + static int invoke(const T& value) { + return static_cast(value); + } +}; + +// Format at most ntrunc characters to the given stream. +template +inline void formatTruncated(std::ostream& out, const T& value, int ntrunc) { + std::ostringstream tmp; + tmp << value; + std::string result = tmp.str(); + out.write(result.c_str(), (std::min)(ntrunc, static_cast(result.size()))); +} +#define TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(type) \ + inline void formatTruncated(std::ostream& out, type* value, int ntrunc) { \ + std::streamsize len = 0; \ + while (len < ntrunc && value[len] != 0) ++len; \ + out.write(value, len); \ + } +// Overload for const char* and char*. Could overload for signed & unsigned +// char too, but these are technically unneeded for printf compatibility. +TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(const char) +TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR(char) +#undef TINYFORMAT_DEFINE_FORMAT_TRUNCATED_CSTR + +} // namespace detail + +//------------------------------------------------------------------------------ +// Variable formatting functions. May be overridden for user-defined types if +// desired. + +/// Format a value into a stream, delegating to operator<< by default. +/// +/// Users may override this for their own types. When this function is called, +/// the stream flags will have been modified according to the format string. +/// The format specification is provided in the range [fmtBegin, fmtEnd). For +/// truncating conversions, ntrunc is set to the desired maximum number of +/// characters, for example "%.7s" calls formatValue with ntrunc = 7. +/// +/// By default, formatValue() uses the usual stream insertion operator +/// operator<< to format the type T, with special cases for the %c and %p +/// conversions. +template +inline void formatValue(std::ostream& out, const char* /*fmtBegin*/, const char* fmtEnd, int ntrunc, const T& value) { +#ifndef TINYFORMAT_ALLOW_WCHAR_STRINGS + // Since we don't support printing of wchar_t using "%ls", make it fail at + // compile time in preference to printing as a void* at runtime. + typedef typename detail::is_wchar::tinyformat_wchar_is_not_supported DummyType; + (void)DummyType(); // avoid unused type warning with gcc-4.8 +#endif + // The mess here is to support the %c and %p conversions: if these + // conversions are active we try to convert the type to a char or const + // void* respectively and format that instead of the value itself. For the + // %p conversion it's important to avoid dereferencing the pointer, which + // could otherwise lead to a crash when printing a dangling (const char*). + const bool canConvertToChar = detail::is_convertible::value; + const bool canConvertToVoidPtr = detail::is_convertible::value; + if (canConvertToChar && *(fmtEnd - 1) == 'c') + detail::formatValueAsType::invoke(out, value); + else if (canConvertToVoidPtr && *(fmtEnd - 1) == 'p') + detail::formatValueAsType::invoke(out, value); +#ifdef TINYFORMAT_OLD_LIBSTDCPLUSPLUS_WORKAROUND + else if (detail::formatZeroIntegerWorkaround::invoke(out, value)) /**/ + ; +#endif + else if (ntrunc >= 0) { + // Take care not to overread C strings in truncating conversions like + // "%.4s" where at most 4 characters may be read. + detail::formatTruncated(out, value, ntrunc); + } else + out << value; +} + +// Overloaded version for char types to support printing as an integer +#define TINYFORMAT_DEFINE_FORMATVALUE_CHAR(charType) \ + inline void formatValue(std::ostream& out, const char* /*fmtBegin*/, const char* fmtEnd, int /**/, \ + charType value) { \ + switch (*(fmtEnd - 1)) { \ + case 'u': \ + case 'd': \ + case 'i': \ + case 'o': \ + case 'X': \ + case 'x': \ + out << static_cast(value); \ + break; \ + default: \ + out << value; \ + break; \ + } \ + } +// per 3.9.1: char, signed char and unsigned char are all distinct types +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(signed char) +TINYFORMAT_DEFINE_FORMATVALUE_CHAR(unsigned char) +#undef TINYFORMAT_DEFINE_FORMATVALUE_CHAR + +//------------------------------------------------------------------------------ +// Tools for emulating variadic templates in C++98. The basic idea here is +// stolen from the boost preprocessor metaprogramming library and cut down to +// be just general enough for what we need. + +#define TINYFORMAT_ARGTYPES(n) TINYFORMAT_ARGTYPES_##n +#define TINYFORMAT_VARARGS(n) TINYFORMAT_VARARGS_##n +#define TINYFORMAT_PASSARGS(n) TINYFORMAT_PASSARGS_##n +#define TINYFORMAT_PASSARGS_TAIL(n) TINYFORMAT_PASSARGS_TAIL_##n + +// To keep it as transparent as possible, the macros below have been generated +// using python via the excellent cog.py code generation script. This avoids +// the need for a bunch of complex (but more general) preprocessor tricks as +// used in boost.preprocessor. +// +// To rerun the code generation in place, use `cog.py -r tinyformat.h` +// (see http://nedbatchelder.com/code/cog). Alternatively you can just create +// extra versions by hand. + +/*[[[cog +maxParams = 16 + +def makeCommaSepLists(lineTemplate, elemTemplate, startInd=1): + for j in range(startInd,maxParams+1): + list = ', '.join([elemTemplate % {'i':i} for i in range(startInd,j+1)]) + cog.outl(lineTemplate % {'j':j, 'list':list}) + +makeCommaSepLists('#define TINYFORMAT_ARGTYPES_%(j)d %(list)s', + 'class T%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_VARARGS_%(j)d %(list)s', + 'const T%(i)d& v%(i)d') + +cog.outl() +makeCommaSepLists('#define TINYFORMAT_PASSARGS_%(j)d %(list)s', 'v%(i)d') + +cog.outl() +cog.outl('#define TINYFORMAT_PASSARGS_TAIL_1') +makeCommaSepLists('#define TINYFORMAT_PASSARGS_TAIL_%(j)d , %(list)s', + 'v%(i)d', startInd = 2) + +cog.outl() +cog.outl('#define TINYFORMAT_FOREACH_ARGNUM(m) \\\n ' + + ' '.join(['m(%d)' % (j,) for j in range(1,maxParams+1)])) +]]]*/ +#define TINYFORMAT_ARGTYPES_1 class T1 +#define TINYFORMAT_ARGTYPES_2 class T1, class T2 +#define TINYFORMAT_ARGTYPES_3 class T1, class T2, class T3 +#define TINYFORMAT_ARGTYPES_4 class T1, class T2, class T3, class T4 +#define TINYFORMAT_ARGTYPES_5 class T1, class T2, class T3, class T4, class T5 +#define TINYFORMAT_ARGTYPES_6 class T1, class T2, class T3, class T4, class T5, class T6 +#define TINYFORMAT_ARGTYPES_7 class T1, class T2, class T3, class T4, class T5, class T6, class T7 +#define TINYFORMAT_ARGTYPES_8 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8 +#define TINYFORMAT_ARGTYPES_9 class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9 +#define TINYFORMAT_ARGTYPES_10 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10 +#define TINYFORMAT_ARGTYPES_11 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11 +#define TINYFORMAT_ARGTYPES_12 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, \ + class T12 +#define TINYFORMAT_ARGTYPES_13 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, \ + class T12, class T13 +#define TINYFORMAT_ARGTYPES_14 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, \ + class T12, class T13, class T14 +#define TINYFORMAT_ARGTYPES_15 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, \ + class T12, class T13, class T14, class T15 +#define TINYFORMAT_ARGTYPES_16 \ + class T1, class T2, class T3, class T4, class T5, class T6, class T7, class T8, class T9, class T10, class T11, \ + class T12, class T13, class T14, class T15, class T16 + +#define TINYFORMAT_VARARGS_1 const T1& v1 +#define TINYFORMAT_VARARGS_2 const T1 &v1, const T2 &v2 +#define TINYFORMAT_VARARGS_3 const T1 &v1, const T2 &v2, const T3 &v3 +#define TINYFORMAT_VARARGS_4 const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4 +#define TINYFORMAT_VARARGS_5 const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5 +#define TINYFORMAT_VARARGS_6 const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6 +#define TINYFORMAT_VARARGS_7 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7 +#define TINYFORMAT_VARARGS_8 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8 +#define TINYFORMAT_VARARGS_9 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9 +#define TINYFORMAT_VARARGS_10 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10 +#define TINYFORMAT_VARARGS_11 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10, const T11 &v11 +#define TINYFORMAT_VARARGS_12 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10, const T11 &v11, const T12 &v12 +#define TINYFORMAT_VARARGS_13 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10, const T11 &v11, const T12 &v12, const T13 &v13 +#define TINYFORMAT_VARARGS_14 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10, const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14 +#define TINYFORMAT_VARARGS_15 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10, const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14, const T15 &v15 +#define TINYFORMAT_VARARGS_16 \ + const T1 &v1, const T2 &v2, const T3 &v3, const T4 &v4, const T5 &v5, const T6 &v6, const T7 &v7, const T8 &v8, \ + const T9 &v9, const T10 &v10, const T11 &v11, const T12 &v12, const T13 &v13, const T14 &v14, const T15 &v15, \ + const T16 &v16 + +#define TINYFORMAT_PASSARGS_1 v1 +#define TINYFORMAT_PASSARGS_2 v1, v2 +#define TINYFORMAT_PASSARGS_3 v1, v2, v3 +#define TINYFORMAT_PASSARGS_4 v1, v2, v3, v4 +#define TINYFORMAT_PASSARGS_5 v1, v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_6 v1, v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_7 v1, v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_8 v1, v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_9 v1, v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_10 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_11 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_12 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_13 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_14 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_15 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_16 v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_PASSARGS_TAIL_1 +#define TINYFORMAT_PASSARGS_TAIL_2 , v2 +#define TINYFORMAT_PASSARGS_TAIL_3 , v2, v3 +#define TINYFORMAT_PASSARGS_TAIL_4 , v2, v3, v4 +#define TINYFORMAT_PASSARGS_TAIL_5 , v2, v3, v4, v5 +#define TINYFORMAT_PASSARGS_TAIL_6 , v2, v3, v4, v5, v6 +#define TINYFORMAT_PASSARGS_TAIL_7 , v2, v3, v4, v5, v6, v7 +#define TINYFORMAT_PASSARGS_TAIL_8 , v2, v3, v4, v5, v6, v7, v8 +#define TINYFORMAT_PASSARGS_TAIL_9 , v2, v3, v4, v5, v6, v7, v8, v9 +#define TINYFORMAT_PASSARGS_TAIL_10 , v2, v3, v4, v5, v6, v7, v8, v9, v10 +#define TINYFORMAT_PASSARGS_TAIL_11 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 +#define TINYFORMAT_PASSARGS_TAIL_12 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12 +#define TINYFORMAT_PASSARGS_TAIL_13 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13 +#define TINYFORMAT_PASSARGS_TAIL_14 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14 +#define TINYFORMAT_PASSARGS_TAIL_15 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15 +#define TINYFORMAT_PASSARGS_TAIL_16 , v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16 + +#define TINYFORMAT_FOREACH_ARGNUM(m) \ + m(1) m(2) m(3) m(4) m(5) m(6) m(7) m(8) m(9) m(10) m(11) m(12) m(13) m(14) m(15) m(16) +//[[[end]]] + +namespace detail { + +// Type-opaque holder for an argument to format(), with associated actions on +// the type held as explicit function pointers. This allows FormatArg's for +// each argument to be allocated as a homogeneous array inside FormatList +// whereas a naive implementation based on inheritance does not. +class FormatArg { + public: + FormatArg() : m_value(NULL), m_formatImpl(NULL), m_toIntImpl(NULL) {} + + template + FormatArg(const T& value) + : m_value(static_cast(&value)), m_formatImpl(&formatImpl), m_toIntImpl(&toIntImpl) {} + + void format(std::ostream& out, const char* fmtBegin, const char* fmtEnd, int ntrunc) const { + TINYFORMAT_ASSERT(m_value); + TINYFORMAT_ASSERT(m_formatImpl); + m_formatImpl(out, fmtBegin, fmtEnd, ntrunc, m_value); + } + + int toInt() const { + TINYFORMAT_ASSERT(m_value); + TINYFORMAT_ASSERT(m_toIntImpl); + return m_toIntImpl(m_value); + } + + private: + template + TINYFORMAT_HIDDEN static void formatImpl(std::ostream& out, const char* fmtBegin, const char* fmtEnd, int ntrunc, + const void* value) { + formatValue(out, fmtBegin, fmtEnd, ntrunc, *static_cast(value)); + } + + template + TINYFORMAT_HIDDEN static int toIntImpl(const void* value) { + return convertToInt::invoke(*static_cast(value)); + } + + const void* m_value; + void (*m_formatImpl)(std::ostream& out, const char* fmtBegin, const char* fmtEnd, int ntrunc, const void* value); + int (*m_toIntImpl)(const void* value); +}; + +// Parse and return an integer from the string c, as atoi() +// On return, c is set to one past the end of the integer. +inline int parseIntAndAdvance(const char*& c) { + int i = 0; + for (; *c >= '0' && *c <= '9'; ++c) i = 10 * i + (*c - '0'); + return i; +} + +// Parse width or precision `n` from format string pointer `c`, and advance it +// to the next character. If an indirection is requested with `*`, the argument +// is read from `args[argIndex]` and `argIndex` is incremented (or read +// from `args[n]` in positional mode). Returns true if one or more +// characters were read. +inline bool parseWidthOrPrecision(int& n, const char*& c, bool positionalMode, const detail::FormatArg* args, + int& argIndex, int numArgs) { + if (*c >= '0' && *c <= '9') { + n = parseIntAndAdvance(c); + } else if (*c == '*') { + ++c; + n = 0; + if (positionalMode) { + int pos = parseIntAndAdvance(c) - 1; + if (*c != '$') TINYFORMAT_ERROR("tinyformat: Non-positional argument used after a positional one"); + if (pos >= 0 && pos < numArgs) + n = args[pos].toInt(); + else + TINYFORMAT_ERROR("tinyformat: Positional argument out of range"); + ++c; + } else { + if (argIndex < numArgs) + n = args[argIndex++].toInt(); + else + TINYFORMAT_ERROR("tinyformat: Not enough arguments to read variable width or precision"); + } + } else { + return false; + } + return true; +} + +// Print literal part of format string and return next format spec position. +// +// Skips over any occurrences of '%%', printing a literal '%' to the output. +// The position of the first % character of the next nontrivial format spec is +// returned, or the end of string. +inline const char* printFormatStringLiteral(std::ostream& out, const char* fmt) { + const char* c = fmt; + for (;; ++c) { + if (*c == '\0') { + out.write(fmt, c - fmt); + return c; + } else if (*c == '%') { + out.write(fmt, c - fmt); + if (*(c + 1) != '%') return c; + // for "%%", tack trailing % onto next literal section. + fmt = ++c; + } + } +} + +// Parse a format string and set the stream state accordingly. +// +// The format mini-language recognized here is meant to be the one from C99, +// with the form "%[flags][width][.precision][length]type" with POSIX +// positional arguments extension. +// +// POSIX positional arguments extension: +// Conversions can be applied to the nth argument after the format in +// the argument list, rather than to the next unused argument. In this case, +// the conversion specifier character % (see below) is replaced by the sequence +// "%n$", where n is a decimal integer in the range [1,{NL_ARGMAX}], +// giving the position of the argument in the argument list. This feature +// provides for the definition of format strings that select arguments +// in an order appropriate to specific languages. +// +// The format can contain either numbered argument conversion specifications +// (that is, "%n$" and "*m$"), or unnumbered argument conversion specifications +// (that is, % and * ), but not both. The only exception to this is that %% +// can be mixed with the "%n$" form. The results of mixing numbered and +// unnumbered argument specifications in a format string are undefined. +// When numbered argument specifications are used, specifying the Nth argument +// requires that all the leading arguments, from the first to the (N-1)th, +// are specified in the format string. +// +// In format strings containing the "%n$" form of conversion specification, +// numbered arguments in the argument list can be referenced from the format +// string as many times as required. +// +// Formatting options which can't be natively represented using the ostream +// state are returned in spacePadPositive (for space padded positive numbers) +// and ntrunc (for truncating conversions). argIndex is incremented if +// necessary to pull out variable width and precision. The function returns a +// pointer to the character after the end of the current format spec. +inline const char* streamStateFromFormat(std::ostream& out, bool& positionalMode, bool& spacePadPositive, int& ntrunc, + const char* fmtStart, const detail::FormatArg* args, int& argIndex, + int numArgs) { + TINYFORMAT_ASSERT(*fmtStart == '%'); + // Reset stream state to defaults. + out.width(0); + out.precision(6); + out.fill(' '); + // Reset most flags; ignore irrelevant unitbuf & skipws. + out.unsetf(std::ios::adjustfield | std::ios::basefield | std::ios::floatfield | std::ios::showbase | + std::ios::boolalpha | std::ios::showpoint | std::ios::showpos | std::ios::uppercase); + bool precisionSet = false; + bool widthSet = false; + int widthExtra = 0; + const char* c = fmtStart + 1; + + // 1) Parse an argument index (if followed by '$') or a width possibly + // preceded with '0' flag. + if (*c >= '0' && *c <= '9') { + const char tmpc = *c; + int value = parseIntAndAdvance(c); + if (*c == '$') { + // value is an argument index + if (value > 0 && value <= numArgs) + argIndex = value - 1; + else + TINYFORMAT_ERROR("tinyformat: Positional argument out of range"); + ++c; + positionalMode = true; + } else if (positionalMode) { + TINYFORMAT_ERROR("tinyformat: Non-positional argument used after a positional one"); + } else { + if (tmpc == '0') { + // Use internal padding so that numeric values are + // formatted correctly, eg -00010 rather than 000-10 + out.fill('0'); + out.setf(std::ios::internal, std::ios::adjustfield); + } + if (value != 0) { + // Nonzero value means that we parsed width. + widthSet = true; + out.width(value); + } + } + } else if (positionalMode) { + TINYFORMAT_ERROR("tinyformat: Non-positional argument used after a positional one"); + } + // 2) Parse flags and width if we did not do it in previous step. + if (!widthSet) { + // Parse flags + for (;; ++c) { + switch (*c) { + case '#': + out.setf(std::ios::showpoint | std::ios::showbase); + continue; + case '0': + // overridden by left alignment ('-' flag) + if (!(out.flags() & std::ios::left)) { + // Use internal padding so that numeric values are + // formatted correctly, eg -00010 rather than 000-10 + out.fill('0'); + out.setf(std::ios::internal, std::ios::adjustfield); + } + continue; + case '-': + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + continue; + case ' ': + // overridden by show positive sign, '+' flag. + if (!(out.flags() & std::ios::showpos)) spacePadPositive = true; + continue; + case '+': + out.setf(std::ios::showpos); + spacePadPositive = false; + widthExtra = 1; + continue; + default: + break; + } + break; + } + // Parse width + int width = 0; + widthSet = parseWidthOrPrecision(width, c, positionalMode, args, argIndex, numArgs); + if (widthSet) { + if (width < 0) { + // negative widths correspond to '-' flag set + out.fill(' '); + out.setf(std::ios::left, std::ios::adjustfield); + width = -width; + } + out.width(width); + } + } + // 3) Parse precision + if (*c == '.') { + ++c; + int precision = 0; + parseWidthOrPrecision(precision, c, positionalMode, args, argIndex, numArgs); + // Presence of `.` indicates precision set, unless the inferred value + // was negative in which case the default is used. + precisionSet = precision >= 0; + if (precisionSet) out.precision(precision); + } + // 4) Ignore any C99 length modifier + while (*c == 'l' || *c == 'h' || *c == 'L' || *c == 'j' || *c == 'z' || *c == 't') { + ++c; + } + // 5) We're up to the conversion specifier character. + // Set stream flags based on conversion specifier (thanks to the + // boost::format class for forging the way here). + bool intConversion = false; + switch (*c) { + case 'u': + case 'd': + case 'i': + out.setf(std::ios::dec, std::ios::basefield); + intConversion = true; + break; + case 'o': + out.setf(std::ios::oct, std::ios::basefield); + intConversion = true; + break; + case 'X': + out.setf(std::ios::uppercase); + // Falls through + case 'x': + case 'p': + out.setf(std::ios::hex, std::ios::basefield); + intConversion = true; + break; + case 'E': + out.setf(std::ios::uppercase); + // Falls through + case 'e': + out.setf(std::ios::scientific, std::ios::floatfield); + out.setf(std::ios::dec, std::ios::basefield); + break; + case 'F': + out.setf(std::ios::uppercase); + // Falls through + case 'f': + out.setf(std::ios::fixed, std::ios::floatfield); + break; + case 'A': + out.setf(std::ios::uppercase); + // Falls through + case 'a': +#ifdef _MSC_VER + // Workaround + // https://developercommunity.visualstudio.com/content/problem/520472/hexfloat-stream-output-does-not-ignore-precision-a.html + // by always setting maximum precision on MSVC to avoid precision + // loss for doubles. + out.precision(13); +#endif + out.setf(std::ios::fixed | std::ios::scientific, std::ios::floatfield); + break; + case 'G': + out.setf(std::ios::uppercase); + // Falls through + case 'g': + out.setf(std::ios::dec, std::ios::basefield); + // As in boost::format, let stream decide float format. + out.flags(out.flags() & ~std::ios::floatfield); + break; + case 'c': + // Handled as special case inside formatValue() + break; + case 's': + if (precisionSet) ntrunc = static_cast(out.precision()); + // Make %s print Booleans as "true" and "false" + out.setf(std::ios::boolalpha); + break; + case 'n': + // Not supported - will cause problems! + TINYFORMAT_ERROR("tinyformat: %n conversion spec not supported"); + break; + case '\0': + TINYFORMAT_ERROR( + "tinyformat: Conversion spec incorrectly " + "terminated by end of string"); + return c; + default: + break; + } + if (intConversion && precisionSet && !widthSet) { + // "precision" for integers gives the minimum number of digits (to be + // padded with zeros on the left). This isn't really supported by the + // iostreams, but we can approximately simulate it with the width if + // the width isn't otherwise used. + out.width(out.precision() + widthExtra); + out.setf(std::ios::internal, std::ios::adjustfield); + out.fill('0'); + } + return c + 1; +} + +//------------------------------------------------------------------------------ +inline void formatImpl(std::ostream& out, const char* fmt, const detail::FormatArg* args, int numArgs) { + // Saved stream state + std::streamsize origWidth = out.width(); + std::streamsize origPrecision = out.precision(); + std::ios::fmtflags origFlags = out.flags(); + char origFill = out.fill(); + + // "Positional mode" means all format specs should be of the form "%n$..." + // with `n` an integer. We detect this in `streamStateFromFormat`. + bool positionalMode = false; + int argIndex = 0; + while (true) { + fmt = printFormatStringLiteral(out, fmt); + if (*fmt == '\0') { + if (!positionalMode && argIndex < numArgs) { + TINYFORMAT_ERROR("tinyformat: Not enough conversion specifiers in format string"); + } + break; + } + bool spacePadPositive = false; + int ntrunc = -1; + const char* fmtEnd = + streamStateFromFormat(out, positionalMode, spacePadPositive, ntrunc, fmt, args, argIndex, numArgs); + // NB: argIndex may be incremented by reading variable width/precision + // in `streamStateFromFormat`, so do the bounds check here. + if (argIndex >= numArgs) { + TINYFORMAT_ERROR("tinyformat: Too many conversion specifiers in format string"); + return; + } + const FormatArg& arg = args[argIndex]; + // Format the arg into the stream. + if (!spacePadPositive) { + arg.format(out, fmt, fmtEnd, ntrunc); + } else { + // The following is a special case with no direct correspondence + // between stream formatting and the printf() behaviour. Simulate + // it crudely by formatting into a temporary string stream and + // munging the resulting string. + std::ostringstream tmpStream; + tmpStream.copyfmt(out); + tmpStream.setf(std::ios::showpos); + arg.format(tmpStream, fmt, fmtEnd, ntrunc); + std::string result = tmpStream.str(); // allocates... yuck. + for (size_t i = 0, iend = result.size(); i < iend; ++i) { + if (result[i] == '+') result[i] = ' '; + } + out << result; + } + if (!positionalMode) ++argIndex; + fmt = fmtEnd; + } + + // Restore stream state + out.width(origWidth); + out.precision(origPrecision); + out.flags(origFlags); + out.fill(origFill); +} + +} // namespace detail + +/// List of template arguments format(), held in a type-opaque way. +/// +/// A const reference to FormatList (typedef'd as FormatListRef) may be +/// conveniently used to pass arguments to non-template functions: All type +/// information has been stripped from the arguments, leaving just enough of a +/// common interface to perform formatting as required. +class FormatList { + public: + FormatList(detail::FormatArg* args, int N) : m_args(args), m_N(N) {} + + friend void vformat(std::ostream& out, const char* fmt, const FormatList& list); + + private: + const detail::FormatArg* m_args; + int m_N; +}; + +/// Reference to type-opaque format list for passing to vformat() +typedef const FormatList& FormatListRef; + +namespace detail { + +// Format list subclass with fixed storage to avoid dynamic allocation +template +class FormatListN : public FormatList { + public: +#ifdef TINYFORMAT_USE_VARIADIC_TEMPLATES + template + FormatListN(const Args&... args) : FormatList(&m_formatterStore[0], N), m_formatterStore{FormatArg(args)...} { + static_assert(sizeof...(args) == N, "Number of args must be N"); + } +#else // C++98 version + void init(int) {} +#define TINYFORMAT_MAKE_FORMATLIST_CONSTRUCTOR(n) \ + \ + template \ + FormatListN(TINYFORMAT_VARARGS(n)) : FormatList(&m_formatterStore[0], n) { \ + TINYFORMAT_ASSERT(n == N); \ + init(0, TINYFORMAT_PASSARGS(n)); \ + } \ + \ + template \ + void init(int i, TINYFORMAT_VARARGS(n)) { \ + m_formatterStore[i] = FormatArg(v1); \ + init(i + 1 TINYFORMAT_PASSARGS_TAIL(n)); \ + } + + TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_FORMATLIST_CONSTRUCTOR) +#undef TINYFORMAT_MAKE_FORMATLIST_CONSTRUCTOR +#endif + + private: + FormatArg m_formatterStore[N]; +}; + +// Special 0-arg version - MSVC says zero-sized C array in struct is nonstandard +template <> +class FormatListN<0> : public FormatList { + public: + FormatListN() : FormatList(0, 0) {} +}; + +} // namespace detail + +//------------------------------------------------------------------------------ +// Primary API functions + +#ifdef TINYFORMAT_USE_VARIADIC_TEMPLATES + +/// Make type-agnostic format list from list of template arguments. +/// +/// The exact return type of this function is an implementation detail and +/// shouldn't be relied upon. Instead it should be stored as a FormatListRef: +/// +/// FormatListRef formatList = makeFormatList( /*...*/ ); +template +detail::FormatListN makeFormatList(const Args&... args) { + return detail::FormatListN(args...); +} + +#else // C++98 version + +inline detail::FormatListN<0> makeFormatList() { + return detail::FormatListN<0>(); +} +#define TINYFORMAT_MAKE_MAKEFORMATLIST(n) \ + template \ + detail::FormatListN makeFormatList(TINYFORMAT_VARARGS(n)) { \ + return detail::FormatListN(TINYFORMAT_PASSARGS(n)); \ + } +TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_MAKEFORMATLIST) +#undef TINYFORMAT_MAKE_MAKEFORMATLIST + +#endif + +/// Format list of arguments to the stream according to the given format string. +/// +/// The name vformat() is chosen for the semantic similarity to vprintf(): the +/// list of format arguments is held in a single function argument. +inline void vformat(std::ostream& out, const char* fmt, FormatListRef list) { + detail::formatImpl(out, fmt, list.m_args, list.m_N); +} + +#ifdef TINYFORMAT_USE_VARIADIC_TEMPLATES + +/// Format list of arguments to the stream according to given format string. +template +void format(std::ostream& out, const char* fmt, const Args&... args) { + vformat(out, fmt, makeFormatList(args...)); +} + +/// Format list of arguments according to the given format string and return +/// the result as a string. +template +std::string format(const char* fmt, const Args&... args) { + std::ostringstream oss; + format(oss, fmt, args...); + return oss.str(); +} + +/// Format list of arguments to std::cout, according to the given format string +template +void printf(const char* fmt, const Args&... args) { + format(std::cout, fmt, args...); +} + +template +void printfln(std::ostream& out, const char* fmt, const Args&... args) { + format(out, fmt, args...); + out << '\n'; +} + +/// Additional functions by Kampersanda +template +void printfln(const char* fmt, const Args&... args) { + format(std::cout, fmt, args...); + std::cout << '\n'; +} + +template +inline void errorf(const char* fmt, const Args&... args) { + static const char* RED_COLOR = "\033[0;31m"; + std::cerr << RED_COLOR << "ERROR: "; + format(std::cerr, fmt, args...); + std::cerr << "\033[0;0m"; +} +template +inline void warnf(const char* fmt, const Args&... args) { + static const char* YELLOW_COLOR = "\033[0;33m"; + std::cerr << YELLOW_COLOR << "WARNING: "; + format(std::cerr, fmt, args...); + std::cerr << "\033[0;0m"; +} +template +inline void reportf(const char* fmt, const Args&... args) { + static const char* GREEN_COLOR = "\033[0;32m"; + std::cout << GREEN_COLOR; + format(std::cout, fmt, args...); + std::cout << "\033[0;0m"; +} +template +inline void errorfln(const char* fmt, const Args&... args) { + static const char* RED_COLOR = "\033[0;31m"; + std::cerr << RED_COLOR << "ERROR: "; + format(std::cerr, fmt, args...); + std::cerr << "\033[0;0m" << std::endl; +} +template +inline void warnfln(const char* fmt, const Args&... args) { + static const char* YELLOW_COLOR = "\033[0;33m"; + std::cerr << YELLOW_COLOR << "WARNING: "; + format(std::cerr, fmt, args...); + std::cerr << "\033[0;0m" << std::endl; +} +template +inline void reportfln(const char* fmt, const Args&... args) { + static const char* GREEN_COLOR = "\033[0;32m"; + std::cout << GREEN_COLOR; + format(std::cout, fmt, args...); + std::cout << "\033[0;0m" << std::endl; +} + +#else // C++98 version + +inline void format(std::ostream& out, const char* fmt) { + vformat(out, fmt, makeFormatList()); +} + +inline std::string format(const char* fmt) { + std::ostringstream oss; + format(oss, fmt); + return oss.str(); +} + +inline void printf(const char* fmt) { + format(std::cout, fmt); +} + +inline void printfln(const char* fmt) { + format(std::cout, fmt); + std::cout << '\n'; +} + +#define TINYFORMAT_MAKE_FORMAT_FUNCS(n) \ + \ + template \ + void format(std::ostream& out, const char* fmt, TINYFORMAT_VARARGS(n)) { \ + vformat(out, fmt, makeFormatList(TINYFORMAT_PASSARGS(n))); \ + } \ + \ + template \ + std::string format(const char* fmt, TINYFORMAT_VARARGS(n)) { \ + std::ostringstream oss; \ + format(oss, fmt, TINYFORMAT_PASSARGS(n)); \ + return oss.str(); \ + } \ + \ + template \ + void printf(const char* fmt, TINYFORMAT_VARARGS(n)) { \ + format(std::cout, fmt, TINYFORMAT_PASSARGS(n)); \ + } \ + \ + template \ + void printfln(const char* fmt, TINYFORMAT_VARARGS(n)) { \ + format(std::cout, fmt, TINYFORMAT_PASSARGS(n)); \ + std::cout << '\n'; \ + } + +TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_FORMAT_FUNCS) +#undef TINYFORMAT_MAKE_FORMAT_FUNCS + +#endif + +} // namespace tinyformat + +#endif // TINYFORMAT_H_INCLUDED diff --git a/tools/xcdat_build.cpp b/tools/xcdat_build.cpp new file mode 100644 index 0000000..587243c --- /dev/null +++ b/tools/xcdat_build.cpp @@ -0,0 +1,70 @@ +#include + +#include "cmd_line_parser/parser.hpp" +#include "tinyformat/tinyformat.h" + +cmd_line_parser::parser make_parser(int argc, char** argv) { + cmd_line_parser::parser p(argc, argv); + p.add("input_keys", "Input filepath of data keys"); + p.add("output_idx", "Output filepath of trie index"); + p.add("trie_type", "Type of trie impl. from [7|8], (default=7)", "-t", false); + p.add("to_unique", "Make unique the input keys? (default=0)", "-u", false); + return p; +} + +template +int build(const cmd_line_parser::parser& p) { + const auto input_keys = p.get("input_keys"); + const auto output_idx = p.get("output_idx"); + const auto to_unique = p.get("to_unique", false); + + auto keys = xcdat::io::load_strings(input_keys); + if (keys.empty()) { + tfm::errorfln("Error: The input dataset is empty."); + } + + if (to_unique) { + std::sort(keys.begin(), keys.end()); + keys.erase(std::unique(keys.begin(), keys.end()), keys.end()); + } + + essentials::timer timer; + timer.start(); + const auto trie = Trie::build(keys); + timer.stop(); + + const double construction_time_in_sec = timer.average(); + const double memory_in_bytes = trie.memory_in_bytes(); + + tfm::printfln("construction_time_in_sec: %g", construction_time_in_sec); + tfm::printfln("memory_in_bytes: %d", memory_in_bytes); + tfm::printfln("memory_in_MiB: %g", memory_in_bytes / essentials::MiB); + tfm::printfln("number_of_keys: %d", trie.num_keys()); + tfm::printfln("alphabet_size: %d", trie.alphabet_size()); + tfm::printfln("max_length: %d", trie.max_length()); + + return 0; +} + +int main(int argc, char** argv) { +#ifndef NDEBUG + tfm::warnfln("The code is running in debug mode."); +#endif + + auto p = make_parser(argc, argv); + if (!p.parse()) { + return 1; + } + + const auto trie_type = p.get("trie_type", 7); + + switch (trie_type) { + case 7: + return build(p); + default: + break; + } + + p.help(); + return 1; +} \ No newline at end of file