/** @addtogroup Strings * @{ */ /** @file format.hh * * @brief APIs for type safe formatting using C-style format strings. * * libostd provides a powerful formatting system that lets you format into * arbitrary output ranges using C-style format strings. It's type safe * and supports custom object formatting without heap allocations as well * as formatting of ranges, tuples and more. * * @include format.cc * * @copyright See COPYING.md in the project tree for further information. */ #ifndef OSTD_FORMAT_HH #define OSTD_FORMAT_HH #include #include #include #include #include #include #include #include #include #include #include #include namespace ostd { /** @addtogroup Strings * @{ */ /** @brief An enumeration defining flags for C-style formatting marks. * * Used inside ostd::format_spec. The C-style formatting mark has a flags * section and each of these enum items represents one. They can be combined * using the standard bitwise operators. */ enum format_flags { FMT_FLAG_DASH = 1 << 0, ///< The dash (`-`) flag. FMT_FLAG_ZERO = 1 << 1, ///< The zero (`0`) flag. FMT_FLAG_SPACE = 1 << 2, ///< The space (` `) flag. FMT_FLAG_PLUS = 1 << 3, ///< The plus (`+`) flag. FMT_FLAG_HASH = 1 << 4, ///< The hash (`#`) flag. FMT_FLAG_AT = 1 << 5 ///< The at (`@`) flag. }; /** @brief Thrown when format string does not properly match the arguments. */ struct OSTD_EXPORT format_error: std::runtime_error { using std::runtime_error::runtime_error; /* empty, for vtable placement */ virtual ~format_error(); }; struct format_spec; /** @brief Specialize this to format custom objects. * * The formatting system provides a way to format arbitrary objects. By default * it's empty as all the formatting logic is builtin. To specialize for your * own object, you simply do this: * * ~~~{.cc} * template<> * struct format_traits { * template * static void to_format(foo const &v, R &writer, ostd::format_spec const &fs) { * // custom formatting here * // writer is just an output range (see ostd::output_range) * } * }; * ~~~ * * Obviously, you can passthrough the formatting, for example when your type * contains a member and you want to format your type exactly as if it was * the member, you just put this in your `to_format`: * * ~~~{.cc} * fs.format_value(writer, v.my_member); * ~~~ * * Anything that writes into the output range will do. The output range is * exactly the same output range the outer format call is formatting into, * so for example when someone is formatting into an ostd::appender(), * it will be just that. * * This may be specialized in other libostd modules as well. */ template struct format_traits {}; /* implementation helpers */ namespace detail { inline int parse_fmt_flags(string_range &fmt, int ret) { while (!fmt.empty()) { switch (fmt.front()) { case '-': ret |= FMT_FLAG_DASH; fmt.pop_front(); break; case '+': ret |= FMT_FLAG_PLUS; fmt.pop_front(); break; case '#': ret |= FMT_FLAG_HASH; fmt.pop_front(); break; case '@': ret |= FMT_FLAG_AT; fmt.pop_front(); break; case '0': ret |= FMT_FLAG_ZERO; fmt.pop_front(); break; case ' ': ret |= FMT_FLAG_SPACE; fmt.pop_front(); break; default: goto retflags; } } retflags: return ret; } inline std::size_t read_digits(string_range &fmt, char *buf) { std::size_t ret = 0; for (; !fmt.empty() && isdigit(fmt.front()); ++ret) { *buf++ = fmt.front(); fmt.pop_front(); } *buf = '\0'; return ret; } /* 0 .. not allowed * 1 .. floating point * 2 .. character * 3 .. binary * 4 .. octal * 5 .. decimal * 6 .. hexadecimal * 7 .. string * 8 .. custom object */ static inline constexpr unsigned char const fmt_specs[] = { /* uppercase spec set */ 1, 3, 8, 8, /* A B C D */ 1, 1, 1, 8, /* E F G H */ 8, 8, 8, 8, /* I J K L */ 8, 8, 8, 8, /* M N O P */ 8, 8, 8, 8, /* Q R S T */ 8, 8, 8, 6, /* U V W X */ 8, 8, /* Y Z */ /* ascii filler */ 0, 0, 0, 0, 0, 0, /* lowercase spec set */ 1, 3, 2, 5, /* a b c d */ 1, 1, 1, 8, /* e f g h */ 8, 8, 8, 8, /* i j k l */ 8, 8, 4, 8, /* m n o p */ 8, 8, 7, 8, /* q r s t */ 8, 8, 8, 6, /* u v w x */ 8, 8, /* y z */ /* ascii filler */ 0, 0, 0, 0, 0 }; static inline constexpr int const fmt_bases[] = { 0, 0, 0, 2, 8, 10, 16, 0 }; /* non-printable escapes up to 0x20 (space) */ static inline constexpr char const *fmt_escapes[] = { "\\0" , "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a" , "\\b" , "\\t" , "\\n" , "\\v" , "\\f" , "\\r" , "\\x0E", "\\x0F", "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F", /* we want to escape double quotes... */ nullptr, nullptr, "\\\"", nullptr, nullptr, nullptr, nullptr, "\\\'" }; inline char const *escape_fmt_char(char v, char quote) { if ((static_cast(v) < 0x20) || (v == quote)) { return fmt_escapes[std::size_t(v)]; } else if (v == 0x7F) { return "\\x7F"; } return nullptr; } /* retrieve width/precision */ template inline int get_arg_param(std::size_t idx, T const &val, A const &...args) { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format args"}; } else { return get_arg_param(idx - 1, args...); } } else { if constexpr(!std::is_integral_v) { throw format_error{"invalid argument for width/precision"}; } else { if constexpr(std::is_signed_v) { if (val < 0) { throw format_error{ "width/precision cannot be negative" }; } } return int(val); } } } /* ugly ass check for whether a type is tuple-like, like tuple itself, * pair, array, possibly other types added later or overridden... */ template inline auto tuple_like_test(int) -> typename std::is_integral::value)>::type; template inline std::false_type tuple_like_test(...); template static inline constexpr bool is_tuple_like = decltype(tuple_like_test(0))::value; /* test if format traits are available for the type */ template inline auto test_tofmt(int) -> typename std::is_void< decltype(format_traits::to_format( std::declval(), std::declval(), std::declval() )) >::type; template inline std::false_type test_tofmt(...); template static inline constexpr bool fmt_tofmt_test = decltype(test_tofmt(0))::value; template inline int ac_to_mb(C c, F const &f, char *buf) { std::mbstate_t mb{}; C const *fromn; char *ton; auto ret = f.out(mb, &c, &c + 1, fromn, buf, &buf[MB_LEN_MAX], ton); if (ret != std::codecvt_base::ok) { return -1; } return int(ton - &buf[0]); } inline int wc_to_mb_loc(wchar_t c, char *buf, std::locale const &loc) { auto &f = std::use_facet>(loc); return ac_to_mb(c, f, buf); } } /** @brief A structure implementing type safe C-style formatting. * * It can be constructed either to represent a specific format specifier or * with a format string to format an entire string (in which case it will * parse the string and format it with the individual intermediate markings). * * It stores information about the current format specifier (when constructed * as one or when parsing the format string) as well as the rest of the current * format string. See read_until_spec() and rest() for more information. * * # Regular format specifiers * * The formatter is considerably more elaborate than C-style printf. Its * basic format specifiers superficially look the same: * * ~~~ * %[position$][flags][width][.precision]specifier * ~~~ * * Position is the optional position of the argument in the pack starting * with 1. It can be mixed with format specifiers without explicit position, * unlike what POSIX says; the next specifier without explicit position * will use the position after the largest explicit position used so far. * For example, `%3$s %1$s %s` will use position 4 for the last specifier. * * ## Flags * * * The `-` flag will left-justify within the given width (right by default). * * The `+` flag applies to numbers and will force sign to always show, even * for positive numbers (by default, only negative ones get a sign). * * The ` ` (space) flag applies to numbers and will force a space to be * written in place of sign for positive numbers (no effect with `+`). * * The `#` flag applies to integers, floats and ranges. For integers, it * will add the prefixes `0x`, `0X`, `0b` or `0B` when formatted as hex * or binary, lowercase or uppercase. For floats, it will force the output * to always contain a decimal point/comma. For ranges, it will cause * automatic expansion of values into items if the values are tuples. * * The `@` flag will escape the value according to the rules. * * The '0' flag will left-pad numbers with zeroes instead of spaces when * needed (according to width). * * ## Width * * Width can be specified either as a number in the format string or as `*` * in which case it will be an integer argument (any integral type, must be * equal or larger than zero, otherwise ostd::format_error is thrown). When * an argument, the position of the argument is right before the actual * value to format, unless precision is also an argument, in which case it's * right before the precision argument. When the position of the value to * format is explicit in the format string, the position refers to the value * to format and width/precision are before that. * * Width defines the minimum number of characters to be printed. If the value * ends up being shorter, it's padded with spaces (or zeroes when formatting * a number and the zero flag is used). The value is not truncated if it's * actually longer than the width. * * ## Precision * * Precision can also be specified as a number or as an argument. When both * width and precision are an argument, width is first. For integers, it * specifies the default number of digits to be written. If the value is * shorter than the precision, the result is padded with leading zeroes. * If it's longer, no truncation happens. A precision of 0 means that no * character is written for the value 0. For floats, it's the number of * digits to be written after decimal point or comma. When not specified, * it's 6. For strings, it's the maximum number of characters to be printed. * By default all characters are printed. When escaping strings, the quotes * are not counted into the precision and escape sequences count as a single * character. * * # Range formatting * * The system also allows advanced formatting for ranges. The specifier * then looks different: * * ~~~ * %[position$][flags](contents%) * ~~~ * * The `contents` is a format specifier for each item of the range followed * by a separator. For example: * * ~~~ * %(%s, %) * ~~~ * * In this case, `%s` is the specifier and `, ` is the separator. You can * also explicitly delimit the separator: * * ~~~ * %(%s%|, %) * ~~~ * * The first part is used to format items and the separator is put between * each two items. * * Two flags are used by this format. Normally, each item of the range is * formatted as is, using a single specifier, even if the item is a tuple-like * value. Using the `#` flag you can expand tuple-like items into multiple * values. So when formatting a range over an associative map, you can do this: * * ~~~ * %#(%s: %s%|, %) * ~~~ * * to format key and value separately. * * You can also use the `@` flag. It will cause the `@` flag to be applied to * every item of the range, therefore escaping each one. Nested range formats * are also affected by this. There is no way to unapply the flag once you * set it. * * # Tuple formatting * * Additionally, the system also supports advanced formatting for tuples. * The syntax is similar: * * ~~~ * %[position$][flags] * ~~~ * * There are no delimiters here. The `contents` is simply a regular format * string, with a format specifier for each tuple item. * * You can use the `@` flag just like you can use it with ranges. No other * flag can be used when formatting tuples. * * # Specifiers * * Now for the basic specifiers themselves: * * * `a`, `A` - hexadecimal float like C printf (lowercase, uppercase). * * `b`, `B` - binary integers (lowercase, uppercase). * * `c` - character values. * * `d` - decimal integers. * * `e`, `E` - floats in scientific notation (lowercase, uppercase). * * `f`, `F` - decimal floating point (lowercase, uppercase). * * `g`, `G` - shortest representation (`e`/`E` or `f`/`F`). * * `o` - octal integers. * * `s` - any value with its default format. * * `x`, `X` - hexadecimal integers (lowercase, uppercase). * * You can use the `s` specifier to format any value that can be formatted * at no extra cost. Because the system is type safe, how a value is meant * to be formatted is decided from the type that is passed in, not the format * specifier itself. * * Signedness and size of integer types is determined from the value itself, * which applies universally to all integer formatting with all bases. Also, * the lowercase/uppercase distinction for binary only applies when a prefix * is added (using the `#` flag). * * All letters (uppercase and lowercase) are available for custom formatting. * * # Format order and rules * * The rules for formatting values go as follows: * * * First it's checked whether the value can be custom formatted using a * specialization of ostd::format_traits. If it can, it's formatted using * that, the current `format_spec` is passed in as it is and no extra * checks are made. Any letter can be used to format custom objects. * * Then it's checked if the value is convertible to ostd::string_range. * If it is, it's formatted as a string. Only the `s` specifier is allowed. * * Then it's checked if the value is a tuple-like object. The value is one * if `std::tuple_size::value` is valid. If it is, the tuple-like object * is formatted as `` by default. You need to use * the `s` specifier only to format tuples like this. The items are all * formatted using the `s` specifier. * * Then ranges are tested in a similar way. The default format for ranges * is `{ITEM, ITEM, ITEM, ...}`. The `s` specifier must be used. The items * are all formatted with `s` too. * * Then bools are formatted. If the `s` specifier is used, the bool is * formatted as `true` or `false`. Otherwise it's converted to `int` and * formatted using the specifier (might error depending on the specifier). * * Then character values are formatted. The `c` and `s` specifiers are * allowed. * * Pointers are formatted then. If the `s` specifier is used, the pointer * will be formatted as hex with the `0x` prefix. Otherwise it's converted * to `std::size_t` and formatted with the specifier (might error depending * on the specifier). * * Then integers are formatted. Using the `s` specifier is like using the * `d` specifier. * * Floats follow. Using `s` is like using `g`. * * When everything is exhausted, ostd::format_error is thrown. * * # Escaping * * String and character values are subject to escaping if the `@` flag is * used. Strings are put into double quotes and any unprintable values in * them are converted into escape sequences. Quotes (single and double) * are also escaped. Character values are put into single quotes and * unprintable characters are converted into escape sequences as well. * For known escape sequences, simple readable versions are used, particularly * `a`, `b`, `t`, `n`, `v`, `f`, `r`. For other unprintables, the hexadecimal * escape format is used. * * When printing tuples and ranges with the `s` specifier and the `@` flag * is used, all of their items are escaped. If the items are tuples or ranges, * their own items are also escaped. The `@` flag doesn't escape anything else, * unless you implement support for escaping in your own custom objects. * * # Locale awareness * * The system is locale-aware but uses the C locale by default. Provide an * explicit locale object when you want things formatted using a specific * locale. This will affect formatting of decimal separators and thousands * grouping particularly. * * # Errors * * If a specifier is not allowed for a value, ostd::format_error is thrown. */ struct format_spec { /** @brief Constructs with a format string and the default locale. * * If you use this constructor, there won't be a specific formatting * specifier set in here so you won't be able to get its properties, * but you will be able to format into a range with some arguments. * You can also manually parse the format string, see read_until_spec(). * * The locale used here is the C locale. */ format_spec(string_range fmt): p_fmt{fmt}, p_loc{std::locale::classic()} {} /** @brief Constructs with a format string and a locale. * * Like format_spec(string_range), but with an explicit locale. */ format_spec(string_range fmt, std::locale const &loc): p_fmt(fmt), p_loc(loc) {} /** @brief Constructs a specific format specifier. * * See ostd::format_flags for flags. The `spec` argument is the format * specifier (for example `s`). It doesn't support tuple/range formatting * nor positional arguments. * * Uses the default (global) locale. The locale is then potentially used * for formatting values. */ format_spec(char spec, int flags = 0): p_flags(flags), p_spec(spec), p_loc() {} /** @brief Constructs a specific format specifier with a locale. * * Like format_spec(char, int) but uses an explicit locale. */ format_spec(char spec, std::locale const &loc, int flags = 0): p_flags(flags), p_spec(spec), p_loc(loc) {} /** @brief Parses the format string if constructed with one. * * This reads the format string, writing each character of it into * `writer`, until it encounters a valid format specifier. It then * stops there and returns `true`. If no format specifier was read, * it returns `false`. When a format specifier is read, this structure * then represents it. * * It's used by format() to parse the string. */ template bool read_until_spec(R &writer) { if (p_fmt.empty()) { return false; } while (!p_fmt.empty()) { if (p_fmt.front() == '%') { p_fmt.pop_front(); if (p_fmt.front() == '%') { goto plain; } return read_spec(); } plain: writer.put(p_fmt.front()); p_fmt.pop_front(); } return false; } /** @brief Gets the yet not parsed portion of the format string. * * If no read_until_spec() was called, this returns the entire format * string. Otherwise, it returns the format string from the point * after the format specifier this structure currently represents. */ string_range rest() const { return p_fmt; } /** @brief Overrides the currently set locale. * * @returns The old locale. */ std::locale imbue(std::locale const &loc) { std::locale ret{p_loc}; p_loc = loc; return ret; } /** @brief Retrieves the currently used locale for the format state. */ std::locale getloc() const { return p_loc; } /** @brief Gets the width of the format specifier. * * If explicitly specified (say `%5s`) it will return the number that * was in the format specifier. If explicitly set with set_width(), * it will return that. If not set at all, it will return 0. * * @see has_width(), precision() */ int width() const { return p_width; } /** @brief Gets the precision of the format specifier. * * If explicitly specified (say `%.5f`) it will return the number that * was in the format specifier. If explicitly set with set_precision(), * it will return that. If not set at all, it will return 0. * * @see has_precision(), width() */ int precision() const { return p_precision; } /** @brief Gets whether a width was specified somehow. * * If the width was provided direclty as part of the format specifier * or with an explicit argument (see set_width()), this will return * `true`. Otherwise, it will return `false`. * * You can get the actual width using width(). * * @see has_precision(), arg_width() */ bool has_width() const { return p_has_width; } /** @brief Gets whether a precision was specified somehow. * * If the precision was provided direclty as part of the format specifier * or with an explicit argument (see set_precision()), this will return * `true`. Otherwise, it will return `false`. * * You can get the actual width using precision(). * * @see has_width(), arg_precision() */ bool has_precision() const { return p_has_precision; } /** @brief Gets whether a width was specified as an explicit argument. * * This is true if the width was specified using `*` in the format * specifier. Also set by set_width_arg(). * * @see has_width() */ bool arg_width() const { return p_arg_width; } /** @brief Gets whether a precision was specified as an explicit argument. * * This is true if the precision was specified using `*` in the format * specifier. Also set by set_precision_arg(). * * @see has_width() */ bool arg_precision() const { return p_arg_precision; } /** @brief Sets the width from an argument pack. * * The `idx` parameter specifies the index (starting with 0) of the * width argument in the followup pack. * * The return value of width() will then be the argument's value. * It will also make has_width() and arg_width() return true (if * they previously didn't). * * @throws ostd::format_error when `idx` is out of bounds or the argument * has an invalid type. * * @see set_width(), set_precision_arg(); */ template void set_width_arg(std::size_t idx, A const &...args) { p_width = detail::get_arg_param(idx, args...); p_has_width = p_arg_width = true; } /** @brief Sets the width to an explicit number. * * The return value of width() will then be the given value. It will * also make has_width() return true and arg_width() return false. * * @see set_width_arg(), set_precision() */ void set_width(int v) { p_width = v; p_has_width = true; p_arg_width = false; } /** @brief Sets the precision from an argument pack. * * The `idx` parameter specifies the index (starting with 0) of the * precision argument in the followup pack. * * The return value of precision() will then be the argument's value. * It will also make has_precision() and arg_precision() return true (if * they previously didn't). * * @throws ostd::format_error when `idx` is out of bounds or the argument * has an invalid type. * * @see set_precision(), set_width_arg(); */ template void set_precision_arg(std::size_t idx, A const &...args) { p_precision = detail::get_arg_param(idx, args...); p_has_precision = p_arg_precision = true; } /** @brief Sets the precision to an explicit number. * * The return value of precision() will then be the given value. It will * also make has_precision() return true and arg_precision() return false. * * @see set_precision_arg(), set_width() */ void set_precision(int v) { p_precision = v; p_has_precision = true; p_arg_precision = false; } /** @brief Gets the combination of flags for the current specifier. */ int flags() const { return p_flags; } /** @brief Gets the base char for the specifier. */ char spec() const { return p_spec; } /** @brief Gets the position of the matching argument in the pack. * * This applies for when the position in the format specifier was * explicitly set (for example `%5$s` will have index 5) to refer * to a specific argument in the pack. Keep in mind that these * start with 1 (1st argument, 5th argument etc) to match the POSIX * conventions on this. If the position was not specified, this just * returns 0. */ unsigned char index() const { return p_index; } /** @brief Gets the inner part of a range or tuple format specifier. * * For ranges, this does not include the separator, you need to use * nested_sep() to get the separator. For example, given the * `%(%s, %)` specifier, this returns `%s` and for `%(%s%|, %)` * it returns the same. When formatting tuples, this behaves identically, * for example for `%<%s, %f%>` this returns `%s, %f`. For simple * specifiers this returns an empty slice. */ string_range nested() const { return p_nested; } /** @brief Gets the separator of a complex range format specifier. * * For example for `%(%s, %)` this returns `, `. With an explicit * delimiter, for example for `%(%s%|, %)`, this returns the same * thing as well. For simple specifiers and tuple specifiers this * returns an empty slice. */ string_range nested_sep() const { return p_nested_sep; } /** @brief Returns true if this specifier is for a tuple. */ bool is_tuple() const { return p_is_tuple; } /** @brief Returns true if this specifier is for a tuple or a range. */ bool is_nested() const { return p_is_nested; } /** @brief Formats into a range with the given arguments. * * When a valid format string is currently present, this formats * into the given range using that format string and the provided * arguments. * * @throws ostd::format_error when the format string and args don't match. * * @see format_value() */ template R &&format(R &&writer, A const &...args) { write_fmt(writer, args...); return std::forward(writer); } /** @brief Formats a single value into a range. * * When this currently represents a valid format specifier, you can * use this to format a single value with that specifier. This is very * useful for example when formatting custom objects, see the example * in ostd::format_traits. * * @throws ostd::format_error when the specifier and the value don't match. * * @see format() */ template R &&format_value(R &&writer, T const &val) const { write_arg(writer, 0, val); return std::forward(writer); } private: string_range p_nested; string_range p_nested_sep; int p_flags = 0; /* internal, for initial set of flags */ int p_gflags = 0; int p_width = 0; int p_precision = 0; bool p_has_width = false; bool p_has_precision = false; bool p_arg_width = false; bool p_arg_precision = false; char p_spec = '\0'; unsigned char p_index = 0; bool p_is_tuple = false; bool p_is_nested = false; bool read_until_dummy() { while (!p_fmt.empty()) { if (p_fmt.front() == '%') { p_fmt.pop_front(); if (p_fmt.front() == '%') { goto plain; } return read_spec(); } plain: p_fmt.pop_front(); } return false; } bool read_spec_range(bool tuple = false) { int sflags = p_flags; p_fmt.pop_front(); string_range begin_inner(p_fmt); if (!read_until_dummy()) { p_is_nested = false; return false; } /* skip to the last spec in case multiple specs are present */ string_range curfmt(p_fmt); while (read_until_dummy()) { curfmt = p_fmt; } p_fmt = curfmt; /* restore in case the inner spec read changed them */ p_flags = sflags; /* find delimiter or ending */ string_range begin_delim(p_fmt); string_range p = find(begin_delim, '%'); char need = tuple ? '>' : ')'; for (; !p.empty(); p = find(p, '%')) { p.pop_front(); /* escape, skip */ if (p.front() == '%') { p.pop_front(); continue; } /* found end, in that case delimiter is after spec */ if (p.front() == need) { p_is_tuple = tuple; if (tuple) { p_nested = begin_inner.slice( 0, std::size_t(&p[0] - &begin_inner[0] - 1) ); p_nested_sep = nullptr; } else { p_nested = begin_inner.slice( 0, std::size_t(&begin_delim[0] - &begin_inner[0]) ); p_nested_sep = begin_delim.slice( 0, std::size_t(&p[0] - &begin_delim[0] - 1) ); } p.pop_front(); p_fmt = p; p_is_nested = true; return true; } /* found actual delimiter start... */ if ((p.front() == '|') && !tuple) { p_nested = begin_inner.slice( 0, std::size_t(&p[0] - &begin_inner[0] - 1) ); p.pop_front(); p_nested_sep = p; for (p = find(p, '%'); !p.empty(); p = find(p, '%')) { p.pop_front(); if (p.front() == ')') { p_nested_sep = p_nested_sep.slice( 0, std::size_t(&p[0] - &p_nested_sep[0] - 1) ); p.pop_front(); p_fmt = p; p_is_nested = true; return true; } } p_is_nested = false; return false; } } p_is_nested = false; return false; } bool read_spec() { std::size_t ndig = detail::read_digits(p_fmt, p_buf); bool havepos = false; p_index = 0; /* parse index */ if (p_fmt.front() == '$') { if (ndig <= 0) return false; /* no pos given */ int idx = atoi(p_buf); if (idx <= 0 || idx > 255) return false; /* bad index */ p_index = static_cast(idx); p_fmt.pop_front(); havepos = true; } /* parse flags */ p_flags = p_gflags; std::size_t skipd = 0; if (havepos || !ndig) { p_flags |= detail::parse_fmt_flags(p_fmt, 0); } else { for (std::size_t i = 0; i < ndig; ++i) { if (p_buf[i] != '0') { break; } ++skipd; } if (skipd) { p_flags |= FMT_FLAG_ZERO; } if (skipd == ndig) { p_flags |= detail::parse_fmt_flags(p_fmt, p_flags); } } /* range/array/tuple formatting */ if ( ((p_fmt.front() == '(') || (p_fmt.front() == '<')) && (havepos || !(ndig - skipd)) ) { return read_spec_range(p_fmt.front() == '<'); } /* parse width */ p_width = 0; p_has_width = false; p_arg_width = false; if (!havepos && ndig && (ndig - skipd)) { p_width = atoi(p_buf + skipd); p_has_width = true; } else if (detail::read_digits(p_fmt, p_buf)) { p_width = atoi(p_buf); p_has_width = true; } else if (p_fmt.front() == '*') { p_arg_width = p_has_width = true; p_fmt.pop_front(); } /* parse precision */ p_precision = 0; p_has_precision = false; p_arg_precision = false; if (p_fmt.front() != '.') { goto fmtchar; } p_fmt.pop_front(); if (detail::read_digits(p_fmt, p_buf)) { p_precision = atoi(p_buf); p_has_precision = true; } else if (p_fmt.front() == '*') { p_arg_precision = p_has_precision = true; p_fmt.pop_front(); } else { return false; } fmtchar: p_spec = p_fmt.front(); p_fmt.pop_front(); return ((p_spec | 32) >= 'a') && ((p_spec | 32) <= 'z'); } template void write_spaces( R &writer, std::size_t n, bool left, char c = ' ' ) const { if (left == bool(p_flags & FMT_FLAG_DASH)) { return; } for (int w = p_width - int(n); --w >= 0; writer.put(c)); } template void write_char_raw(R &writer, char val) const { writer.put(val); } template void write_char_raw(R &writer, char16_t val) const { write_char_raw(writer, char32_t(val)); } template void write_char_raw(R &writer, char32_t val) const { if (!utf::encode(writer, val)) { utf::replace(writer); } } template void write_char_raw(R &writer, wchar_t val) const { if constexpr(sizeof(wchar_t) == sizeof(char32_t)) { write_char_raw(writer, char32_t(val)); } else if constexpr(sizeof(wchar_t) == sizeof(char16_t)) { write_char_raw(writer, char16_t(val)); } else { write_char_raw(writer, char(val)); } } /* string base writer */ template void write_str(R &writer, bool escape, basic_char_range val) const { std::size_t n = val.size(); if (has_precision()) { n = std::min(n, std::size_t(precision())); } write_spaces(writer, n, true); if (escape) { writer.put('"'); for (std::size_t i = 0; i < n; ++i) { if (val.empty()) { break; } C c = val.front(); if (c <= 0x7F) { char const *esc = detail::escape_fmt_char(c, '"'); if (esc) { range_put_all(writer, string_range{esc}); } else { write_char_raw(writer, c); } val.pop_front(); } else if (!utf::encode(writer, val)) { utf::replace(writer); val.pop_front(); } } writer.put('"'); } else { if constexpr(std::is_same_v, char>) { range_put_all(writer, val.slice(0, n)); } else { for (std::size_t i = 0; i < n; ++i) { if (val.empty()) { break; } if (!utf::encode(writer, val)) { utf::replace(writer); val.pop_front(); } } } } write_spaces(writer, n, false); } /* char values */ template void write_char(R &writer, bool escape, C val) const { if (escape && (val <= 0x7F)) { char const *esc = detail::escape_fmt_char(val, '\''); if (esc) { char buf[6]; buf[0] = '\''; std::size_t elen = std::strlen(esc); std::memcpy(buf + 1, esc, elen); buf[elen + 1] = '\''; write_val(writer, false, ostd::string_range{ buf, buf + elen + 2 }); return; } } write_spaces(writer, 1 + escape * 2, true); if (escape) { writer.put('\''); write_char_raw(writer, val); writer.put('\''); } else { write_char_raw(writer, val); } write_spaces(writer, 1 + escape * 2, false); } template void write_int(R &writer, bool ptr, bool neg, T val) const { using UT = std::make_unsigned_t; /* binary representation is the longest */ char buf[sizeof(T) * CHAR_BIT]; std::size_t ndig = 0; char isp = spec(); if (isp == 's') { isp = (ptr ? 'x' : 'd'); } unsigned char specn = detail::fmt_specs[isp - 65]; if (specn <= 2 || specn > 7) { throw format_error{"cannot format integers with the given spec"}; } /* 32 for lowercase variants, 0 for uppercase */ char cmask = char((isp >= 'a') << 5); UT base = UT(detail::fmt_bases[specn]); bool zeroval = !val; if (zeroval) { ndig = 1; buf[0] = '0'; } else { UT uval; if (neg) { if (base != 10) { uval = UT(val); neg = false; } else { uval = UT(-val); } } else { uval = UT(val); } for (; uval; uval /= base) { auto vb = char(uval % base); buf[ndig++] = (vb + "70"[vb < 10]) | cmask; } } std::size_t tdig = ndig; if (has_precision()) { int prec = precision(); if (std::size_t(prec) > tdig) { tdig = std::size_t(prec); } else if (!prec && zeroval) { tdig = 0; } } /* here starts the bullshit */ auto const &fac = std::use_facet>(p_loc); char tseps[MB_LEN_MAX]; int ntsep = detail::wc_to_mb_loc(fac.thousands_sep(), tseps, p_loc); auto const &grp = fac.grouping(); auto grpp = reinterpret_cast(grp.data()); std::size_t nseps = 0, sreps = 0; std::size_t total = tdig; if (!ptr && (ntsep >= 0)) { int cndig = int(ndig); while (*grpp) { cndig -= *grpp; if (cndig > 0) { ++nseps; if (!grpp[1]) { ++sreps; continue; } } else { break; } ++grpp; } total += nseps * std::size_t(ntsep); } /* here ends the bullshit */ int fl = flags(); bool lsgn = fl & FMT_FLAG_PLUS; bool lsp = fl & FMT_FLAG_SPACE; bool zero = fl & FMT_FLAG_ZERO; bool sign = neg + lsgn + lsp; char pfx = '\0'; if (((fl & FMT_FLAG_HASH) || ptr) && ((specn == 3) || (specn == 6))) { pfx = ("XB"[(specn == 3)]) | cmask; } /* leading spaces if they apply */ if (!zero) { write_spaces(writer, total + (!!pfx * 2) + sign, true, ' '); } /* sign (either forced or a minus) */ if (sign) { writer.put(neg ? '-' : *((" \0+") + lsgn * 2)); } /* prefix such as 0x */ if (pfx) { writer.put('0'); writer.put(pfx); } /* if we chose to pad with leading zeroes instead */ if (zero) { write_spaces(writer, total + (!!pfx * 2) + sign, true, '0'); } /* number itself (with potential thousands grouping) */ if (total) { /* potential higher precision, no grouping applies */ for (std::size_t i = 0; i < (tdig - ndig); ++i) { writer.put('0'); } /* the rest of the number, with thousands grouping */ unsigned char grpn = *grpp; for (std::size_t i = 0; i < ndig; ++i) { if (nseps) { if (!grpn) { for (int j = 0; j < ntsep; ++j) { writer.put(tseps[j]); } if (sreps) { --sreps; } else { --grpp; } grpn = *grpp; --nseps; } if (grpn) { --grpn; } } writer.put(buf[ndig - i - 1]); } } write_spaces(writer, total + sign + (!!pfx * 2), false); } /* floating point */ template void write_float(R &writer, T val) const { char isp = spec(); unsigned char specn = detail::fmt_specs[isp - 65]; if (specn != 1 && specn != 7) { throw format_error{"cannot format floats with the given spec"}; } /* null streambuf because it's only used to read flags etc */ std::ios st{nullptr}; st.imbue(p_loc); st.width(width()); st.precision(has_precision() ? precision() : 6); typename std::ios_base::fmtflags fl{}; if (!(isp & 32)) { fl |= std::ios_base::uppercase; } /* equivalent of printf 'g' or 'G' by default */ if ((isp | 32) == 'f') { fl |= std::ios_base::fixed; } else if ((isp | 32) == 'e') { fl |= std::ios_base::scientific; } else if ((isp | 32) == 'a') { fl |= std::ios_base::fixed | std::ios_base::scientific; } if (p_flags & FMT_FLAG_DASH) { fl |= std::ios_base::right; } if (p_flags & FMT_FLAG_PLUS) { fl |= std::ios_base::showpos; } else if ((p_flags & FMT_FLAG_SPACE) && !std::signbit(val)) { /* only if no sign is shown... num_put does not * support this so we have to do it on our own */ writer.put(' '); } if (p_flags & FMT_FLAG_HASH) { fl |= std::ios_base::showpoint; } st.flags(fl); /* this is also bullshit */ fmt_num_put nump; nump.put( fmt_out{&writer, &p_loc}, st, (p_flags & FMT_FLAG_ZERO) ? L'0' : L' ', std::conditional_t, T, double>(val) ); } template void write_val( R &writer, [[maybe_unused]] bool escape, T const &val ) const { /* stuff fhat can be custom-formatted goes first */ if constexpr(detail::fmt_tofmt_test())>) { format_traits::to_format(val, writer, *this); return; } /* second best, we can convert to string slice */ if constexpr(std::is_constructible_v) { if (spec() != 's') { throw format_error{"strings need the '%s' spec"}; } write_str(writer, escape, val); return; } /* we can convert to UTF-32 slice */ if constexpr(std::is_constructible_v) { if (spec() != 's') { throw format_error{"strings need the '%s' spec"}; } write_str(writer, escape, val); return; } /* we can convert to UTF-16 slice */ if constexpr(std::is_constructible_v) { if (spec() != 's') { throw format_error{"strings need the '%s' spec"}; } write_str(writer, escape, val); return; } /* we can convert to wide slice */ if constexpr(std::is_constructible_v) { if (spec() != 's') { throw format_error{"strings need the '%s' spec"}; } write_str(writer, escape, val); return; } /* tuples */ if constexpr(detail::is_tuple_like) { if (spec() != 's') { throw format_error{"ranges need the '%s' spec"}; } writer.put('<'); write_tuple_val<0, std::tuple_size::value>( writer, escape, ", ", val ); writer.put('>'); return; } /* ranges */ if constexpr(detail::iterable_test) { if (spec() != 's') { throw format_error{"tuples need the '%s' spec"}; } writer.put('{'); write_range_val(writer, [&writer, this](auto const &rval, bool esc) { format_spec sp{'s', p_loc, esc ? FMT_FLAG_AT : 0}; sp.write_arg(writer, 0, rval); }, ", ", val, escape); writer.put('}'); return; } /* bools, check if printing as string, otherwise convert to int */ if constexpr(std::is_same_v) { if (spec() == 's') { write_val(writer, escape, ("false\0true") + (6 * val)); } else { write_val(writer, escape, int(val)); } return; } /* character values */ if constexpr(utf::is_character) { if (spec() == 's' || spec() == 'c') { write_char(writer, escape, val); return; } /* characters are also integers so try that too */ } /* pointers, write as pointer with %s and otherwise as unsigned... * char pointers are handled by the string case above */ if constexpr(std::is_pointer_v) { write_int(writer, (spec() == 's'), false, std::size_t(val)); return; } /* integers */ if constexpr(std::is_integral_v && !std::is_same_v) { if constexpr(std::is_signed_v) { /* signed integers */ write_int(writer, false, val < 0, val); } else { /* unsigned integers */ write_int(writer, false, false, val); } return; } /* floats */ if constexpr(std::is_floating_point_v) { write_float(writer, val); return; } /* we ran out of options, failure */ throw format_error{"the value cannot be formatted"}; } /* actual writer */ template void write_arg( R &writer, std::size_t idx, T const &val, A const &...args ) const { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { write_arg(writer, idx - 1, args...); } } else { write_val(writer, p_flags & FMT_FLAG_AT, val); } } template inline void write_range_item( R &writer, bool escape, [[maybe_unused]] bool expandval, string_range fmt, T const &item ) const { if constexpr(detail::is_tuple_like) { if (expandval) { std::apply([ this, &writer, &fmt, flags = escape ? FMT_FLAG_AT : 0 ](auto const &...args) mutable { format_spec sp{fmt, p_loc}; sp.p_gflags |= flags; sp.write_fmt(writer, args...); }, item); return; } } format_spec sp{fmt, p_loc}; if (escape) { sp.p_gflags |= FMT_FLAG_AT; } sp.write_fmt(writer, item); } template void write_range_val( R &writer, F &&func, [[maybe_unused]] string_range sep, T const &val, A const &...args ) const { if constexpr(detail::iterable_test) { auto range = ostd::iter(val); if (range.empty()) { return; } for (;;) { func(range.front(), args...); range.pop_front(); if (range.empty()) { break; } range_put_all(writer, sep); } } else { throw format_error{"invalid value for ranged format"}; } } /* range writer */ template void write_range( R &writer, std::size_t idx, bool expandval, string_range sep, T const &val, A const &...args ) const { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { write_range(writer, idx - 1, expandval, sep, args...); } } else { write_range_val(writer, [ fmt = rest(), this, &writer ](auto const &rval, bool expval, bool escape) { this->write_range_item( writer, escape, expval, fmt, rval ); }, sep, val, expandval, p_gflags & FMT_FLAG_AT); } } template void write_tuple_val( R &writer, bool escape, [[maybe_unused]] string_range sep, T const &tup ) const { format_spec sp{'s', p_loc, escape ? FMT_FLAG_AT : 0}; sp.write_arg(writer, 0, std::get(tup)); if constexpr(I < (N - 1)) { range_put_all(writer, sep); write_tuple_val(writer, escape, sep, tup); } } template void write_tuple( R &writer, std::size_t idx, T const &val, A const &...args ) { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { write_tuple(writer, idx - 1, args...); } } else { if constexpr(detail::is_tuple_like) { std::apply([this, &writer](auto const &...vals) mutable { this->write_fmt(writer, vals...); }, val); } else { throw format_error{"invalid value for tuple format"}; } } } template void write_fmt(R &writer, A const &...args) { std::size_t argidx = 1; while (read_until_spec(writer)) { std::size_t argpos = index(); if (is_nested()) { if (!argpos) { argpos = argidx++; } else if (argpos > argidx) { argidx = argpos + 1; } format_spec nspec(nested(), p_loc); nspec.p_gflags |= (p_flags & FMT_FLAG_AT); if (is_tuple()) { nspec.write_tuple(writer, argpos - 1, args...); } else { nspec.write_range( writer, argpos - 1, (flags() & FMT_FLAG_HASH), nested_sep(), args... ); } continue; } if (!argpos) { argpos = argidx++; if (arg_width()) { set_width_arg(argpos - 1, args...); argpos = argidx++; } if (arg_precision()) { set_precision_arg(argpos - 1, args...); argpos = argidx++; } } else { bool argprec = arg_precision(); if (argprec) { if (argpos <= 1) { throw format_error{"argument precision not given"}; } set_precision_arg(argpos - 2, args...); } if (arg_width()) { if (argpos <= (std::size_t(argprec) + 1)) { throw format_error{"argument width not given"}; } set_width_arg(argpos - 2 - argprec, args...); } if (argpos > argidx) { argidx = argpos + 1; } } write_arg(writer, argpos - 1, args...); } } template void write_fmt(R &writer) { if (read_until_spec(writer)) { throw format_error{"format spec without format arguments"}; } } /* but most of all, this part is the biggest bullshit */ template struct fmt_out { using iterator_category = std::output_iterator_tag; using value_type = wchar_t; using pointer = wchar_t *; using reference = wchar_t &; using difference_type = std::streamoff; fmt_out &operator=(wchar_t c) { char buf[MB_LEN_MAX]; int j = detail::wc_to_mb_loc(c, buf, *p_loc); for (int i = 0; i < j; ++i) { p_out->put(buf[i]); } return *this; } fmt_out &operator*() { return *this; } fmt_out &operator++() { return *this; } fmt_out &operator++(int) { return *this; } R *p_out; std::locale const *p_loc; }; template struct fmt_num_put final: std::num_put> { fmt_num_put(std::size_t refs = 0): std::num_put>(refs) {} ~fmt_num_put() {} }; string_range p_fmt; std::locale p_loc; char p_buf[32]; }; /** @brief Formats into an output range using a format string and arguments. * * Uses the default constructed std::locale (the current global locale) * for locale specific formatting. There is also a version that takes an * explicit locale. * * This is just a simple wrapper, equivalent to: * * ~~~{.cc} * return ostd::format_spec{fmt}.format(std::forward(writer), args...); * ~~~ */ template inline R &&format(R &&writer, string_range fmt, A const &...args) { return format_spec{fmt}.format(std::forward(writer), args...); } /** @brief Formats into an output range using a format string and arguments. * * This version uses `loc` as a locale. There is also a version that uses * the global locale by default. * * This is just a simple wrapper, equivalent to: * * ~~~{.cc} * return ostd::format_spec{fmt, loc}.format(std::forward(writer), args...); * ~~~ */ template inline R &&format( R &&writer, std::locale const &loc, string_range fmt, A const &...args ) { return format_spec{fmt, loc}.format(std::forward(writer), args...); } /** @} */ } /* namespace ostd */ #endif /** @} */