/** @addtogroup Strings * @{ */ /** @file format.hh * * @brief APIs for type safe formatting using C-style format strings. * * libostd provides a powerful formatting system that lets you format into * arbitrary output ranges using C-style format strings. It's type safe * and supports custom object formatting without heap allocations as well * as formatting of ranges, tuples and more. * * @copyright See COPYING.md in the project tree for further information. */ #ifndef OSTD_FORMAT_HH #define OSTD_FORMAT_HH #include #include #include #include #include #include #include #include #include "ostd/algorithm.hh" #include "ostd/string.hh" namespace ostd { /** @addtogroup Strings * @{ */ /** @brief An enumeration defining flags for C-style formatting marks. * * Used inside ostd::format_spec. The C-style formatting mark has a flags * section and each of these enum items represents one. They can be combined * using the standard bitwise operators. */ enum format_flags { FMT_FLAG_DASH = 1 << 0, ///< The dash (`-`) flag. FMT_FLAG_ZERO = 1 << 1, ///< The zero (`0`) flag. FMT_FLAG_SPACE = 1 << 2, ///< The space (` `) flag. FMT_FLAG_PLUS = 1 << 3, ///< The plus (`+`) flag. FMT_FLAG_HASH = 1 << 4, ///< The hash (`#`) flag. FMT_FLAG_AT = 1 << 5 ///< The at (`@`) flag. }; /** @brief Thrown when format string does not properly match the arguments. */ struct format_error: std::runtime_error { using std::runtime_error::runtime_error; }; struct format_spec; /** @brief Specialize this to format custom objects. * * The formatting system provides a way to format arbitrary objects. By default * it's empty as all the formatting logic is builtin. To specialize for your * own object, you simply do this: * * ~~~{.cc} * template<> * struct format_traits { * template * static void to_format(foo const &v, R &writer, ostd::format_spec const &fs) { * // custom formatting here * // writer is just an output range (see ostd::output_range) * } * }; * ~~~ * * Obviously, you can passthrough the formatting, for example when your type * contains a member and you want to format your type exactly as if it was * the member, you just put this in your `to_format`: * * ~~~{.cc} * fs.format_value(writer, v.my_member); * ~~~ * * Anything that writes into the output range will do. The output range is * exactly the same output range the outer format call is formatting into, * so for example when someone is formatting into an ostd::appender_range, * it will be just that. * * This may be specialized in other libostd modules as well. */ template struct format_traits {}; /* implementation helpers */ namespace detail { inline int parse_fmt_flags(string_range &fmt, int ret) { while (!fmt.empty()) { switch (fmt.front()) { case '-': ret |= FMT_FLAG_DASH; fmt.pop_front(); break; case '+': ret |= FMT_FLAG_PLUS; fmt.pop_front(); break; case '#': ret |= FMT_FLAG_HASH; fmt.pop_front(); break; case '@': ret |= FMT_FLAG_AT; fmt.pop_front(); break; case '0': ret |= FMT_FLAG_ZERO; fmt.pop_front(); break; case ' ': ret |= FMT_FLAG_SPACE; fmt.pop_front(); break; default: goto retflags; } } retflags: return ret; } inline size_t read_digits(string_range &fmt, char *buf) { size_t ret = 0; for (; !fmt.empty() && isdigit(fmt.front()); ++ret) { *buf++ = fmt.front(); fmt.pop_front(); } *buf = '\0'; return ret; } /* 0 .. not allowed * 1 .. floating point * 2 .. character * 3 .. binary * 4 .. octal * 5 .. decimal * 6 .. hexadecimal * 7 .. string * 8 .. custom object */ static constexpr unsigned char const fmt_specs[] = { /* uppercase spec set */ 1, 3, 8, 8, /* A B C D */ 1, 1, 1, 8, /* E F G H */ 8, 8, 8, 8, /* I J K L */ 8, 8, 8, 8, /* M N O P */ 8, 8, 8, 8, /* Q R S T */ 8, 8, 8, 6, /* U V W X */ 8, 8, /* Y Z */ /* ascii filler */ 0, 0, 0, 0, 0, 0, /* lowercase spec set */ 1, 3, 2, 5, /* a b c d */ 1, 1, 1, 8, /* e f g h */ 8, 8, 8, 8, /* i j k l */ 8, 8, 4, 8, /* m n o p */ 8, 8, 7, 8, /* q r s t */ 8, 8, 8, 6, /* u v w x */ 8, 8, /* y z */ /* ascii filler */ 0, 0, 0, 0, 0 }; static constexpr int const fmt_bases[] = { 0, 0, 0, 2, 8, 10, 16, 0 }; /* non-printable escapes up to 0x20 (space) */ static constexpr char const *fmt_escapes[] = { "\\0" , "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a" , "\\b" , "\\t" , "\\n" , "\\v" , "\\f" , "\\r" , "\\x0E", "\\x0F", "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F", /* we want to escape double quotes... */ nullptr, nullptr, "\\\"", nullptr, nullptr, nullptr, nullptr, "\\\'" }; inline char const *escape_fmt_char(char v, char quote) { if ((v >= 0 && v < 0x20) || (v == quote)) { return fmt_escapes[size_t(v)]; } else if (v == 0x7F) { return "\\x7F"; } return nullptr; } /* retrieve width/precision */ template inline int get_arg_param(size_t idx, T const &val, A const &...args) { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format args"}; } else { return get_arg_param(idx - 1, args...); } } else { if constexpr(!std::is_integral_v) { throw format_error{"invalid argument for width/precision"}; } else { if constexpr(std::is_signed_v) { if (val < 0) { throw format_error{ "width/precision cannot be negative" }; } } return int(val); } } } /* ugly ass check for whether a type is tuple-like, like tuple itself, * pair, array, possibly other types added later or overridden... */ template std::true_type tuple_like_test(typename std::tuple_size::type *); template std::false_type tuple_like_test(...); template constexpr bool is_tuple_like = decltype(tuple_like_test(0))::value; /* test if format traits are available for the type */ template static std::true_type test_tofmt(decltype(format_traits::to_format( std::declval(), std::declval(), std::declval() )) *); template static std::false_type test_tofmt(...); template constexpr bool fmt_tofmt_test = decltype(test_tofmt(0))::value; } /** @brief A structure implementing type safe C-style formatting. * * It can be constructed either to represent a specific format specifier or * with a format string to format an entire string (in which case it will * parse the string and format it with the individual intermediate markings). * * It stores information about the current format specifier (when constructed * as one or when parsing the format string) as well as the rest of the current * format string. See read_until_spec() and rest() for more information. * * # Regular format specifiers * * The formatter is considerably more elaborate than C-style printf. Its * basic format specifiers superficially look the same: * * ~~~ * %[position$][flags][width][.precision]specifier * ~~~ * * Position is the optional position of the argument in the pack starting * with 1. It can be mixed with format specifiers without explicit position, * unlike what POSIX says; the next specifier without explicit position * will use the position after the largest explicit position used so far. * For example, `%3$s %1$s %s` will use position 4 for the last specifier. * * ## Flags * * * The `-` flag will left-justify within the given width (right by default). * * The `+` flag applies to numbers and will force sign to always show, even * for positive numbers (by default, only negative ones get a sign). * * The ` ` (space) flag applies to numbers and will force a space to be * written in place of sign for positive numbers (no effect with `+`). * * The `#` flag applies to integers, floats and ranges. For integers, it * will add the prefixes `0x`, `0X`, `0b` or `0B` when formatted as hex * or binary, lowercase or uppercase. For floats, it will force the output * to always contain a decimal point/comma. For ranges, it will cause * automatic expansion of values into items if the values are tuples. * * The `@` flag will escape the value according to the rules. * * The '0' flag will left-pad numbers with zeroes instead of spaces when * needed (according to width). * * ## Width * * Width can be specified either as a number in the format string or as `*` * in which case it will be an integer argument (any integral type, must be * equal or larger than zero, otherwise ostd::format_error is thrown). When * an argument, the position of the argument is right before the actual * value to format, unless precision is also an argument, in which case it's * right before the precision argument. When the position of the value to * format is explicit in the format string, the position refers to the value * to format and width/precision are before that. * * Width defines the minimum number of characters to be printed. If the value * ends up being shorter, it's padded with spaces (or zeroes when formatting * a number and the zero flag is used). The value is not truncated if it's * actually longer than the width. * * ## Precision * * Precision can also be specified as a number or as an argument. When both * width and precision are an argument, width is first. For integers, it * specifies the default number of digits to be written. If the value is * shorter than the precision, the result is padded with leading zeroes. * If it's longer, no truncation happens. A precision of 0 means that no * character is written for the value 0. For floats, it's the number of * digits to be written after decimal point or comma. When not specified, * it's 6. For strings, it's the maximum number of characters to be printed. * By default all characters are printed. When escaping strings, the quotes * are not counted into the precision and escape sequences count as a single * character. * * # Range formatting * * The system also allows advanced formatting for ranges. The specifier * then looks different: * * ~~~ * %[position$][flags](contents%) * ~~~ * * The `contents` is a format specifier for each item of the range followed * by a separator. For example: * * ~~~ * %(%s, %) * ~~~ * * In this case, `%s` is the specifier and `, ` is the separator. You can * also explicitly delimit the separator: * * ~~~ * %(%s%|, %) * ~~~ * * The first part is used to format items and the separator is put between * each two items. * * Two flags are used by this format. Normally, each item of the range is * formatted as is, using a single specifier, even if the item is a tuple-like * value. Using the `#` flag you can expand tuple-like items into multiple * values. So when formatting a range over an associative map, you can do this: * * ~~~ * %#(%s: %s%|, %) * ~~~ * * to format key and value separately. * * You can also use the `@` flag. It will cause the `@` flag to be applied to * every item of the range, therefore escaping each one. Nested range formats * are also affected by this. There is no way to unapply the flag once you * set it. * * # Tuple formatting * * Additionally, the system also supports advanced formatting for tuples. * The syntax is similar: * * ~~~ * %[position$][flags] * ~~~ * * There are no delimiters here. The `contents` is simply a regular format * string, with a format specifier for each tuple item. * * You can use the `@` flag just like you can use it with ranges. No other * flag can be used when formatting tuples. * * # Specifiers * * Now for the basic specifiers themselves: * * * `a`, `A` - hexadecimal float like C printf (lowercase, uppercase). * * `b`, `B` - binary integers (lowercase, uppercase). * * `c` - character values. * * `d` - decimal integers. * * `e`, `E` - floats in scientific notation (lowercase, uppercase). * * `f`, `F` - decimal floating point (lowercase, uppercase). * * `g`, `G` - shortest representation (`e`/`E` or `f`/`F`). * * `o` - octal integers. * * `s` - any value with its default format. * * `x`, `X` - hexadecimal integers (lowercase, uppercase). * * You can use the `s` specifier to format any value that can be formatted * at no extra cost. Because the system is type safe, how a value is meant * to be formatted is decided from the type that is passed in, not the format * specifier itself. * * Signedness and size of integer types is determined from the value itself, * which applies universally to all integer formatting with all bases. Also, * the lowercase/uppercase distinction for binary only applies when a prefix * is added (using the `#` flag). * * All letters (uppercase and lowercase) are available for custom formatting. * * # Format order and rules * * The rules for formatting values go as follows: * * * First it's checked whether the value can be custom formatted using a * specialization of ostd::format_traits. If it can, it's formatted using * that, the current `format_spec` is passed in as it is and no extra * checks are made. Any letter can be used to format custom objects. * * Then it's checked if the value is convertible to ostd::string_range. * If it is, it's formatted as a string. Only the `s` specifier is allowed. * * Then it's checked if the value is a tuple-like object. The value is one * if `std::tuple_size::value` is valid. If it is, the tuple-like object * is formatted as `` by default. You need to use * the `s` specifier only to format tuples like this. The items are all * formatted using the `s` specifier. * * Then ranges are tested in a similar way. The default format for ranges * is `{ITEM, ITEM, ITEM, ...}`. The `s` specifier must be used. The items * are all formatted with `s` too. * * Then bools are formatted. If the `s` specifier is used, the bool is * formatted as `true` or `false`. Otherwise it's converted to `int` and * formatted using the specifier (might error depending on the specifier). * * Then character values are formatted. The `c` and `s` specifiers are * allowed. * * Pointers are formatted then. If the `s` specifier is used, the pointer * will be formatted as hex with the `0x` prefix. Otherwise it's converted * to `size_t` and formatted with the specifier (might error depending on * the specifier). * * Then integers are formatted. Using the `s` specifier is like using the * `d` specifier. * * Floats follow. Using `s` is like using `g`. * * When everything is exhausted, ostd::format_error is thrown. * * # Escaping * * String and character values are subject to escaping if the `@` flag is * used. Strings are put into double quotes and any unprintable values in * them are converted into escape sequences. Quotes (single and double) * are also escaped. Character values are put into single quotes and * unprintable characters are converted into escape sequences as well. * For known escape sequences, simple readable versions are used, particularly * `a`, `b`, `t`, `n`, `v`, `f`, `r`. For other unprintables, the hexadecimal * escape format is used. * * When printing tuples and ranges with the `s` specifier and the `@` flag * is used, all of their items are escaped. If the items are tuples or ranges, * their own items are also escaped. The `@` flag doesn't escape anything else, * unless you implement support for escaping in your own custom objects. * * # Locale awareness * * The system also makes use of locales. When formatting integers, thousands * grouping rules from the locale apply (no matter the base). When formatting * floats, a locale specific decimal separator is used and thousands grouping * also applies. * * # Errors * * If a specifier is not allowed for a value, ostd::format_error is thrown. */ struct format_spec { /** @brief Constructs with a format string and the default locale. * * If you use this constructor, there won't be a specific formatting * specifier set in here so you won't be able to get its properties, * but you will be able to format into a range with some arguments. * You can also manually parse the format string, see read_until_spec(). * * The locale used here is the default (global) locale. */ format_spec(string_range fmt): p_fmt(fmt), p_loc() {} /** @brief Constructs with a format string and a locale. * * Like format_spec(string_range), but with an explicit locale. */ format_spec(string_range fmt, std::locale const &loc): p_fmt(fmt), p_loc(loc) {} /** @brief Constructs a specific format specifier. * * See ostd::format_flags for flags. The `spec` argument is the format * specifier (for example `s`). It doesn't support tuple/range formatting * nor positional arguments. * * Uses the default (global) locale. The locale is then potentially used * for formatting values. */ format_spec(char spec, int flags = 0): p_flags(flags), p_spec(spec), p_loc() {} /** @brief Constructs a specific format specifier with a locale. * * Like format_spec(char, int) but uses an explicit locale. */ format_spec(char spec, std::locale const &loc, int flags = 0): p_flags(flags), p_spec(spec), p_loc(loc) {} /** @brief Parses the format string if constructed with one. * * This reads the format string, writing each character of it into * `writer`, until it encounters a valid format specifier. It then * stops there and returns `true`. If no format specifier was read, * it returns `false`. When a format specifier is read, this structure * then represents it. * * It's used by format() to parse the string. */ template bool read_until_spec(R &writer) { if (p_fmt.empty()) { return false; } while (!p_fmt.empty()) { if (p_fmt.front() == '%') { p_fmt.pop_front(); if (p_fmt.front() == '%') { goto plain; } return read_spec(); } plain: writer.put(p_fmt.front()); p_fmt.pop_front(); } return false; } /** @brief Gets the yet not parsed portion of the format string. * * If no read_until_spec() was called, this returns the entire format * string. Otherwise, it returns the format string from the point * after the format specifier this structure currently represents. */ string_range rest() const { return p_fmt; } /** @brief Overrides the currently set locale. * * @returns The old locale. */ std::locale imbue(std::locale const &loc) { std::locale ret{p_loc}; p_loc = loc; return ret; } /** @brief Retrieves the currently used locale for the format state. */ std::locale getloc() const { return p_loc; } /** @brief Gets the width of the format specifier. * * If explicitly specified (say `%5s`) it will return the number that * was in the format specifier. If explicitly set with set_width(), * it will return that. If not set at all, it will return 0. * * @see has_width(), precision() */ int width() const { return p_width; } /** @brief Gets the precision of the format specifier. * * If explicitly specified (say `%.5f`) it will return the number that * was in the format specifier. If explicitly set with set_precision(), * it will return that. If not set at all, it will return 0. * * @see has_precision(), width() */ int precision() const { return p_precision; } /** @brief Gets whether a width was specified somehow. * * If the width was provided direclty as part of the format specifier * or with an explicit argument (see set_width()), this will return * `true`. Otherwise, it will return `false`. * * You can get the actual width using width(). * * @see has_precision(), arg_width() */ bool has_width() const { return p_has_width; } /** @brief Gets whether a precision was specified somehow. * * If the precision was provided direclty as part of the format specifier * or with an explicit argument (see set_precision()), this will return * `true`. Otherwise, it will return `false`. * * You can get the actual width using precision(). * * @see has_width(), arg_precision() */ bool has_precision() const { return p_has_precision; } /** @brief Gets whether a width was specified as an explicit argument. * * This is true if the width was specified using `*` in the format * specifier. Also set by set_width_arg(). * * @see has_width() */ bool arg_width() const { return p_arg_width; } /** @brief Gets whether a precision was specified as an explicit argument. * * This is true if the precision was specified using `*` in the format * specifier. Also set by set_precision_arg(). * * @see has_width() */ bool arg_precision() const { return p_arg_precision; } /** @brief Sets the width from an argument pack. * * The `idx` parameter specifies the index (starting with 0) of the * width argument in the followup pack. * * The return value of width() will then be the argument's value. * It will also make has_width() and arg_width() return true (if * they previously didn't). * * @throws ostd::format_error when `idx` is out of bounds or the argument * has an invalid type. * * @see set_width(), set_precision_arg(); */ template void set_width_arg(size_t idx, A const &...args) { p_width = detail::get_arg_param(idx, args...); p_has_width = p_arg_width = true; } /** @brief Sets the width to an explicit number. * * The return value of width() will then be the given value. It will * also make has_width() return true and arg_width() return false. * * @see set_width_arg(), set_precision() */ void set_width(int v) { p_width = v; p_has_width = true; p_arg_width = false; } /** @brief Sets the precision from an argument pack. * * The `idx` parameter specifies the index (starting with 0) of the * precision argument in the followup pack. * * The return value of precision() will then be the argument's value. * It will also make has_precision() and arg_precision() return true (if * they previously didn't). * * @throws ostd::format_error when `idx` is out of bounds or the argument * has an invalid type. * * @see set_precision(), set_width_arg(); */ template void set_precision_arg(size_t idx, A const &...args) { p_precision = detail::get_arg_param(idx, args...); p_has_precision = p_arg_precision = true; } /** @brief Sets the precision to an explicit number. * * The return value of precision() will then be the given value. It will * also make has_precision() return true and arg_precision() return false. * * @see set_precision_arg(), set_width() */ void set_precision(int v) { p_precision = v; p_has_precision = true; p_arg_precision = false; } /** @brief Gets the combination of flags for the current specifier. */ int flags() const { return p_flags; } /** @brief Gets the base char for the specifier. */ char spec() const { return p_spec; } /** @brief Gets the position of the matching argument in the pack. * * This applies for when the position in the format specifier was * explicitly set (for example `%5$s` will have index 5) to refer * to a specific argument in the pack. Keep in mind that these * start with 1 (1st argument, 5th argument etc) to match the POSIX * conventions on this. If the position was not specified, this just * returns 0. */ unsigned char index() const { return p_index; } /** @brief Gets the inner part of a range or tuple format specifier. * * For ranges, this does not include the separator, you need to use * nested_sep() to get the separator. For example, given the * `%(%s, %)` specifier, this returns `%s` and for `%(%s%|, %)` * it returns the same. When formatting tuples, this behaves identically, * for example for `%<%s, %f%>` this returns `%s, %f`. For simple * specifiers this returns an empty slice. */ string_range nested() const { return p_nested; } /** @brief Gets the separator of a complex range format specifier. * * For example for `%(%s, %)` this returns `, `. With an explicit * delimiter, for example for `%(%s%|, %)`, this returns the same * thing as well. For simple specifiers and tuple specifiers this * returns an empty slice. */ string_range nested_sep() const { return p_nested_sep; } /** @brief Returns true if this specifier is for a tuple. */ bool is_tuple() const { return p_is_tuple; } /** @brief Returns true if this specifier is for a tuple or a range. */ bool is_nested() const { return p_is_nested; } /** @brief Formats into a range with the given arguments. * * When a valid format string is currently present, this formats * into the given range using that format string and the provided * arguments. * * @throws ostd::format_error when the format string and args don't match. * * @see format_value() */ template R &&format(R &&writer, A const &...args) { write_fmt(writer, args...); return std::forward(writer); } /** @brief Formats a single value into a range. * * When this currently represents a valid format specifier, you can * use this to format a single value with that specifier. This is very * useful for example when formatting custom objects, see the example * in ostd::format_traits. * * @throws ostd::format_error when the specifier and the value don't match. * * @see format() */ template R &&format_value(R &&writer, T const &val) const { write_arg(writer, 0, val); return std::forward(writer); } private: string_range p_nested; string_range p_nested_sep; int p_flags = 0; /* internal, for initial set of flags */ int p_gflags = 0; int p_width = 0; int p_precision = 0; bool p_has_width = false; bool p_has_precision = false; bool p_arg_width = false; bool p_arg_precision = false; char p_spec = '\0'; unsigned char p_index = 0; bool p_is_tuple = false; bool p_is_nested = false; bool read_until_dummy() { while (!p_fmt.empty()) { if (p_fmt.front() == '%') { p_fmt.pop_front(); if (p_fmt.front() == '%') { goto plain; } return read_spec(); } plain: p_fmt.pop_front(); } return false; } bool read_spec_range(bool tuple = false) { int sflags = p_flags; p_fmt.pop_front(); string_range begin_inner(p_fmt); if (!read_until_dummy()) { p_is_nested = false; return false; } /* skip to the last spec in case multiple specs are present */ string_range curfmt(p_fmt); while (read_until_dummy()) { curfmt = p_fmt; } p_fmt = curfmt; /* restore in case the inner spec read changed them */ p_flags = sflags; /* find delimiter or ending */ string_range begin_delim(p_fmt); string_range p = find(begin_delim, '%'); char need = tuple ? '>' : ')'; for (; !p.empty(); p = find(p, '%')) { p.pop_front(); /* escape, skip */ if (p.front() == '%') { p.pop_front(); continue; } /* found end, in that case delimiter is after spec */ if (p.front() == need) { p_is_tuple = tuple; if (tuple) { p_nested = begin_inner.slice(0, &p[0] - &begin_inner[0] - 1); p_nested_sep = nullptr; } else { p_nested = begin_inner.slice( 0, &begin_delim[0] - &begin_inner[0] ); p_nested_sep = begin_delim.slice( 0, &p[0] - &begin_delim[0] - 1 ); } p.pop_front(); p_fmt = p; p_is_nested = true; return true; } /* found actual delimiter start... */ if ((p.front() == '|') && !tuple) { p_nested = begin_inner.slice(0, &p[0] - &begin_inner[0] - 1); p.pop_front(); p_nested_sep = p; for (p = find(p, '%'); !p.empty(); p = find(p, '%')) { p.pop_front(); if (p.front() == ')') { p_nested_sep = p_nested_sep.slice( 0, &p[0] - &p_nested_sep[0] - 1 ); p.pop_front(); p_fmt = p; p_is_nested = true; return true; } } p_is_nested = false; return false; } } p_is_nested = false; return false; } bool read_spec() { size_t ndig = detail::read_digits(p_fmt, p_buf); bool havepos = false; p_index = 0; /* parse index */ if (p_fmt.front() == '$') { if (ndig <= 0) return false; /* no pos given */ int idx = atoi(p_buf); if (idx <= 0 || idx > 255) return false; /* bad index */ p_index = static_cast(idx); p_fmt.pop_front(); havepos = true; } /* parse flags */ p_flags = p_gflags; size_t skipd = 0; if (havepos || !ndig) { p_flags |= detail::parse_fmt_flags(p_fmt, 0); } else { for (size_t i = 0; i < ndig; ++i) { if (p_buf[i] != '0') { break; } ++skipd; } if (skipd) { p_flags |= FMT_FLAG_ZERO; } if (skipd == ndig) { p_flags |= detail::parse_fmt_flags(p_fmt, p_flags); } } /* range/array/tuple formatting */ if ( ((p_fmt.front() == '(') || (p_fmt.front() == '<')) && (havepos || !(ndig - skipd)) ) { return read_spec_range(p_fmt.front() == '<'); } /* parse width */ p_width = 0; p_has_width = false; p_arg_width = false; if (!havepos && ndig && (ndig - skipd)) { p_width = atoi(p_buf + skipd); p_has_width = true; } else if (detail::read_digits(p_fmt, p_buf)) { p_width = atoi(p_buf); p_has_width = true; } else if (p_fmt.front() == '*') { p_arg_width = p_has_width = true; p_fmt.pop_front(); } /* parse precision */ p_precision = 0; p_has_precision = false; p_arg_precision = false; if (p_fmt.front() != '.') { goto fmtchar; } p_fmt.pop_front(); if (detail::read_digits(p_fmt, p_buf)) { p_precision = atoi(p_buf); p_has_precision = true; } else if (p_fmt.front() == '*') { p_arg_precision = p_has_precision = true; p_fmt.pop_front(); } else { return false; } fmtchar: p_spec = p_fmt.front(); p_fmt.pop_front(); return ((p_spec | 32) >= 'a') && ((p_spec | 32) <= 'z'); } template void write_spaces(R &writer, size_t n, bool left, char c = ' ') const { if (left == bool(p_flags & FMT_FLAG_DASH)) { return; } for (int w = p_width - int(n); --w >= 0; writer.put(c)); } /* string base writer */ template void write_str(R &writer, bool escape, string_range val) const { size_t n = val.size(); if (has_precision()) { n = std::min(n, size_t(precision())); } write_spaces(writer, n, true); if (escape) { writer.put('"'); for (size_t i = 0; i < n; ++i) { if (val.empty()) { break; } char c = val.front(); char const *esc = detail::escape_fmt_char(c, '"'); if (esc) { range_put_all(writer, string_range{esc}); } else { writer.put(c); } val.pop_front(); } writer.put('"'); } else { range_put_all(writer, val.slice(0, n)); } write_spaces(writer, n, false); } /* char values */ template void write_char(R &writer, bool escape, char val) const { if (escape) { char const *esc = detail::escape_fmt_char(val, '\''); if (esc) { char buf[6]; buf[0] = '\''; size_t elen = strlen(esc); memcpy(buf + 1, esc, elen); buf[elen + 1] = '\''; write_val(writer, false, ostd::string_range{ buf, buf + elen + 2 }); return; } } write_spaces(writer, 1 + escape * 2, true); if (escape) { writer.put('\''); writer.put(val); writer.put('\''); } else { writer.put(val); } write_spaces(writer, 1 + escape * 2, false); } template void write_int(R &writer, bool ptr, bool neg, T val) const { /* binary representation is the biggest, assume grouping */ char buf[sizeof(T) * CHAR_BIT * 2]; size_t n = 0; char isp = spec(); if (isp == 's') { isp = (ptr ? 'x' : 'd'); } unsigned char specn = detail::fmt_specs[isp - 65]; if (specn <= 2 || specn > 7) { throw format_error{"cannot format integers with the given spec"}; } /* 32 for lowercase variants, 0 for uppercase */ int cmask = ((isp >= 'a') << 5); int base = detail::fmt_bases[specn]; bool zval = !val; if (zval) { buf[n++] = '0'; } auto const &fac = std::use_facet>(p_loc); auto const &grp = fac.grouping(); char tsep = fac.thousands_sep(); auto grpp = reinterpret_cast(grp.data()); unsigned char grpn = *grpp; for (; val; val /= base) { if (!ptr && *grpp) { if (!grpn) { buf[n++] = tsep; if (*(grpp + 1)) { ++grpp; } grpn = *grpp; } if (grpn) { --grpn; } } T vb = val % base; buf[n++] = (vb + "70"[vb < 10]) | cmask; } size_t tn = n; if (has_precision()) { int prec = precision(); if (size_t(prec) > tn) { tn = size_t(prec); } else if (!prec && zval) { tn = 0; } } int fl = flags(); bool lsgn = fl & FMT_FLAG_PLUS; bool lsp = fl & FMT_FLAG_SPACE; bool zero = fl & FMT_FLAG_ZERO; bool sign = neg + lsgn + lsp; char pfx = '\0'; if (((fl & FMT_FLAG_HASH) || ptr) && ((specn == 3) || (specn == 6))) { pfx = ("XB"[(specn == 3)]) | cmask; } if (!zero) { write_spaces(writer, tn + (!!pfx * 2) + sign, true, ' '); } if (sign) { writer.put(neg ? '-' : *((" \0+") + lsgn * 2)); } if (pfx) { writer.put('0'); writer.put(pfx); } if (zero) { write_spaces(writer, tn + (!!pfx * 2) + sign, true, '0'); } if (tn) { for (size_t i = 0; i < (tn - n); ++i) { writer.put('0'); } for (size_t i = 0; i < n; ++i) { writer.put(buf[n - i - 1]); } } write_spaces(writer, tn + sign + (!!pfx * 2), false); } /* floating point */ template void write_float(R &writer, T val) const { char isp = spec(); unsigned char specn = detail::fmt_specs[isp - 65]; if (specn != 1 && specn != 7) { throw format_error{"cannot format floats with the given spec"}; } /* null streambuf because it's only used to read flags etc */ std::ios st{nullptr}; st.imbue(p_loc); st.width(width()); st.precision(has_precision() ? precision() : 6); typename std::ios_base::fmtflags fl = 0; if (!(isp & 32)) { fl |= std::ios_base::uppercase; } /* equivalent of printf 'g' or 'G' by default */ if ((isp | 32) == 'f') { fl |= std::ios_base::fixed; } else if ((isp | 32) == 'e') { fl |= std::ios_base::scientific; } else if ((isp | 32) == 'a') { fl |= std::ios_base::fixed | std::ios_base::scientific; } if (p_flags & FMT_FLAG_DASH) { fl |= std::ios_base::right; } if (p_flags & FMT_FLAG_PLUS) { fl |= std::ios_base::showpos; } else if ((p_flags & FMT_FLAG_SPACE) && !signbit(val)) { /* only if no sign is shown... num_put does not * support this so we have to do it on our own */ writer.put(' '); } if (p_flags & FMT_FLAG_HASH) { fl |= std::ios_base::showpoint; } st.flags(fl); fmt_num_put nump; nump.put( fmt_out{&writer}, st, (p_flags & FMT_FLAG_ZERO) ? '0' : ' ', val ); } template void write_val(R &writer, bool escape, T const &val) const { /* stuff fhat can be custom-formatted goes first */ if constexpr(detail::fmt_tofmt_test>) { format_traits::to_format(val, writer, *this); return; } /* second best, we can convert to string slice */ if constexpr(std::is_constructible_v) { if (spec() != 's') { throw format_error{"strings need the '%s' spec"}; } write_str(writer, escape, val); return; } /* tuples */ if constexpr(detail::is_tuple_like) { if (spec() != 's') { throw format_error{"ranges need the '%s' spec"}; } writer.put('<'); write_tuple_val<0, std::tuple_size::value>( writer, escape, ", ", val ); writer.put('>'); return; } /* ranges */ if constexpr(detail::iterable_test) { if (spec() != 's') { throw format_error{"tuples need the '%s' spec"}; } writer.put('{'); write_range_val(writer, [&writer, escape, this](auto const &rval) { format_spec sp{'s', p_loc, escape ? FMT_FLAG_AT : 0}; sp.write_arg(writer, 0, rval); }, ", ", val); writer.put('}'); return; } /* bools, check if printing as string, otherwise convert to int */ if constexpr(std::is_same_v) { if (spec() == 's') { write_val(writer, escape, ("false\0true") + (6 * val)); } else { write_val(writer, escape, int(val)); } return; } /* character values */ if constexpr(std::is_same_v) { if (spec() != 's' && spec() != 'c') { throw format_error{"cannot format chars with the given spec"}; } write_char(writer, escape, val); return; } /* pointers, write as pointer with %s and otherwise as unsigned... * char pointers are handled by the string case above */ if constexpr(std::is_pointer_v) { write_int(writer, (spec() == 's'), false, size_t(val)); return; } /* integers */ if constexpr(std::is_integral_v) { if constexpr(std::is_signed_v) { /* signed integers */ using UT = std::make_unsigned_t; write_int( writer, false, val < 0, (val < 0) ? static_cast(-val) : static_cast(val) ); } else { /* unsigned integers */ write_int(writer, false, false, val); } return; } /* floats */ if constexpr(std::is_floating_point_v) { write_float(writer, val); return; } /* we ran out of options, failure */ throw format_error{"the value cannot be formatted"}; } /* actual writer */ template void write_arg( R &writer, size_t idx, T const &val, A const &...args ) const { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { write_arg(writer, idx - 1, args...); } } else { write_val(writer, p_flags & FMT_FLAG_AT, val); } } template inline void write_range_item( R &writer, bool escape, bool expandval, string_range fmt, T const &item ) const { if constexpr(detail::is_tuple_like) { if (expandval) { std::apply([&writer, escape, &fmt, this]( auto const &...args ) mutable { format_spec sp{fmt, p_loc}; if (escape) { sp.p_gflags |= FMT_FLAG_AT; } sp.write_fmt(writer, args...); }, item); return; } } format_spec sp{fmt, p_loc}; if (escape) { sp.p_gflags |= FMT_FLAG_AT; } sp.write_fmt(writer, item); } template void write_range_val( R &writer, F &&func, string_range sep, T const &val ) const { if constexpr(detail::iterable_test) { auto range = ostd::iter(val); if (range.empty()) { return; } for (;;) { func(range.front()); range.pop_front(); if (range.empty()) { break; } range_put_all(writer, sep); } } else { throw format_error{"invalid value for ranged format"}; } } /* range writer */ template void write_range( R &writer, size_t idx, bool expandval, string_range sep, T const &val, A const &...args ) const { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { write_range(writer, idx - 1, expandval, sep, args...); } } else { write_range_val(writer, [ this, &writer, escape = p_gflags & FMT_FLAG_AT, expandval, fmt = rest() ](auto const &rval) { this->write_range_item( writer, escape, expandval, fmt, rval ); }, sep, val); } } template void write_tuple_val( R &writer, bool escape, string_range sep, T const &tup ) const { format_spec sp{'s', p_loc, escape ? FMT_FLAG_AT : 0}; sp.write_arg(writer, 0, std::get(tup)); if constexpr(I < (N - 1)) { range_put_all(writer, sep); write_tuple_val(writer, escape, sep, tup); } } template void write_tuple( R &writer, size_t idx, T const &val, A const &...args ) { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { write_tuple(writer, idx - 1, args...); } } else { if constexpr(detail::is_tuple_like) { std::apply([this, &writer, &val](auto const &...vals) mutable { this->write_fmt(writer, vals...); }, val); } else { throw format_error{"invalid value for tuple format"}; } } } template void write_fmt(R &writer, A const &...args) { size_t argidx = 1; while (read_until_spec(writer)) { size_t argpos = index(); if (is_nested()) { if (!argpos) { argpos = argidx++; } else if (argpos > argidx) { argidx = argpos + 1; } format_spec nspec(nested(), p_loc); nspec.p_gflags |= (p_flags & FMT_FLAG_AT); if (is_tuple()) { nspec.write_tuple(writer, argpos - 1, args...); } else { nspec.write_range( writer, argpos - 1, (flags() & FMT_FLAG_HASH), nested_sep(), args... ); } continue; } if (!argpos) { argpos = argidx++; if (arg_width()) { set_width_arg(argpos - 1, args...); argpos = argidx++; } if (arg_precision()) { set_precision_arg(argpos - 1, args...); argpos = argidx++; } } else { bool argprec = arg_precision(); if (argprec) { if (argpos <= 1) { throw format_error{"argument precision not given"}; } set_precision_arg(argpos - 2, args...); } if (arg_width()) { if (argpos <= (size_t(argprec) + 1)) { throw format_error{"argument width not given"}; } set_width_arg(argpos - 2 - argprec, args...); } if (argpos > argidx) { argidx = argpos + 1; } } write_arg(writer, argpos - 1, args...); } } template void write_fmt(R &writer) { if (read_until_spec(writer)) { throw format_error{"format spec without format arguments"}; } } template struct fmt_out { using iterator_category = std::output_iterator_tag; using value_type = char; using pointer = char *; using reference = char &; using difference_type = typename std::char_traits::off_type; fmt_out &operator=(char c) { p_out->put(c); return *this; } fmt_out &operator*() { return *this; } fmt_out &operator++() { return *this; } fmt_out &operator++(int) { return *this; } R *p_out; }; template struct fmt_num_put final: std::num_put> { fmt_num_put(size_t refs = 0): std::num_put>(refs) {} ~fmt_num_put() {} }; string_range p_fmt; std::locale p_loc; char p_buf[32]; }; /** @brief Formats into an output range using a format string and arguments. * * Uses the default constructed std::locale (the current global locale) * for locale specific formatting. There is also a version that takes an * explicit locale. * * This is just a simple wrapper, equivalent to: * * ~~~{.cc} * return ostd::format_spec{fmt}.format(std::forward(writer), args...); * ~~~ */ template inline R &&format(R &&writer, string_range fmt, A const &...args) { return format_spec{fmt}.format(std::forward(writer), args...); } /** @brief Formats into an output range using a format string and arguments. * * This version uses `loc` as a locale. There is also a version that uses * the global locale by default. * * This is just a simple wrapper, equivalent to: * * ~~~{.cc} * return ostd::format_spec{fmt, loc}.format(std::forward(writer), args...); * ~~~ */ template inline R &&format( R &&writer, std::locale const &loc, string_range fmt, A const &...args ) { return format_spec{fmt, loc}.format(std::forward(writer), args...); } /** @} */ } /* namespace ostd */ #endif /** @} */