libostd/ostd/format.hh

1494 lines
50 KiB
C++

/** @addtogroup Strings
* @{
*/
/** @file format.hh
*
* @brief APIs for type safe formatting using C-style format strings.
*
* libostd provides a powerful formatting system that lets you format into
* arbitrary output ranges using C-style format strings. It's type safe
* and supports custom object formatting without heap allocations as well
* as formatting of ranges, tuples and more.
*
* @copyright See COPYING.md in the project tree for further information.
*/
#ifndef OSTD_FORMAT_HH
#define OSTD_FORMAT_HH
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <cmath>
#include <utility>
#include <stdexcept>
#include <locale>
#include <ios>
#include "ostd/algorithm.hh"
#include "ostd/string.hh"
namespace ostd {
/** @addtogroup Strings
* @{
*/
/** @brief An enumeration defining flags for C-style formatting marks.
*
* Used inside ostd::format_spec. The C-style formatting mark has a flags
* section and each of these enum items represents one. They can be combined
* using the standard bitwise operators.
*/
enum format_flags {
FMT_FLAG_DASH = 1 << 0, ///< The dash (`-`) flag.
FMT_FLAG_ZERO = 1 << 1, ///< The zero (`0`) flag.
FMT_FLAG_SPACE = 1 << 2, ///< The space (` `) flag.
FMT_FLAG_PLUS = 1 << 3, ///< The plus (`+`) flag.
FMT_FLAG_HASH = 1 << 4, ///< The hash (`#`) flag.
FMT_FLAG_AT = 1 << 5 ///< The at (`@`) flag.
};
/** @brief Thrown when format string does not properly match the arguments. */
struct format_error: std::runtime_error {
using std::runtime_error::runtime_error;
};
struct format_spec;
/** @brief Specialize this to format custom objects.
*
* The formatting system provides a way to format arbitrary objects. By default
* it's empty as all the formatting logic is builtin. To specialize for your
* own object, you simply do this:
*
* ~~~{.cc}
* template<>
* struct format_traits<foo> {
* template<typename R>
* static void to_format(foo const &v, R &writer, ostd::format_spec const &fs) {
* // custom formatting here
* // writer is just an output range (see ostd::output_range)
* }
* };
* ~~~
*
* Obviously, you can passthrough the formatting, for example when your type
* contains a member and you want to format your type exactly as if it was
* the member, you just put this in your `to_format`:
*
* ~~~{.cc}
* fs.format_value(writer, v.my_member);
* ~~~
*
* Anything that writes into the output range will do. The output range is
* exactly the same output range the outer format call is formatting into,
* so for example when someone is formatting into an ostd::appender_range,
* it will be just that.
*
* This may be specialized in other libostd modules as well.
*/
template<typename>
struct format_traits {};
/* implementation helpers */
namespace detail {
inline int parse_fmt_flags(string_range &fmt, int ret) {
while (!fmt.empty()) {
switch (fmt.front()) {
case '-': ret |= FMT_FLAG_DASH; fmt.pop_front(); break;
case '+': ret |= FMT_FLAG_PLUS; fmt.pop_front(); break;
case '#': ret |= FMT_FLAG_HASH; fmt.pop_front(); break;
case '@': ret |= FMT_FLAG_AT; fmt.pop_front(); break;
case '0': ret |= FMT_FLAG_ZERO; fmt.pop_front(); break;
case ' ': ret |= FMT_FLAG_SPACE; fmt.pop_front(); break;
default: goto retflags;
}
}
retflags:
return ret;
}
inline size_t read_digits(string_range &fmt, char *buf) {
size_t ret = 0;
for (; !fmt.empty() && isdigit(fmt.front()); ++ret) {
*buf++ = fmt.front();
fmt.pop_front();
}
*buf = '\0';
return ret;
}
/* 0 .. not allowed
* 1 .. floating point
* 2 .. character
* 3 .. binary
* 4 .. octal
* 5 .. decimal
* 6 .. hexadecimal
* 7 .. string
* 8 .. custom object
*/
static constexpr unsigned char const fmt_specs[] = {
/* uppercase spec set */
1, 3, 8, 8, /* A B C D */
1, 1, 1, 8, /* E F G H */
8, 8, 8, 8, /* I J K L */
8, 8, 8, 8, /* M N O P */
8, 8, 8, 8, /* Q R S T */
8, 8, 8, 6, /* U V W X */
8, 8, /* Y Z */
/* ascii filler */
0, 0, 0, 0, 0, 0,
/* lowercase spec set */
1, 3, 2, 5, /* a b c d */
1, 1, 1, 8, /* e f g h */
8, 8, 8, 8, /* i j k l */
8, 8, 4, 8, /* m n o p */
8, 8, 7, 8, /* q r s t */
8, 8, 8, 6, /* u v w x */
8, 8, /* y z */
/* ascii filler */
0, 0, 0, 0, 0
};
static constexpr int const fmt_bases[] = {
0, 0, 0, 2, 8, 10, 16, 0
};
/* non-printable escapes up to 0x20 (space) */
static constexpr char const *fmt_escapes[] = {
"\\0" , "\\x01", "\\x02", "\\x03", "\\x04", "\\x05",
"\\x06", "\\a" , "\\b" , "\\t" , "\\n" , "\\v" ,
"\\f" , "\\r" , "\\x0E", "\\x0F", "\\x10", "\\x11",
"\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17",
"\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D",
"\\x1E", "\\x1F",
/* we want to escape double quotes... */
nullptr, nullptr, "\\\"", nullptr, nullptr, nullptr,
nullptr, "\\\'"
};
inline char const *escape_fmt_char(char v, char quote) {
if ((v >= 0 && v < 0x20) || (v == quote)) {
return fmt_escapes[size_t(v)];
} else if (v == 0x7F) {
return "\\x7F";
}
return nullptr;
}
/* retrieve width/precision */
template<typename T, typename ...A>
inline int get_arg_param(size_t idx, T const &val, A const &...args) {
if (idx) {
if constexpr(!sizeof...(A)) {
throw format_error{"not enough format args"};
} else {
return get_arg_param(idx - 1, args...);
}
} else {
if constexpr(!std::is_integral_v<T>) {
throw format_error{"invalid argument for width/precision"};
} else {
if constexpr(std::is_signed_v<T>) {
if (val < 0) {
throw format_error{
"width/precision cannot be negative"
};
}
}
return int(val);
}
}
}
/* ugly ass check for whether a type is tuple-like, like tuple itself,
* pair, array, possibly other types added later or overridden...
*/
template<typename T>
std::true_type tuple_like_test(typename std::tuple_size<T>::type *);
template<typename>
std::false_type tuple_like_test(...);
template<typename T>
constexpr bool is_tuple_like = decltype(tuple_like_test<T>(0))::value;
/* test if format traits are available for the type */
template<typename T, typename R>
static std::true_type test_tofmt(decltype(format_traits<T>::to_format(
std::declval<T const &>(), std::declval<R &>(),
std::declval<format_spec const &>()
)) *);
template<typename, typename>
static std::false_type test_tofmt(...);
template<typename T, typename R>
constexpr bool fmt_tofmt_test = decltype(test_tofmt<T, R>(0))::value;
}
/** @brief A structure implementing type safe C-style formatting.
*
* It can be constructed either to represent a specific format specifier or
* with a format string to format an entire string (in which case it will
* parse the string and format it with the individual intermediate markings).
*
* It stores information about the current format specifier (when constructed
* as one or when parsing the format string) as well as the rest of the current
* format string. See read_until_spec() and rest() for more information.
*
* # Regular format specifiers
*
* The formatter is considerably more elaborate than C-style printf. Its
* basic format specifiers superficially look the same:
*
* ~~~
* %[position$][flags][width][.precision]specifier
* ~~~
*
* Position is the optional position of the argument in the pack starting
* with 1. It can be mixed with format specifiers without explicit position,
* unlike what POSIX says; the next specifier without explicit position
* will use the position after the largest explicit position used so far.
* For example, `%3$s %1$s %s` will use position 4 for the last specifier.
*
* ## Flags
*
* * The `-` flag will left-justify within the given width (right by default).
* * The `+` flag applies to numbers and will force sign to always show, even
* for positive numbers (by default, only negative ones get a sign).
* * The ` ` (space) flag applies to numbers and will force a space to be
* written in place of sign for positive numbers (no effect with `+`).
* * The `#` flag applies to integers, floats and ranges. For integers, it
* will add the prefixes `0x`, `0X`, `0b` or `0B` when formatted as hex
* or binary, lowercase or uppercase. For floats, it will force the output
* to always contain a decimal point/comma. For ranges, it will cause
* automatic expansion of values into items if the values are tuples.
* * The `@` flag will escape the value according to the rules.
* * The '0' flag will left-pad numbers with zeroes instead of spaces when
* needed (according to width).
*
* ## Width
*
* Width can be specified either as a number in the format string or as `*`
* in which case it will be an integer argument (any integral type, must be
* equal or larger than zero, otherwise ostd::format_error is thrown). When
* an argument, the position of the argument is right before the actual
* value to format, unless precision is also an argument, in which case it's
* right before the precision argument. When the position of the value to
* format is explicit in the format string, the position refers to the value
* to format and width/precision are before that.
*
* Width defines the minimum number of characters to be printed. If the value
* ends up being shorter, it's padded with spaces (or zeroes when formatting
* a number and the zero flag is used). The value is not truncated if it's
* actually longer than the width.
*
* ## Precision
*
* Precision can also be specified as a number or as an argument. When both
* width and precision are an argument, width is first. For integers, it
* specifies the default number of digits to be written. If the value is
* shorter than the precision, the result is padded with leading zeroes.
* If it's longer, no truncation happens. A precision of 0 means that no
* character is written for the value 0. For floats, it's the number of
* digits to be written after decimal point or comma. When not specified,
* it's 6. For strings, it's the maximum number of characters to be printed.
* By default all characters are printed. When escaping strings, the quotes
* are not counted into the precision and escape sequences count as a single
* character.
*
* # Range formatting
*
* The system also allows advanced formatting for ranges. The specifier
* then looks different:
*
* ~~~
* %[position$][flags](contents%)
* ~~~
*
* The `contents` is a format specifier for each item of the range followed
* by a separator. For example:
*
* ~~~
* %(%s, %)
* ~~~
*
* In this case, `%s` is the specifier and `, ` is the separator. You can
* also explicitly delimit the separator:
*
* ~~~
* %(%s%|, %)
* ~~~
*
* The first part is used to format items and the separator is put between
* each two items.
*
* Two flags are used by this format. Normally, each item of the range is
* formatted as is, using a single specifier, even if the item is a tuple-like
* value. Using the `#` flag you can expand tuple-like items into multiple
* values. So when formatting a range over an associative map, you can do this:
*
* ~~~
* %#(%s: %s%|, %)
* ~~~
*
* to format key and value separately.
*
* You can also use the `@` flag. It will cause the `@` flag to be applied to
* every item of the range, therefore escaping each one. Nested range formats
* are also affected by this. There is no way to unapply the flag once you
* set it.
*
* # Tuple formatting
*
* Additionally, the system also supports advanced formatting for tuples.
* The syntax is similar:
*
* ~~~
* %[position$][flags]<contents%>
* ~~~
*
* There are no delimiters here. The `contents` is simply a regular format
* string, with a format specifier for each tuple item.
*
* You can use the `@` flag just like you can use it with ranges. No other
* flag can be used when formatting tuples.
*
* # Specifiers
*
* Now for the basic specifiers themselves:
*
* * `a`, `A` - hexadecimal float like C printf (lowercase, uppercase).
* * `b`, `B` - binary integers (lowercase, uppercase).
* * `c` - character values.
* * `d` - decimal integers.
* * `e`, `E` - floats in scientific notation (lowercase, uppercase).
* * `f`, `F` - decimal floating point (lowercase, uppercase).
* * `g`, `G` - shortest representation (`e`/`E` or `f`/`F`).
* * `o` - octal integers.
* * `s` - any value with its default format.
* * `x`, `X` - hexadecimal integers (lowercase, uppercase).
*
* You can use the `s` specifier to format any value that can be formatted
* at no extra cost. Because the system is type safe, how a value is meant
* to be formatted is decided from the type that is passed in, not the format
* specifier itself.
*
* Signedness and size of integer types is determined from the value itself,
* which applies universally to all integer formatting with all bases. Also,
* the lowercase/uppercase distinction for binary only applies when a prefix
* is added (using the `#` flag).
*
* All letters (uppercase and lowercase) are available for custom formatting.
*
* # Format order and rules
*
* The rules for formatting values go as follows:
*
* * First it's checked whether the value can be custom formatted using a
* specialization of ostd::format_traits. If it can, it's formatted using
* that, the current `format_spec` is passed in as it is and no extra
* checks are made. Any letter can be used to format custom objects.
* * Then it's checked if the value is convertible to ostd::string_range.
* If it is, it's formatted as a string. Only the `s` specifier is allowed.
* * Then it's checked if the value is a tuple-like object. The value is one
* if `std::tuple_size<T>::value` is valid. If it is, the tuple-like object
* is formatted as `<ITEM, ITEM, ITEM, ...>` by default. You need to use
* the `s` specifier only to format tuples like this. The items are all
* formatted using the `s` specifier.
* * Then ranges are tested in a similar way. The default format for ranges
* is `{ITEM, ITEM, ITEM, ...}`. The `s` specifier must be used. The items
* are all formatted with `s` too.
* * Then bools are formatted. If the `s` specifier is used, the bool is
* formatted as `true` or `false`. Otherwise it's converted to `int` and
* formatted using the specifier (might error depending on the specifier).
* * Then character values are formatted. The `c` and `s` specifiers are
* allowed.
* * Pointers are formatted then. If the `s` specifier is used, the pointer
* will be formatted as hex with the `0x` prefix. Otherwise it's converted
* to `size_t` and formatted with the specifier (might error depending on
* the specifier).
* * Then integers are formatted. Using the `s` specifier is like using the
* `d` specifier.
* * Floats follow. Using `s` is like using `g`.
* * When everything is exhausted, ostd::format_error is thrown.
*
* # Escaping
*
* String and character values are subject to escaping if the `@` flag is
* used. Strings are put into double quotes and any unprintable values in
* them are converted into escape sequences. Quotes (single and double)
* are also escaped. Character values are put into single quotes and
* unprintable characters are converted into escape sequences as well.
* For known escape sequences, simple readable versions are used, particularly
* `a`, `b`, `t`, `n`, `v`, `f`, `r`. For other unprintables, the hexadecimal
* escape format is used.
*
* When printing tuples and ranges with the `s` specifier and the `@` flag
* is used, all of their items are escaped. If the items are tuples or ranges,
* their own items are also escaped. The `@` flag doesn't escape anything else,
* unless you implement support for escaping in your own custom objects.
*
* # Locale awareness
*
* The system also makes use of locales. When formatting integers, thousands
* grouping rules from the locale apply (no matter the base). When formatting
* floats, a locale specific decimal separator is used and thousands grouping
* also applies.
*
* # Errors
*
* If a specifier is not allowed for a value, ostd::format_error is thrown.
*/
struct format_spec {
/** @brief Constructs with a format string and the default locale.
*
* If you use this constructor, there won't be a specific formatting
* specifier set in here so you won't be able to get its properties,
* but you will be able to format into a range with some arguments.
* You can also manually parse the format string, see read_until_spec().
*
* The locale used here is the default (global) locale.
*/
format_spec(string_range fmt):
p_fmt(fmt), p_loc()
{}
/** @brief Constructs with a format string and a locale.
*
* Like format_spec(string_range), but with an explicit locale.
*/
format_spec(string_range fmt, std::locale const &loc):
p_fmt(fmt), p_loc(loc)
{}
/** @brief Constructs a specific format specifier.
*
* See ostd::format_flags for flags. The `spec` argument is the format
* specifier (for example `s`). It doesn't support tuple/range formatting
* nor positional arguments.
*
* Uses the default (global) locale. The locale is then potentially used
* for formatting values.
*/
format_spec(char spec, int flags = 0):
p_flags(flags), p_spec(spec), p_loc()
{}
/** @brief Constructs a specific format specifier with a locale.
*
* Like format_spec(char, int) but uses an explicit locale.
*/
format_spec(char spec, std::locale const &loc, int flags = 0):
p_flags(flags), p_spec(spec), p_loc(loc)
{}
/** @brief Parses the format string if constructed with one.
*
* This reads the format string, writing each character of it into
* `writer`, until it encounters a valid format specifier. It then
* stops there and returns `true`. If no format specifier was read,
* it returns `false`. When a format specifier is read, this structure
* then represents it.
*
* It's used by format() to parse the string.
*/
template<typename R>
bool read_until_spec(R &writer) {
if (p_fmt.empty()) {
return false;
}
while (!p_fmt.empty()) {
if (p_fmt.front() == '%') {
p_fmt.pop_front();
if (p_fmt.front() == '%') {
goto plain;
}
return read_spec();
}
plain:
writer.put(p_fmt.front());
p_fmt.pop_front();
}
return false;
}
/** @brief Gets the yet not parsed portion of the format string.
*
* If no read_until_spec() was called, this returns the entire format
* string. Otherwise, it returns the format string from the point
* after the format specifier this structure currently represents.
*/
string_range rest() const {
return p_fmt;
}
/** @brief Overrides the currently set locale.
*
* @returns The old locale.
*/
std::locale imbue(std::locale const &loc) {
std::locale ret{p_loc};
p_loc = loc;
return ret;
}
/** @brief Retrieves the currently used locale for the format state. */
std::locale getloc() const {
return p_loc;
}
/** @brief Gets the width of the format specifier.
*
* If explicitly specified (say `%5s`) it will return the number that
* was in the format specifier. If explicitly set with set_width(),
* it will return that. If not set at all, it will return 0.
*
* @see has_width(), precision()
*/
int width() const { return p_width; }
/** @brief Gets the precision of the format specifier.
*
* If explicitly specified (say `%.5f`) it will return the number that
* was in the format specifier. If explicitly set with set_precision(),
* it will return that. If not set at all, it will return 0.
*
* @see has_precision(), width()
*/
int precision() const { return p_precision; }
/** @brief Gets whether a width was specified somehow.
*
* If the width was provided direclty as part of the format specifier
* or with an explicit argument (see set_width()), this will return
* `true`. Otherwise, it will return `false`.
*
* You can get the actual width using width().
*
* @see has_precision(), arg_width()
*/
bool has_width() const { return p_has_width; }
/** @brief Gets whether a precision was specified somehow.
*
* If the precision was provided direclty as part of the format specifier
* or with an explicit argument (see set_precision()), this will return
* `true`. Otherwise, it will return `false`.
*
* You can get the actual width using precision().
*
* @see has_width(), arg_precision()
*/
bool has_precision() const { return p_has_precision; }
/** @brief Gets whether a width was specified as an explicit argument.
*
* This is true if the width was specified using `*` in the format
* specifier. Also set by set_width_arg().
*
* @see has_width()
*/
bool arg_width() const { return p_arg_width; }
/** @brief Gets whether a precision was specified as an explicit argument.
*
* This is true if the precision was specified using `*` in the format
* specifier. Also set by set_precision_arg().
*
* @see has_width()
*/
bool arg_precision() const { return p_arg_precision; }
/** @brief Sets the width from an argument pack.
*
* The `idx` parameter specifies the index (starting with 0) of the
* width argument in the followup pack.
*
* The return value of width() will then be the argument's value.
* It will also make has_width() and arg_width() return true (if
* they previously didn't).
*
* @throws ostd::format_error when `idx` is out of bounds or the argument
* has an invalid type.
*
* @see set_width(), set_precision_arg();
*/
template<typename ...A>
void set_width_arg(size_t idx, A const &...args) {
p_width = detail::get_arg_param(idx, args...);
p_has_width = p_arg_width = true;
}
/** @brief Sets the width to an explicit number.
*
* The return value of width() will then be the given value. It will
* also make has_width() return true and arg_width() return false.
*
* @see set_width_arg(), set_precision()
*/
void set_width(int v) {
p_width = v;
p_has_width = true;
p_arg_width = false;
}
/** @brief Sets the precision from an argument pack.
*
* The `idx` parameter specifies the index (starting with 0) of the
* precision argument in the followup pack.
*
* The return value of precision() will then be the argument's value.
* It will also make has_precision() and arg_precision() return true (if
* they previously didn't).
*
* @throws ostd::format_error when `idx` is out of bounds or the argument
* has an invalid type.
*
* @see set_precision(), set_width_arg();
*/
template<typename ...A>
void set_precision_arg(size_t idx, A const &...args) {
p_precision = detail::get_arg_param(idx, args...);
p_has_precision = p_arg_precision = true;
}
/** @brief Sets the precision to an explicit number.
*
* The return value of precision() will then be the given value. It will
* also make has_precision() return true and arg_precision() return false.
*
* @see set_precision_arg(), set_width()
*/
void set_precision(int v) {
p_precision = v;
p_has_precision = true;
p_arg_precision = false;
}
/** @brief Gets the combination of flags for the current specifier. */
int flags() const { return p_flags; }
/** @brief Gets the base char for the specifier. */
char spec() const { return p_spec; }
/** @brief Gets the position of the matching argument in the pack.
*
* This applies for when the position in the format specifier was
* explicitly set (for example `%5$s` will have index 5) to refer
* to a specific argument in the pack. Keep in mind that these
* start with 1 (1st argument, 5th argument etc) to match the POSIX
* conventions on this. If the position was not specified, this just
* returns 0.
*/
unsigned char index() const { return p_index; }
/** @brief Gets the inner part of a range or tuple format specifier.
*
* For ranges, this does not include the separator, you need to use
* nested_sep() to get the separator. For example, given the
* `%(%s, %)` specifier, this returns `%s` and for `%(%s%|, %)`
* it returns the same. When formatting tuples, this behaves identically,
* for example for `%<%s, %f%>` this returns `%s, %f`. For simple
* specifiers this returns an empty slice.
*/
string_range nested() const { return p_nested; }
/** @brief Gets the separator of a complex range format specifier.
*
* For example for `%(%s, %)` this returns `, `. With an explicit
* delimiter, for example for `%(%s%|, %)`, this returns the same
* thing as well. For simple specifiers and tuple specifiers this
* returns an empty slice.
*/
string_range nested_sep() const { return p_nested_sep; }
/** @brief Returns true if this specifier is for a tuple. */
bool is_tuple() const { return p_is_tuple; }
/** @brief Returns true if this specifier is for a tuple or a range. */
bool is_nested() const { return p_is_nested; }
/** @brief Formats into a range with the given arguments.
*
* When a valid format string is currently present, this formats
* into the given range using that format string and the provided
* arguments.
*
* @throws ostd::format_error when the format string and args don't match.
*
* @see format_value()
*/
template<typename R, typename ...A>
R &&format(R &&writer, A const &...args) {
write_fmt(writer, args...);
return std::forward<R>(writer);
}
/** @brief Formats a single value into a range.
*
* When this currently represents a valid format specifier, you can
* use this to format a single value with that specifier. This is very
* useful for example when formatting custom objects, see the example
* in ostd::format_traits.
*
* @throws ostd::format_error when the specifier and the value don't match.
*
* @see format()
*/
template<typename R, typename T>
R &&format_value(R &&writer, T const &val) const {
write_arg(writer, 0, val);
return std::forward<R>(writer);
}
private:
string_range p_nested;
string_range p_nested_sep;
int p_flags = 0;
/* internal, for initial set of flags */
int p_gflags = 0;
int p_width = 0;
int p_precision = 0;
bool p_has_width = false;
bool p_has_precision = false;
bool p_arg_width = false;
bool p_arg_precision = false;
char p_spec = '\0';
unsigned char p_index = 0;
bool p_is_tuple = false;
bool p_is_nested = false;
bool read_until_dummy() {
while (!p_fmt.empty()) {
if (p_fmt.front() == '%') {
p_fmt.pop_front();
if (p_fmt.front() == '%') {
goto plain;
}
return read_spec();
}
plain:
p_fmt.pop_front();
}
return false;
}
bool read_spec_range(bool tuple = false) {
int sflags = p_flags;
p_fmt.pop_front();
string_range begin_inner(p_fmt);
if (!read_until_dummy()) {
p_is_nested = false;
return false;
}
/* skip to the last spec in case multiple specs are present */
string_range curfmt(p_fmt);
while (read_until_dummy()) {
curfmt = p_fmt;
}
p_fmt = curfmt;
/* restore in case the inner spec read changed them */
p_flags = sflags;
/* find delimiter or ending */
string_range begin_delim(p_fmt);
string_range p = find(begin_delim, '%');
char need = tuple ? '>' : ')';
for (; !p.empty(); p = find(p, '%')) {
p.pop_front();
/* escape, skip */
if (p.front() == '%') {
p.pop_front();
continue;
}
/* found end, in that case delimiter is after spec */
if (p.front() == need) {
p_is_tuple = tuple;
if (tuple) {
p_nested = begin_inner.slice(0, &p[0] - &begin_inner[0] - 1);
p_nested_sep = nullptr;
} else {
p_nested = begin_inner.slice(
0, &begin_delim[0] - &begin_inner[0]
);
p_nested_sep = begin_delim.slice(
0, &p[0] - &begin_delim[0] - 1
);
}
p.pop_front();
p_fmt = p;
p_is_nested = true;
return true;
}
/* found actual delimiter start... */
if ((p.front() == '|') && !tuple) {
p_nested = begin_inner.slice(0, &p[0] - &begin_inner[0] - 1);
p.pop_front();
p_nested_sep = p;
for (p = find(p, '%'); !p.empty(); p = find(p, '%')) {
p.pop_front();
if (p.front() == ')') {
p_nested_sep = p_nested_sep.slice(
0, &p[0] - &p_nested_sep[0] - 1
);
p.pop_front();
p_fmt = p;
p_is_nested = true;
return true;
}
}
p_is_nested = false;
return false;
}
}
p_is_nested = false;
return false;
}
bool read_spec() {
size_t ndig = detail::read_digits(p_fmt, p_buf);
bool havepos = false;
p_index = 0;
/* parse index */
if (p_fmt.front() == '$') {
if (ndig <= 0) return false; /* no pos given */
int idx = atoi(p_buf);
if (idx <= 0 || idx > 255) return false; /* bad index */
p_index = static_cast<unsigned char>(idx);
p_fmt.pop_front();
havepos = true;
}
/* parse flags */
p_flags = p_gflags;
size_t skipd = 0;
if (havepos || !ndig) {
p_flags |= detail::parse_fmt_flags(p_fmt, 0);
} else {
for (size_t i = 0; i < ndig; ++i) {
if (p_buf[i] != '0') {
break;
}
++skipd;
}
if (skipd) {
p_flags |= FMT_FLAG_ZERO;
}
if (skipd == ndig) {
p_flags |= detail::parse_fmt_flags(p_fmt, p_flags);
}
}
/* range/array/tuple formatting */
if (
((p_fmt.front() == '(') || (p_fmt.front() == '<')) &&
(havepos || !(ndig - skipd))
) {
return read_spec_range(p_fmt.front() == '<');
}
/* parse width */
p_width = 0;
p_has_width = false;
p_arg_width = false;
if (!havepos && ndig && (ndig - skipd)) {
p_width = atoi(p_buf + skipd);
p_has_width = true;
} else if (detail::read_digits(p_fmt, p_buf)) {
p_width = atoi(p_buf);
p_has_width = true;
} else if (p_fmt.front() == '*') {
p_arg_width = p_has_width = true;
p_fmt.pop_front();
}
/* parse precision */
p_precision = 0;
p_has_precision = false;
p_arg_precision = false;
if (p_fmt.front() != '.') {
goto fmtchar;
}
p_fmt.pop_front();
if (detail::read_digits(p_fmt, p_buf)) {
p_precision = atoi(p_buf);
p_has_precision = true;
} else if (p_fmt.front() == '*') {
p_arg_precision = p_has_precision = true;
p_fmt.pop_front();
} else {
return false;
}
fmtchar:
p_spec = p_fmt.front();
p_fmt.pop_front();
return ((p_spec | 32) >= 'a') && ((p_spec | 32) <= 'z');
}
template<typename R>
void write_spaces(R &writer, size_t n, bool left, char c = ' ') const {
if (left == bool(p_flags & FMT_FLAG_DASH)) {
return;
}
for (int w = p_width - int(n); --w >= 0; writer.put(c));
}
/* string base writer */
template<typename R>
void write_str(R &writer, bool escape, string_range val) const {
size_t n = val.size();
if (has_precision()) {
n = std::min(n, size_t(precision()));
}
write_spaces(writer, n, true);
if (escape) {
writer.put('"');
for (size_t i = 0; i < n; ++i) {
if (val.empty()) {
break;
}
char c = val.front();
char const *esc = detail::escape_fmt_char(c, '"');
if (esc) {
range_put_all(writer, string_range{esc});
} else {
writer.put(c);
}
val.pop_front();
}
writer.put('"');
} else {
range_put_all(writer, val.slice(0, n));
}
write_spaces(writer, n, false);
}
/* char values */
template<typename R>
void write_char(R &writer, bool escape, char val) const {
if (escape) {
char const *esc = detail::escape_fmt_char(val, '\'');
if (esc) {
char buf[6];
buf[0] = '\'';
size_t elen = strlen(esc);
memcpy(buf + 1, esc, elen);
buf[elen + 1] = '\'';
write_val(writer, false, ostd::string_range{
buf, buf + elen + 2
});
return;
}
}
write_spaces(writer, 1 + escape * 2, true);
if (escape) {
writer.put('\'');
writer.put(val);
writer.put('\'');
} else {
writer.put(val);
}
write_spaces(writer, 1 + escape * 2, false);
}
template<typename R, typename T>
void write_int(R &writer, bool ptr, bool neg, T val) const {
/* binary representation is the biggest, assume grouping */
char buf[sizeof(T) * CHAR_BIT * 2];
size_t n = 0;
char isp = spec();
if (isp == 's') {
isp = (ptr ? 'x' : 'd');
}
unsigned char specn = detail::fmt_specs[isp - 65];
if (specn <= 2 || specn > 7) {
throw format_error{"cannot format integers with the given spec"};
}
/* 32 for lowercase variants, 0 for uppercase */
int cmask = ((isp >= 'a') << 5);
int base = detail::fmt_bases[specn];
bool zval = !val;
if (zval) {
buf[n++] = '0';
}
auto const &fac = std::use_facet<std::numpunct<char>>(p_loc);
auto const &grp = fac.grouping();
char tsep = fac.thousands_sep();
auto grpp = reinterpret_cast<unsigned char const *>(grp.data());
unsigned char grpn = *grpp;
for (; val; val /= base) {
if (!ptr && *grpp) {
if (!grpn) {
buf[n++] = tsep;
if (*(grpp + 1)) {
++grpp;
}
grpn = *grpp;
}
if (grpn) {
--grpn;
}
}
T vb = val % base;
buf[n++] = (vb + "70"[vb < 10]) | cmask;
}
size_t tn = n;
if (has_precision()) {
int prec = precision();
if (size_t(prec) > tn) {
tn = size_t(prec);
} else if (!prec && zval) {
tn = 0;
}
}
int fl = flags();
bool lsgn = fl & FMT_FLAG_PLUS;
bool lsp = fl & FMT_FLAG_SPACE;
bool zero = fl & FMT_FLAG_ZERO;
bool sign = neg + lsgn + lsp;
char pfx = '\0';
if (((fl & FMT_FLAG_HASH) || ptr) && ((specn == 3) || (specn == 6))) {
pfx = ("XB"[(specn == 3)]) | cmask;
}
if (!zero) {
write_spaces(writer, tn + (!!pfx * 2) + sign, true, ' ');
}
if (sign) {
writer.put(neg ? '-' : *((" \0+") + lsgn * 2));
}
if (pfx) {
writer.put('0');
writer.put(pfx);
}
if (zero) {
write_spaces(writer, tn + (!!pfx * 2) + sign, true, '0');
}
if (tn) {
for (size_t i = 0; i < (tn - n); ++i) {
writer.put('0');
}
for (size_t i = 0; i < n; ++i) {
writer.put(buf[n - i - 1]);
}
}
write_spaces(writer, tn + sign + (!!pfx * 2), false);
}
/* floating point */
template<typename R, typename T>
void write_float(R &writer, T val) const {
char isp = spec();
unsigned char specn = detail::fmt_specs[isp - 65];
if (specn != 1 && specn != 7) {
throw format_error{"cannot format floats with the given spec"};
}
/* null streambuf because it's only used to read flags etc */
std::ios st{nullptr};
st.imbue(p_loc);
st.width(width());
st.precision(has_precision() ? precision() : 6);
typename std::ios_base::fmtflags fl = 0;
if (!(isp & 32)) {
fl |= std::ios_base::uppercase;
}
/* equivalent of printf 'g' or 'G' by default */
if ((isp | 32) == 'f') {
fl |= std::ios_base::fixed;
} else if ((isp | 32) == 'e') {
fl |= std::ios_base::scientific;
} else if ((isp | 32) == 'a') {
fl |= std::ios_base::fixed | std::ios_base::scientific;
}
if (p_flags & FMT_FLAG_DASH) {
fl |= std::ios_base::right;
}
if (p_flags & FMT_FLAG_PLUS) {
fl |= std::ios_base::showpos;
} else if ((p_flags & FMT_FLAG_SPACE) && !signbit(val)) {
/* only if no sign is shown... num_put does not
* support this so we have to do it on our own
*/
writer.put(' ');
}
if (p_flags & FMT_FLAG_HASH) {
fl |= std::ios_base::showpoint;
}
st.flags(fl);
fmt_num_put<R> nump;
nump.put(
fmt_out<R>{&writer}, st, (p_flags & FMT_FLAG_ZERO) ? '0' : ' ', val
);
}
template<typename R, typename T>
void write_val(R &writer, bool escape, T const &val) const {
/* stuff fhat can be custom-formatted goes first */
if constexpr(detail::fmt_tofmt_test<T, noop_output_range<char>>) {
format_traits<T>::to_format(val, writer, *this);
return;
}
/* second best, we can convert to string slice */
if constexpr(std::is_constructible_v<string_range, T const &>) {
if (spec() != 's') {
throw format_error{"strings need the '%s' spec"};
}
write_str(writer, escape, val);
return;
}
/* tuples */
if constexpr(detail::is_tuple_like<T>) {
if (spec() != 's') {
throw format_error{"ranges need the '%s' spec"};
}
writer.put('<');
write_tuple_val<0, std::tuple_size<T>::value>(
writer, escape, ", ", val
);
writer.put('>');
return;
}
/* ranges */
if constexpr(detail::iterable_test<T>) {
if (spec() != 's') {
throw format_error{"tuples need the '%s' spec"};
}
writer.put('{');
write_range_val(writer, [&writer, escape, this](auto const &rval) {
format_spec sp{'s', p_loc, escape ? FMT_FLAG_AT : 0};
sp.write_arg(writer, 0, rval);
}, ", ", val);
writer.put('}');
return;
}
/* bools, check if printing as string, otherwise convert to int */
if constexpr(std::is_same_v<T, bool>) {
if (spec() == 's') {
write_val(writer, escape, ("false\0true") + (6 * val));
} else {
write_val(writer, escape, int(val));
}
return;
}
/* character values */
if constexpr(std::is_same_v<T, char>) {
if (spec() != 's' && spec() != 'c') {
throw format_error{"cannot format chars with the given spec"};
}
write_char(writer, escape, val);
return;
}
/* pointers, write as pointer with %s and otherwise as unsigned...
* char pointers are handled by the string case above
*/
if constexpr(std::is_pointer_v<T>) {
write_int(writer, (spec() == 's'), false, size_t(val));
return;
}
/* integers */
if constexpr(std::is_integral_v<T>) {
if constexpr(std::is_signed_v<T>) {
/* signed integers */
using UT = std::make_unsigned_t<T>;
write_int(
writer, false, val < 0,
(val < 0) ? static_cast<UT>(-val) : static_cast<UT>(val)
);
} else {
/* unsigned integers */
write_int(writer, false, false, val);
}
return;
}
/* floats */
if constexpr(std::is_floating_point_v<T>) {
write_float(writer, val);
return;
}
/* we ran out of options, failure */
throw format_error{"the value cannot be formatted"};
}
/* actual writer */
template<typename R, typename T, typename ...A>
void write_arg(
R &writer, size_t idx, T const &val, A const &...args
) const {
if (idx) {
if constexpr(!sizeof...(A)) {
throw format_error{"not enough format arguments"};
} else {
write_arg(writer, idx - 1, args...);
}
} else {
write_val(writer, p_flags & FMT_FLAG_AT, val);
}
}
template<typename R, typename T>
inline void write_range_item(
R &writer, bool escape, bool expandval, string_range fmt, T const &item
) const {
if constexpr(detail::is_tuple_like<T>) {
if (expandval) {
std::apply([&writer, escape, &fmt, this](
auto const &...args
) mutable {
format_spec sp{fmt, p_loc};
if (escape) {
sp.p_gflags |= FMT_FLAG_AT;
}
sp.write_fmt(writer, args...);
}, item);
return;
}
}
format_spec sp{fmt, p_loc};
if (escape) {
sp.p_gflags |= FMT_FLAG_AT;
}
sp.write_fmt(writer, item);
}
template<typename R, typename F, typename T>
void write_range_val(
R &writer, F &&func, string_range sep, T const &val
) const {
if constexpr(detail::iterable_test<T>) {
auto range = ostd::iter(val);
if (range.empty()) {
return;
}
for (;;) {
func(range.front());
range.pop_front();
if (range.empty()) {
break;
}
range_put_all(writer, sep);
}
} else {
throw format_error{"invalid value for ranged format"};
}
}
/* range writer */
template<typename R, typename T, typename ...A>
void write_range(
R &writer, size_t idx, bool expandval, string_range sep,
T const &val, A const &...args
) const {
if (idx) {
if constexpr(!sizeof...(A)) {
throw format_error{"not enough format arguments"};
} else {
write_range(writer, idx - 1, expandval, sep, args...);
}
} else {
write_range_val(writer, [
this, &writer, escape = p_gflags & FMT_FLAG_AT, expandval,
fmt = rest()
](auto const &rval) {
this->write_range_item(
writer, escape, expandval, fmt, rval
);
}, sep, val);
}
}
template<size_t I, size_t N, typename R, typename T>
void write_tuple_val(
R &writer, bool escape, string_range sep, T const &tup
) const {
format_spec sp{'s', p_loc, escape ? FMT_FLAG_AT : 0};
sp.write_arg(writer, 0, std::get<I>(tup));
if constexpr(I < (N - 1)) {
range_put_all(writer, sep);
write_tuple_val<I + 1, N>(writer, escape, sep, tup);
}
}
template<typename R, typename T, typename ...A>
void write_tuple(
R &writer, size_t idx, T const &val, A const &...args
) {
if (idx) {
if constexpr(!sizeof...(A)) {
throw format_error{"not enough format arguments"};
} else {
write_tuple(writer, idx - 1, args...);
}
} else {
if constexpr(detail::is_tuple_like<T>) {
std::apply([this, &writer, &val](auto const &...vals) mutable {
this->write_fmt(writer, vals...);
}, val);
} else {
throw format_error{"invalid value for tuple format"};
}
}
}
template<typename R, typename ...A>
void write_fmt(R &writer, A const &...args) {
size_t argidx = 1;
while (read_until_spec(writer)) {
size_t argpos = index();
if (is_nested()) {
if (!argpos) {
argpos = argidx++;
} else if (argpos > argidx) {
argidx = argpos + 1;
}
format_spec nspec(nested(), p_loc);
nspec.p_gflags |= (p_flags & FMT_FLAG_AT);
if (is_tuple()) {
nspec.write_tuple(writer, argpos - 1, args...);
} else {
nspec.write_range(
writer, argpos - 1, (flags() & FMT_FLAG_HASH),
nested_sep(), args...
);
}
continue;
}
if (!argpos) {
argpos = argidx++;
if (arg_width()) {
set_width_arg(argpos - 1, args...);
argpos = argidx++;
}
if (arg_precision()) {
set_precision_arg(argpos - 1, args...);
argpos = argidx++;
}
} else {
bool argprec = arg_precision();
if (argprec) {
if (argpos <= 1) {
throw format_error{"argument precision not given"};
}
set_precision_arg(argpos - 2, args...);
}
if (arg_width()) {
if (argpos <= (size_t(argprec) + 1)) {
throw format_error{"argument width not given"};
}
set_width_arg(argpos - 2 - argprec, args...);
}
if (argpos > argidx) {
argidx = argpos + 1;
}
}
write_arg(writer, argpos - 1, args...);
}
}
template<typename R, typename ...A>
void write_fmt(R &writer) {
if (read_until_spec(writer)) {
throw format_error{"format spec without format arguments"};
}
}
template<typename R>
struct fmt_out {
using iterator_category = std::output_iterator_tag;
using value_type = char;
using pointer = char *;
using reference = char &;
using difference_type = typename std::char_traits<char>::off_type;
fmt_out &operator=(char c) {
p_out->put(c);
return *this;
}
fmt_out &operator*() { return *this; }
fmt_out &operator++() { return *this; }
fmt_out &operator++(int) { return *this; }
R *p_out;
};
template<typename R>
struct fmt_num_put final: std::num_put<char, fmt_out<R>> {
fmt_num_put(size_t refs = 0): std::num_put<char, fmt_out<R>>(refs) {}
~fmt_num_put() {}
};
string_range p_fmt;
std::locale p_loc;
char p_buf[32];
};
/** @brief Formats into an output range using a format string and arguments.
*
* Uses the default constructed std::locale (the current global locale)
* for locale specific formatting. There is also a version that takes an
* explicit locale.
*
* This is just a simple wrapper, equivalent to:
*
* ~~~{.cc}
* return ostd::format_spec{fmt}.format(std::forward<R>(writer), args...);
* ~~~
*/
template<typename R, typename ...A>
inline R &&format(R &&writer, string_range fmt, A const &...args) {
return format_spec{fmt}.format(std::forward<R>(writer), args...);
}
/** @brief Formats into an output range using a format string and arguments.
*
* This version uses `loc` as a locale. There is also a version that uses
* the global locale by default.
*
* This is just a simple wrapper, equivalent to:
*
* ~~~{.cc}
* return ostd::format_spec{fmt, loc}.format(std::forward<R>(writer), args...);
* ~~~
*/
template<typename R, typename ...A>
inline R &&format(
R &&writer, std::locale const &loc, string_range fmt, A const &...args
) {
return format_spec{fmt, loc}.format(std::forward<R>(writer), args...);
}
/** @} */
} /* namespace ostd */
#endif
/** @} */