From d4edd425961ea427739421cadb37213006ee264b Mon Sep 17 00:00:00 2001 From: q66 Date: Sun, 2 Apr 2017 18:24:10 +0200 Subject: [PATCH] detailed docs for format module --- ostd/format.hh | 514 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 500 insertions(+), 14 deletions(-) diff --git a/ostd/format.hh b/ostd/format.hh index bac99d9..3ecbc30 100644 --- a/ostd/format.hh +++ b/ostd/format.hh @@ -1,6 +1,17 @@ -/* Format strings for OctaSTD. Inspired by D's std.format module. +/** @addtogroup Strings + * @{ + */ + +/** @file format.hh * - * This file is part of OctaSTD. See COPYING.md for futher information. + * @brief APIs for type safe formatting using C-style format strings. + * + * OctaSTD provides a powerful formatting system that lets you format into + * arbitrary output ranges using C-style format strings. It's type safe + * and supports custom object formatting without heap allocations as well + * as formatting of ranges, tuples and more. + * + * @copyright See COPYING.md in the project tree for further information. */ #ifndef OSTD_FORMAT_HH @@ -21,22 +32,64 @@ namespace ostd { +/** @addtogroup Strings + * @{ + */ + +/** @brief An enumeration defining flags for C-style formatting marks. + * + * Used inside ostd::format_spec. The C-style formatting mark has a flags + * section and each of these enum items represents one. They can be combined + * using the standard bitwise operators. + */ enum format_flags { - FMT_FLAG_DASH = 1 << 0, - FMT_FLAG_ZERO = 1 << 1, - FMT_FLAG_SPACE = 1 << 2, - FMT_FLAG_PLUS = 1 << 3, - FMT_FLAG_HASH = 1 << 4, - FMT_FLAG_AT = 1 << 5 + FMT_FLAG_DASH = 1 << 0, ///< The dash (`-`) flag. + FMT_FLAG_ZERO = 1 << 1, ///< The zero (`0`) flag. + FMT_FLAG_SPACE = 1 << 2, ///< The space (` `) flag. + FMT_FLAG_PLUS = 1 << 3, ///< The plus (`+`) flag. + FMT_FLAG_HASH = 1 << 4, ///< The hash (`#`) flag. + FMT_FLAG_AT = 1 << 5 ///< The at (`@`) flag. }; +/** @brief Thrown when format string does not properly match the arguments. */ struct format_error: std::runtime_error { using std::runtime_error::runtime_error; }; struct format_spec; -/* empty by default, SFINAE friendly */ +/** @brief Specialize this to format custom objects. + * + * The formatting system provides a way to format arbitrary objects. By default + * it's empty as all the formatting logic is builtin. To specialize for your + * own object, you simply do this: + * + * ~~~{.cc} + * template<> + * struct format_traits { + * template + * static void to_format(foo const &v, R &writer, ostd::format_spec const &fs) { + * // custom formatting here + * // writer is just an output range (see ostd::output_range) + * } + * }; + * ~~~ + * + * Obviously, you can passthrough the formatting, for example when your type + * contains a member and you want to format your type exactly as if it was + * the member, you just put this in your `to_format`: + * + * ~~~{.cc} + * fs.format_value(writer, v.my_member); + * ~~~ + * + * Anything that writes into the output range will do. The output range is + * exactly the same output range the outer format call is formatting into, + * so for example when someone is formatting into an ostd::appender_range, + * it will be just that. + * + * This may be specialized in other OctaSTD modules as well. + */ template struct format_traits {}; @@ -181,27 +234,270 @@ namespace detail { constexpr bool fmt_tofmt_test = decltype(test_tofmt(0))::value; } +/** @brief A structure implementing type safe C-style formatting. + * + * It can be constructed either to represent a specific format specifier or + * with a format string to format an entire string (in which case it will + * parse the string and format it with the individual intermediate markings). + * + * It stores information about the current format specifier (when constructed + * as one or when parsing the format string) as well as the rest of the current + * format string. See read_until_spec() and rest() for more information. + * + * # Regular format specifiers + * + * The formatter is considerably more elaborate than C-style printf. Its + * basic format specifiers superficially look the same: + * + * ~~~ + * %[position$][flags][width][.precision]specifier + * ~~~ + * + * Position is the optional position of the argument in the pack starting + * with 1. It can be mixed with format specifiers without explicit position, + * unlike what POSIX says; the next specifier without explicit position + * will use the position after the largest explicit position used so far. + * For example, `%3$s %1$s %s` will use position 4 for the last specifier. + * + * ## Flags + * + * * The `-` flag will left-justify within the given width (right by default). + * * The `+` flag applies to numbers and will force sign to always show, even + * for positive numbers (by default, only negative ones get a sign). + * * The ` ` (space) flag applies to numbers and will force a space to be + * written in place of sign for positive numbers (no effect with `+`). + * * The `#` flag applies to integers, floats and ranges. For integers, it + * will add the prefixes `0x`, `0X`, `0b` or `0B` when formatted as hex + * or binary, lowercase or uppercase. For floats, it will force the output + * to always contain a decimal point/comma. For ranges, it will cause + * automatic expansion of values into items if the values are tuples. + * * The `@` flag will escape the value according to the rules. + * * The '0' flag will left-pad numbers with zeroes instead of spaces when + * needed (according to width). + * + * ## Width + * + * Width can be specified either as a number in the format string or as `*` + * in which case it will be an integer argument (any integral type, must be + * equal or larger than zero, otherwise ostd::format_error is thrown). When + * an argument, the position of the argument is where the actual value should + * have been if no argument was used, and the actual value follows. The same + * applies with explicit positions. + * + * Width defines the minimum number of characters to be printed. If the value + * ends up being shorter, it's padded with spaces (or zeroes when formatting + * a number and the zero flag is used). The value is not truncated if it's + * actually longer than the width. + * + * ## Precision + * + * Precision can also be specified as a number or as an argument. When both + * width and precision are an argument, width is first. For integers, it + * specifies the default number of digits to be written. If the value is + * shorter than the precision, the result is padded with leading zeroes. + * If it's longer, no truncation happens. A precision of 0 means that no + * character is written for the value 0. For floats, it's the number of + * digits to be written after decimal point or comma. When not specified, + * it's 6. For strings, it's the maximum number of characters to be printed. + * By default all characters are printed. When escaping strings, the quotes + * are not counted into the precision and escape sequences count as a single + * character. + * + * # Range formatting + * + * The system also allows advanced formatting for ranges. The specifier + * then looks different: + * + * ~~~ + * %[flags](contents%) + * ~~~ + * + * The `contents` is a format specifier for each item of the range followed + * by a separator. For example: + * + * ~~~ + * %(%s, %) + * ~~~ + * + * In this case, `%s` is the specifier and `, ` is the separator. You can + * also explicitly delimit the separator: + * + * ~~~ + * %(%s%|, %) + * ~~~ + * + * The first part is used to format items and the separator is put between + * each two items. + * + * Two flags are used by this format. Normally, each item of the range is + * formatted as is, using a single specifier, even if the item is a tuple-like + * value. Using the `#` flag you can expand tuple-like items into multiple + * values. So when formatting a range over an associative map, you can do this: + * + * ~~~ + * %#(%s: %s%|, %) + * ~~~ + * + * to format key and value separately. + * + * You can also use the `@` flag. It will cause the `@` flag to be applied to + * every item of the range, therefore escaping each one. Nested range formats + * are also affected by this. There is no way to unapply the flag once you + * set it. + * + * # Tuple formatting + * + * Additionally, the system also supports advanced formatting for tuples. + * The syntax is similar: + * + * ~~~ + * %[flags] + * ~~~ + * + * There are no delimiters here. The `contents` is simply a regular format + * string, with a format specifier for each tuple item. + * + * You can use the `@` flag just like you can use it with ranges. No other + * flag can be used when formatting tuples. + * + * # Specifiers + * + * Now for the basic specifiers themselves: + * + * * `a`, `A` - hexadecimal float like C printf (lowercase, uppercase). + * * `b`, `B` - binary integers (lowercase, uppercase). + * * `c` - character values. + * * `d` - decimal integers (signed or unsigned). + * * `e`, `E` - floats in scientific notation (lowercase, uppercase). + * * `f`, `F` - decimal floating point (lowercase, uppercase). + * * `g`, `G` - shortest representation (`e`/`E` or `f`/`F`). + * * `o` - octal octal (signed or unsigned) + * * `s` - any value with its default format + * * `x`, `X` - hexadecimal integers (lowercase, uppercase, signed or unsigned). + * + * You can use the `s` specifier to format any value that can be formatted + * at no extra cost. Because the system is type safe, how a value is meant + * to be formatted is decided from the type that is passed in, not the format + * specifier itself. + * + * All letters (uppercase and lowercase) are available for custom formatting. + * + * # Format order and rules + * + * The rules for formatting values go as follows: + * + * * First it's checked whether the value can be custom formatted using a + * specialization of ostd::format_traits. If it can, it's formatted using + * that, the current `format_spec` is passed in as it is and no extra + * checks are made. Any letter can be used to format custom objects. + * * Then it's checked if the value is convertible to ostd::string_range. + * If it is, it's formatted as a string. Only the `s` specifier is allowed. + * * Then it's checked if the value is a tuple-like object. The value is one + * if `std::tuple_size::value` is valid. If it is, the tuple-like object + * is formatted as `` by default. You need to use + * the `s` specifier only to format tuples like this. The items are all + * formatted using the `s` specifier. + * * Then ranges are tested in a similar way. The default format for ranges + * is `{ITEM, ITEM, ITEM, ...}`. The `s` specifier must be used. The items + * are all formatted with `s` too. + * * Then bools are formatted. If the `s` specifier is used, the bool is + * formatted as `true` or `false`. Otherwise it's converted to `int` and + * formatted using the specifier (might error depending on the specifier). + * * Then character values are formatted. The `c` and `s` specifiers are + * allowed. + * * Pointers are formatted then. If the `s` specifier is used, the pointer + * will be formatted as hex with the `0x` prefix. Otherwise it's converted + * to `size_t` and formatted with the specifier (might error depending on + * the specifier). + * * Then integers are formatted. Using the `s` specifier is like using the + * `d` specifier. + * * Floats follow. Using `s` is like using `g`. + * * When everything is exhausted, ostd::format_error is thrown. + * + * # Escaping + * + * String and character values are subject to escaping if the `@` flag is + * used. Strings are put into double quotes and any unprintable values in + * them are converted into escape sequences. Quotes (single and double) + * are also escaped. Character values are put into single quotes and + * unprintable characters are converted into escape sequences as well. + * For known escape sequences, simple readable versions are used, particularly + * `a`, `b`, `t`, `n`, `v`, `f`, `r`. For other unprintables, the hexadecimal + * escape format is used. + * + * When printing tuples and ranges with the `s` specifier and the `@` flag + * is used, all of their items are escaped. If the items are tuples or ranges, + * their own items are also escaped. The `@` flag doesn't escape anything else, + * unless you implement support for escaping in your own custom objects. + * + * # Locale awareness + * + * The system also makes use of locales. When formatting integers, thousands + * grouping rules from the locale apply (no matter the base). When formatting + * floats, a locale specific decimal separator is used and thousands grouping + * also applies. + * + * # Errors and other remarks + * + * Bceause the system is type safe, there is no need to explicitly specify + * type lengths or any such thing. Any integral type and any floating point + * type can be formatted using the right specifiers. + * + * If a specifier is not allowed for a value, ostd::format_error is thrown. + */ struct format_spec { - format_spec(string_range fmt = nullptr): + /** @brief Constructs with a format string and the default locale. + * + * If you use this constructor, there won't be a specific formatting + * specifier set in here so you won't be able to get its properties, + * but you will be able to format into a range with some arguments. + * You can also manually parse the format string, see read_until_spec(). + * + * The locale used here is the default (global) locale. + */ + format_spec(string_range fmt): p_fmt(fmt), p_loc() {} + /** @brief Constructs with a format string and a locale. + * + * Like format_spec(string_range), but with an explicit locale. + */ format_spec(string_range fmt, std::locale const &loc): p_fmt(fmt), p_loc(loc) {} - format_spec(std::locale const &loc): - p_fmt(), p_loc(loc) - {} - + /** @brief Constructs a specific format specifier. + * + * See ostd::format_flags for flags. The `spec` argument is the format + * specifier (for example `s`). It doesn't support tuple/range formatting + * nor positional arguments. + * + * Uses the default (global) locale. The locale is then potentially used + * for formatting values. + */ format_spec(char spec, int flags = 0): p_flags(flags), p_spec(spec), p_loc() {} + /** @brief Constructs a specific format specifier with a locale. + * + * Like format_spec(char, int) but uses an explicit locale. + */ format_spec(char spec, std::locale const &loc, int flags = 0): p_flags(flags), p_spec(spec), p_loc(loc) {} + /** @brief Parses the format string if constructed with one. + * + * This reads the format string, writing each character of it into + * `writer`, until it encounters a valid format specifier. It then + * stops there and returns `true`. If no format specifier was read, + * it returns `false`. When a format specifier is read, this structure + * then represents it. + * + * It's used by format() to parse the string. + */ template bool read_until_spec(R &writer) { if (p_fmt.empty()) { @@ -222,71 +518,229 @@ struct format_spec { return false; } + /** @brief Gets the yet not parsed portion of the format string. + * + * If no read_until_spec() was called, this returns the entire format + * string. Otherwise, it returns the format string from the point + * after the format specifier this structure currently represents. + */ string_range rest() const { return p_fmt; } + /** @brief Overrides the currently set locale. + * + * @returns The old locale. + */ std::locale imbue(std::locale const &loc) { std::locale ret{p_loc}; p_loc = loc; return ret; } + /** @brief Retrieves the currently used locale for the format state. */ std::locale getloc() const { return p_loc; } + /** @brief Gets the width of the format specifier. + * + * If explicitly specified (say `%5s`) it will return the number that + * was in the format specifier. If explicitly set with set_width(), + * it will return that. If not set at all, it will return 0. + * + * @see has_width(), precision() + */ int width() const { return p_width; } + + /** @brief Gets the precision of the format specifier. + * + * If explicitly specified (say `%.5f`) it will return the number that + * was in the format specifier. If explicitly set with set_precision(), + * it will return that. If not set at all, it will return 0. + * + * @see has_precision(), width() + */ int precision() const { return p_precision; } + /** @brief Gets whether a width was specified somehow. + * + * If the width was provided direclty as part of the format specifier + * or with an explicit argument (see set_width()), this will return + * `true`. Otherwise, it will return `false`. + * + * You can get the actual width using width(). + * + * @see has_precision(), arg_width() + */ bool has_width() const { return p_has_width; } + + /** @brief Gets whether a precision was specified somehow. + * + * If the precision was provided direclty as part of the format specifier + * or with an explicit argument (see set_precision()), this will return + * `true`. Otherwise, it will return `false`. + * + * You can get the actual width using precision(). + * + * @see has_width(), arg_precision() + */ bool has_precision() const { return p_has_precision; } + /** @brief Gets whether a width was specified as an explicit argument. + * + * This is true if the width was specified using `*` in the format + * specifier. Also set by set_width(size_t, A const &...). + * + * @see has_width() + */ bool arg_width() const { return p_arg_width; } + + /** @brief Gets whether a precision was specified as an explicit argument. + * + * This is true if the precision was specified using `*` in the format + * specifier. Also set by set_precision(size_t, A const &...). + * + * @see has_width() + */ bool arg_precision() const { return p_arg_precision; } + /** @brief Sets the width from an argument pack. + * + * The `idx` parameter specifies the index (starting with 0) of the + * width argument in the followup pack. + * + * The return value of width() will then be the argument's value. + * It will also make has_width() and arg_width() return true (if + * they previously didn't). + * + * @throws ostd::format_error when `idx` is out of bounds or the argument + * has an invalid type. + * + * @see set_width(int), set_precision(size_t, A const &...); + */ template void set_width(size_t idx, A const &...args) { p_width = detail::get_arg_param(idx, args...); p_has_width = p_arg_width = true; } + /** @brief Sets the width to an explicit number. + * + * The return value of width() will then be the given value. It will + * also make has_width() return true and arg_width() return false. + * + * @see set_width(size_t, A const &...) set_precision(int) + */ void set_width(int v) { p_width = v; p_has_width = true; p_arg_width = false; } + /** @brief Sets the precision from an argument pack. + * + * The `idx` parameter specifies the index (starting with 0) of the + * precision argument in the followup pack. + * + * The return value of precision() will then be the argument's value. + * It will also make has_precision() and arg_precision() return true (if + * they previously didn't). + * + * @throws ostd::format_error when `idx` is out of bounds or the argument + * has an invalid type. + * + * @see set_precision(int), set_width(size_t, A const &...); + */ template void set_precision(size_t idx, A const &...args) { p_precision = detail::get_arg_param(idx, args...); p_has_precision = p_arg_precision = true; } + /** @brief Sets the precision to an explicit number. + * + * The return value of precision() will then be the given value. It will + * also make has_precision() return true and arg_precision() return false. + * + * @see set_precision(size_t, A const &...) set_with(int) + */ void set_precision(int v) { p_precision = v; p_has_precision = true; p_arg_precision = false; } + /** @brief Gets the combination of flags for the current specifier. */ int flags() const { return p_flags; } + /** @brief Gets the base char for the specifier. */ char spec() const { return p_spec; } + /** @brief Gets the position of the matching argument in the pack. + * + * This applies for when the position in the format specifier was + * explicitly set (for example `%5$s` will have index 5) to refer + * to a specific argument in the pack. Keep in mind that these + * start with 1 (1st argument, 5th argument etc) to match the POSIX + * conventions on this. If the position was not specified, this just + * returns 0. + */ byte index() const { return p_index; } + /** @brief Gets the inner part of a range or tuple format specifier. + * + * For ranges, this does not include the separator, you need to use + * nested_sep() to get the separator. For example, given the + * `%(%s, %)` specifier, this returns `%s` and for `%(%s%|, %)` + * it returns the same. When formatting tuples, this behaves identically, + * for example for `%<%s, %f%>` this returns `%s, %f`. For simple + * specifiers this returns an empty slice. + */ string_range nested() const { return p_nested; } + + /** @brief Gets the separator of a complex range format specifier. + * + * For example for `%(%s, %)` this returns `, `. With an explicit + * delimiter, for example for `%(%s%|, %)`, this returns the same + * thing as well. For simple specifiers and tuple specifiers this + * returns an empty slice. + */ string_range nested_sep() const { return p_nested_sep; } + /** @brief Returns true if this specifier is for a tuple. */ bool is_tuple() const { return p_is_tuple; } + + /** @brief Returns true if this specifier is for a tuple or a range. */ bool is_nested() const { return p_is_nested; } + /** @brief Formats into a range with the given arguments. + * + * When a valid format string is currently present, this formats + * into the given range using that format string and the provided + * arguments. + * + * @throws ostd::format_error when the format string and args don't match. + * + * @see format_value() + */ template R &&format(R &&writer, A const &...args) { write_fmt(writer, args...); return std::forward(writer); } + /** @brief Formats a single value into a range. + * + * When this currently represents a valid format specifier, you can + * use this to format a single value with that specifier. This is very + * useful for example when formatting custom objects, see the example + * in ostd::format_traits. + * + * @throws ostd::format_error when the specifier and the value don't match. + * + * @see format() + */ template R &&format_value(R &&writer, T const &val) const { write_arg(writer, 0, val); @@ -907,6 +1361,8 @@ private: if (is_nested()) { if (!argpos) { argpos = argidx++; + } else if (argpos > argidx) { + argidx = argpos + 1; } format_spec nspec(nested(), p_loc); nspec.p_gflags |= (p_flags & FMT_FLAG_AT); @@ -944,6 +1400,9 @@ private: } set_width(argpos - 2 - argprec, args...); } + if (argpos > argidx) { + argidx = argpos + 1; + } } write_arg(writer, argpos - 1, args...); } @@ -987,11 +1446,34 @@ private: char p_buf[32]; }; +/** @brief Formats into an output range using a format string and arguments. + * + * Uses the default constructed std::locale (the current global locale) + * for locale specific formatting. There is also a version that takes an + * explicit locale. + * + * This is just a simple wrapper, equivalent to: + * + * ~~~{.cc} + * return ostd::format_spec{fmt}.format(std::forward(writer), args...); + * ~~~ + */ template inline R &&format(R &&writer, string_range fmt, A const &...args) { return format_spec{fmt}.format(std::forward(writer), args...); } +/** @brief Formats into an output range using a format string and arguments. + * + * This version uses `loc` as a locale. There is also a version that uses + * the global locale by default. + * + * This is just a simple wrapper, equivalent to: + * + * ~~~{.cc} + * return ostd::format_spec{fmt, loc}.format(std::forward(writer), args...); + * ~~~ + */ template inline R &&format( R &&writer, std::locale const &loc, string_range fmt, A const &...args @@ -999,6 +1481,10 @@ inline R &&format( return format_spec{fmt, loc}.format(std::forward(writer), args...); } +/** @} */ + } /* namespace ostd */ #endif + +/** @} */