detailed docs for format module

master
Daniel Kolesa 2017-04-02 18:24:10 +02:00
parent 6d0c0a91f4
commit d4edd42596
1 changed files with 500 additions and 14 deletions

View File

@ -1,6 +1,17 @@
/* Format strings for OctaSTD. Inspired by D's std.format module.
/** @addtogroup Strings
* @{
*/
/** @file format.hh
*
* This file is part of OctaSTD. See COPYING.md for futher information.
* @brief APIs for type safe formatting using C-style format strings.
*
* OctaSTD provides a powerful formatting system that lets you format into
* arbitrary output ranges using C-style format strings. It's type safe
* and supports custom object formatting without heap allocations as well
* as formatting of ranges, tuples and more.
*
* @copyright See COPYING.md in the project tree for further information.
*/
#ifndef OSTD_FORMAT_HH
@ -21,22 +32,64 @@
namespace ostd {
/** @addtogroup Strings
* @{
*/
/** @brief An enumeration defining flags for C-style formatting marks.
*
* Used inside ostd::format_spec. The C-style formatting mark has a flags
* section and each of these enum items represents one. They can be combined
* using the standard bitwise operators.
*/
enum format_flags {
FMT_FLAG_DASH = 1 << 0,
FMT_FLAG_ZERO = 1 << 1,
FMT_FLAG_SPACE = 1 << 2,
FMT_FLAG_PLUS = 1 << 3,
FMT_FLAG_HASH = 1 << 4,
FMT_FLAG_AT = 1 << 5
FMT_FLAG_DASH = 1 << 0, ///< The dash (`-`) flag.
FMT_FLAG_ZERO = 1 << 1, ///< The zero (`0`) flag.
FMT_FLAG_SPACE = 1 << 2, ///< The space (` `) flag.
FMT_FLAG_PLUS = 1 << 3, ///< The plus (`+`) flag.
FMT_FLAG_HASH = 1 << 4, ///< The hash (`#`) flag.
FMT_FLAG_AT = 1 << 5 ///< The at (`@`) flag.
};
/** @brief Thrown when format string does not properly match the arguments. */
struct format_error: std::runtime_error {
using std::runtime_error::runtime_error;
};
struct format_spec;
/* empty by default, SFINAE friendly */
/** @brief Specialize this to format custom objects.
*
* The formatting system provides a way to format arbitrary objects. By default
* it's empty as all the formatting logic is builtin. To specialize for your
* own object, you simply do this:
*
* ~~~{.cc}
* template<>
* struct format_traits<foo> {
* template<typename R>
* static void to_format(foo const &v, R &writer, ostd::format_spec const &fs) {
* // custom formatting here
* // writer is just an output range (see ostd::output_range)
* }
* };
* ~~~
*
* Obviously, you can passthrough the formatting, for example when your type
* contains a member and you want to format your type exactly as if it was
* the member, you just put this in your `to_format`:
*
* ~~~{.cc}
* fs.format_value(writer, v.my_member);
* ~~~
*
* Anything that writes into the output range will do. The output range is
* exactly the same output range the outer format call is formatting into,
* so for example when someone is formatting into an ostd::appender_range,
* it will be just that.
*
* This may be specialized in other OctaSTD modules as well.
*/
template<typename>
struct format_traits {};
@ -181,27 +234,270 @@ namespace detail {
constexpr bool fmt_tofmt_test = decltype(test_tofmt<T, R>(0))::value;
}
/** @brief A structure implementing type safe C-style formatting.
*
* It can be constructed either to represent a specific format specifier or
* with a format string to format an entire string (in which case it will
* parse the string and format it with the individual intermediate markings).
*
* It stores information about the current format specifier (when constructed
* as one or when parsing the format string) as well as the rest of the current
* format string. See read_until_spec() and rest() for more information.
*
* # Regular format specifiers
*
* The formatter is considerably more elaborate than C-style printf. Its
* basic format specifiers superficially look the same:
*
* ~~~
* %[position$][flags][width][.precision]specifier
* ~~~
*
* Position is the optional position of the argument in the pack starting
* with 1. It can be mixed with format specifiers without explicit position,
* unlike what POSIX says; the next specifier without explicit position
* will use the position after the largest explicit position used so far.
* For example, `%3$s %1$s %s` will use position 4 for the last specifier.
*
* ## Flags
*
* * The `-` flag will left-justify within the given width (right by default).
* * The `+` flag applies to numbers and will force sign to always show, even
* for positive numbers (by default, only negative ones get a sign).
* * The ` ` (space) flag applies to numbers and will force a space to be
* written in place of sign for positive numbers (no effect with `+`).
* * The `#` flag applies to integers, floats and ranges. For integers, it
* will add the prefixes `0x`, `0X`, `0b` or `0B` when formatted as hex
* or binary, lowercase or uppercase. For floats, it will force the output
* to always contain a decimal point/comma. For ranges, it will cause
* automatic expansion of values into items if the values are tuples.
* * The `@` flag will escape the value according to the rules.
* * The '0' flag will left-pad numbers with zeroes instead of spaces when
* needed (according to width).
*
* ## Width
*
* Width can be specified either as a number in the format string or as `*`
* in which case it will be an integer argument (any integral type, must be
* equal or larger than zero, otherwise ostd::format_error is thrown). When
* an argument, the position of the argument is where the actual value should
* have been if no argument was used, and the actual value follows. The same
* applies with explicit positions.
*
* Width defines the minimum number of characters to be printed. If the value
* ends up being shorter, it's padded with spaces (or zeroes when formatting
* a number and the zero flag is used). The value is not truncated if it's
* actually longer than the width.
*
* ## Precision
*
* Precision can also be specified as a number or as an argument. When both
* width and precision are an argument, width is first. For integers, it
* specifies the default number of digits to be written. If the value is
* shorter than the precision, the result is padded with leading zeroes.
* If it's longer, no truncation happens. A precision of 0 means that no
* character is written for the value 0. For floats, it's the number of
* digits to be written after decimal point or comma. When not specified,
* it's 6. For strings, it's the maximum number of characters to be printed.
* By default all characters are printed. When escaping strings, the quotes
* are not counted into the precision and escape sequences count as a single
* character.
*
* # Range formatting
*
* The system also allows advanced formatting for ranges. The specifier
* then looks different:
*
* ~~~
* %[flags](contents%)
* ~~~
*
* The `contents` is a format specifier for each item of the range followed
* by a separator. For example:
*
* ~~~
* %(%s, %)
* ~~~
*
* In this case, `%s` is the specifier and `, ` is the separator. You can
* also explicitly delimit the separator:
*
* ~~~
* %(%s%|, %)
* ~~~
*
* The first part is used to format items and the separator is put between
* each two items.
*
* Two flags are used by this format. Normally, each item of the range is
* formatted as is, using a single specifier, even if the item is a tuple-like
* value. Using the `#` flag you can expand tuple-like items into multiple
* values. So when formatting a range over an associative map, you can do this:
*
* ~~~
* %#(%s: %s%|, %)
* ~~~
*
* to format key and value separately.
*
* You can also use the `@` flag. It will cause the `@` flag to be applied to
* every item of the range, therefore escaping each one. Nested range formats
* are also affected by this. There is no way to unapply the flag once you
* set it.
*
* # Tuple formatting
*
* Additionally, the system also supports advanced formatting for tuples.
* The syntax is similar:
*
* ~~~
* %[flags]<contents%>
* ~~~
*
* There are no delimiters here. The `contents` is simply a regular format
* string, with a format specifier for each tuple item.
*
* You can use the `@` flag just like you can use it with ranges. No other
* flag can be used when formatting tuples.
*
* # Specifiers
*
* Now for the basic specifiers themselves:
*
* * `a`, `A` - hexadecimal float like C printf (lowercase, uppercase).
* * `b`, `B` - binary integers (lowercase, uppercase).
* * `c` - character values.
* * `d` - decimal integers (signed or unsigned).
* * `e`, `E` - floats in scientific notation (lowercase, uppercase).
* * `f`, `F` - decimal floating point (lowercase, uppercase).
* * `g`, `G` - shortest representation (`e`/`E` or `f`/`F`).
* * `o` - octal octal (signed or unsigned)
* * `s` - any value with its default format
* * `x`, `X` - hexadecimal integers (lowercase, uppercase, signed or unsigned).
*
* You can use the `s` specifier to format any value that can be formatted
* at no extra cost. Because the system is type safe, how a value is meant
* to be formatted is decided from the type that is passed in, not the format
* specifier itself.
*
* All letters (uppercase and lowercase) are available for custom formatting.
*
* # Format order and rules
*
* The rules for formatting values go as follows:
*
* * First it's checked whether the value can be custom formatted using a
* specialization of ostd::format_traits. If it can, it's formatted using
* that, the current `format_spec` is passed in as it is and no extra
* checks are made. Any letter can be used to format custom objects.
* * Then it's checked if the value is convertible to ostd::string_range.
* If it is, it's formatted as a string. Only the `s` specifier is allowed.
* * Then it's checked if the value is a tuple-like object. The value is one
* if `std::tuple_size<T>::value` is valid. If it is, the tuple-like object
* is formatted as `<ITEM, ITEM, ITEM, ...>` by default. You need to use
* the `s` specifier only to format tuples like this. The items are all
* formatted using the `s` specifier.
* * Then ranges are tested in a similar way. The default format for ranges
* is `{ITEM, ITEM, ITEM, ...}`. The `s` specifier must be used. The items
* are all formatted with `s` too.
* * Then bools are formatted. If the `s` specifier is used, the bool is
* formatted as `true` or `false`. Otherwise it's converted to `int` and
* formatted using the specifier (might error depending on the specifier).
* * Then character values are formatted. The `c` and `s` specifiers are
* allowed.
* * Pointers are formatted then. If the `s` specifier is used, the pointer
* will be formatted as hex with the `0x` prefix. Otherwise it's converted
* to `size_t` and formatted with the specifier (might error depending on
* the specifier).
* * Then integers are formatted. Using the `s` specifier is like using the
* `d` specifier.
* * Floats follow. Using `s` is like using `g`.
* * When everything is exhausted, ostd::format_error is thrown.
*
* # Escaping
*
* String and character values are subject to escaping if the `@` flag is
* used. Strings are put into double quotes and any unprintable values in
* them are converted into escape sequences. Quotes (single and double)
* are also escaped. Character values are put into single quotes and
* unprintable characters are converted into escape sequences as well.
* For known escape sequences, simple readable versions are used, particularly
* `a`, `b`, `t`, `n`, `v`, `f`, `r`. For other unprintables, the hexadecimal
* escape format is used.
*
* When printing tuples and ranges with the `s` specifier and the `@` flag
* is used, all of their items are escaped. If the items are tuples or ranges,
* their own items are also escaped. The `@` flag doesn't escape anything else,
* unless you implement support for escaping in your own custom objects.
*
* # Locale awareness
*
* The system also makes use of locales. When formatting integers, thousands
* grouping rules from the locale apply (no matter the base). When formatting
* floats, a locale specific decimal separator is used and thousands grouping
* also applies.
*
* # Errors and other remarks
*
* Bceause the system is type safe, there is no need to explicitly specify
* type lengths or any such thing. Any integral type and any floating point
* type can be formatted using the right specifiers.
*
* If a specifier is not allowed for a value, ostd::format_error is thrown.
*/
struct format_spec {
format_spec(string_range fmt = nullptr):
/** @brief Constructs with a format string and the default locale.
*
* If you use this constructor, there won't be a specific formatting
* specifier set in here so you won't be able to get its properties,
* but you will be able to format into a range with some arguments.
* You can also manually parse the format string, see read_until_spec().
*
* The locale used here is the default (global) locale.
*/
format_spec(string_range fmt):
p_fmt(fmt), p_loc()
{}
/** @brief Constructs with a format string and a locale.
*
* Like format_spec(string_range), but with an explicit locale.
*/
format_spec(string_range fmt, std::locale const &loc):
p_fmt(fmt), p_loc(loc)
{}
format_spec(std::locale const &loc):
p_fmt(), p_loc(loc)
{}
/** @brief Constructs a specific format specifier.
*
* See ostd::format_flags for flags. The `spec` argument is the format
* specifier (for example `s`). It doesn't support tuple/range formatting
* nor positional arguments.
*
* Uses the default (global) locale. The locale is then potentially used
* for formatting values.
*/
format_spec(char spec, int flags = 0):
p_flags(flags), p_spec(spec), p_loc()
{}
/** @brief Constructs a specific format specifier with a locale.
*
* Like format_spec(char, int) but uses an explicit locale.
*/
format_spec(char spec, std::locale const &loc, int flags = 0):
p_flags(flags), p_spec(spec), p_loc(loc)
{}
/** @brief Parses the format string if constructed with one.
*
* This reads the format string, writing each character of it into
* `writer`, until it encounters a valid format specifier. It then
* stops there and returns `true`. If no format specifier was read,
* it returns `false`. When a format specifier is read, this structure
* then represents it.
*
* It's used by format() to parse the string.
*/
template<typename R>
bool read_until_spec(R &writer) {
if (p_fmt.empty()) {
@ -222,71 +518,229 @@ struct format_spec {
return false;
}
/** @brief Gets the yet not parsed portion of the format string.
*
* If no read_until_spec() was called, this returns the entire format
* string. Otherwise, it returns the format string from the point
* after the format specifier this structure currently represents.
*/
string_range rest() const {
return p_fmt;
}
/** @brief Overrides the currently set locale.
*
* @returns The old locale.
*/
std::locale imbue(std::locale const &loc) {
std::locale ret{p_loc};
p_loc = loc;
return ret;
}
/** @brief Retrieves the currently used locale for the format state. */
std::locale getloc() const {
return p_loc;
}
/** @brief Gets the width of the format specifier.
*
* If explicitly specified (say `%5s`) it will return the number that
* was in the format specifier. If explicitly set with set_width(),
* it will return that. If not set at all, it will return 0.
*
* @see has_width(), precision()
*/
int width() const { return p_width; }
/** @brief Gets the precision of the format specifier.
*
* If explicitly specified (say `%.5f`) it will return the number that
* was in the format specifier. If explicitly set with set_precision(),
* it will return that. If not set at all, it will return 0.
*
* @see has_precision(), width()
*/
int precision() const { return p_precision; }
/** @brief Gets whether a width was specified somehow.
*
* If the width was provided direclty as part of the format specifier
* or with an explicit argument (see set_width()), this will return
* `true`. Otherwise, it will return `false`.
*
* You can get the actual width using width().
*
* @see has_precision(), arg_width()
*/
bool has_width() const { return p_has_width; }
/** @brief Gets whether a precision was specified somehow.
*
* If the precision was provided direclty as part of the format specifier
* or with an explicit argument (see set_precision()), this will return
* `true`. Otherwise, it will return `false`.
*
* You can get the actual width using precision().
*
* @see has_width(), arg_precision()
*/
bool has_precision() const { return p_has_precision; }
/** @brief Gets whether a width was specified as an explicit argument.
*
* This is true if the width was specified using `*` in the format
* specifier. Also set by set_width(size_t, A const &...).
*
* @see has_width()
*/
bool arg_width() const { return p_arg_width; }
/** @brief Gets whether a precision was specified as an explicit argument.
*
* This is true if the precision was specified using `*` in the format
* specifier. Also set by set_precision(size_t, A const &...).
*
* @see has_width()
*/
bool arg_precision() const { return p_arg_precision; }
/** @brief Sets the width from an argument pack.
*
* The `idx` parameter specifies the index (starting with 0) of the
* width argument in the followup pack.
*
* The return value of width() will then be the argument's value.
* It will also make has_width() and arg_width() return true (if
* they previously didn't).
*
* @throws ostd::format_error when `idx` is out of bounds or the argument
* has an invalid type.
*
* @see set_width(int), set_precision(size_t, A const &...);
*/
template<typename ...A>
void set_width(size_t idx, A const &...args) {
p_width = detail::get_arg_param(idx, args...);
p_has_width = p_arg_width = true;
}
/** @brief Sets the width to an explicit number.
*
* The return value of width() will then be the given value. It will
* also make has_width() return true and arg_width() return false.
*
* @see set_width(size_t, A const &...) set_precision(int)
*/
void set_width(int v) {
p_width = v;
p_has_width = true;
p_arg_width = false;
}
/** @brief Sets the precision from an argument pack.
*
* The `idx` parameter specifies the index (starting with 0) of the
* precision argument in the followup pack.
*
* The return value of precision() will then be the argument's value.
* It will also make has_precision() and arg_precision() return true (if
* they previously didn't).
*
* @throws ostd::format_error when `idx` is out of bounds or the argument
* has an invalid type.
*
* @see set_precision(int), set_width(size_t, A const &...);
*/
template<typename ...A>
void set_precision(size_t idx, A const &...args) {
p_precision = detail::get_arg_param(idx, args...);
p_has_precision = p_arg_precision = true;
}
/** @brief Sets the precision to an explicit number.
*
* The return value of precision() will then be the given value. It will
* also make has_precision() return true and arg_precision() return false.
*
* @see set_precision(size_t, A const &...) set_with(int)
*/
void set_precision(int v) {
p_precision = v;
p_has_precision = true;
p_arg_precision = false;
}
/** @brief Gets the combination of flags for the current specifier. */
int flags() const { return p_flags; }
/** @brief Gets the base char for the specifier. */
char spec() const { return p_spec; }
/** @brief Gets the position of the matching argument in the pack.
*
* This applies for when the position in the format specifier was
* explicitly set (for example `%5$s` will have index 5) to refer
* to a specific argument in the pack. Keep in mind that these
* start with 1 (1st argument, 5th argument etc) to match the POSIX
* conventions on this. If the position was not specified, this just
* returns 0.
*/
byte index() const { return p_index; }
/** @brief Gets the inner part of a range or tuple format specifier.
*
* For ranges, this does not include the separator, you need to use
* nested_sep() to get the separator. For example, given the
* `%(%s, %)` specifier, this returns `%s` and for `%(%s%|, %)`
* it returns the same. When formatting tuples, this behaves identically,
* for example for `%<%s, %f%>` this returns `%s, %f`. For simple
* specifiers this returns an empty slice.
*/
string_range nested() const { return p_nested; }
/** @brief Gets the separator of a complex range format specifier.
*
* For example for `%(%s, %)` this returns `, `. With an explicit
* delimiter, for example for `%(%s%|, %)`, this returns the same
* thing as well. For simple specifiers and tuple specifiers this
* returns an empty slice.
*/
string_range nested_sep() const { return p_nested_sep; }
/** @brief Returns true if this specifier is for a tuple. */
bool is_tuple() const { return p_is_tuple; }
/** @brief Returns true if this specifier is for a tuple or a range. */
bool is_nested() const { return p_is_nested; }
/** @brief Formats into a range with the given arguments.
*
* When a valid format string is currently present, this formats
* into the given range using that format string and the provided
* arguments.
*
* @throws ostd::format_error when the format string and args don't match.
*
* @see format_value()
*/
template<typename R, typename ...A>
R &&format(R &&writer, A const &...args) {
write_fmt(writer, args...);
return std::forward<R>(writer);
}
/** @brief Formats a single value into a range.
*
* When this currently represents a valid format specifier, you can
* use this to format a single value with that specifier. This is very
* useful for example when formatting custom objects, see the example
* in ostd::format_traits.
*
* @throws ostd::format_error when the specifier and the value don't match.
*
* @see format()
*/
template<typename R, typename T>
R &&format_value(R &&writer, T const &val) const {
write_arg(writer, 0, val);
@ -907,6 +1361,8 @@ private:
if (is_nested()) {
if (!argpos) {
argpos = argidx++;
} else if (argpos > argidx) {
argidx = argpos + 1;
}
format_spec nspec(nested(), p_loc);
nspec.p_gflags |= (p_flags & FMT_FLAG_AT);
@ -944,6 +1400,9 @@ private:
}
set_width(argpos - 2 - argprec, args...);
}
if (argpos > argidx) {
argidx = argpos + 1;
}
}
write_arg(writer, argpos - 1, args...);
}
@ -987,11 +1446,34 @@ private:
char p_buf[32];
};
/** @brief Formats into an output range using a format string and arguments.
*
* Uses the default constructed std::locale (the current global locale)
* for locale specific formatting. There is also a version that takes an
* explicit locale.
*
* This is just a simple wrapper, equivalent to:
*
* ~~~{.cc}
* return ostd::format_spec{fmt}.format(std::forward<R>(writer), args...);
* ~~~
*/
template<typename R, typename ...A>
inline R &&format(R &&writer, string_range fmt, A const &...args) {
return format_spec{fmt}.format(std::forward<R>(writer), args...);
}
/** @brief Formats into an output range using a format string and arguments.
*
* This version uses `loc` as a locale. There is also a version that uses
* the global locale by default.
*
* This is just a simple wrapper, equivalent to:
*
* ~~~{.cc}
* return ostd::format_spec{fmt, loc}.format(std::forward<R>(writer), args...);
* ~~~
*/
template<typename R, typename ...A>
inline R &&format(
R &&writer, std::locale const &loc, string_range fmt, A const &...args
@ -999,6 +1481,10 @@ inline R &&format(
return format_spec{fmt, loc}.format(std::forward<R>(writer), args...);
}
/** @} */
} /* namespace ostd */
#endif
/** @} */