/* Format strings for OctaSTD. Inspired by D's std.format module. * * This file is part of OctaSTD. See COPYING.md for futher information. */ #ifndef OSTD_FORMAT_HH #define OSTD_FORMAT_HH #include #include #include #include #include #include "ostd/algorithm.hh" #include "ostd/string.hh" namespace ostd { enum FormatFlags { FMT_FLAG_DASH = 1 << 0, FMT_FLAG_ZERO = 1 << 1, FMT_FLAG_SPACE = 1 << 2, FMT_FLAG_PLUS = 1 << 3, FMT_FLAG_HASH = 1 << 4 }; struct format_error: std::runtime_error { using std::runtime_error::runtime_error; }; namespace detail { inline int parse_fmt_flags(string_range &fmt, int ret) { while (!fmt.empty()) { switch (fmt.front()) { case '-': ret |= FMT_FLAG_DASH; fmt.pop_front(); break; case '+': ret |= FMT_FLAG_PLUS; fmt.pop_front(); break; case '#': ret |= FMT_FLAG_HASH; fmt.pop_front(); break; case '0': ret |= FMT_FLAG_ZERO; fmt.pop_front(); break; case ' ': ret |= FMT_FLAG_SPACE; fmt.pop_front(); break; default: goto retflags; } } retflags: return ret; } inline size_t read_digits(string_range &fmt, char *buf) { size_t ret = 0; for (; !fmt.empty() && isdigit(fmt.front()); ++ret) { *buf++ = fmt.front(); fmt.pop_front(); } *buf = '\0'; return ret; } /* 0 .. not allowed * 1 .. floating point * 2 .. character * 3 .. binary * 4 .. octal * 5 .. decimal * 6 .. hexadecimal * 7 .. string * 8 .. custom object */ static constexpr byte const fmt_specs[] = { /* uppercase spec set */ 1, 3, 8, 8, /* A B C D */ 1, 1, 1, 8, /* E F G H */ 8, 8, 8, 8, /* I J K L */ 8, 8, 8, 8, /* M N O P */ 8, 8, 8, 8, /* Q R S T */ 8, 8, 8, 6, /* U V W X */ 8, 8, /* Y Z */ /* ascii filler */ 0, 0, 0, 0, 0, 0, /* lowercase spec set */ 1, 3, 2, 5, /* a b c d */ 1, 1, 1, 8, /* e f g h */ 8, 8, 8, 8, /* i j k l */ 8, 8, 4, 8, /* m n o p */ 8, 8, 7, 8, /* q r s t */ 8, 8, 8, 6, /* u v w x */ 8, 8, /* y z */ /* ascii filler */ 0, 0, 0, 0, 0 }; static constexpr int const fmt_bases[] = { 0, 0, 0, 2, 8, 10, 16, 0 }; static constexpr char fmt_digits[2][16] = { { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }, { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' } }; static constexpr char const *fmt_intpfx[2][4] = { { "0B", "0", "", "0X" }, { "0b", "0", "", "0x" } }; /* retrieve width/precision */ template int get_arg_param(size_t idx, T const &val, A const &...args) { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format args"}; } else { return get_arg_param(idx - 1, args...); } } else { if constexpr(!std::is_integral_v) { throw format_error{"invalid argument for width/precision"}; } else { return int(val); } } } } struct format_spec { format_spec(): p_nested_escape(false), p_fmt() {} format_spec(string_range fmt, bool escape = false): p_nested_escape(escape), p_fmt(fmt) {} format_spec(char spec, int width = -1, int prec = -1, int flags = 0): p_flags(flags), p_width((width >= 0) ? width : 0), p_precision((prec >= 0) ? prec : 0), p_has_width(width >= 0), p_has_precision(prec >= 0), p_spec(spec) {} template bool read_until_spec(R &writer, size_t *wret) { size_t written = 0; if (wret) { *wret = 0; } if (p_fmt.empty()) { return false; } while (!p_fmt.empty()) { if (p_fmt.front() == '%') { p_fmt.pop_front(); if (p_fmt.front() == '%') { goto plain; } bool r = read_spec(); if (wret) { *wret = written; } return r; } plain: ++written; writer.put(p_fmt.front()); p_fmt.pop_front(); } if (wret) { *wret = written; } return false; } template size_t write_spaces(R &writer, size_t n, bool left, char c = ' ') const { if (left == bool(p_flags & FMT_FLAG_DASH)) { return 0; } int r = p_width - int(n); for (int w = p_width - int(n); --w >= 0; writer.put(c)); if (r < 0) { return 0; } return r; } string_range rest() const { return p_fmt; } template size_t build_spec(R &&out, string_range spec) const { size_t ret = out.put('%'); if (p_flags & FMT_FLAG_DASH ) { ret += out.put('-'); } if (p_flags & FMT_FLAG_ZERO ) { ret += out.put('0'); } if (p_flags & FMT_FLAG_SPACE) { ret += out.put(' '); } if (p_flags & FMT_FLAG_PLUS ) { ret += out.put('+'); } if (p_flags & FMT_FLAG_HASH ) { ret += out.put('#'); } ret += range_put_n(out, "*.*", 3); ret += range_put_n(out, &spec[0], spec.size()); return ret; } int width() const { return p_width; } int precision() const { return p_precision; } bool has_width() const { return p_has_width; } bool has_precision() const { return p_has_precision; } bool arg_width() const { return p_arg_width; } bool arg_precision() const { return p_arg_precision; } template void set_width(size_t idx, A const &...args) { p_width = detail::get_arg_param(idx, args...); } template void set_precision(size_t idx, A const &...args) { p_precision = detail::get_arg_param(idx, args...); } int flags() const { return p_flags; } char spec() const { return p_spec; } byte index() const { return p_index; } string_range nested() const { return p_nested; } string_range nested_sep() const { return p_nested_sep; } bool is_nested() const { return p_is_nested; } bool nested_escape() const { return p_nested_escape; } protected: string_range p_nested; string_range p_nested_sep; int p_flags = 0; int p_width = 0; int p_precision = 0; bool p_has_width = false; bool p_has_precision = false; bool p_arg_width = false; bool p_arg_precision = false; char p_spec = '\0'; byte p_index = 0; bool p_is_nested = false; bool p_nested_escape = false; bool read_until_dummy() { while (!p_fmt.empty()) { if (p_fmt.front() == '%') { p_fmt.pop_front(); if (p_fmt.front() == '%') { goto plain; } return read_spec(); } plain: p_fmt.pop_front(); } return false; } bool read_spec_range() { int sflags = p_flags; p_nested_escape = !(sflags & FMT_FLAG_DASH); p_fmt.pop_front(); string_range begin_inner(p_fmt); if (!read_until_dummy()) { p_is_nested = false; return false; } /* skip to the last spec in case multiple specs are present */ string_range curfmt(p_fmt); while (read_until_dummy()) { curfmt = p_fmt; } p_fmt = curfmt; p_flags = sflags; /* find delimiter or ending */ string_range begin_delim(p_fmt); string_range p = find(begin_delim, '%'); for (; !p.empty(); p = find(p, '%')) { p.pop_front(); /* escape, skip */ if (p.front() == '%') { p.pop_front(); continue; } /* found end, in that case delimiter is after spec */ if (p.front() == ')') { p_nested = begin_inner.slice(0, &begin_delim[0] - &begin_inner[0]); p_nested_sep = begin_delim.slice(0, &p[0] - &begin_delim[0] - 1); p.pop_front(); p_fmt = p; p_is_nested = true; return true; } /* found actual delimiter start... */ if (p.front() == '|') { p_nested = begin_inner.slice(0, &p[0] - &begin_inner[0] - 1); p.pop_front(); p_nested_sep = p; for (p = find(p, '%'); !p.empty(); p = find(p, '%')) { p.pop_front(); if (p.front() == ')') { p_nested_sep = p_nested_sep.slice(0, &p[0] - &p_nested_sep[0] - 1); p.pop_front(); p_fmt = p; p_is_nested = true; return true; } } p_is_nested = false; return false; } } p_is_nested = false; return false; } bool read_spec() { size_t ndig = detail::read_digits(p_fmt, p_buf); bool havepos = false; p_index = 0; /* parse index */ if (p_fmt.front() == '$') { if (ndig <= 0) return false; /* no pos given */ int idx = atoi(p_buf); if (idx <= 0 || idx > 255) return false; /* bad index */ p_index = byte(idx); p_fmt.pop_front(); havepos = true; } /* parse flags */ p_flags = 0; size_t skipd = 0; if (havepos || !ndig) { p_flags = detail::parse_fmt_flags(p_fmt, 0); } else { for (size_t i = 0; i < ndig; ++i) { if (p_buf[i] != '0') { break; } ++skipd; } if (skipd) { p_flags = FMT_FLAG_ZERO; } if (skipd == ndig) { p_flags = detail::parse_fmt_flags(p_fmt, p_flags); } } /* range/array formatting */ if ((p_fmt.front() == '(') && (havepos || !(ndig - skipd))) { return read_spec_range(); } /* parse width */ p_width = 0; p_has_width = false; p_arg_width = false; if (!havepos && ndig && (ndig - skipd)) { p_width = atoi(p_buf + skipd); p_has_width = true; } else if (detail::read_digits(p_fmt, p_buf)) { p_width = atoi(p_buf); p_has_width = true; } else if (p_fmt.front() == '*') { p_arg_width = p_has_width = true; p_fmt.pop_front(); } /* parse precision */ p_precision = 0; p_has_precision = false; p_arg_precision = false; if (p_fmt.front() != '.') { goto fmtchar; } p_fmt.pop_front(); if (detail::read_digits(p_fmt, p_buf)) { p_precision = atoi(p_buf); p_has_precision = true; } else if (p_fmt.front() == '*') { p_arg_precision = p_has_precision = true; p_fmt.pop_front(); } else { return false; } fmtchar: p_spec = p_fmt.front(); p_fmt.pop_front(); /* make sure we're testing on a signed byte - our mapping only * tests values up to 127 */ sbyte sp = p_spec; return (sp >= 65) && (detail::fmt_specs[sp - 65] != 0); } string_range p_fmt; char p_buf[32]; }; /* for custom container formatting */ template< typename T, typename R, typename = std::enable_if_t< std::is_same_v().to_format( std::declval(), std::declval() )), void> > > inline void to_format(T const &v, R &writer, format_spec const &fs) { v.to_format(writer, fs); } namespace detail { template inline size_t write_u(R &writer, format_spec const *fl, bool neg, T val) { char buf[20]; size_t r = 0, n = 0; char spec = fl->spec(); if (spec == 's') spec = 'd'; byte specn = detail::fmt_specs[spec - 65]; if (specn <= 2 || specn > 7) { throw format_error{"cannot format integers with the given spec"}; } int base = detail::fmt_bases[specn]; if (!val) { buf[n++] = '0'; } for (; val; val /= base) { buf[n++] = detail::fmt_digits[spec >= 'a'][val % base]; } r = n; int flags = fl->flags(); bool lsgn = flags & FMT_FLAG_PLUS; bool lsp = flags & FMT_FLAG_SPACE; bool zero = flags & FMT_FLAG_ZERO; bool sign = neg + lsgn + lsp; r += sign; char const *pfx = nullptr; size_t pfxlen = 0; if (flags & FMT_FLAG_HASH && spec != 'd') { pfx = detail::fmt_intpfx[spec >= 'a'][specn - 3]; pfxlen = !!pfx[1] + 1; r += pfxlen; } if (!zero) { r += fl->write_spaces(writer, n + pfxlen + sign, true, ' '); } if (sign) { writer.put(neg ? '-' : *((" \0+") + lsgn * 2)); } range_put_n(writer, pfx, pfxlen); if (zero) { r += fl->write_spaces(writer, n + pfxlen + sign, true, '0'); } for (int i = int(n - 1); i >= 0; --i) { writer.put(buf[i]); } r += fl->write_spaces(writer, n + sign + pfxlen, false); return r; } template static size_t format_impl( R &writer, bool escape, string_range fmt, A const &...args ); template struct FmtTupleUnpacker { template static inline size_t unpack( R &writer, bool esc, string_range fmt, T const &item, A const &...args ) { return FmtTupleUnpacker::unpack( writer, esc, fmt, item, std::get(item), args... ); } }; template<> struct FmtTupleUnpacker<0> { template static inline size_t unpack( R &writer, bool esc, string_range fmt, T const &, A const &...args ) { return format_impl(writer, esc, fmt, args...); } }; /* ugly ass check for whether a type is tuple-like, like tuple itself, * pair, array, possibly other types added later or overridden... */ template std::true_type tuple_like_test(typename std::tuple_size::type *); template std::false_type tuple_like_test(...); template constexpr bool is_tuple_like = decltype(tuple_like_test(0))::value; template inline size_t format_ritem( R &writer, bool esc, bool, string_range fmt, T const &item, std::enable_if_t, bool> = true ) { return format_impl(writer, esc, fmt, item); } template inline size_t format_ritem( R &writer, bool esc, bool expandval, string_range fmt, T const &item, std::enable_if_t, bool> = true ) { if (expandval) { return FmtTupleUnpacker::value>::unpack( writer, esc, fmt, item ); } return format_impl(writer, esc, fmt, item); } template inline size_t write_range( R &writer, format_spec const *fl, bool escape, bool expandval, string_range sep, T const &val, std::enable_if_t, bool> = true ) { /* XXX: maybe handle error cases? */ auto range = ostd::iter(val); if (range.empty()) { return 0; } size_t ret = 0; /* test first item */ ret += format_ritem( writer, escape, expandval, fl->rest(), range.front() ); range.pop_front(); /* write the rest (if any) */ for (; !range.empty(); range.pop_front()) { ret += range_put_n(writer, &sep[0], sep.size()); ret += format_ritem( writer, escape, expandval, fl->rest(), range.front() ); } return ret; } template inline size_t write_range( R &, format_spec const *, bool, bool, string_range, T const &, std::enable_if_t, bool> = true ) { throw format_error{"invalid value for ranged format"}; } template static std::true_type test_fmt_tostr( decltype(ostd::to_string{}(std::declval())) * ); template static std::false_type test_fmt_tostr(...); template constexpr bool fmt_tostr_test = decltype(test_fmt_tostr(0))::value; /* non-printable escapes up to 0x20 (space) */ static constexpr char const *fmt_escapes[] = { "\\0" , "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\a" , "\\b" , "\\t" , "\\n" , "\\v" , "\\f" , "\\r" , "\\x0E", "\\x0F", "\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1A", "\\x1B", "\\x1C", "\\x1D", "\\x1E", "\\x1F", /* we want to escape double quotes... */ nullptr, nullptr, "\\\"", nullptr, nullptr, nullptr, nullptr, "\\\'" }; inline char const *escape_fmt_char(char v, char quote) { if ((v >= 0 && v < 0x20) || (v == quote)) { return fmt_escapes[size_t(v)]; } else if (v == 0x7F) { return "\\x7F"; } return nullptr; } inline std::string escape_fmt_str(string_range val) { std::string ret; ret.push_back('"'); while (!val.empty()) { char const *esc = escape_fmt_char(val.front(), '"'); if (esc) { ret.append(esc); } else { ret.push_back(val.front()); } val.pop_front(); } ret.push_back('"'); return ret; } template static std::true_type test_tofmt(decltype(to_format( std::declval(), std::declval(), std::declval() )) *); template static std::false_type test_tofmt(...); template constexpr bool fmt_tofmt_test = decltype(test_tofmt(0))::value; struct write_spec: format_spec { using format_spec::format_spec; /* string base writer */ template size_t write_str(R &writer, bool escape, string_range val) const { if (escape) { return write_str(writer, false, escape_fmt_str(val)); } size_t n = val.size(); if (this->precision()) { n = this->precision(); } size_t r = n; r += this->write_spaces(writer, n, true); range_put_n(writer, &val[0], n); r += this->write_spaces(writer, n, false); return r; } /* char values */ template size_t write_char(R &writer, bool escape, char val) const { if (escape) { char const *esc = escape_fmt_char(val, '\''); if (esc) { char buf[6]; buf[0] = '\''; size_t elen = strlen(esc); memcpy(buf + 1, esc, elen); buf[elen + 1] = '\''; return write_val(writer, false, ostd::string_range{ buf, buf + elen + 2 }); } } size_t r = 1 + escape * 2; r += this->write_spaces(writer, 1 + escape * 2, true); if (escape) { writer.put('\''); writer.put(val); writer.put('\''); } else { writer.put(val); } r += this->write_spaces(writer, 1 + escape * 2, false); return r; } /* floating point */ template> size_t write_float(R &writer, bool, T val) const { char buf[16], rbuf[128]; char fmtspec[Long + 1]; fmtspec[Long] = this->spec(); byte specn = detail::fmt_specs[this->spec() - 65]; if (specn != 1 && specn != 7) { throw format_error{"cannot format floats with the given spec"}; } if (specn == 7) { fmtspec[Long] = 'g'; } if (Long) { fmtspec[0] = 'L'; } buf[this->build_spec(iter(buf), fmtspec)] = '\0'; int ret = snprintf( rbuf, sizeof(rbuf), buf, this->width(), this->has_precision() ? this->precision() : 6, val ); if (ret < 0) { /* typically unreachable, build_spec creates valid format */ throw format_error{"invalid float format"}; } char *dbuf = nullptr; if (size_t(ret) >= sizeof(rbuf)) { /* this should typically never happen */ dbuf = new char[ret + 1]; ret = snprintf( dbuf, ret + 1, buf, this->width(), this->has_precision() ? this->precision() : 6, val ); if (ret < 0) { /* see above */ throw format_error{"invalid float format"}; } range_put_n(writer, dbuf, ret); delete[] dbuf; } else { range_put_n(writer, rbuf, ret); } return ret; } template size_t write_val(R &writer, bool escape, T const &val) const { /* stuff fhat can be custom-formatted goes first */ if constexpr(fmt_tofmt_test>) { tostr_range sink(writer); to_format(val, sink, *this); return sink.get_written(); } /* second best, we can convert to string slice */ if constexpr(std::is_constructible_v) { if (this->spec() != 's') { throw format_error{"strings need the '%s' spec"}; } return write_str(writer, escape, val); } /* bools, check if printing as string, otherwise convert to int */ if constexpr(std::is_same_v) { if (this->spec() == 's') { return write_val(writer, ("false\0true") + (6 * val)); } else { return write_val(writer, int(val)); } } /* character values */ if constexpr(std::is_same_v) { if (this->spec() != 's' && this->spec() != 'c') { throw format_error{"cannot format chars with the given spec"}; } return write_char(writer, escape, val); } /* pointers, write as pointer with %s and otherwise as unsigned... * char pointers are handled by the string case above */ if constexpr(std::is_pointer_v) { format_spec sp{ (spec() == 's') ? 'x' : spec(), has_width() ? width() : -1, has_precision() ? precision() : -1, (spec() == 's') ? (flags() | FMT_FLAG_HASH) : flags() }; return detail::write_u(writer, &sp, false, size_t(val)); } /* integers */ if constexpr(std::is_integral_v) { if constexpr(std::is_signed_v) { /* signed integers */ using UT = std::make_unsigned_t; return detail::write_u( writer, this, val < 0, (val < 0) ? static_cast(-val) : static_cast(val) ); } else { /* unsigned integers */ return detail::write_u(writer, this, false, val); } } /* floats */ if constexpr(std::is_floating_point_v) { return write_float(writer, escape, val); } /* stuff that can be to_string'd, worst reliable case, allocates */ if constexpr(fmt_tostr_test) { if (this->spec() != 's') { throw format_error{"custom objects need the '%s' spec"}; } return write_val(writer, false, ostd::to_string{}(val)); } /* we ran out of options, failure */ throw format_error{"the value cannot be formatted"}; } /* actual writer */ template size_t write_arg( R &writer, size_t idx, T const &val, A const &...args ) const { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { return write_arg(writer, idx - 1, args...); } } else { return write_val(writer, this->p_nested_escape, val); } } /* range writer */ template size_t write_range( R &writer, size_t idx, bool expandval, string_range sep, T const &val, A const &...args ) const { if (idx) { if constexpr(!sizeof...(A)) { throw format_error{"not enough format arguments"}; } else { return write_range(writer, idx - 1, expandval, sep, args...); } } else { return detail::write_range( writer, this, this->p_nested_escape, expandval, sep, val ); } } }; template inline size_t format_impl( R &writer, bool escape, string_range fmt, A const &...args ) { size_t argidx = 1, twr = 0, written = 0; detail::write_spec spec(fmt, escape); while (spec.read_until_spec(writer, &twr)) { written += twr; size_t argpos = spec.index(); if (spec.is_nested()) { if (!argpos) { argpos = argidx++; } /* FIXME: figure out a better way */ detail::write_spec nspec(spec.nested(), spec.nested_escape()); written += nspec.write_range( writer, argpos - 1, (spec.flags() & FMT_FLAG_HASH), spec.nested_sep(), args... ); continue; } if (!argpos) { argpos = argidx++; if (spec.arg_width()) { spec.set_width(argpos - 1, args...); argpos = argidx++; } if (spec.arg_precision()) { spec.set_precision(argpos - 1, args...); argpos = argidx++; } } else { bool argprec = spec.arg_precision(); if (argprec) { if (argpos <= 1) { throw format_error{"argument precision not given"}; } spec.set_precision(argpos - 2, args...); } if (spec.arg_width()) { if (argpos <= (size_t(argprec) + 1)) { throw format_error{"argument width not given"}; } spec.set_width(argpos - 2 - argprec, args...); } } written += spec.write_arg(writer, argpos - 1, args...); } written += twr; return written; } template inline ptrdiff_t format_impl(R &writer, bool, string_range fmt) { size_t written = 0; detail::write_spec spec(fmt, false); if (spec.read_until_spec(writer, &written)) { throw format_error{"format spec without format arguments"}; } return written; } } /* namespace detail */ template inline size_t format(R &&writer, string_range fmt, A const &...args) { return detail::format_impl(writer, false, fmt, args...); } template inline size_t format(R &&writer, format_spec const &fmt, T const &val) { /* we can do this as there are no members added... but ugly, FIXME later */ detail::write_spec const &wsp = static_cast(fmt); return wsp.write_arg(writer, 0, val); } } /* namespace ostd */ #endif