remove the encode_u* wrappers (just use generic type-basedd ver)

2018-01-07 19:07:50 +01:00 · 2018-01-07 19:07:50 +01:00 · fb91f77eb0
parent 4a2e5cd557
commit fb91f77eb0
2 changed files with 29 additions and 86 deletions
--- a/ostd/format.hh
+++ b/ostd/format.hh
@ -1008,7 +1008,7 @@ private:

    template<typename R>
    void write_char_raw(R &writer, char32_t val) const {
-        if (!utf::encode_u8(writer, val)) {
+        if (!utf::encode<char>(writer, val)) {
            write_replacement(writer);
        }
    }
@ -1047,7 +1047,7 @@ private:
                        write_char_raw(writer, c);
                    }
                    val.pop_front();
-                } else if (!utf::encode_u8(writer, val)) {
+                } else if (!utf::encode<char>(writer, val)) {
                    write_replacement(writer);
                    val.pop_front();
                }
--- a/ostd/string.hh
+++ b/ostd/string.hh
@ -782,6 +782,18 @@ namespace utf {
        ) noexcept;
    }

+    /* @brief Encode a Unicode code point in the given encoding.
+     *
+     * The encoding is specified by the template parameter `C` which
+     * can be one of the character types (utf::is_character), the
+     * encoding used is picked based on utf::max_units.
+     *
+     * The return value is the number of values written into `sink`.
+     * If none were written, the encoding failed.
+     *
+     * If your input is a string and you want to advance it, use the
+     * utf::encode(R, basic_char_range) variant.
+     */
    template<typename C, typename R>
    inline std::size_t encode(R &sink, char32_t ch) {
        std::size_t ret;
@ -798,6 +810,21 @@ namespace utf {
        return ret;
    }

+    /* @brief Encode a Unicode code point from a string in the given encoding.
+     *
+     * The encoding is specified by the template parameter `C` which
+     * can be one of the character types (utf::is_character), the
+     * encoding used is picked based on utf::max_units.
+     *
+     * Unlike utf::encode(R, char32_t), this takes a string as a second
+     * input and the string can be in any UTF encoding and use any of the
+     * available character types. The function advances the string by one
+     * code point, which may mean multiple values.
+     *
+     * The return value is the number of values written into `sink`.
+     * If none were written, the encoding failed and the string is not
+     * advanced.
+     */
    template<typename C, typename R, typename IC>
    inline std::size_t encode(R &sink, basic_char_range<IC const> &r) {
        if constexpr(max_units<IC> == 1) {
@ -825,90 +852,6 @@ namespace utf {
        return 0;
    }

-    /* @brief Encode a UTF-32 code point into UTF-8 code units.
-     *
-     * The units are written in `sink` which is an ostd::output_range_tag.
-     * The written values are of type `char` and up to 4 are written. The
-     * number of bytes written is returned from the function. In case of
-     * failure, `0` is returned.
-     *
-     * This function is allowed to fail only in two cases, when a surrogate
-     * code point is provided or when the code point is out of bounds as
-     * defined by Unicode (i.e. 0x10FFFF). It does not throw exceptions
-     * other than those thrown by `sink`.
-     */
-    template<typename R>
-    inline std::size_t encode_u8(R &sink, char32_t ch) {
-        return encode<char>(sink, ch);
-    }
-
-    template<typename R, typename C>
-    inline std::size_t encode_u8(R &sink, basic_char_range<C const> &r) {
-        return encode<char>(sink, r);
-    }
-
-    /* @brief Encode a UTF-32 code point into UTF-16.
-     *
-     * The values are written in `sink` which is an ostd::output_range_tag.
-     * The written values are of type `char16_t` and up to 2 are written.
-     * The number of values written is returned from the function. In case
-     * of failure, `0` is returned.
-     *
-     * This function is allowed to fail only in two cases, when a surrogate
-     * code point is provided or when the code point is out of bounds as
-     * defined by Unicode (i.e. 0x10FFFF). It does not throw exceptions
-     * other than those thrown by `sink`.
-     */
-    template<typename R>
-    inline std::size_t encode_u16(R &sink, char32_t ch) {
-        return encode<char16_t>(sink, ch);
-    }
-
-    template<typename R, typename C>
-    inline std::size_t encode_u16(R &sink, basic_char_range<C const> &r) {
-        return encode<char16_t>(sink, r);
-    }
-
-    template<typename R>
-    inline std::size_t encode_u32(R &sink, char32_t ch) {
-        return encode<char32_t>(sink, ch);
-    }
-
-    template<typename R, typename C>
-    inline std::size_t encode_u32(R &sink, basic_char_range<C const> &r) {
-        return encode<char32_t>(sink, r);
-    }
-
-    /* @brief Encode a UTF-32 code point into a wide Unicode char/sequence.
-     *
-     * The value(s) are written in `sink` which is an ostd::output_range_tag.
-     * The written values are of type `wchar_t` and the amount written depends
-     * on the size of `wchar_t`.
-     *
-     * If `wchar_t` has equal size to `char32_t`, the input is simply type
-     * cast and written into the sink, treating `wchar_t` as UTF-32. If it
-     * has equal size to `char16_t` instead, `wchar_t` is treated as UTF-16
-     * and the input code point is encoded into one or two UTF-16 values.
-     * If neither of these happens, `wchar_t` is treated the same as `char`
-     * and the encoding is UTF-8, writing up to 4 code units.
-     *
-     * This function does not throw exceptions other than those thrown by
-     * `sink`. As for errors, with UTF-32 `wchar_t` it isn't allowed to
-     * fail; with UTF-8 or UTF-16, the failure points are the usual ones
-     * (surrogate code point as input or input greater than 0x10FFFF).
-     *
-     * The return value is the number of values written into the sink.
-     */
-    template<typename R>
-    inline std::size_t encode_uw(R &sink, char32_t ch) {
-        return encode<wchar_t>(sink, ch);
-    }
-
-    template<typename R, typename C>
-    inline std::size_t encode_uw(R &sink, basic_char_range<C const> &r) {
-        return encode<wchar_t>(sink, r);
-    }
-
    /* @brief Get the number of Unicode code points in a string.
     *
     * This function keeps reading Unicode code points while it can and