add utf::unit_bits and encode/iter_u taking number of bits (8/16/32)

2018-01-07 19:26:35 +01:00 · 2018-01-07 19:26:35 +01:00 · 818fd1e8e8
parent 798fcec6c8
commit 818fd1e8e8
1 changed files with 37 additions and 4 deletions
--- a/ostd/string.hh
+++ b/ostd/string.hh
@ -354,6 +354,16 @@ public:
    template<typename C>
    inline auto iter_u() const;

+    /** @brief Iterate over the Unicode units of the size in bits.
+     *
+     * The type maps to `char` for 8, `char16_t` for 16 and `char32_t`
+     * for 32, or UTF-8, UTF-16 and UTF-32.
+     *
+     * Like utf::iter_u().
+     */
+    template<std::size_t N>
+    inline auto iter_u() const;
+
    /** @brief Implicitly converts a string slice to std::basic_string_view.
     *
     * String views represent more or less the same thing but they're always
@ -622,16 +632,19 @@ namespace utf {
        template<>
        struct max_units_base<char32_t> {
            static constexpr std::size_t const value = 1;
+            static constexpr std::size_t const bits  = 32;
        };

        template<>
        struct max_units_base<char16_t> {
            static constexpr std::size_t const value = 2;
+            static constexpr std::size_t const bits  = 16;
        };

        template<>
        struct max_units_base<char> {
            static constexpr std::size_t const value = 4;
+            static constexpr std::size_t const bits  = 8;
        };

        template<>
@ -642,22 +655,26 @@ namespace utf {
    static inline constexpr std::size_t const max_units =
        detail::max_units_base<C>::value;

+    template<typename C>
+    static inline constexpr std::size_t const unit_bits =
+        detail::max_units_base<C>::bits;
+
    namespace detail {
        template<std::size_t N>
        struct unicode_t_base;

        template<>
-        struct unicode_t_base<1> {
+        struct unicode_t_base<32> {
            using type = char32_t;
        };

        template<>
-        struct unicode_t_base<2> {
+        struct unicode_t_base<16> {
            using type = char16_t;
        };

        template<>
-        struct unicode_t_base<4> {
+        struct unicode_t_base<8> {
            using type = char;
        };
    }
@ -666,7 +683,7 @@ namespace utf {
    using unicode_t = typename detail::unicode_t_base<N>::type;

    template<typename T>
-    using unicode_base_t = unicode_t<max_units<T>>;
+    using unicode_base_t = unicode_t<unit_bits<T>>;

    static inline constexpr bool const is_wchar_u32 =
        std::is_same_v<wchar_fixed_t, char32_t>;
@ -828,6 +845,11 @@ namespace utf {
        return 0;
    }

+    template<std::size_t N, typename R, typename IC>
+    inline std::size_t encode(R &sink, basic_char_range<IC const> &r) {
+        return encode<unicode_t<N>>(sink, r);
+    }
+
    /* @brief Get the number of Unicode code points in a string.
     *
     * This function keeps reading Unicode code points while it can and
@ -922,6 +944,11 @@ namespace utf {
        >(std::forward<R>(str));
    }

+    template<std::size_t N, typename R>
+    inline auto iter_u(R &&str) {
+        return iter_u<unicode_t<N>>(std::forward<R>(str));
+    }
+
    bool isalnum(char32_t c) noexcept;
    bool isalpha(char32_t c) noexcept;
    bool isblank(char32_t c) noexcept;
@ -978,6 +1005,12 @@ inline auto basic_char_range<T>::iter_u() const {
    return utf::iter_u<C>(*this);
 }

+template<typename T>
+template<std::size_t N>
+inline auto basic_char_range<T>::iter_u() const {
+    return utf::iter_u<N>(*this);
+}
+
 template<typename T>
 inline int basic_char_range<T>::case_compare(
    basic_char_range<T const> s