From 1a07db8bacb8d773fbfdf41f566a78e0724debe3 Mon Sep 17 00:00:00 2001 From: q66 Date: Sun, 31 Dec 2017 19:16:16 +0100 Subject: [PATCH] merge utf impl bits into one place --- ostd/string.hh | 82 +++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/ostd/string.hh b/ostd/string.hh index a6d5e90..ae237b3 100644 --- a/ostd/string.hh +++ b/ostd/string.hh @@ -83,38 +83,6 @@ using char_range = basic_char_range; */ using string_range = basic_char_range; -namespace utf { - /* @brief Get the Unicode code point for a multibyte sequence. - * - * The string is advanced past the UTF-8 character in the front. - * If the decoding fails, `false` is returned, otherwise it's `true`. - */ - bool codepoint(string_range &r, char32_t &ret) noexcept; - - /* @brief Get the number of Unicode code points in a string. - * - * This function keeps reading Unicode code points while it can and - * once it can't it returns the number of valid ones with the rest - * of the input string range being in `cont`. That means if the entire - * string is a valid UTF-8 string, `cont` will be empty, otherwise it - * will begin at the first invalid UTF-8 code point. - * - * If you're sure the string is valid or you don't need to handle the - * error, you can use the more convenient overload below. - */ - std::size_t length(string_range r, string_range &cont) noexcept; - - /* @brief Get the number of Unicode code points in a valid UTF-8 string. - * - * If an invalid UTF-8 sequence is encountered, it returns the length - * until that sequence. - * - * If you need to get the continuation string, use the general - * error-handling overload of the function. - */ - std::size_t length(string_range r) noexcept; -} /* namespace utf */ - /** @brief A string slice type. * * This is a contiguous range over a character type. The character type @@ -283,17 +251,13 @@ public: * * Effectively the same as utf::length(). */ - size_type length() const noexcept { - return utf::length(*this); - } + inline size_type length() const noexcept; /** @brief Gets the number of code points in the slice. * * Effectively the same as utf::length(). */ - size_type length(basic_char_range &cont) const noexcept { - return utf::length(*this, cont); - } + inline size_type length(basic_char_range &cont) const noexcept; /** @brief Creates a sub-slice of the slice. * @@ -748,6 +712,36 @@ namespace utf { using std::runtime_error::runtime_error; }; + /* @brief Get the Unicode code point for a multibyte sequence. + * + * The string is advanced past the UTF-8 character in the front. + * If the decoding fails, `false` is returned, otherwise it's `true`. + */ + bool codepoint(string_range &r, char32_t &ret) noexcept; + + /* @brief Get the number of Unicode code points in a string. + * + * This function keeps reading Unicode code points while it can and + * once it can't it returns the number of valid ones with the rest + * of the input string range being in `cont`. That means if the entire + * string is a valid UTF-8 string, `cont` will be empty, otherwise it + * will begin at the first invalid UTF-8 code point. + * + * If you're sure the string is valid or you don't need to handle the + * error, you can use the more convenient overload below. + */ + std::size_t length(string_range r, string_range &cont) noexcept; + + /* @brief Get the number of Unicode code points in a valid UTF-8 string. + * + * If an invalid UTF-8 sequence is encountered, it returns the length + * until that sequence. + * + * If you need to get the continuation string, use the general + * error-handling overload of the function. + */ + std::size_t length(string_range r) noexcept; + namespace detail { struct codepoint_range: input_range { using range_category = forward_range_tag; @@ -809,6 +803,18 @@ namespace utf { } /* namespace utf */ +template +inline std::size_t basic_char_range::length() const noexcept { + return utf::length(*this); +} + +template +inline std::size_t basic_char_range::length( + basic_char_range &cont +) const noexcept { + return utf::length(*this, cont); +} + template inline auto basic_char_range::iter_codes() const { return utf::iter_codes(*this);