merge utf impl bits into one place

master
Daniel Kolesa 2017-12-31 19:16:16 +01:00
parent d3cdbe2fcf
commit 1a07db8bac
1 changed files with 44 additions and 38 deletions

View File

@ -83,38 +83,6 @@ using char_range = basic_char_range<char>;
*/
using string_range = basic_char_range<char const>;
namespace utf {
/* @brief Get the Unicode code point for a multibyte sequence.
*
* The string is advanced past the UTF-8 character in the front.
* If the decoding fails, `false` is returned, otherwise it's `true`.
*/
bool codepoint(string_range &r, char32_t &ret) noexcept;
/* @brief Get the number of Unicode code points in a string.
*
* This function keeps reading Unicode code points while it can and
* once it can't it returns the number of valid ones with the rest
* of the input string range being in `cont`. That means if the entire
* string is a valid UTF-8 string, `cont` will be empty, otherwise it
* will begin at the first invalid UTF-8 code point.
*
* If you're sure the string is valid or you don't need to handle the
* error, you can use the more convenient overload below.
*/
std::size_t length(string_range r, string_range &cont) noexcept;
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
*
* If an invalid UTF-8 sequence is encountered, it returns the length
* until that sequence.
*
* If you need to get the continuation string, use the general
* error-handling overload of the function.
*/
std::size_t length(string_range r) noexcept;
} /* namespace utf */
/** @brief A string slice type.
*
* This is a contiguous range over a character type. The character type
@ -283,17 +251,13 @@ public:
*
* Effectively the same as utf::length().
*/
size_type length() const noexcept {
return utf::length(*this);
}
inline size_type length() const noexcept;
/** @brief Gets the number of code points in the slice.
*
* Effectively the same as utf::length().
*/
size_type length(basic_char_range &cont) const noexcept {
return utf::length(*this, cont);
}
inline size_type length(basic_char_range &cont) const noexcept;
/** @brief Creates a sub-slice of the slice.
*
@ -748,6 +712,36 @@ namespace utf {
using std::runtime_error::runtime_error;
};
/* @brief Get the Unicode code point for a multibyte sequence.
*
* The string is advanced past the UTF-8 character in the front.
* If the decoding fails, `false` is returned, otherwise it's `true`.
*/
bool codepoint(string_range &r, char32_t &ret) noexcept;
/* @brief Get the number of Unicode code points in a string.
*
* This function keeps reading Unicode code points while it can and
* once it can't it returns the number of valid ones with the rest
* of the input string range being in `cont`. That means if the entire
* string is a valid UTF-8 string, `cont` will be empty, otherwise it
* will begin at the first invalid UTF-8 code point.
*
* If you're sure the string is valid or you don't need to handle the
* error, you can use the more convenient overload below.
*/
std::size_t length(string_range r, string_range &cont) noexcept;
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
*
* If an invalid UTF-8 sequence is encountered, it returns the length
* until that sequence.
*
* If you need to get the continuation string, use the general
* error-handling overload of the function.
*/
std::size_t length(string_range r) noexcept;
namespace detail {
struct codepoint_range: input_range<codepoint_range> {
using range_category = forward_range_tag;
@ -809,6 +803,18 @@ namespace utf {
} /* namespace utf */
template<typename T>
inline std::size_t basic_char_range<T>::length() const noexcept {
return utf::length(*this);
}
template<typename T>
inline std::size_t basic_char_range<T>::length(
basic_char_range<T> &cont
) const noexcept {
return utf::length(*this, cont);
}
template<typename T>
inline auto basic_char_range<T>::iter_codes() const {
return utf::iter_codes(*this);