merge utf impl bits into one place
parent
d3cdbe2fcf
commit
1a07db8bac
|
@ -83,38 +83,6 @@ using char_range = basic_char_range<char>;
|
||||||
*/
|
*/
|
||||||
using string_range = basic_char_range<char const>;
|
using string_range = basic_char_range<char const>;
|
||||||
|
|
||||||
namespace utf {
|
|
||||||
/* @brief Get the Unicode code point for a multibyte sequence.
|
|
||||||
*
|
|
||||||
* The string is advanced past the UTF-8 character in the front.
|
|
||||||
* If the decoding fails, `false` is returned, otherwise it's `true`.
|
|
||||||
*/
|
|
||||||
bool codepoint(string_range &r, char32_t &ret) noexcept;
|
|
||||||
|
|
||||||
/* @brief Get the number of Unicode code points in a string.
|
|
||||||
*
|
|
||||||
* This function keeps reading Unicode code points while it can and
|
|
||||||
* once it can't it returns the number of valid ones with the rest
|
|
||||||
* of the input string range being in `cont`. That means if the entire
|
|
||||||
* string is a valid UTF-8 string, `cont` will be empty, otherwise it
|
|
||||||
* will begin at the first invalid UTF-8 code point.
|
|
||||||
*
|
|
||||||
* If you're sure the string is valid or you don't need to handle the
|
|
||||||
* error, you can use the more convenient overload below.
|
|
||||||
*/
|
|
||||||
std::size_t length(string_range r, string_range &cont) noexcept;
|
|
||||||
|
|
||||||
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
|
|
||||||
*
|
|
||||||
* If an invalid UTF-8 sequence is encountered, it returns the length
|
|
||||||
* until that sequence.
|
|
||||||
*
|
|
||||||
* If you need to get the continuation string, use the general
|
|
||||||
* error-handling overload of the function.
|
|
||||||
*/
|
|
||||||
std::size_t length(string_range r) noexcept;
|
|
||||||
} /* namespace utf */
|
|
||||||
|
|
||||||
/** @brief A string slice type.
|
/** @brief A string slice type.
|
||||||
*
|
*
|
||||||
* This is a contiguous range over a character type. The character type
|
* This is a contiguous range over a character type. The character type
|
||||||
|
@ -283,17 +251,13 @@ public:
|
||||||
*
|
*
|
||||||
* Effectively the same as utf::length().
|
* Effectively the same as utf::length().
|
||||||
*/
|
*/
|
||||||
size_type length() const noexcept {
|
inline size_type length() const noexcept;
|
||||||
return utf::length(*this);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief Gets the number of code points in the slice.
|
/** @brief Gets the number of code points in the slice.
|
||||||
*
|
*
|
||||||
* Effectively the same as utf::length().
|
* Effectively the same as utf::length().
|
||||||
*/
|
*/
|
||||||
size_type length(basic_char_range &cont) const noexcept {
|
inline size_type length(basic_char_range &cont) const noexcept;
|
||||||
return utf::length(*this, cont);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief Creates a sub-slice of the slice.
|
/** @brief Creates a sub-slice of the slice.
|
||||||
*
|
*
|
||||||
|
@ -748,6 +712,36 @@ namespace utf {
|
||||||
using std::runtime_error::runtime_error;
|
using std::runtime_error::runtime_error;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* @brief Get the Unicode code point for a multibyte sequence.
|
||||||
|
*
|
||||||
|
* The string is advanced past the UTF-8 character in the front.
|
||||||
|
* If the decoding fails, `false` is returned, otherwise it's `true`.
|
||||||
|
*/
|
||||||
|
bool codepoint(string_range &r, char32_t &ret) noexcept;
|
||||||
|
|
||||||
|
/* @brief Get the number of Unicode code points in a string.
|
||||||
|
*
|
||||||
|
* This function keeps reading Unicode code points while it can and
|
||||||
|
* once it can't it returns the number of valid ones with the rest
|
||||||
|
* of the input string range being in `cont`. That means if the entire
|
||||||
|
* string is a valid UTF-8 string, `cont` will be empty, otherwise it
|
||||||
|
* will begin at the first invalid UTF-8 code point.
|
||||||
|
*
|
||||||
|
* If you're sure the string is valid or you don't need to handle the
|
||||||
|
* error, you can use the more convenient overload below.
|
||||||
|
*/
|
||||||
|
std::size_t length(string_range r, string_range &cont) noexcept;
|
||||||
|
|
||||||
|
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
|
||||||
|
*
|
||||||
|
* If an invalid UTF-8 sequence is encountered, it returns the length
|
||||||
|
* until that sequence.
|
||||||
|
*
|
||||||
|
* If you need to get the continuation string, use the general
|
||||||
|
* error-handling overload of the function.
|
||||||
|
*/
|
||||||
|
std::size_t length(string_range r) noexcept;
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
struct codepoint_range: input_range<codepoint_range> {
|
struct codepoint_range: input_range<codepoint_range> {
|
||||||
using range_category = forward_range_tag;
|
using range_category = forward_range_tag;
|
||||||
|
@ -809,6 +803,18 @@ namespace utf {
|
||||||
|
|
||||||
} /* namespace utf */
|
} /* namespace utf */
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline std::size_t basic_char_range<T>::length() const noexcept {
|
||||||
|
return utf::length(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
inline std::size_t basic_char_range<T>::length(
|
||||||
|
basic_char_range<T> &cont
|
||||||
|
) const noexcept {
|
||||||
|
return utf::length(*this, cont);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline auto basic_char_range<T>::iter_codes() const {
|
inline auto basic_char_range<T>::iter_codes() const {
|
||||||
return utf::iter_codes(*this);
|
return utf::iter_codes(*this);
|
||||||
|
|
Loading…
Reference in New Issue