unified length handling for all encodings
parent
aeb5023b30
commit
ed82fa0233
|
@ -1102,6 +1102,10 @@ namespace utf {
|
|||
*/
|
||||
std::size_t length(string_range r, string_range &cont) noexcept;
|
||||
|
||||
std::size_t length(u16string_range r, u16string_range &cont) noexcept;
|
||||
std::size_t length(u32string_range r, u32string_range &cont) noexcept;
|
||||
std::size_t length(wstring_range r, wstring_range &cont) noexcept;
|
||||
|
||||
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
|
||||
*
|
||||
* If an invalid UTF-8 sequence is encountered, it's considered
|
||||
|
@ -1114,15 +1118,9 @@ namespace utf {
|
|||
*/
|
||||
std::size_t length(string_range r) noexcept;
|
||||
|
||||
/* @brief Get the number of Unicode code points in a UTF-32 string.
|
||||
*
|
||||
* As a UTF-32 string encodes entire code points, this function
|
||||
* never fails, so there is no need for an error-handling version
|
||||
* and this is equivalent to simply calling `r.size()`.
|
||||
*/
|
||||
inline std::size_t length(u32string_range r) noexcept {
|
||||
return r.size();
|
||||
}
|
||||
std::size_t length(u16string_range r) noexcept;
|
||||
std::size_t length(u32string_range r) noexcept;
|
||||
std::size_t length(wstring_range r) noexcept;
|
||||
|
||||
namespace detail {
|
||||
template<typename C>
|
||||
|
|
|
@ -145,6 +145,39 @@ namespace detail {
|
|||
ret[1] = char16_t(0xDC00 + (ch & 0x3FF));
|
||||
return 2;
|
||||
}
|
||||
|
||||
template<typename C>
|
||||
inline std::size_t length(
|
||||
basic_char_range<C const> &r, basic_char_range<C const> &cont
|
||||
) noexcept {
|
||||
std::size_t ret = 0;
|
||||
for (char32_t ch; utf::decode(r, ch); ++ret) {
|
||||
continue;
|
||||
}
|
||||
cont = r;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename C>
|
||||
inline std::size_t length(basic_char_range<C const> &r) noexcept {
|
||||
std::size_t ret = 0;
|
||||
if constexpr(std::is_same_v<C, char32_t> || (
|
||||
std::is_same_v<C, wchar_t> &&
|
||||
(sizeof(wchar_t) == sizeof(char32_t))
|
||||
)) {
|
||||
ret = r.size();
|
||||
} else {
|
||||
for (;; ++ret) {
|
||||
if (char32_t ch; !utf::decode(r, ch)) {
|
||||
if (r.empty()) {
|
||||
break;
|
||||
}
|
||||
r.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} /* namespace detail */
|
||||
|
||||
bool decode(string_range &r, char32_t &ret) noexcept {
|
||||
|
@ -190,25 +223,35 @@ bool decode(wstring_range &r, char32_t &ret) noexcept {
|
|||
}
|
||||
|
||||
std::size_t length(string_range r, string_range &cont) noexcept {
|
||||
std::size_t ret = 0;
|
||||
for (char32_t ch; utf::decode(r, ch); ++ret) {
|
||||
continue;
|
||||
}
|
||||
cont = r;
|
||||
return ret;
|
||||
return detail::length(r, cont);
|
||||
}
|
||||
|
||||
std::size_t length(u16string_range r, u16string_range &cont) noexcept {
|
||||
return detail::length(r, cont);
|
||||
}
|
||||
|
||||
std::size_t length(u32string_range r, u32string_range &cont) noexcept {
|
||||
return detail::length(r, cont);
|
||||
}
|
||||
|
||||
std::size_t length(wstring_range r, wstring_range &cont) noexcept {
|
||||
return detail::length(r, cont);
|
||||
}
|
||||
|
||||
std::size_t length(string_range r) noexcept {
|
||||
std::size_t ret = 0;
|
||||
for (;; ++ret) {
|
||||
if (char32_t ch; !utf::decode(r, ch)) {
|
||||
if (r.empty()) {
|
||||
break;
|
||||
}
|
||||
r.pop_front();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
return detail::length(r);
|
||||
}
|
||||
|
||||
std::size_t length(u16string_range r) noexcept {
|
||||
return detail::length(r);
|
||||
}
|
||||
|
||||
std::size_t length(u32string_range r) noexcept {
|
||||
return detail::length(r);
|
||||
}
|
||||
|
||||
std::size_t length(wstring_range r) noexcept {
|
||||
return detail::length(r);
|
||||
}
|
||||
|
||||
/* unicode-aware ctype
|
||||
|
|
Loading…
Reference in New Issue