relax the rules of zero-argument utf::length
parent
e5162233d4
commit
aeb5023b30
|
@ -1104,15 +1104,15 @@ namespace utf {
|
|||
|
||||
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
|
||||
*
|
||||
* If an invalid UTF-8 sequence is encountered, it returns the length
|
||||
* until that sequence.
|
||||
* If an invalid UTF-8 sequence is encountered, it's considered
|
||||
* 1 character and therefore the resulting length will be the
|
||||
* number of valid code points plus the number of invalid
|
||||
* code units as if they were replaced with valid code points.
|
||||
*
|
||||
* If you need to get the continuation string, use the general
|
||||
* error-handling overload of the function.
|
||||
* If you need to stop at an invalid code unit and get the
|
||||
* continuation string, use the overload above.
|
||||
*/
|
||||
inline std::size_t length(string_range r) noexcept {
|
||||
return utf::length(r, r);
|
||||
}
|
||||
std::size_t length(string_range r) noexcept;
|
||||
|
||||
/* @brief Get the number of Unicode code points in a UTF-32 string.
|
||||
*
|
||||
|
|
|
@ -191,13 +191,26 @@ bool decode(wstring_range &r, char32_t &ret) noexcept {
|
|||
|
||||
std::size_t length(string_range r, string_range &cont) noexcept {
|
||||
std::size_t ret = 0;
|
||||
for (char32_t ch = U'\0'; utf::decode(r, ch); ++ret) {
|
||||
for (char32_t ch; utf::decode(r, ch); ++ret) {
|
||||
continue;
|
||||
}
|
||||
cont = r;
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t length(string_range r) noexcept {
|
||||
std::size_t ret = 0;
|
||||
for (;; ++ret) {
|
||||
if (char32_t ch; !utf::decode(r, ch)) {
|
||||
if (r.empty()) {
|
||||
break;
|
||||
}
|
||||
r.pop_front();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* unicode-aware ctype
|
||||
* the other ones use custom tables for lookups
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue