relax the rules of zero-argument utf::length

master
Daniel Kolesa 2018-01-06 00:27:04 +01:00
parent e5162233d4
commit aeb5023b30
2 changed files with 21 additions and 8 deletions

View File

@ -1104,15 +1104,15 @@ namespace utf {
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
*
* If an invalid UTF-8 sequence is encountered, it returns the length
* until that sequence.
* If an invalid UTF-8 sequence is encountered, it's considered
* 1 character and therefore the resulting length will be the
* number of valid code points plus the number of invalid
* code units as if they were replaced with valid code points.
*
* If you need to get the continuation string, use the general
* error-handling overload of the function.
* If you need to stop at an invalid code unit and get the
* continuation string, use the overload above.
*/
inline std::size_t length(string_range r) noexcept {
return utf::length(r, r);
}
std::size_t length(string_range r) noexcept;
/* @brief Get the number of Unicode code points in a UTF-32 string.
*

View File

@ -191,13 +191,26 @@ bool decode(wstring_range &r, char32_t &ret) noexcept {
std::size_t length(string_range r, string_range &cont) noexcept {
std::size_t ret = 0;
for (char32_t ch = U'\0'; utf::decode(r, ch); ++ret) {
for (char32_t ch; utf::decode(r, ch); ++ret) {
continue;
}
cont = r;
return ret;
}
std::size_t length(string_range r) noexcept {
std::size_t ret = 0;
for (;; ++ret) {
if (char32_t ch; !utf::decode(r, ch)) {
if (r.empty()) {
break;
}
r.pop_front();
}
}
return ret;
}
/* unicode-aware ctype
* the other ones use custom tables for lookups
*/