expose unicode stuff through string slices

master
Daniel Kolesa 2017-12-31 19:12:51 +01:00
parent fb2f9e3b0e
commit d3cdbe2fcf
3 changed files with 34 additions and 8 deletions

View File

@ -257,7 +257,6 @@ namespace detail {
inline int wc_to_mb_loc(wchar_t c, char *buf, std::locale const &loc) {
auto &f = std::use_facet<std::codecvt<wchar_t, char, std::mbstate_t>>(loc);
std::mbstate_t mb{};
return ac_to_mb(c, f, buf);
}
}

View File

@ -89,7 +89,7 @@ namespace utf {
* The string is advanced past the UTF-8 character in the front.
* If the decoding fails, `false` is returned, otherwise it's `true`.
*/
bool codepoint(string_range &r, char32_t &ret);
bool codepoint(string_range &r, char32_t &ret) noexcept;
/* @brief Get the number of Unicode code points in a string.
*
@ -102,7 +102,7 @@ namespace utf {
* If you're sure the string is valid or you don't need to handle the
* error, you can use the more convenient overload below.
*/
std::size_t length(string_range r, string_range &cont);
std::size_t length(string_range r, string_range &cont) noexcept;
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
*
@ -112,7 +112,7 @@ namespace utf {
* If you need to get the continuation string, use the general
* error-handling overload of the function.
*/
std::size_t length(string_range r);
std::size_t length(string_range r) noexcept;
} /* namespace utf */
/** @brief A string slice type.
@ -279,6 +279,22 @@ public:
/** @brief Gets the number of value_type in the slice. */
size_type size() const noexcept { return p_end - p_beg; }
/** @brief Gets the number of code points in the slice.
*
* Effectively the same as utf::length().
*/
size_type length() const noexcept {
return utf::length(*this);
}
/** @brief Gets the number of code points in the slice.
*
* Effectively the same as utf::length().
*/
size_type length(basic_char_range &cont) const noexcept {
return utf::length(*this, cont);
}
/** @brief Creates a sub-slice of the slice.
*
* Behavior is undefined if `start` and `end` are not within the
@ -374,6 +390,12 @@ diffsize:
return (s1 < s2) ? -1 : ((s1 > s2) ? 1 : 0);
}
/** @brief Iterate over the code points of the string.
*
* Like utf::iter_codes().
*/
inline auto iter_codes() const;
/** @brief Implicitly converts a string slice to std::basic_string_view.
*
* String views represent more or less the same thing but they're always
@ -787,6 +809,11 @@ namespace utf {
} /* namespace utf */
template<typename T>
inline auto basic_char_range<T>::iter_codes() const {
return utf::iter_codes(*this);
}
/* string literals */
inline namespace literals {

View File

@ -11,7 +11,7 @@ namespace utf {
constexpr std::uint32_t MaxCodepoint = 0x10FFFF;
static inline bool codepoint_dec(string_range &r, char32_t &cret) {
static inline bool codepoint_dec(string_range &r, char32_t &cret) noexcept {
static const std::uint32_t ulim[] = { 0xFF, 0x7F, 0x7FF, 0xFFFF };
if (r.empty()) {
return false;
@ -61,11 +61,11 @@ static inline bool codepoint_dec(string_range &r, char32_t &cret) {
return true;
}
bool codepoint(string_range &r, char32_t &ret) {
bool codepoint(string_range &r, char32_t &ret) noexcept {
return codepoint_dec(r, ret);
}
std::size_t length(string_range r, string_range &cont) {
std::size_t length(string_range r, string_range &cont) noexcept {
std::size_t ret = 0;
for (char32_t ch = U'\0'; codepoint_dec(r, ch); ++ret) {
continue;
@ -74,7 +74,7 @@ std::size_t length(string_range r, string_range &cont) {
return ret;
}
std::size_t length(string_range r) {
std::size_t length(string_range r) noexcept {
return length(r, r);
}