2017-04-10 17:40:28 +00:00
|
|
|
/** @defgroup Strings
|
2015-05-27 20:43:13 +00:00
|
|
|
*
|
2017-04-10 17:40:28 +00:00
|
|
|
* @brief Provides string processing extensions.
|
|
|
|
*
|
|
|
|
* As libostd provides a range system, it represents string slices as
|
|
|
|
* contiguous ranges of characters. This has many advantages, such as
|
|
|
|
* being able to use them with generic algorithms. The string slices are
|
|
|
|
* not zero terminated, which means creating subslices is very fast, it's
|
|
|
|
* basically just pointer arithmetic.
|
|
|
|
*
|
|
|
|
* Integration with existing string handling facilities is ensured, so you
|
|
|
|
* can incorporate libostd into any existing project and still benefit from
|
|
|
|
* the new features.
|
|
|
|
*
|
|
|
|
* A simple example:
|
|
|
|
*
|
|
|
|
* ~~~{.cc}
|
|
|
|
* #include <ostd/string.hh>
|
|
|
|
* #include <ostd/io.hh>
|
|
|
|
*
|
|
|
|
* int main() {
|
|
|
|
* ostd::string_range x = "hello world";
|
|
|
|
* auto p1 = x.slice(0, 5);
|
|
|
|
* auto p2 = x.slice(6);
|
|
|
|
* ostd::writeln(p1); // hello
|
|
|
|
* ostd::writeln(p2); // world
|
|
|
|
* }
|
|
|
|
* ~~~
|
|
|
|
*
|
2017-05-03 00:14:27 +00:00
|
|
|
* An example of using libostd string formatting:
|
|
|
|
*
|
|
|
|
* @include format.cc
|
|
|
|
*
|
2017-04-10 17:40:28 +00:00
|
|
|
* See the examples provided with the library for further information.
|
|
|
|
*
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @file string.hh
|
|
|
|
*
|
|
|
|
* @brief String slice implementation as well as other utilities.
|
|
|
|
*
|
|
|
|
* This file implements string slices, their comparisons, utilities,
|
|
|
|
* standard C++ string range integration, range literals, std::hash
|
|
|
|
* support for string slices and others.
|
|
|
|
*
|
|
|
|
* @copyright See COPYING.md in the project tree for further information.
|
2015-05-27 20:43:13 +00:00
|
|
|
*/
|
|
|
|
|
2015-07-13 19:08:55 +00:00
|
|
|
#ifndef OSTD_STRING_HH
|
|
|
|
#define OSTD_STRING_HH
|
2015-05-27 20:43:13 +00:00
|
|
|
|
2017-12-31 13:50:48 +00:00
|
|
|
#include <cstdint>
|
2017-04-09 14:44:45 +00:00
|
|
|
#include <cstddef>
|
2018-01-06 00:08:19 +00:00
|
|
|
#include <cstring>
|
2017-01-29 20:22:40 +00:00
|
|
|
#include <string>
|
|
|
|
#include <string_view>
|
2017-02-01 19:56:19 +00:00
|
|
|
#include <type_traits>
|
2017-02-08 00:06:50 +00:00
|
|
|
#include <functional>
|
2017-02-16 19:39:05 +00:00
|
|
|
#include <utility>
|
2017-04-23 13:30:51 +00:00
|
|
|
#include <vector>
|
2017-12-31 13:50:48 +00:00
|
|
|
#include <stdexcept>
|
2017-01-29 20:22:40 +00:00
|
|
|
|
2017-06-19 14:59:36 +00:00
|
|
|
#include <ostd/range.hh>
|
|
|
|
#include <ostd/algorithm.hh>
|
2015-05-27 20:43:13 +00:00
|
|
|
|
2015-07-13 19:07:14 +00:00
|
|
|
namespace ostd {
|
2015-06-09 17:59:25 +00:00
|
|
|
|
2018-01-05 02:05:17 +00:00
|
|
|
static_assert(
|
|
|
|
(sizeof(wchar_t) == sizeof(char)) ||
|
|
|
|
(sizeof(wchar_t) == sizeof(char16_t)) ||
|
|
|
|
(sizeof(wchar_t) == sizeof(char32_t)),
|
|
|
|
"wchar_t must correspond to either char, char16_t or char32_t"
|
|
|
|
);
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @addtogroup Strings
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @brief A string slice type.
|
|
|
|
*
|
|
|
|
* This is a contiguous range over a character type. The character type
|
|
|
|
* can be any of the standard character types, of any size - for example
|
2018-01-06 00:08:19 +00:00
|
|
|
* you would use `char32_t` to represent UTF-32 slices.
|
2017-04-10 17:40:28 +00:00
|
|
|
*
|
|
|
|
* The range is mutable, i.e. it implements the output range interface.
|
|
|
|
*/
|
2017-12-15 22:32:06 +00:00
|
|
|
template<typename T>
|
2017-02-16 19:02:55 +00:00
|
|
|
struct basic_char_range: input_range<basic_char_range<T>> {
|
2017-04-16 15:23:09 +00:00
|
|
|
using range_category = contiguous_range_tag;
|
|
|
|
using value_type = T;
|
|
|
|
using reference = T &;
|
|
|
|
using size_type = std::size_t;
|
2017-02-13 22:04:02 +00:00
|
|
|
|
2015-07-21 20:16:38 +00:00
|
|
|
private:
|
2017-02-17 16:50:44 +00:00
|
|
|
struct nat {};
|
2015-07-21 20:16:38 +00:00
|
|
|
|
|
|
|
public:
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Constructs an empty slice. */
|
2018-01-03 16:09:28 +00:00
|
|
|
basic_char_range() noexcept: p_beg(nullptr), p_end(nullptr) {}
|
2017-04-10 17:40:28 +00:00
|
|
|
|
|
|
|
/** @brief Constructs a slice from two pointers.
|
|
|
|
*
|
|
|
|
* The first pointer is the beginning of the slice
|
|
|
|
* and the second pointer is just past the end.
|
|
|
|
*/
|
|
|
|
basic_char_range(value_type *beg, value_type *end) noexcept:
|
|
|
|
p_beg(beg), p_end(end)
|
|
|
|
{}
|
2015-08-06 23:08:59 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Constructs an empty slice. */
|
|
|
|
basic_char_range(std::nullptr_t) noexcept:
|
|
|
|
p_beg(nullptr), p_end(nullptr)
|
|
|
|
{}
|
|
|
|
|
2018-01-03 16:09:28 +00:00
|
|
|
/** @brief Slices are arbitrarily copy constructible. */
|
|
|
|
basic_char_range(basic_char_range const &v) noexcept:
|
|
|
|
p_beg(v.p_beg), p_end(v.p_end)
|
|
|
|
{}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Constructs a slice from a pointer or a static array.
|
|
|
|
*
|
|
|
|
* This constructor handles two cases. The input must be convertible
|
|
|
|
* to `T *`, if it's not, this constructor is not enabled. Effectively,
|
|
|
|
* if the input is a static array of `T`, the entire array is used to
|
|
|
|
* create the slice, minus the potential zero at the end. If there is
|
|
|
|
* no zero at the end, nothing is removed and the array is used whole.
|
2017-12-15 22:32:06 +00:00
|
|
|
* If the input is not an array, the size is checked at runtime.
|
2017-04-10 17:40:28 +00:00
|
|
|
*/
|
2017-02-17 16:50:44 +00:00
|
|
|
template<typename U>
|
|
|
|
basic_char_range(U &&beg, std::enable_if_t<
|
2017-04-10 17:40:28 +00:00
|
|
|
std::is_convertible_v<U, value_type *>, nat
|
|
|
|
> = nat{}) noexcept: p_beg(beg) {
|
2017-02-17 16:50:44 +00:00
|
|
|
if constexpr(std::is_array_v<std::remove_reference_t<U>>) {
|
2017-04-09 14:44:45 +00:00
|
|
|
std::size_t N = std::extent_v<std::remove_reference_t<U>>;
|
2017-02-17 16:50:44 +00:00
|
|
|
p_end = beg + N - (beg[N - 1] == '\0');
|
|
|
|
} else {
|
2018-01-06 00:08:19 +00:00
|
|
|
p_end = beg + (beg ? std::strlen(beg) : 0);
|
2017-02-17 16:50:44 +00:00
|
|
|
}
|
|
|
|
}
|
2015-07-01 19:09:02 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Constructs a slice from an std::basic_string.
|
|
|
|
*
|
|
|
|
* This uses the string's data to construct a matching slice.
|
|
|
|
*/
|
2017-02-09 20:39:03 +00:00
|
|
|
template<typename STR, typename A>
|
2017-04-10 17:40:28 +00:00
|
|
|
basic_char_range(
|
|
|
|
std::basic_string<std::remove_const_t<value_type>, STR, A> const &s
|
|
|
|
) noexcept:
|
2016-07-31 19:40:25 +00:00
|
|
|
p_beg(s.data()), p_end(s.data() + s.size())
|
|
|
|
{}
|
2015-06-09 17:59:25 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Constructs a slice from a different but compatible slice.
|
|
|
|
*
|
2018-01-06 00:08:19 +00:00
|
|
|
* A pointer to the other slice's value type must be convertible to
|
|
|
|
* a pointer to the new slice's value type, otherwise the constructor
|
|
|
|
* will not be enabled.
|
2017-04-10 17:40:28 +00:00
|
|
|
*/
|
2017-12-15 22:32:06 +00:00
|
|
|
template<typename U, typename = std::enable_if_t<
|
2017-04-10 17:40:28 +00:00
|
|
|
std::is_convertible_v<U *, value_type *>
|
2017-02-09 19:56:15 +00:00
|
|
|
>>
|
2017-12-15 22:32:06 +00:00
|
|
|
basic_char_range(basic_char_range<U> const &v) noexcept:
|
2016-07-31 19:40:25 +00:00
|
|
|
p_beg(&v[0]), p_end(&v[v.size()])
|
|
|
|
{}
|
2015-06-17 01:00:39 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Slices are arbitrarily copy constructible. */
|
|
|
|
basic_char_range &operator=(basic_char_range const &v) noexcept {
|
2015-06-09 17:59:25 +00:00
|
|
|
p_beg = v.p_beg; p_end = v.p_end; return *this;
|
|
|
|
}
|
2015-07-01 19:09:02 +00:00
|
|
|
|
2018-01-06 00:08:19 +00:00
|
|
|
/** @brief Assigns the slice's data from a matching std::basic_string. */
|
2017-02-09 20:39:03 +00:00
|
|
|
template<typename STR, typename A>
|
2017-04-10 17:40:28 +00:00
|
|
|
basic_char_range &operator=(
|
|
|
|
std::basic_string<value_type, STR, A> const &s
|
|
|
|
) noexcept {
|
2015-06-09 17:59:25 +00:00
|
|
|
p_beg = s.data(); p_end = s.data() + s.size(); return *this;
|
|
|
|
}
|
2017-02-09 20:39:03 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Assigns the slice's data from a pointer.
|
|
|
|
*
|
|
|
|
* The data pointed to by the argument must be zero terminated.
|
|
|
|
*/
|
|
|
|
basic_char_range &operator=(value_type *s) noexcept {
|
2018-01-06 00:08:19 +00:00
|
|
|
p_beg = s; p_end = s + (s ? std::strlen(s) : 0); return *this;
|
2015-06-09 17:59:25 +00:00
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Checks if the slice is empty. */
|
|
|
|
bool empty() const noexcept { return p_beg == p_end; }
|
|
|
|
|
|
|
|
/** @brief Pops the first character out of the slice.
|
|
|
|
*
|
|
|
|
* This is bounds checked, std::out_of_range is thrown when
|
|
|
|
* slice was already empty before popping out the character.
|
|
|
|
* No changes are done to the slice if it throws.
|
|
|
|
*
|
|
|
|
* @throws std::out_of_range when empty.
|
|
|
|
*
|
|
|
|
* @see front(), pop_back()
|
|
|
|
*/
|
2017-02-19 15:45:06 +00:00
|
|
|
void pop_front() {
|
2017-04-10 17:40:28 +00:00
|
|
|
if (p_beg == p_end) {
|
2017-02-19 15:45:06 +00:00
|
|
|
throw std::out_of_range{"pop_front on empty range"};
|
|
|
|
}
|
2017-04-10 17:40:28 +00:00
|
|
|
++p_beg;
|
2017-02-19 15:45:06 +00:00
|
|
|
}
|
2015-06-09 17:59:25 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Gets a reference to the first character.
|
|
|
|
*
|
|
|
|
* The behavior is undefined when the slice is empty.
|
|
|
|
*
|
|
|
|
* @see back(), pop_front()
|
|
|
|
*/
|
|
|
|
reference front() const noexcept { return *p_beg; }
|
|
|
|
|
|
|
|
/** @brief Pops the last character out of the slice.
|
|
|
|
*
|
|
|
|
* This is bounds checked, std::out_of_range is thrown when
|
|
|
|
* slice was already empty before popping out the character.
|
|
|
|
* No changes are done to the slice if it throws.
|
|
|
|
*
|
|
|
|
* @throws std::out_of_range when empty.
|
|
|
|
*
|
|
|
|
* @see back(), pop_front()
|
|
|
|
*/
|
2017-02-19 15:45:06 +00:00
|
|
|
void pop_back() {
|
2017-04-10 17:40:28 +00:00
|
|
|
if (p_beg == p_end) {
|
|
|
|
throw std::out_of_range{"pop_back on empty range"};
|
2016-07-31 19:40:25 +00:00
|
|
|
}
|
2015-06-09 17:59:25 +00:00
|
|
|
--p_end;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Gets a reference to the last character.
|
|
|
|
*
|
|
|
|
* The behavior is undefined when the slice is empty.
|
|
|
|
*
|
|
|
|
* @see front(), pop_back()
|
|
|
|
*/
|
|
|
|
reference back() const noexcept { return *(p_end - 1); }
|
|
|
|
|
|
|
|
/** @brief Gets the number of value_type in the slice. */
|
2018-01-03 16:09:28 +00:00
|
|
|
size_type size() const noexcept { return size_type(p_end - p_beg); }
|
2017-04-10 17:40:28 +00:00
|
|
|
|
2017-12-31 18:12:51 +00:00
|
|
|
/** @brief Gets the number of code points in the slice.
|
|
|
|
*
|
|
|
|
* Effectively the same as utf::length().
|
|
|
|
*/
|
2017-12-31 18:16:16 +00:00
|
|
|
inline size_type length() const noexcept;
|
2017-12-31 18:12:51 +00:00
|
|
|
|
|
|
|
/** @brief Gets the number of code points in the slice.
|
|
|
|
*
|
|
|
|
* Effectively the same as utf::length().
|
|
|
|
*/
|
2017-12-31 18:16:16 +00:00
|
|
|
inline size_type length(basic_char_range &cont) const noexcept;
|
2017-12-31 18:12:51 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Creates a sub-slice of the slice.
|
|
|
|
*
|
|
|
|
* Behavior is undefined if `start` and `end` are not within the
|
|
|
|
* slice's bounds. There is no bound checking done in this call.
|
|
|
|
* It's also undefined if the first argument is larger than the
|
|
|
|
* second argument.
|
|
|
|
*/
|
|
|
|
basic_char_range slice(size_type start, size_type end) const noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return basic_char_range(p_beg + start, p_beg + end);
|
2015-06-09 17:59:25 +00:00
|
|
|
}
|
2017-04-10 17:40:28 +00:00
|
|
|
|
|
|
|
/** @brief Creates a sub-slice of the slice until the end.
|
|
|
|
*
|
|
|
|
* Equivalent to slice(size_type, size_type) with `size()` as
|
|
|
|
* the second argument. The first argument must be within the
|
|
|
|
* slice's boundaries otherwis the behavior is undefined.
|
|
|
|
*/
|
|
|
|
basic_char_range slice(size_type start) const noexcept {
|
2017-04-01 14:49:38 +00:00
|
|
|
return slice(start, size());
|
|
|
|
}
|
2015-06-09 17:59:25 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Gets a reference to a character within the slice.
|
|
|
|
*
|
|
|
|
* The behavior is undefined if the index is not within the bounds.
|
|
|
|
*/
|
|
|
|
reference operator[](size_type i) const noexcept { return p_beg[i]; }
|
|
|
|
|
|
|
|
/** @brief Writes a character at the beginning and pops it out.
|
|
|
|
*
|
|
|
|
* @throws std::out_of_range when empty.
|
|
|
|
*/
|
|
|
|
void put(value_type v) {
|
2017-02-19 17:31:08 +00:00
|
|
|
if (p_beg == p_end) {
|
|
|
|
throw std::out_of_range{"put into an empty range"};
|
|
|
|
}
|
2015-06-09 17:59:25 +00:00
|
|
|
*(p_beg++) = v;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Gets the pointer to the beginning. */
|
|
|
|
value_type *data() noexcept { return p_beg; }
|
|
|
|
|
|
|
|
/** @brief Gets the pointer to the beginning. */
|
|
|
|
value_type const *data() const noexcept { return p_beg; }
|
|
|
|
|
|
|
|
/** @brief Compares two slices.
|
|
|
|
*
|
|
|
|
* This works similarly to the C function `strcmp` or the `compare`
|
|
|
|
* method of std::char_traits, but does not depend on the strings
|
|
|
|
* to be terminated.
|
|
|
|
*
|
2018-01-03 00:22:07 +00:00
|
|
|
* It performs an ordinary lexicographical comparison, the values
|
|
|
|
* are compared and the first string to have a lesser value is
|
|
|
|
* considered lexicographically less. If they are equal up to a
|
|
|
|
* point but one of them terminates early, it's also less.
|
|
|
|
*
|
|
|
|
* If the `this` slice is the lesser one, a negative value is
|
|
|
|
* returned. If they are equal (if they're both zero length,
|
|
|
|
* it counts as equal) then `0` is returned. Otherwise, a
|
|
|
|
* positive value is returned.
|
|
|
|
*
|
|
|
|
* This works with the slice's native unit values, i.e. bytes
|
|
|
|
* for UTF-8, `char16_t` for UTF-16 and `char32_t` for UTF-32.
|
|
|
|
* These units are compared by getting the difference between
|
|
|
|
* them (i.e. `this[index] - other[index]`).
|
2017-04-10 17:40:28 +00:00
|
|
|
*
|
|
|
|
* It is not a part of the range interface, just the string slice
|
|
|
|
* interface.
|
|
|
|
*
|
|
|
|
* @see case_compare()
|
|
|
|
*/
|
|
|
|
int compare(basic_char_range<value_type const> s) const noexcept {
|
2017-04-09 14:44:45 +00:00
|
|
|
size_type s1 = size(), s2 = s.size();
|
2018-01-03 00:22:07 +00:00
|
|
|
for (size_type i = 0, ms = std::min(s1, s2); i < ms; ++i) {
|
2018-01-03 16:09:28 +00:00
|
|
|
int d = int(p_beg[i]) - int(s[i]);
|
2018-01-03 00:22:07 +00:00
|
|
|
if (d) {
|
|
|
|
return d;
|
|
|
|
}
|
2016-08-17 23:34:20 +00:00
|
|
|
}
|
|
|
|
return (s1 < s2) ? -1 : ((s1 > s2) ? 1 : 0);
|
2015-07-24 18:43:39 +00:00
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Compares two slices in a case insensitive manner.
|
|
|
|
*
|
2018-01-03 00:22:07 +00:00
|
|
|
* Works exactly the same as compare(), but in a case insensitive
|
|
|
|
* way, i.e. it lowercases the characters and compares them after
|
|
|
|
* that.
|
2017-04-10 17:40:28 +00:00
|
|
|
*
|
2018-01-03 00:22:07 +00:00
|
|
|
* For UTF-8, it decodes the string on the fly, then lowercases the
|
|
|
|
* decoded code points and uses their difference (without encoding
|
|
|
|
* them back). If the decoding fails, the failing code unit is used
|
|
|
|
* as-is, so this function never fails. Identical treatment is given
|
|
|
|
* to UTF-16.
|
2017-04-10 17:40:28 +00:00
|
|
|
*/
|
2018-01-03 00:22:07 +00:00
|
|
|
inline int case_compare(basic_char_range<value_type const> s) const noexcept;
|
2016-09-01 23:06:13 +00:00
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
/** @brief Iterate over the UTF-8 code units of the string.
|
|
|
|
*
|
|
|
|
* Like utf::iter_u8().
|
|
|
|
*/
|
|
|
|
inline auto iter_u8() const;
|
|
|
|
|
|
|
|
/** @brief Iterate over the UTF-16 units of the string.
|
|
|
|
*
|
|
|
|
* Like utf::iter_u16().
|
|
|
|
*/
|
|
|
|
inline auto iter_u16() const;
|
|
|
|
|
2017-12-31 18:12:51 +00:00
|
|
|
/** @brief Iterate over the code points of the string.
|
|
|
|
*
|
2018-01-06 01:10:38 +00:00
|
|
|
* Like utf::iter_u32().
|
2017-12-31 18:12:51 +00:00
|
|
|
*/
|
2018-01-06 01:10:38 +00:00
|
|
|
inline auto iter_u32() const;
|
2017-12-31 18:12:51 +00:00
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
/** @brief Iterate over the Unicode wide characters of the string.
|
2018-01-06 00:59:13 +00:00
|
|
|
*
|
2018-01-07 01:17:05 +00:00
|
|
|
* Like utf::iter_uw().
|
2018-01-06 00:59:13 +00:00
|
|
|
*/
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto iter_uw() const;
|
|
|
|
|
|
|
|
/** @brief Iterate over the Unicode units of the given type.
|
|
|
|
*
|
|
|
|
* Like utf::iter_u().
|
|
|
|
*/
|
|
|
|
template<typename C>
|
|
|
|
inline auto iter_u() const;
|
2018-01-06 00:59:13 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Implicitly converts a string slice to std::basic_string_view.
|
|
|
|
*
|
|
|
|
* String views represent more or less the same thing but they're always
|
|
|
|
* immutable. This simple conversion allows usage of string slices on
|
|
|
|
* any API that uses either strings or string view, as well as construct
|
|
|
|
* strings and string views out of slices.
|
|
|
|
*/
|
|
|
|
operator std::basic_string_view<std::remove_cv_t<value_type>>()
|
|
|
|
const noexcept
|
|
|
|
{
|
2017-02-09 19:56:15 +00:00
|
|
|
return std::basic_string_view<std::remove_cv_t<T>>{data(), size()};
|
2017-01-29 20:22:40 +00:00
|
|
|
}
|
|
|
|
|
2015-06-09 17:59:25 +00:00
|
|
|
private:
|
|
|
|
T *p_beg, *p_end;
|
|
|
|
};
|
|
|
|
|
2017-12-31 18:17:02 +00:00
|
|
|
/** @brief A mutable slice over `char`. */
|
|
|
|
using char_range = basic_char_range<char>;
|
|
|
|
|
2017-12-31 22:42:46 +00:00
|
|
|
/** @brief A mutable slice over `wchar_t`. */
|
|
|
|
using wchar_range = basic_char_range<wchar_t>;
|
|
|
|
|
|
|
|
/** @brief A mutable slice over `char16_t`. */
|
|
|
|
using char16_range = basic_char_range<char16_t>;
|
|
|
|
|
|
|
|
/** @brief A mutable slice over `char32_t`. */
|
|
|
|
using char32_range = basic_char_range<char32_t>;
|
|
|
|
|
2017-12-31 18:17:02 +00:00
|
|
|
/** @brief An immutable slice over `char`.
|
|
|
|
*
|
|
|
|
* This is used in most libostd APIs that read strings. More or less
|
|
|
|
* anything is convertible to it, including mutable slices, so it's
|
|
|
|
* a perfect fit as long as modifications are not necessary.
|
|
|
|
*/
|
|
|
|
using string_range = basic_char_range<char const>;
|
|
|
|
|
2017-12-31 22:42:46 +00:00
|
|
|
/** @brief An immutable slice over `wchar_t`.
|
|
|
|
*
|
|
|
|
* Included primarily for compatibility with other APIs.
|
|
|
|
*/
|
|
|
|
using wstring_range = basic_char_range<wchar_t const>;
|
|
|
|
|
|
|
|
/** @brief An immutable slice over `char16_t`.
|
|
|
|
*
|
|
|
|
* Included for basic UTF-16 compatibility.
|
|
|
|
*/
|
|
|
|
using u16string_range = basic_char_range<char16_t const>;
|
|
|
|
|
|
|
|
/** @brief An immutable slice over `char32_t`.
|
|
|
|
*
|
|
|
|
* Can represent UTF-32 strings.
|
|
|
|
*/
|
|
|
|
using u32string_range = basic_char_range<char32_t const>;
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/* comparisons between utf-8 ranges */
|
2017-02-16 17:48:14 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `!lhs.compare(rhs)`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator==(string_range lhs, string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return !lhs.compare(rhs);
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs)`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator!=(string_range lhs, string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs);
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) < 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator<(string_range lhs, string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) < 0;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) > 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator>(string_range lhs, string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) > 0;
|
|
|
|
}
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) <= 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator<=(string_range lhs, string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) <= 0;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) >= 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator>=(string_range lhs, string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) >= 0;
|
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/* comparisons between utf-16 ranges */
|
2017-02-16 17:48:14 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `!lhs.compare(rhs)`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator==(u16string_range lhs, u16string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return !lhs.compare(rhs);
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs)`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator!=(u16string_range lhs, u16string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs);
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) < 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator<(u16string_range lhs, u16string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) < 0;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) > 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator>(u16string_range lhs, u16string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) > 0;
|
|
|
|
}
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) <= 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator<=(u16string_range lhs, u16string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) <= 0;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) >= 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator>=(u16string_range lhs, u16string_range rhs) noexcept {
|
2017-02-16 17:48:14 +00:00
|
|
|
return lhs.compare(rhs) >= 0;
|
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/* comparisons between utf-32 ranges */
|
2017-02-16 17:48:14 +00:00
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Like `!lhs.compare(rhs)`. */
|
|
|
|
inline bool operator==(u32string_range lhs, u32string_range rhs) noexcept {
|
|
|
|
return !lhs.compare(rhs);
|
2017-02-16 17:48:14 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs)`. */
|
|
|
|
inline bool operator!=(u32string_range lhs, u32string_range rhs) noexcept {
|
|
|
|
return lhs.compare(rhs);
|
2017-02-16 17:48:14 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) < 0`. */
|
|
|
|
inline bool operator<(u32string_range lhs, u32string_range rhs) noexcept {
|
|
|
|
return lhs.compare(rhs) < 0;
|
2017-02-16 17:48:14 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) > 0`. */
|
|
|
|
inline bool operator>(u32string_range lhs, u32string_range rhs) noexcept {
|
|
|
|
return lhs.compare(rhs) > 0;
|
|
|
|
}
|
|
|
|
/** @brief Like `lhs.compare(rhs) <= 0`. */
|
|
|
|
inline bool operator<=(u32string_range lhs, u32string_range rhs) noexcept {
|
|
|
|
return lhs.compare(rhs) <= 0;
|
2017-02-16 17:48:14 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Like `lhs.compare(rhs) >= 0`. */
|
|
|
|
inline bool operator>=(u32string_range lhs, u32string_range rhs) noexcept {
|
|
|
|
return lhs.compare(rhs) >= 0;
|
2017-02-16 17:48:14 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/* comparisons between wide ranges */
|
2017-06-01 19:50:31 +00:00
|
|
|
|
|
|
|
/** @brief Like `!lhs.compare(rhs)`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator==(wstring_range lhs, wstring_range rhs) noexcept {
|
2017-06-01 19:50:31 +00:00
|
|
|
return !lhs.compare(rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Like `lhs.compare(rhs)`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator!=(wstring_range lhs, wstring_range rhs) noexcept {
|
2017-06-01 19:50:31 +00:00
|
|
|
return lhs.compare(rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Like `lhs.compare(rhs) < 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator<(wstring_range lhs, wstring_range rhs) noexcept {
|
2017-06-01 19:50:31 +00:00
|
|
|
return lhs.compare(rhs) < 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Like `lhs.compare(rhs) > 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator>(wstring_range lhs, wstring_range rhs) noexcept {
|
2017-06-01 19:50:31 +00:00
|
|
|
return lhs.compare(rhs) > 0;
|
|
|
|
}
|
|
|
|
/** @brief Like `lhs.compare(rhs) <= 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator<=(wstring_range lhs, wstring_range rhs) noexcept {
|
2017-06-01 19:50:31 +00:00
|
|
|
return lhs.compare(rhs) <= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @brief Like `lhs.compare(rhs) >= 0`. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool operator>=(wstring_range lhs, wstring_range rhs) noexcept {
|
2017-06-01 19:50:31 +00:00
|
|
|
return lhs.compare(rhs) >= 0;
|
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Checks if a string slice starts with another slice. */
|
|
|
|
inline bool starts_with(string_range a, string_range b) noexcept {
|
|
|
|
if (a.size() < b.size()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return a.slice(0, b.size()) == b;
|
2017-06-01 19:50:31 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Checks if a string slice starts with another slice. */
|
|
|
|
inline bool starts_with(u16string_range a, u16string_range b) noexcept {
|
|
|
|
if (a.size() < b.size()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return a.slice(0, b.size()) == b;
|
2017-06-01 19:50:31 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:17:18 +00:00
|
|
|
/** @brief Checks if a string slice starts with another slice. */
|
|
|
|
inline bool starts_with(u32string_range a, u32string_range b) noexcept {
|
|
|
|
if (a.size() < b.size()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return a.slice(0, b.size()) == b;
|
2017-06-01 19:50:31 +00:00
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Checks if a string slice starts with another slice. */
|
2018-01-06 00:17:18 +00:00
|
|
|
inline bool starts_with(wstring_range a, wstring_range b) noexcept {
|
2016-07-31 19:40:25 +00:00
|
|
|
if (a.size() < b.size()) {
|
2015-12-31 15:36:41 +00:00
|
|
|
return false;
|
2016-07-31 19:40:25 +00:00
|
|
|
}
|
2015-12-31 15:36:41 +00:00
|
|
|
return a.slice(0, b.size()) == b;
|
|
|
|
}
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Mutable range integration for std::basic_string.
|
|
|
|
*
|
|
|
|
* The range type used for mutable string references
|
|
|
|
* is an ostd::basic_char_range with mutable values.
|
|
|
|
*/
|
2017-02-01 19:56:19 +00:00
|
|
|
template<typename T, typename TR, typename A>
|
|
|
|
struct ranged_traits<std::basic_string<T, TR, A>> {
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief The range type. */
|
2017-12-15 22:32:06 +00:00
|
|
|
using range = basic_char_range<T>;
|
2017-02-15 18:13:52 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Creates a range. */
|
|
|
|
static range iter(std::basic_string<T, TR, A> &v) noexcept {
|
2017-02-15 18:13:52 +00:00
|
|
|
return range{v.data(), v.data() + v.size()};
|
2017-01-29 20:22:40 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Immutable range integration for std::basic_string.
|
|
|
|
*
|
|
|
|
* The range type used for immutable string references
|
|
|
|
* is an ostd::basic_char_range with immutable values.
|
|
|
|
*/
|
2017-02-01 19:56:19 +00:00
|
|
|
template<typename T, typename TR, typename A>
|
|
|
|
struct ranged_traits<std::basic_string<T, TR, A> const> {
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief The range type. */
|
2017-12-15 22:32:06 +00:00
|
|
|
using range = basic_char_range<T const>;
|
2017-02-15 18:13:52 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @brief Creates a range. */
|
|
|
|
static range iter(std::basic_string<T, TR, A> const &v) noexcept {
|
2017-02-15 18:13:52 +00:00
|
|
|
return range{v.data(), v.data() + v.size()};
|
2017-01-29 20:22:40 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-12-31 13:50:48 +00:00
|
|
|
/* more UTF utilities beyond basic API */
|
|
|
|
|
|
|
|
namespace utf {
|
|
|
|
|
|
|
|
/** @addtogroup Strings
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @brief Thrown on UTF-8 decoding failure. */
|
|
|
|
struct utf_error: std::runtime_error {
|
|
|
|
using std::runtime_error::runtime_error;
|
2018-01-05 21:31:04 +00:00
|
|
|
/* empty, for vtable placement */
|
|
|
|
virtual ~utf_error();
|
2017-12-31 13:50:48 +00:00
|
|
|
};
|
|
|
|
|
2018-01-05 01:18:36 +00:00
|
|
|
/* @brief Get the Unicode code point for a UTF-8 sequence.
|
2017-12-31 18:16:16 +00:00
|
|
|
*
|
2018-01-01 23:30:58 +00:00
|
|
|
* The string is advanced past the Unicode character in the front.
|
2017-12-31 18:16:16 +00:00
|
|
|
* If the decoding fails, `false` is returned, otherwise it's `true`.
|
|
|
|
*/
|
2017-12-31 19:06:36 +00:00
|
|
|
bool decode(string_range &r, char32_t &ret) noexcept;
|
2017-12-31 18:16:16 +00:00
|
|
|
|
2018-01-05 01:18:36 +00:00
|
|
|
/* @brief Get the Unicode code point for a UTF-16 sequence.
|
|
|
|
*
|
|
|
|
* The string is advanced past the Unicode character in the front.
|
|
|
|
* If the decoding fails, `false` is returned, otherwise it's `true`.
|
|
|
|
*/
|
|
|
|
bool decode(u16string_range &r, char32_t &ret) noexcept;
|
|
|
|
|
2018-01-01 23:30:58 +00:00
|
|
|
/* @brief Get the Unicode code point from a UTF-32 string.
|
|
|
|
*
|
2018-01-05 23:52:50 +00:00
|
|
|
* The string is advanced by one. It can also fail if utf::isvalid()
|
|
|
|
* returns `false` for the front character, in which case the string
|
|
|
|
* will not be advanced.
|
2018-01-01 23:30:58 +00:00
|
|
|
*/
|
2018-01-05 23:52:50 +00:00
|
|
|
bool decode(u32string_range &r, char32_t &ret) noexcept;
|
2018-01-01 23:30:58 +00:00
|
|
|
|
2018-01-05 18:25:20 +00:00
|
|
|
/* @brief Get the Unicode code point for a wide Unicode char/sequence.
|
|
|
|
*
|
|
|
|
* The input is treated as either UTF-8, UTF-16 or UTF-32 depending
|
|
|
|
* on the size of the wide character. Typically, it will be UTF-16
|
|
|
|
* on Windows and UTF-32 on Unix-like systems, with UTF-32 taking
|
|
|
|
* priority (on systems where two or more of the types are the same
|
|
|
|
* size).
|
|
|
|
*/
|
|
|
|
bool decode(wstring_range &r, char32_t &ret) noexcept;
|
|
|
|
|
2018-01-05 21:16:18 +00:00
|
|
|
template<typename R, typename C>
|
2018-01-05 23:17:47 +00:00
|
|
|
inline bool decode(R &sink, basic_char_range<C const> &r) {
|
2018-01-05 21:16:18 +00:00
|
|
|
if (char32_t ch; utf::decode(r, ch)) {
|
|
|
|
sink.put(ch);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-01-01 00:02:49 +00:00
|
|
|
namespace detail {
|
2018-01-05 17:55:34 +00:00
|
|
|
std::size_t u8_encode(
|
|
|
|
char (&ret)[4], char32_t ch
|
2018-01-01 00:02:49 +00:00
|
|
|
) noexcept;
|
2018-01-05 17:55:34 +00:00
|
|
|
std::size_t u16_encode(
|
|
|
|
char16_t (&ret)[2], char32_t ch
|
2018-01-05 01:18:36 +00:00
|
|
|
) noexcept;
|
2018-01-01 00:02:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* @brief Encode a UTF-32 code point into UTF-8 code units.
|
|
|
|
*
|
|
|
|
* The units are written in `sink` which is an ostd::output_range_tag.
|
|
|
|
* The written values are of type `char` and up to 4 are written. The
|
|
|
|
* number of bytes written is returned from the function. In case of
|
|
|
|
* failure, `0` is returned.
|
|
|
|
*
|
|
|
|
* This function is allowed to fail only in two cases, when a surrogate
|
|
|
|
* code point is provided or when the code point is out of bounds as
|
|
|
|
* defined by Unicode (i.e. 0x10FFFF). It does not throw exceptions
|
|
|
|
* other than those thrown by `sink`.
|
|
|
|
*/
|
|
|
|
template<typename R>
|
2018-01-05 17:55:34 +00:00
|
|
|
inline std::size_t encode_u8(R &sink, char32_t ch) {
|
|
|
|
char buf[4];
|
|
|
|
std::size_t n = detail::u8_encode(buf, ch);
|
|
|
|
for (std::size_t i = 0; i < n; ++i) {
|
2018-01-01 00:02:49 +00:00
|
|
|
sink.put(buf[i]);
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:16:46 +00:00
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u8(R &sink, u32string_range &r) {
|
2018-01-05 20:49:00 +00:00
|
|
|
/* just a wrapper; does the same thing but advances */
|
2018-01-05 20:16:46 +00:00
|
|
|
std::size_t n = 0;
|
|
|
|
if (!r.empty() && (n = utf::encode_u8(sink, r.front()))) {
|
|
|
|
r.pop_front();
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:49:00 +00:00
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u8(R &sink, u16string_range &r) {
|
|
|
|
/* decodes to code point and encodes */
|
|
|
|
auto rr = r;
|
|
|
|
if (char32_t ch; utf::decode(rr, ch)) {
|
|
|
|
if (std::size_t ret; (ret = utf::encode_u8(sink, ch))) {
|
|
|
|
r = rr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u8(R &sink, string_range &r) {
|
|
|
|
/* identity match, advances */
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(r.front());
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u8(R &sink, wstring_range &r) {
|
|
|
|
/* for utf-32, decode is just a swapper, for utf-16 it
|
|
|
|
* actually decodes; in both cases it encodes to utf-8,
|
|
|
|
* for utf-8 the whole thing is just an advancing wrapper
|
|
|
|
*/
|
|
|
|
if constexpr(
|
|
|
|
(sizeof(wchar_t) == sizeof(char32_t)) ||
|
|
|
|
(sizeof(wchar_t) == sizeof(char16_t))
|
|
|
|
) {
|
|
|
|
auto rr = r;
|
|
|
|
if (char32_t ch; utf::decode(rr, ch)) {
|
|
|
|
if (std::size_t ret; (ret = utf::encode_u8(sink, ch))) {
|
|
|
|
r = rr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(char(r.front()));
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-05 01:18:36 +00:00
|
|
|
/* @brief Encode a UTF-32 code point into UTF-16.
|
|
|
|
*
|
|
|
|
* The values are written in `sink` which is an ostd::output_range_tag.
|
|
|
|
* The written values are of type `char16_t` and up to 2 are written.
|
|
|
|
* The number of values written is returned from the function. In case
|
|
|
|
* of failure, `0` is returned.
|
|
|
|
*
|
|
|
|
* This function is allowed to fail only in two cases, when a surrogate
|
|
|
|
* code point is provided or when the code point is out of bounds as
|
|
|
|
* defined by Unicode (i.e. 0x10FFFF). It does not throw exceptions
|
|
|
|
* other than those thrown by `sink`.
|
|
|
|
*/
|
|
|
|
template<typename R>
|
2018-01-05 17:55:34 +00:00
|
|
|
inline std::size_t encode_u16(R &sink, char32_t ch) {
|
|
|
|
char16_t buf[2];
|
|
|
|
std::size_t n = detail::u16_encode(buf, ch);
|
|
|
|
for (std::size_t i = 0; i < n; ++i) {
|
2018-01-05 01:18:36 +00:00
|
|
|
sink.put(buf[i]);
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:16:46 +00:00
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u16(R &sink, u32string_range &r) {
|
2018-01-05 20:49:00 +00:00
|
|
|
/* just a wrapper; does the same thing but advances */
|
2018-01-05 20:16:46 +00:00
|
|
|
std::size_t n = 0;
|
|
|
|
if (!r.empty() && (n = utf::encode_u16(sink, r.front()))) {
|
|
|
|
r.pop_front();
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:49:00 +00:00
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u16(R &sink, u16string_range &r) {
|
|
|
|
/* identity match, advances */
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(r.front());
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u16(R &sink, string_range &r) {
|
|
|
|
/* has to decode and encode */
|
|
|
|
auto rr = r;
|
|
|
|
if (char32_t ch; utf::decode(rr, ch)) {
|
|
|
|
if (std::size_t ret; (ret = utf::encode_u16(sink, ch))) {
|
|
|
|
r = rr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_u16(R &sink, wstring_range &r) {
|
|
|
|
/* when wchar_t is guaranteed utf-16, we have an identity
|
|
|
|
* match so we just advance; otherwise decode and encode
|
|
|
|
*/
|
|
|
|
if constexpr(
|
|
|
|
(sizeof(wchar_t) != sizeof(char32_t)) &&
|
|
|
|
(sizeof(wchar_t) == sizeof(char16_t))
|
|
|
|
) {
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(char16_t(r.front()));
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
auto rr = r;
|
|
|
|
if (char32_t ch; utf::decode(rr, ch)) {
|
|
|
|
if (std::size_t ret; (ret = utf::encode_u16(sink, ch))) {
|
|
|
|
r = rr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
template<typename OR, typename IR>
|
|
|
|
inline std::size_t encode_u32(OR &sink, IR &r) {
|
|
|
|
if (char32_t ret; decode(r, ret)) {
|
|
|
|
sink.put(ret);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-05 18:25:20 +00:00
|
|
|
/* @brief Encode a UTF-32 code point into a wide Unicode char/sequence.
|
|
|
|
*
|
|
|
|
* The value(s) are written in `sink` which is an ostd::output_range_tag.
|
|
|
|
* The written values are of type `wchar_t` and the amount written depends
|
|
|
|
* on the size of `wchar_t`.
|
|
|
|
*
|
|
|
|
* If `wchar_t` has equal size to `char32_t`, the input is simply type
|
|
|
|
* cast and written into the sink, treating `wchar_t` as UTF-32. If it
|
|
|
|
* has equal size to `char16_t` instead, `wchar_t` is treated as UTF-16
|
|
|
|
* and the input code point is encoded into one or two UTF-16 values.
|
|
|
|
* If neither of these happens, `wchar_t` is treated the same as `char`
|
|
|
|
* and the encoding is UTF-8, writing up to 4 code units.
|
|
|
|
*
|
|
|
|
* This function does not throw exceptions other than those thrown by
|
|
|
|
* `sink`. As for errors, with UTF-32 `wchar_t` it isn't allowed to
|
|
|
|
* fail; with UTF-8 or UTF-16, the failure points are the usual ones
|
|
|
|
* (surrogate code point as input or input greater than 0x10FFFF).
|
|
|
|
*
|
|
|
|
* The return value is the number of values written into the sink.
|
|
|
|
*/
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_uw(R &sink, char32_t ch) {
|
|
|
|
std::size_t n;
|
|
|
|
if constexpr(sizeof(wchar_t) == sizeof(char32_t)) {
|
|
|
|
n = 1;
|
|
|
|
sink.put(wchar_t(ch));
|
|
|
|
} else if constexpr(sizeof(wchar_t) == sizeof(char16_t)) {
|
|
|
|
char16_t buf[2];
|
|
|
|
n = detail::u16_encode(buf, ch);
|
|
|
|
for (std::size_t i = 0; i < n; ++i) {
|
|
|
|
sink.put(wchar_t(buf[i]));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
char buf[4];
|
|
|
|
n = detail::u8_encode(buf, ch);
|
|
|
|
for (std::size_t i = 0; i < n; ++i) {
|
|
|
|
sink.put(wchar_t(buf[i]));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:16:46 +00:00
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_uw(R &sink, u32string_range &r) {
|
2018-01-05 20:49:00 +00:00
|
|
|
/* just a wrapper; does the same thing but advances */
|
2018-01-05 20:16:46 +00:00
|
|
|
std::size_t n = 0;
|
|
|
|
if (!r.empty() && (n = utf::encode_uw(sink, r.front()))) {
|
|
|
|
r.pop_front();
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2018-01-05 20:49:00 +00:00
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_uw(R &sink, u16string_range &r) {
|
|
|
|
/* when wchar_t is guaranteed utf-16, we have an identity
|
|
|
|
* match much like encode_u16 with wstring, otherwise
|
|
|
|
* decode and encode
|
|
|
|
*/
|
|
|
|
if constexpr(
|
|
|
|
(sizeof(wchar_t) != sizeof(char32_t)) &&
|
|
|
|
(sizeof(wchar_t) == sizeof(char16_t))
|
|
|
|
) {
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(wchar_t(r.front()));
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
auto rr = r;
|
|
|
|
if (char32_t ch; utf::decode(rr, ch)) {
|
|
|
|
if (std::size_t ret; (ret = utf::encode_uw(sink, ch))) {
|
|
|
|
r = rr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_uw(R &sink, string_range &r) {
|
|
|
|
/* when wchar_t is guaranteed utf-8, we have an identity
|
|
|
|
* match so there is no reencoding, otherwise decode and
|
|
|
|
* encode...
|
|
|
|
*/
|
|
|
|
if constexpr(
|
|
|
|
(sizeof(wchar_t) != sizeof(char32_t)) &&
|
|
|
|
(sizeof(wchar_t) != sizeof(char16_t))
|
|
|
|
) {
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(wchar_t(r.front()));
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
auto rr = r;
|
|
|
|
if (char32_t ch; utf::decode(rr, ch)) {
|
|
|
|
if (std::size_t ret; (ret = utf::encode_uw(sink, ch))) {
|
|
|
|
r = rr;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename R>
|
|
|
|
inline std::size_t encode_uw(R &sink, wstring_range &r) {
|
|
|
|
/* identity match, advances */
|
|
|
|
if (!r.empty()) {
|
|
|
|
sink.put(wchar_t(r.front()));
|
|
|
|
r.pop_front();
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
template<typename C, typename OR, typename IR>
|
|
|
|
inline std::size_t encode(
|
|
|
|
[[maybe_unused]] OR &sink, [[maybe_unused]] IR &r
|
|
|
|
) {
|
|
|
|
static_assert(
|
|
|
|
std::is_same_v<C, char32_t> ||
|
|
|
|
std::is_same_v<C, char16_t> ||
|
|
|
|
std::is_same_v<C, char> ||
|
|
|
|
std::is_same_v<C, wchar_t>, "Invalid input type"
|
|
|
|
);
|
|
|
|
if constexpr(std::is_same_v<C, char32_t>) {
|
|
|
|
return encode_u32(sink, r);
|
|
|
|
} else if constexpr(std::is_same_v<C, char16_t>) {
|
|
|
|
return encode_u16(sink, r);
|
|
|
|
} else if constexpr(std::is_same_v<C, char>) {
|
|
|
|
return encode_u8(sink, r);
|
|
|
|
} else if constexpr(std::is_same_v<C, wchar_t>) {
|
|
|
|
return encode_uw(sink, r);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-12-31 18:16:16 +00:00
|
|
|
/* @brief Get the number of Unicode code points in a string.
|
|
|
|
*
|
|
|
|
* This function keeps reading Unicode code points while it can and
|
|
|
|
* once it can't it returns the number of valid ones with the rest
|
|
|
|
* of the input string range being in `cont`. That means if the entire
|
|
|
|
* string is a valid UTF-8 string, `cont` will be empty, otherwise it
|
|
|
|
* will begin at the first invalid UTF-8 code point.
|
|
|
|
*
|
|
|
|
* If you're sure the string is valid or you don't need to handle the
|
|
|
|
* error, you can use the more convenient overload below.
|
|
|
|
*/
|
|
|
|
std::size_t length(string_range r, string_range &cont) noexcept;
|
|
|
|
|
2018-01-05 23:42:14 +00:00
|
|
|
std::size_t length(u16string_range r, u16string_range &cont) noexcept;
|
|
|
|
std::size_t length(u32string_range r, u32string_range &cont) noexcept;
|
|
|
|
std::size_t length(wstring_range r, wstring_range &cont) noexcept;
|
|
|
|
|
2017-12-31 18:16:16 +00:00
|
|
|
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
|
|
|
|
*
|
2018-01-05 23:27:04 +00:00
|
|
|
* If an invalid UTF-8 sequence is encountered, it's considered
|
|
|
|
* 1 character and therefore the resulting length will be the
|
|
|
|
* number of valid code points plus the number of invalid
|
|
|
|
* code units as if they were replaced with valid code points.
|
2017-12-31 18:16:16 +00:00
|
|
|
*
|
2018-01-05 23:27:04 +00:00
|
|
|
* If you need to stop at an invalid code unit and get the
|
|
|
|
* continuation string, use the overload above.
|
2017-12-31 18:16:16 +00:00
|
|
|
*/
|
2018-01-05 23:27:04 +00:00
|
|
|
std::size_t length(string_range r) noexcept;
|
2017-12-31 18:16:16 +00:00
|
|
|
|
2018-01-05 23:42:14 +00:00
|
|
|
std::size_t length(u16string_range r) noexcept;
|
|
|
|
std::size_t length(u32string_range r) noexcept;
|
|
|
|
std::size_t length(wstring_range r) noexcept;
|
2018-01-01 19:59:39 +00:00
|
|
|
|
2017-12-31 13:50:48 +00:00
|
|
|
namespace detail {
|
2018-01-07 01:17:05 +00:00
|
|
|
template<typename IC, typename OC>
|
|
|
|
struct unicode_range: input_range<unicode_range<IC, OC>> {
|
2017-12-31 13:50:48 +00:00
|
|
|
using range_category = forward_range_tag;
|
2018-01-07 01:17:05 +00:00
|
|
|
using value_type = OC;
|
|
|
|
using reference = OC;
|
2017-12-31 13:50:48 +00:00
|
|
|
using size_type = std::size_t;
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
unicode_range() = delete;
|
|
|
|
unicode_range(basic_char_range<IC const> r): p_range(r) {
|
2018-01-06 00:59:13 +00:00
|
|
|
if (!r.empty()) {
|
|
|
|
advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool empty() const { return p_left.empty(); }
|
|
|
|
|
|
|
|
void pop_front() {
|
|
|
|
std::size_t n = p_left.size();
|
|
|
|
if (n > 1) {
|
|
|
|
p_left.pop_front();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if ((n == 1) && p_range.empty()) {
|
2018-01-07 01:17:05 +00:00
|
|
|
p_left = basic_char_range<OC>{};
|
2018-01-06 00:59:13 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
advance();
|
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
OC front() const {
|
2018-01-06 00:59:13 +00:00
|
|
|
return p_left.front();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
void advance() {
|
2018-01-07 01:17:05 +00:00
|
|
|
auto r = basic_char_range<OC>(p_buf, p_buf + sizeof(p_buf));
|
|
|
|
if (std::size_t n; !(n = utf::encode<OC>(r, p_range))) {
|
2018-01-06 00:59:13 +00:00
|
|
|
/* range is unchanged */
|
2018-01-07 01:17:05 +00:00
|
|
|
p_left = basic_char_range<OC>{};
|
|
|
|
throw utf_error{"Unicode encoding failed"};
|
2018-01-06 00:59:13 +00:00
|
|
|
} else {
|
2018-01-07 01:17:05 +00:00
|
|
|
p_left = basic_char_range<OC>{p_buf, p_buf + n};
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
basic_char_range<IC const> p_range;
|
|
|
|
basic_char_range<OC> p_left{};
|
|
|
|
OC p_buf[4];
|
2017-12-31 13:50:48 +00:00
|
|
|
};
|
|
|
|
} /* namespace detail */
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto iter_u8(string_range r) {
|
|
|
|
return detail::unicode_range<char, char>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u8(u16string_range r) {
|
|
|
|
return detail::unicode_range<char16_t, char>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u8(u32string_range r) {
|
|
|
|
return detail::unicode_range<char32_t, char>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u8(wstring_range r) {
|
|
|
|
return detail::unicode_range<wchar_t, char>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u16(string_range r) {
|
|
|
|
return detail::unicode_range<char, char16_t>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u16(u16string_range r) {
|
|
|
|
return detail::unicode_range<char16_t, char16_t>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u16(u32string_range r) {
|
|
|
|
return detail::unicode_range<char32_t, char16_t>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline auto iter_u16(wstring_range r) {
|
|
|
|
return detail::unicode_range<wchar_t, char16_t>(r);
|
|
|
|
}
|
|
|
|
|
2018-01-01 23:30:58 +00:00
|
|
|
/** @brief Iterate over the code points of a UTF-8 string.
|
2017-12-31 13:50:48 +00:00
|
|
|
*
|
|
|
|
* The resulting range is ostd::forward_range_tag. The range will
|
|
|
|
* contain the code points of the given string. On error, which may
|
|
|
|
* be during any string advancement (the constructor or `pop_front()`),
|
|
|
|
* an ostd::utf_error is raised.
|
|
|
|
*/
|
2018-01-06 01:10:38 +00:00
|
|
|
inline auto iter_u32(string_range r) {
|
2018-01-07 01:17:05 +00:00
|
|
|
return detail::unicode_range<char, char32_t>{r};
|
2018-01-01 23:30:58 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:59:13 +00:00
|
|
|
/** @brief Iterate over the code points of a UTF-16 string.
|
|
|
|
*
|
|
|
|
* The resulting range is ostd::forward_range_tag. The range will
|
|
|
|
* contain the code points of the given string. On error, which may
|
|
|
|
* be during any string advancement (the constructor or `pop_front()`),
|
|
|
|
* an ostd::utf_error is raised.
|
|
|
|
*/
|
2018-01-06 01:10:38 +00:00
|
|
|
inline auto iter_u32(u16string_range r) noexcept {
|
2018-01-07 01:17:05 +00:00
|
|
|
return detail::unicode_range<char16_t, char32_t>{r};
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-01 23:30:58 +00:00
|
|
|
/** @brief Iterate over the code points of a UTF-32 string.
|
|
|
|
*
|
2018-01-06 00:59:13 +00:00
|
|
|
* The resulting range is ostd::forward_range_tag. This can actually
|
2018-01-06 01:10:38 +00:00
|
|
|
* fail just like the other ostd::iter_u32() variants if the string
|
2018-01-06 00:59:13 +00:00
|
|
|
* contains surrogates or code points that are out of bounds. If that
|
|
|
|
* happens, an ostd::utf_error is raised.
|
2018-01-01 23:30:58 +00:00
|
|
|
*/
|
2018-01-06 01:10:38 +00:00
|
|
|
inline auto iter_u32(u32string_range r) noexcept {
|
2018-01-07 01:17:05 +00:00
|
|
|
return detail::unicode_range<char32_t, char32_t>{r};
|
2017-12-31 13:50:48 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:59:13 +00:00
|
|
|
/** @brief Iterate over the code points of a wide Unicode string.
|
|
|
|
*
|
|
|
|
* The resulting range is ostd::forward_range_tag. The range will
|
|
|
|
* contain the code points of the given string. On error, which may
|
|
|
|
* be during any string advancement (the constructor or `pop_front()`),
|
|
|
|
* an ostd::utf_error is raised.
|
|
|
|
*/
|
2018-01-06 01:10:38 +00:00
|
|
|
inline auto iter_u32(wstring_range r) noexcept {
|
2018-01-07 01:17:05 +00:00
|
|
|
return detail::unicode_range<wchar_t, char32_t>{r};
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto iter_uw(string_range r) {
|
|
|
|
return detail::unicode_range<char, wchar_t>(r);
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto iter_uw(u16string_range r) {
|
|
|
|
return detail::unicode_range<char16_t, wchar_t>(r);
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto iter_uw(u32string_range r) {
|
|
|
|
return detail::unicode_range<char32_t, wchar_t>(r);
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto iter_uw(wstring_range r) {
|
|
|
|
return detail::unicode_range<wchar_t, wchar_t>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline auto iter_u(string_range r) {
|
|
|
|
return detail::unicode_range<char, C>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline auto iter_u(u16string_range r) {
|
|
|
|
return detail::unicode_range<char16_t, C>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline auto iter_u(u32string_range r) {
|
|
|
|
return detail::unicode_range<char32_t, C>(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline auto iter_u(wstring_range r) {
|
|
|
|
return detail::unicode_range<wchar_t, C>(r);
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-03 01:12:23 +00:00
|
|
|
bool isalnum(char32_t c) noexcept;
|
|
|
|
bool isalpha(char32_t c) noexcept;
|
|
|
|
bool isblank(char32_t c) noexcept;
|
|
|
|
bool iscntrl(char32_t c) noexcept;
|
|
|
|
bool isdigit(char32_t c) noexcept;
|
|
|
|
bool isgraph(char32_t c) noexcept;
|
|
|
|
bool islower(char32_t c) noexcept;
|
|
|
|
bool isprint(char32_t c) noexcept;
|
|
|
|
bool ispunct(char32_t c) noexcept;
|
|
|
|
bool isspace(char32_t c) noexcept;
|
|
|
|
bool istitle(char32_t c) noexcept;
|
|
|
|
bool isupper(char32_t c) noexcept;
|
|
|
|
bool isvalid(char32_t c) noexcept;
|
|
|
|
bool isxdigit(char32_t c) noexcept;
|
|
|
|
char32_t tolower(char32_t c) noexcept;
|
|
|
|
char32_t toupper(char32_t c) noexcept;
|
2018-01-02 21:23:03 +00:00
|
|
|
|
2018-01-03 00:22:07 +00:00
|
|
|
inline int compare(string_range s1, string_range s2) noexcept {
|
|
|
|
return s1.compare(s2);
|
|
|
|
}
|
2018-01-07 00:15:17 +00:00
|
|
|
inline int compare(u16string_range s1, u16string_range s2) noexcept {
|
|
|
|
return s1.compare(s2);
|
|
|
|
}
|
2018-01-03 00:22:07 +00:00
|
|
|
inline int compare(u32string_range s1, u32string_range s2) noexcept {
|
|
|
|
return s1.compare(s2);
|
|
|
|
}
|
2018-01-07 00:15:17 +00:00
|
|
|
inline int compare(wstring_range s1, wstring_range s2) noexcept {
|
|
|
|
return s1.compare(s2);
|
|
|
|
}
|
2018-01-03 00:22:07 +00:00
|
|
|
|
|
|
|
int case_compare(string_range s1, string_range s2) noexcept;
|
2018-01-07 00:15:17 +00:00
|
|
|
int case_compare(u16string_range s1, u16string_range s2) noexcept;
|
2018-01-03 00:22:07 +00:00
|
|
|
int case_compare(u32string_range s1, u32string_range s2) noexcept;
|
2018-01-07 00:15:17 +00:00
|
|
|
int case_compare(wstring_range s1, wstring_range s2) noexcept;
|
2017-12-31 13:50:48 +00:00
|
|
|
/** @} */
|
|
|
|
|
|
|
|
} /* namespace utf */
|
|
|
|
|
2017-12-31 18:16:16 +00:00
|
|
|
template<typename T>
|
|
|
|
inline std::size_t basic_char_range<T>::length() const noexcept {
|
|
|
|
return utf::length(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
inline std::size_t basic_char_range<T>::length(
|
|
|
|
basic_char_range<T> &cont
|
|
|
|
) const noexcept {
|
|
|
|
return utf::length(*this, cont);
|
|
|
|
}
|
|
|
|
|
2018-01-07 01:17:05 +00:00
|
|
|
template<typename T>
|
|
|
|
inline auto basic_char_range<T>::iter_u8() const {
|
|
|
|
return utf::iter_u8(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
inline auto basic_char_range<T>::iter_u16() const {
|
|
|
|
return utf::iter_u16(*this);
|
|
|
|
}
|
|
|
|
|
2017-12-31 18:12:51 +00:00
|
|
|
template<typename T>
|
2018-01-06 01:10:38 +00:00
|
|
|
inline auto basic_char_range<T>::iter_u32() const {
|
|
|
|
return utf::iter_u32(*this);
|
2017-12-31 18:12:51 +00:00
|
|
|
}
|
|
|
|
|
2018-01-06 00:59:13 +00:00
|
|
|
template<typename T>
|
2018-01-07 01:17:05 +00:00
|
|
|
inline auto basic_char_range<T>::iter_uw() const {
|
|
|
|
return utf::iter_uw(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
template<typename C>
|
|
|
|
inline auto basic_char_range<T>::iter_u() const {
|
|
|
|
return utf::iter_u<C>(*this);
|
2018-01-06 00:59:13 +00:00
|
|
|
}
|
|
|
|
|
2018-01-03 00:22:07 +00:00
|
|
|
template<typename T>
|
|
|
|
inline int basic_char_range<T>::case_compare(
|
|
|
|
basic_char_range<T const> s
|
|
|
|
) const noexcept {
|
|
|
|
return utf::case_compare(*this, s);
|
|
|
|
}
|
|
|
|
|
2015-07-18 00:02:13 +00:00
|
|
|
/* string literals */
|
|
|
|
|
2016-07-31 19:40:25 +00:00
|
|
|
inline namespace literals {
|
|
|
|
inline namespace string_literals {
|
2017-04-10 17:40:28 +00:00
|
|
|
|
|
|
|
/** @addtogroup Strings
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @brief A custom literal for string ranges.
|
|
|
|
*
|
|
|
|
* You need to enable this explicitly by using this namespace. It's
|
|
|
|
* not enabled by default to ensure compatibility with existing code.
|
|
|
|
*/
|
|
|
|
inline string_range operator "" _sr(char const *str, std::size_t len)
|
|
|
|
noexcept
|
|
|
|
{
|
2017-02-16 17:48:14 +00:00
|
|
|
return string_range(str, str + len);
|
2015-07-21 23:13:44 +00:00
|
|
|
}
|
2017-04-10 17:40:28 +00:00
|
|
|
|
|
|
|
/** @} */
|
|
|
|
|
2016-07-31 19:40:25 +00:00
|
|
|
}
|
|
|
|
}
|
2015-07-18 00:02:13 +00:00
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @} */
|
|
|
|
|
2015-07-13 19:07:14 +00:00
|
|
|
} /* namespace ostd */
|
2015-06-03 22:07:57 +00:00
|
|
|
|
2017-01-29 20:22:40 +00:00
|
|
|
namespace std {
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @addtogroup Strings
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @brief Standard std::hash integration for string slices.
|
|
|
|
*
|
|
|
|
* This integrates all possible slice types with standard hashing.
|
|
|
|
* It uses the hashing used for matching std::basic_string_view,
|
|
|
|
* so the algorithm (and thus result) will always match standard strings.
|
|
|
|
*/
|
2017-12-15 22:32:06 +00:00
|
|
|
template<typename T>
|
|
|
|
struct hash<ostd::basic_char_range<T>> {
|
|
|
|
std::size_t operator()(ostd::basic_char_range<T> const &v)
|
2017-04-10 17:40:28 +00:00
|
|
|
const noexcept
|
|
|
|
{
|
2017-12-15 22:32:06 +00:00
|
|
|
return hash<std::basic_string_view<std::remove_const_t<T>>>{}(v);
|
2017-01-29 20:22:40 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2017-04-10 17:40:28 +00:00
|
|
|
/** @} */
|
|
|
|
|
2017-01-29 20:22:40 +00:00
|
|
|
}
|
|
|
|
|
2016-02-07 21:17:15 +00:00
|
|
|
#endif
|
2017-04-10 17:40:28 +00:00
|
|
|
|
|
|
|
/** @} */
|