libostd/ostd/string.hh

986 lines
30 KiB
C++
Raw Normal View History

2017-04-10 19:40:28 +02:00
/** @defgroup Strings
2015-05-27 22:43:13 +02:00
*
2017-04-10 19:40:28 +02:00
* @brief Provides string processing extensions.
*
* As libostd provides a range system, it represents string slices as
* contiguous ranges of characters. This has many advantages, such as
* being able to use them with generic algorithms. The string slices are
* not zero terminated, which means creating subslices is very fast, it's
* basically just pointer arithmetic.
*
* Integration with existing string handling facilities is ensured, so you
* can incorporate libostd into any existing project and still benefit from
* the new features.
*
* A simple example:
*
* ~~~{.cc}
* #include <ostd/string.hh>
* #include <ostd/io.hh>
*
* int main() {
* ostd::string_range x = "hello world";
* auto p1 = x.slice(0, 5);
* auto p2 = x.slice(6);
* ostd::writeln(p1); // hello
* ostd::writeln(p2); // world
* }
* ~~~
*
2017-05-03 02:14:27 +02:00
* An example of using libostd string formatting:
*
* @include format.cc
*
2017-04-10 19:40:28 +02:00
* See the examples provided with the library for further information.
*
* @{
*/
/** @file string.hh
*
* @brief String slice implementation as well as other utilities.
*
* This file implements string slices, their comparisons, utilities,
* standard C++ string range integration, range literals, std::hash
* support for string slices and others.
*
* @copyright See COPYING.md in the project tree for further information.
2015-05-27 22:43:13 +02:00
*/
2015-07-13 21:08:55 +02:00
#ifndef OSTD_STRING_HH
#define OSTD_STRING_HH
2015-05-27 22:43:13 +02:00
#include <cstdint>
2017-04-09 16:44:45 +02:00
#include <cstddef>
#include <cctype>
2017-01-29 21:22:40 +01:00
#include <string>
#include <string_view>
#include <type_traits>
2017-02-08 01:06:50 +01:00
#include <functional>
2017-02-16 20:39:05 +01:00
#include <utility>
#include <vector>
#include <stdexcept>
2017-01-29 21:22:40 +01:00
2017-06-19 16:59:36 +02:00
#include <ostd/range.hh>
#include <ostd/algorithm.hh>
2015-05-27 22:43:13 +02:00
2015-07-13 21:07:14 +02:00
namespace ostd {
2017-04-10 19:40:28 +02:00
/** @addtogroup Strings
* @{
*/
/** @brief A string slice type.
*
* This is a contiguous range over a character type. The character type
* can be any of the standard character types, of any size - for example
* you would use `char32_t` to represent UTF-32 slices. The std::char_traits
2017-12-15 23:32:06 +01:00
* structure is used for the basic string operations where possible.
2017-04-10 19:40:28 +02:00
*
* The range is mutable, i.e. it implements the output range interface.
*/
2017-12-15 23:32:06 +01:00
template<typename T>
struct basic_char_range: input_range<basic_char_range<T>> {
2017-04-16 17:23:09 +02:00
using range_category = contiguous_range_tag;
using value_type = T;
using reference = T &;
using size_type = std::size_t;
2015-07-21 22:16:38 +02:00
private:
2017-12-15 23:32:06 +01:00
using TR = std::char_traits<std::remove_const_t<T>>;
struct nat {};
2015-07-21 22:16:38 +02:00
public:
2017-04-10 19:40:28 +02:00
/** @brief Constructs an empty slice. */
basic_char_range() noexcept: p_beg(nullptr), p_end(nullptr) {};
/** @brief Constructs a slice from two pointers.
*
* The first pointer is the beginning of the slice
* and the second pointer is just past the end.
*/
basic_char_range(value_type *beg, value_type *end) noexcept:
p_beg(beg), p_end(end)
{}
2017-04-10 19:40:28 +02:00
/** @brief Constructs an empty slice. */
basic_char_range(std::nullptr_t) noexcept:
p_beg(nullptr), p_end(nullptr)
{}
/** @brief Constructs a slice from a pointer or a static array.
*
* This constructor handles two cases. The input must be convertible
* to `T *`, if it's not, this constructor is not enabled. Effectively,
* if the input is a static array of `T`, the entire array is used to
* create the slice, minus the potential zero at the end. If there is
* no zero at the end, nothing is removed and the array is used whole.
2017-12-15 23:32:06 +01:00
* If the input is not an array, the size is checked at runtime.
2017-04-10 19:40:28 +02:00
*/
template<typename U>
basic_char_range(U &&beg, std::enable_if_t<
2017-04-10 19:40:28 +02:00
std::is_convertible_v<U, value_type *>, nat
> = nat{}) noexcept: p_beg(beg) {
if constexpr(std::is_array_v<std::remove_reference_t<U>>) {
2017-04-09 16:44:45 +02:00
std::size_t N = std::extent_v<std::remove_reference_t<U>>;
p_end = beg + N - (beg[N - 1] == '\0');
} else {
p_end = beg + (beg ? TR::length(beg) : 0);
}
}
2017-04-10 19:40:28 +02:00
/** @brief Constructs a slice from an std::basic_string.
*
* This uses the string's data to construct a matching slice.
*/
2017-02-09 21:39:03 +01:00
template<typename STR, typename A>
2017-04-10 19:40:28 +02:00
basic_char_range(
std::basic_string<std::remove_const_t<value_type>, STR, A> const &s
) noexcept:
p_beg(s.data()), p_end(s.data() + s.size())
{}
2017-04-10 19:40:28 +02:00
/** @brief Constructs a slice from a different but compatible slice.
*
* The other slice can use any traits type, but a pointer to the
* other slice's value type must be convertible to a pointer to
* the new slice's value type, otherwise the constructor will not
* be enabled.
*/
2017-12-15 23:32:06 +01:00
template<typename U, typename = std::enable_if_t<
2017-04-10 19:40:28 +02:00
std::is_convertible_v<U *, value_type *>
2017-02-09 20:56:15 +01:00
>>
2017-12-15 23:32:06 +01:00
basic_char_range(basic_char_range<U> const &v) noexcept:
p_beg(&v[0]), p_end(&v[v.size()])
{}
2015-06-17 03:00:39 +02:00
2017-04-10 19:40:28 +02:00
/** @brief Slices are arbitrarily copy constructible. */
basic_char_range &operator=(basic_char_range const &v) noexcept {
p_beg = v.p_beg; p_end = v.p_end; return *this;
}
2017-04-10 19:40:28 +02:00
/** @brief Assigns the slice's data from a matching std::basic_string.
*
* The string does not have to be using a matching traits type.
*/
2017-02-09 21:39:03 +01:00
template<typename STR, typename A>
2017-04-10 19:40:28 +02:00
basic_char_range &operator=(
std::basic_string<value_type, STR, A> const &s
) noexcept {
p_beg = s.data(); p_end = s.data() + s.size(); return *this;
}
2017-02-09 21:39:03 +01:00
2017-04-10 19:40:28 +02:00
/** @brief Assigns the slice's data from a pointer.
*
* The data pointed to by the argument must be zero terminated.
*/
basic_char_range &operator=(value_type *s) noexcept {
2017-02-09 21:39:03 +01:00
p_beg = s; p_end = s + (s ? TR::length(s) : 0); return *this;
}
2017-04-10 19:40:28 +02:00
/** @brief Checks if the slice is empty. */
bool empty() const noexcept { return p_beg == p_end; }
/** @brief Pops the first character out of the slice.
*
* This is bounds checked, std::out_of_range is thrown when
* slice was already empty before popping out the character.
* No changes are done to the slice if it throws.
*
* @throws std::out_of_range when empty.
*
* @see front(), pop_back()
*/
void pop_front() {
2017-04-10 19:40:28 +02:00
if (p_beg == p_end) {
throw std::out_of_range{"pop_front on empty range"};
}
2017-04-10 19:40:28 +02:00
++p_beg;
}
2017-04-10 19:40:28 +02:00
/** @brief Gets a reference to the first character.
*
* The behavior is undefined when the slice is empty.
*
* @see back(), pop_front()
*/
reference front() const noexcept { return *p_beg; }
/** @brief Pops the last character out of the slice.
*
* This is bounds checked, std::out_of_range is thrown when
* slice was already empty before popping out the character.
* No changes are done to the slice if it throws.
*
* @throws std::out_of_range when empty.
*
* @see back(), pop_front()
*/
void pop_back() {
2017-04-10 19:40:28 +02:00
if (p_beg == p_end) {
throw std::out_of_range{"pop_back on empty range"};
}
--p_end;
}
2017-04-10 19:40:28 +02:00
/** @brief Gets a reference to the last character.
*
* The behavior is undefined when the slice is empty.
*
* @see front(), pop_back()
*/
reference back() const noexcept { return *(p_end - 1); }
/** @brief Gets the number of value_type in the slice. */
size_type size() const noexcept { return p_end - p_beg; }
/** @brief Gets the number of code points in the slice.
*
* Effectively the same as utf::length().
*/
2017-12-31 19:16:16 +01:00
inline size_type length() const noexcept;
/** @brief Gets the number of code points in the slice.
*
* Effectively the same as utf::length().
*/
2017-12-31 19:16:16 +01:00
inline size_type length(basic_char_range &cont) const noexcept;
2017-04-10 19:40:28 +02:00
/** @brief Creates a sub-slice of the slice.
*
* Behavior is undefined if `start` and `end` are not within the
* slice's bounds. There is no bound checking done in this call.
* It's also undefined if the first argument is larger than the
* second argument.
*/
basic_char_range slice(size_type start, size_type end) const noexcept {
2017-02-16 18:48:14 +01:00
return basic_char_range(p_beg + start, p_beg + end);
}
2017-04-10 19:40:28 +02:00
/** @brief Creates a sub-slice of the slice until the end.
*
* Equivalent to slice(size_type, size_type) with `size()` as
* the second argument. The first argument must be within the
* slice's boundaries otherwis the behavior is undefined.
*/
basic_char_range slice(size_type start) const noexcept {
2017-04-01 16:49:38 +02:00
return slice(start, size());
}
2017-04-10 19:40:28 +02:00
/** @brief Gets a reference to a character within the slice.
*
* The behavior is undefined if the index is not within the bounds.
*/
reference operator[](size_type i) const noexcept { return p_beg[i]; }
/** @brief Writes a character at the beginning and pops it out.
*
* @throws std::out_of_range when empty.
*/
void put(value_type v) {
2017-02-19 18:31:08 +01:00
if (p_beg == p_end) {
throw std::out_of_range{"put into an empty range"};
}
*(p_beg++) = v;
}
2017-04-10 19:40:28 +02:00
/** @brief Gets the pointer to the beginning. */
value_type *data() noexcept { return p_beg; }
/** @brief Gets the pointer to the beginning. */
value_type const *data() const noexcept { return p_beg; }
/** @brief Compares two slices.
*
* This works similarly to the C function `strcmp` or the `compare`
* method of std::char_traits, but does not depend on the strings
* to be terminated.
*
* If this slice is empty and the other is not, this method returns
* -1. If it's the other way around, it returns 1. If both are empty,
2017-12-15 23:32:06 +01:00
* 0 is returned. Otherwise, the `compare` method of std::char_traits
* is used to compare the data, using the smaller of the lengths as the
2017-04-10 19:40:28 +02:00
* count.
*
* It is not a part of the range interface, just the string slice
* interface.
*
* @see case_compare()
*/
int compare(basic_char_range<value_type const> s) const noexcept {
2017-04-09 16:44:45 +02:00
size_type s1 = size(), s2 = s.size();
2016-08-18 01:34:20 +02:00
int ret;
2016-08-17 19:18:12 +02:00
if (!s1 || !s2) {
2016-08-18 01:34:20 +02:00
goto diffsize;
2016-08-17 19:18:12 +02:00
}
2017-02-18 17:54:51 +01:00
if ((ret = TR::compare(data(), s.data(), std::min(s1, s2)))) {
2016-08-18 01:34:20 +02:00
return ret;
}
diffsize:
return (s1 < s2) ? -1 : ((s1 > s2) ? 1 : 0);
}
2017-04-10 19:40:28 +02:00
/** @brief Compares two slices in a case insensitive manner.
*
* Lexicographically compares the strings like compare(), but in
* a case insensitive way. The std::toupper() function is used to
* convert the characters to uppercase when comparing.
*
* Returns a negative value when this slice is less than the other
* slice and a positive value when the other way around. Zero is
2017-12-15 23:32:06 +01:00
* returned when they're equal.
2017-04-10 19:40:28 +02:00
*/
int case_compare(basic_char_range<value_type const> s) const noexcept {
2017-04-09 16:44:45 +02:00
size_type s1 = size(), s2 = s.size();
for (size_type i = 0, ms = std::min(s1, s2); i < ms; ++i) {
int d = std::toupper(p_beg[i]) - std::toupper(s[i]);
2016-09-02 01:06:13 +02:00
if (d) {
return d;
}
}
return (s1 < s2) ? -1 : ((s1 > s2) ? 1 : 0);
}
/** @brief Iterate over the code points of the string.
*
* Like utf::iter_codes().
*/
inline auto iter_codes() const;
2017-04-10 19:40:28 +02:00
/** @brief Implicitly converts a string slice to std::basic_string_view.
*
* String views represent more or less the same thing but they're always
* immutable. This simple conversion allows usage of string slices on
* any API that uses either strings or string view, as well as construct
* strings and string views out of slices.
*/
operator std::basic_string_view<std::remove_cv_t<value_type>>()
const noexcept
{
2017-02-09 20:56:15 +01:00
return std::basic_string_view<std::remove_cv_t<T>>{data(), size()};
2017-01-29 21:22:40 +01:00
}
private:
T *p_beg, *p_end;
};
2017-12-31 19:17:02 +01:00
/** @brief A mutable slice over `char`. */
using char_range = basic_char_range<char>;
/** @brief A mutable slice over `wchar_t`. */
using wchar_range = basic_char_range<wchar_t>;
/** @brief A mutable slice over `char16_t`. */
using char16_range = basic_char_range<char16_t>;
/** @brief A mutable slice over `char32_t`. */
using char32_range = basic_char_range<char32_t>;
2017-12-31 19:17:02 +01:00
/** @brief An immutable slice over `char`.
*
* This is used in most libostd APIs that read strings. More or less
* anything is convertible to it, including mutable slices, so it's
* a perfect fit as long as modifications are not necessary.
*/
using string_range = basic_char_range<char const>;
/** @brief An immutable slice over `wchar_t`.
*
* Included primarily for compatibility with other APIs.
*/
using wstring_range = basic_char_range<wchar_t const>;
/** @brief An immutable slice over `char16_t`.
*
* Included for basic UTF-16 compatibility.
*/
using u16string_range = basic_char_range<char16_t const>;
/** @brief An immutable slice over `char32_t`.
*
* Can represent UTF-32 strings.
*/
using u32string_range = basic_char_range<char32_t const>;
2017-02-16 18:48:14 +01:00
/* comparisons between ranges */
2017-04-10 19:40:28 +02:00
/** @brief Like `!lhs.compare(rhs)`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator==(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, basic_char_range<U> rhs
2017-04-10 19:40:28 +02:00
) noexcept {
2017-02-16 18:48:14 +01:00
return !lhs.compare(rhs);
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs)`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator!=(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, basic_char_range<U> rhs
2017-04-10 19:40:28 +02:00
) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs);
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) < 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator<(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, basic_char_range<U> rhs
2017-04-10 19:40:28 +02:00
) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) < 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) > 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator>(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, basic_char_range<U> rhs
2017-04-10 19:40:28 +02:00
) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) > 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) <= 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator<=(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, basic_char_range<U> rhs
2017-04-10 19:40:28 +02:00
) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) <= 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) >= 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator>=(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, basic_char_range<U> rhs
2017-04-10 19:40:28 +02:00
) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) >= 0;
}
2017-06-01 21:44:09 +02:00
/* comparisons between ranges and char arrays */
2017-02-16 18:48:14 +01:00
2017-04-10 19:40:28 +02:00
/** @brief Like `!lhs.compare(rhs)`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator==(basic_char_range<T> lhs, U *rhs) noexcept {
2017-02-16 18:48:14 +01:00
return !lhs.compare(rhs);
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs)`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator!=(basic_char_range<T> lhs, U *rhs) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs);
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) < 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator<(basic_char_range<T> lhs, U *rhs) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) < 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) > 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator>(basic_char_range<T> lhs, U *rhs) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) > 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) <= 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator<=(basic_char_range<T> lhs, U *rhs) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) <= 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `lhs.compare(rhs) >= 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator>=(basic_char_range<T> lhs, U *rhs) noexcept {
2017-02-16 18:48:14 +01:00
return lhs.compare(rhs) >= 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `!rhs.compare(lhs)`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator==(U *lhs, basic_char_range<T> rhs) noexcept {
2017-02-16 18:48:14 +01:00
return !rhs.compare(lhs);
}
2017-04-10 19:40:28 +02:00
/** @brief Like `rhs.compare(lhs)`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator!=(U *lhs, basic_char_range<T> rhs) noexcept {
2017-02-16 18:48:14 +01:00
return rhs.compare(lhs);
}
2017-04-10 19:40:28 +02:00
/** @brief Like `rhs.compare(lhs) > 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator<(U *lhs, basic_char_range<T> rhs) noexcept {
2017-02-16 18:48:14 +01:00
return rhs.compare(lhs) > 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `rhs.compare(lhs) < 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator>(U *lhs, basic_char_range<T> rhs) noexcept {
2017-02-16 18:48:14 +01:00
return rhs.compare(lhs) < 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `rhs.compare(lhs) >= 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator<=(U *lhs, basic_char_range<T> rhs) noexcept {
2017-02-16 18:48:14 +01:00
return rhs.compare(lhs) >= 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Like `rhs.compare(lhs) <= 0`. */
2017-12-15 23:32:06 +01:00
template<typename T, typename U>
2017-06-01 21:44:09 +02:00
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
2017-12-15 23:32:06 +01:00
> operator>=(U *lhs, basic_char_range<T> rhs) noexcept {
2017-02-16 18:48:14 +01:00
return rhs.compare(lhs) <= 0;
}
/* comparisons between ranges and stdlib strings */
/** @brief Like `!lhs.compare(rhs)`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator==(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, std::basic_string<U, TR, A> const &rhs
) noexcept {
return !lhs.compare(rhs);
}
/** @brief Like `lhs.compare(rhs)`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator!=(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, std::basic_string<U, TR, A> const &rhs
) noexcept {
return lhs.compare(rhs);
}
/** @brief Like `lhs.compare(rhs) < 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator<(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, std::basic_string<U, TR, A> const &rhs
) noexcept {
return lhs.compare(rhs) < 0;
}
/** @brief Like `lhs.compare(rhs) > 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator>(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, std::basic_string<U, TR, A> const &rhs
) noexcept {
return lhs.compare(rhs) > 0;
}
/** @brief Like `lhs.compare(rhs) <= 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator<=(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, std::basic_string<U, TR, A> const &rhs
) noexcept {
return lhs.compare(rhs) <= 0;
}
/** @brief Like `lhs.compare(rhs) >= 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator>=(
2017-12-15 23:32:06 +01:00
basic_char_range<T> lhs, std::basic_string<U, TR, A> const &rhs
) noexcept {
return lhs.compare(rhs) >= 0;
}
/** @brief Like `!rhs.compare(lhs)`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator==(
2017-12-15 23:32:06 +01:00
std::basic_string<U, TR, A> const &lhs, basic_char_range<T> rhs
) noexcept {
return !rhs.compare(lhs);
}
/** @brief Like `rhs.compare(lhs)`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator!=(
2017-12-15 23:32:06 +01:00
std::basic_string<U, TR, A> const &lhs, basic_char_range<T> rhs
) noexcept {
return rhs.compare(lhs);
}
/** @brief Like `rhs.compare(lhs) > 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator<(
2017-12-15 23:32:06 +01:00
std::basic_string<U, TR, A> const &lhs, basic_char_range<T> rhs
) noexcept {
return rhs.compare(lhs) > 0;
}
/** @brief Like `rhs.compare(lhs) < 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator>(
2017-12-15 23:32:06 +01:00
std::basic_string<U, TR, A> const &lhs, basic_char_range<T> rhs
) noexcept {
return rhs.compare(lhs) < 0;
}
/** @brief Like `rhs.compare(lhs) >= 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator<=(
2017-12-15 23:32:06 +01:00
std::basic_string<U, TR, A> const &lhs, basic_char_range<T> rhs
) noexcept {
return rhs.compare(lhs) >= 0;
}
/** @brief Like `rhs.compare(lhs) <= 0`. */
template<typename T, typename TR, typename U, typename A>
inline std::enable_if_t<
std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>, bool
> operator>=(
2017-12-15 23:32:06 +01:00
std::basic_string<U, TR, A> const &lhs, basic_char_range<T> rhs
) noexcept {
return rhs.compare(lhs) <= 0;
}
2017-04-10 19:40:28 +02:00
/** @brief Checks if a string slice starts with another slice. */
inline bool starts_with(string_range a, string_range b) noexcept {
if (a.size() < b.size()) {
return false;
}
return a.slice(0, b.size()) == b;
}
2017-04-10 19:40:28 +02:00
/** @brief Mutable range integration for std::basic_string.
*
* The range type used for mutable string references
* is an ostd::basic_char_range with mutable values.
*/
template<typename T, typename TR, typename A>
struct ranged_traits<std::basic_string<T, TR, A>> {
2017-04-10 19:40:28 +02:00
/** @brief The range type. */
2017-12-15 23:32:06 +01:00
using range = basic_char_range<T>;
2017-04-10 19:40:28 +02:00
/** @brief Creates a range. */
static range iter(std::basic_string<T, TR, A> &v) noexcept {
return range{v.data(), v.data() + v.size()};
2017-01-29 21:22:40 +01:00
}
};
2017-04-10 19:40:28 +02:00
/** @brief Immutable range integration for std::basic_string.
*
* The range type used for immutable string references
* is an ostd::basic_char_range with immutable values.
*/
template<typename T, typename TR, typename A>
struct ranged_traits<std::basic_string<T, TR, A> const> {
2017-04-10 19:40:28 +02:00
/** @brief The range type. */
2017-12-15 23:32:06 +01:00
using range = basic_char_range<T const>;
2017-04-10 19:40:28 +02:00
/** @brief Creates a range. */
static range iter(std::basic_string<T, TR, A> const &v) noexcept {
return range{v.data(), v.data() + v.size()};
2017-01-29 21:22:40 +01:00
}
};
/* more UTF utilities beyond basic API */
namespace utf {
/** @addtogroup Strings
* @{
*/
/** @brief Thrown on UTF-8 decoding failure. */
struct utf_error: std::runtime_error {
using std::runtime_error::runtime_error;
};
2017-12-31 19:16:16 +01:00
/* @brief Get the Unicode code point for a multibyte sequence.
*
* The string is advanced past the Unicode character in the front.
2017-12-31 19:16:16 +01:00
* If the decoding fails, `false` is returned, otherwise it's `true`.
*/
2017-12-31 20:06:36 +01:00
bool decode(string_range &r, char32_t &ret) noexcept;
2017-12-31 19:16:16 +01:00
/* @brief Get the Unicode code point from a UTF-32 string.
*
* The string is advanced by one. This can only fail if the string
* is empty, `false` is returned in that case.
*/
inline bool decode(u32string_range &r, char32_t &ret) noexcept {
if (r.empty()) {
return false;
}
ret = r.front();
r.pop_front();
return true;
}
namespace detail {
std::uint8_t u8_encode(
std::uint8_t (&ret)[4], std::uint32_t ch
) noexcept;
}
/* @brief Encode a UTF-32 code point into UTF-8 code units.
*
* The units are written in `sink` which is an ostd::output_range_tag.
* The written values are of type `char` and up to 4 are written. The
* number of bytes written is returned from the function. In case of
* failure, `0` is returned.
*
* This function is allowed to fail only in two cases, when a surrogate
* code point is provided or when the code point is out of bounds as
* defined by Unicode (i.e. 0x10FFFF). It does not throw exceptions
* other than those thrown by `sink`.
*/
template<typename R>
std::uint8_t encode_u8(R &sink, char32_t ch) {
std::uint8_t buf[4];
std::uint8_t n = detail::u8_encode(buf, ch);
for (std::uint8_t i = 0; i < n; ++i) {
sink.put(buf[i]);
}
return n;
}
2017-12-31 19:16:16 +01:00
/* @brief Get the number of Unicode code points in a string.
*
* This function keeps reading Unicode code points while it can and
* once it can't it returns the number of valid ones with the rest
* of the input string range being in `cont`. That means if the entire
* string is a valid UTF-8 string, `cont` will be empty, otherwise it
* will begin at the first invalid UTF-8 code point.
*
* If you're sure the string is valid or you don't need to handle the
* error, you can use the more convenient overload below.
*/
std::size_t length(string_range r, string_range &cont) noexcept;
/* @brief Get the number of Unicode code points in a valid UTF-8 string.
*
* If an invalid UTF-8 sequence is encountered, it returns the length
* until that sequence.
*
* If you need to get the continuation string, use the general
* error-handling overload of the function.
*/
2017-12-31 19:18:08 +01:00
inline std::size_t length(string_range r) noexcept {
return utf::length(r, r);
}
2017-12-31 19:16:16 +01:00
/* @brief Get the number of Unicode code points in a UTF-32 string.
*
* As a UTF-32 string encodes entire code points, this function
* never fails, so there is no need for an error-handling version
* and this is equivalent to simply calling `r.size()`.
*/
inline std::size_t length(u32string_range r) noexcept {
return r.size();
}
namespace detail {
template<typename C>
struct codepoint_range: input_range<codepoint_range<C>> {
using range_category = forward_range_tag;
using value_type = char32_t;
using reference = char32_t;
using size_type = std::size_t;
codepoint_range() = delete;
codepoint_range(basic_char_range<C const> r): p_range(r) {
if (r.empty()) {
p_current = -1;
} else {
advance();
}
}
bool empty() const { return (p_current < 0); }
void pop_front() {
if (p_current > 0 && p_range.empty()) {
p_current = -1;
return;
}
advance();
}
char32_t front() const {
return p_current;
}
private:
void advance() {
2017-12-31 20:06:36 +01:00
if (char32_t ret; !decode(p_range, ret)) {
/* range is unchanged */
p_current = -1;
throw utf_error{"UTF-8 decoding failed"};
} else {
p_current = ret;
}
}
basic_char_range<C const> p_range;
std::int32_t p_current;
};
} /* namespace detail */
/** @brief Iterate over the code points of a UTF-8 string.
*
* The resulting range is ostd::forward_range_tag. The range will
* contain the code points of the given string. On error, which may
* be during any string advancement (the constructor or `pop_front()`),
* an ostd::utf_error is raised.
*/
inline auto iter_codes(string_range r) {
return detail::codepoint_range<char>{r};
}
/** @brief Iterate over the code points of a UTF-32 string.
*
* The resulting range is ostd::forward_range_tag. This cannot fail
* as it's essentially an identity range.
*/
inline auto iter_codes(u32string_range r) noexcept {
return detail::codepoint_range<char32_t>{r};
}
bool isalnum(char32_t c);
bool isalpha(char32_t c);
bool isblank(char32_t c);
bool iscntrl(char32_t c);
bool isdigit(char32_t c);
bool isgraph(char32_t c);
bool islower(char32_t c);
bool isprint(char32_t c);
bool ispunct(char32_t c);
bool isspace(char32_t c);
bool istitle(char32_t c);
bool isupper(char32_t c);
bool isvalid(char32_t c);
bool isxdigit(char32_t c);
char32_t tolower(char32_t c);
char32_t toupper(char32_t c);
/** @} */
} /* namespace utf */
2017-12-31 19:16:16 +01:00
template<typename T>
inline std::size_t basic_char_range<T>::length() const noexcept {
return utf::length(*this);
}
template<typename T>
inline std::size_t basic_char_range<T>::length(
basic_char_range<T> &cont
) const noexcept {
return utf::length(*this, cont);
}
template<typename T>
inline auto basic_char_range<T>::iter_codes() const {
return utf::iter_codes(*this);
}
2015-07-18 02:02:13 +02:00
/* string literals */
inline namespace literals {
inline namespace string_literals {
2017-04-10 19:40:28 +02:00
/** @addtogroup Strings
* @{
*/
/** @brief A custom literal for string ranges.
*
* You need to enable this explicitly by using this namespace. It's
* not enabled by default to ensure compatibility with existing code.
*/
inline string_range operator "" _sr(char const *str, std::size_t len)
noexcept
{
2017-02-16 18:48:14 +01:00
return string_range(str, str + len);
}
2017-04-10 19:40:28 +02:00
/** @} */
}
}
2015-07-18 02:02:13 +02:00
2017-04-10 19:40:28 +02:00
/** @} */
2015-07-13 21:07:14 +02:00
} /* namespace ostd */
2015-06-04 00:07:57 +02:00
2017-01-29 21:22:40 +01:00
namespace std {
2017-04-10 19:40:28 +02:00
/** @addtogroup Strings
* @{
*/
/** @brief Standard std::hash integration for string slices.
*
* This integrates all possible slice types with standard hashing.
* It uses the hashing used for matching std::basic_string_view,
* so the algorithm (and thus result) will always match standard strings.
*/
2017-12-15 23:32:06 +01:00
template<typename T>
struct hash<ostd::basic_char_range<T>> {
std::size_t operator()(ostd::basic_char_range<T> const &v)
2017-04-10 19:40:28 +02:00
const noexcept
{
2017-12-15 23:32:06 +01:00
return hash<std::basic_string_view<std::remove_const_t<T>>>{}(v);
2017-01-29 21:22:40 +01:00
}
};
2017-04-10 19:40:28 +02:00
/** @} */
2017-01-29 21:22:40 +01:00
}
2016-02-07 22:17:15 +01:00
#endif
2017-04-10 19:40:28 +02:00
/** @} */