2017-12-31 02:00:23 +00:00
|
|
|
/* String implementation details, mainly regarding Unicode support.
|
|
|
|
*
|
|
|
|
* This file is part of libostd. See COPYING.md for futher information.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <cstdint>
|
2018-01-02 21:23:03 +00:00
|
|
|
#include <cstdlib>
|
2018-01-11 22:27:45 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <limits>
|
|
|
|
#include <type_traits>
|
2018-01-11 22:38:03 +00:00
|
|
|
|
|
|
|
#include "ostd/platform.hh"
|
2017-12-31 02:00:23 +00:00
|
|
|
#include "ostd/string.hh"
|
2018-01-05 21:31:04 +00:00
|
|
|
#include "ostd/format.hh"
|
2017-12-31 02:00:23 +00:00
|
|
|
|
2018-01-11 22:27:45 +00:00
|
|
|
namespace ostd {
|
|
|
|
namespace detail {
|
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline std::size_t tstrlen_impl(C const *p) noexcept {
|
|
|
|
using SL = std::numeric_limits<std::size_t>;
|
|
|
|
using UL = std::numeric_limits<std::make_unsigned_t<C>>;
|
2018-01-13 21:33:46 +00:00
|
|
|
/* low bits of each UL contained in SL (0000000100000001... etc) */
|
2018-01-11 22:27:45 +00:00
|
|
|
constexpr std::size_t Lbits = SL::max() / UL::max();
|
2018-01-13 21:33:46 +00:00
|
|
|
/* high bits of each UL contained in SL (1000000010000000... etc) */
|
2018-01-11 22:27:45 +00:00
|
|
|
constexpr std::size_t Hbits = Lbits << (UL::digits - 1);
|
|
|
|
|
|
|
|
C const *bp = p;
|
2018-01-13 21:33:46 +00:00
|
|
|
/* 1 unit or less per size_t, simple loop */
|
2018-01-11 22:27:45 +00:00
|
|
|
if constexpr(sizeof(C) >= sizeof(std::size_t)) {
|
2018-01-17 21:58:34 +00:00
|
|
|
for (; *p; ++p) {}
|
|
|
|
return (p - bp);
|
2018-01-11 22:27:45 +00:00
|
|
|
}
|
2018-01-13 21:33:46 +00:00
|
|
|
/* need a pointer aligned to sizeof(size_t) */
|
2018-01-11 22:27:45 +00:00
|
|
|
for (; std::uintptr_t(p) % sizeof(std::size_t); ++p) {
|
|
|
|
if (!*p) {
|
|
|
|
return (p - bp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
{
|
2018-01-15 20:05:58 +00:00
|
|
|
/* e.g. x86_64 => sizeof(size_t) == 8 * sizeof(char) */
|
2018-01-11 22:27:45 +00:00
|
|
|
auto *wp = reinterpret_cast<std::size_t const *>(p);
|
2018-01-13 21:33:46 +00:00
|
|
|
/* check if any unit in the size_t is zero, in binary:
|
|
|
|
*
|
|
|
|
* XXX1 - 0001 => XXX0; XXX0 & YYY0 => 0000; 0000 & 1000 => 0000
|
|
|
|
* XX10 - 0001 => XX01; XX01 & YY01 => 0001; 0001 & 1000 => 0000
|
|
|
|
* 0000 - 0001 => 1111; 1111 & 1111 => 1111; 1111 & 1000 => 1000
|
|
|
|
*
|
|
|
|
* if the check passes, a terminating zero is in that size_t, break
|
|
|
|
*/
|
2018-01-11 22:27:45 +00:00
|
|
|
for (; !(((*wp - Lbits) & ~*wp) & Hbits); ++wp) {}
|
|
|
|
p = reinterpret_cast<C const *>(wp);
|
|
|
|
}
|
2018-01-17 21:58:34 +00:00
|
|
|
/* go to terminating zero, by unit */
|
2018-01-11 22:27:45 +00:00
|
|
|
for (; *p; ++p) {}
|
|
|
|
return (p - bp);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t tstrlen(char const *p) noexcept {
|
2018-01-11 22:27:45 +00:00
|
|
|
return tstrlen_impl(p);
|
|
|
|
}
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t tstrlen(char16_t const *p) noexcept {
|
2018-01-11 22:27:45 +00:00
|
|
|
return tstrlen_impl(p);
|
|
|
|
}
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t tstrlen(char32_t const *p) noexcept {
|
2018-01-11 22:27:45 +00:00
|
|
|
return tstrlen_impl(p);
|
|
|
|
}
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t tstrlen(wchar_t const *p) noexcept {
|
2018-01-11 22:27:45 +00:00
|
|
|
return tstrlen_impl(p);
|
|
|
|
}
|
|
|
|
|
|
|
|
} /* namespace detail */
|
|
|
|
} /* namespace ostd */
|
|
|
|
|
2017-12-31 02:00:23 +00:00
|
|
|
namespace ostd {
|
|
|
|
namespace utf {
|
|
|
|
|
2018-01-05 21:31:04 +00:00
|
|
|
/* place the vtable in here */
|
|
|
|
utf_error::~utf_error() {}
|
|
|
|
|
2018-01-01 00:02:49 +00:00
|
|
|
namespace detail {
|
2018-01-06 00:03:43 +00:00
|
|
|
inline bool is_invalid_u32(char32_t c) {
|
2018-01-07 16:13:53 +00:00
|
|
|
return (((c >= 0xD800) && (c <= 0xDFFF)) || (c > utf::max_unicode));
|
2018-01-06 00:03:43 +00:00
|
|
|
}
|
|
|
|
|
2018-01-05 17:55:34 +00:00
|
|
|
static inline std::size_t u8_decode(
|
|
|
|
unsigned char const *beg, unsigned char const *end, char32_t &cret
|
|
|
|
) noexcept {
|
|
|
|
static char32_t const ulim[] = { 0xFF, 0x7F, 0x7FF, 0xFFFF };
|
|
|
|
if (beg == end) {
|
|
|
|
return 0;
|
2017-12-31 02:00:23 +00:00
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
char32_t ch = *beg;
|
2018-01-01 00:02:49 +00:00
|
|
|
if (ch <= 0x7F) {
|
|
|
|
/* ASCII */
|
|
|
|
cret = ch;
|
2018-01-05 17:55:34 +00:00
|
|
|
return 1;
|
2018-01-01 00:02:49 +00:00
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
char32_t ret = 0;
|
|
|
|
unsigned char const *sbeg = beg + 1;
|
2018-01-01 00:02:49 +00:00
|
|
|
/* continuation bytes */
|
|
|
|
for (; ch & 0x40; ch <<= 1) {
|
|
|
|
/* need a continuation byte but nothing left in the string */
|
2018-01-05 17:55:34 +00:00
|
|
|
if (sbeg == end) {
|
|
|
|
return 0;
|
2018-01-01 00:02:49 +00:00
|
|
|
}
|
|
|
|
/* the continuation byte */
|
2018-01-05 17:55:34 +00:00
|
|
|
char32_t nch = *sbeg++;
|
2018-01-01 00:02:49 +00:00
|
|
|
/* lower 6 bits */
|
2018-01-05 17:55:34 +00:00
|
|
|
char32_t bch = nch & 0x3F;
|
2018-01-01 00:02:49 +00:00
|
|
|
/* not a continuation byte */
|
|
|
|
if ((nch ^ bch) != 0x80) {
|
2018-01-07 00:15:17 +00:00
|
|
|
return 0;
|
2018-01-01 00:02:49 +00:00
|
|
|
}
|
|
|
|
/* the 6 bits go in the result */
|
|
|
|
ret = (ret << 6) | bch;
|
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
/* by how many bytes we advance (continuation bytes + 1) */
|
|
|
|
auto adv = std::size_t(sbeg - beg);
|
2018-01-01 00:02:49 +00:00
|
|
|
/* invalid sequence - too many continuation bits */
|
2018-01-05 17:55:34 +00:00
|
|
|
if (adv > 4) {
|
|
|
|
return 0;
|
2017-12-31 02:00:23 +00:00
|
|
|
}
|
2018-01-01 00:02:49 +00:00
|
|
|
/* add the up to 7 bits from the first byte, already shifted left by n */
|
2018-01-05 17:55:34 +00:00
|
|
|
ret |= (ch & 0x7F) << ((adv - 1) * 5);
|
2018-01-06 00:03:43 +00:00
|
|
|
/* invalid sequence */
|
|
|
|
if (is_invalid_u32(ret) || (ret <= ulim[adv - 1])) {
|
2018-01-05 17:55:34 +00:00
|
|
|
return 0;
|
2018-01-01 01:36:05 +00:00
|
|
|
}
|
2018-01-01 00:02:49 +00:00
|
|
|
cret = ret;
|
2018-01-05 17:55:34 +00:00
|
|
|
return adv;
|
2017-12-31 02:00:23 +00:00
|
|
|
}
|
2018-01-01 00:02:49 +00:00
|
|
|
|
2018-01-05 17:55:34 +00:00
|
|
|
static inline std::size_t u16_decode(
|
|
|
|
char16_t const *beg, char16_t const *end, char32_t &cret
|
|
|
|
) noexcept {
|
|
|
|
if (beg == end) {
|
|
|
|
return 0;
|
2018-01-05 01:18:36 +00:00
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
char32_t ch = *beg;
|
2018-01-05 01:18:36 +00:00
|
|
|
/* lead surrogate code point */
|
|
|
|
if ((ch >= 0xD800) && (ch <= 0xDBFF)) {
|
|
|
|
/* unpaired */
|
2018-01-05 17:55:34 +00:00
|
|
|
if ((end - beg) < 2) {
|
|
|
|
return 0;
|
2018-01-05 01:18:36 +00:00
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
char32_t nch = beg[1];
|
2018-01-05 01:18:36 +00:00
|
|
|
/* trail surrogate code point */
|
2018-01-05 17:55:34 +00:00
|
|
|
if ((nch >= 0xDC00) && (nch <= 0xDFFF)) {
|
2018-01-05 01:18:36 +00:00
|
|
|
cret = 0x10000 + (((ch - 0xD800) << 10) | (nch - 0xDC00));
|
2018-01-05 17:55:34 +00:00
|
|
|
return 2;
|
2018-01-05 01:18:36 +00:00
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
return 0;
|
2018-01-05 01:18:36 +00:00
|
|
|
}
|
|
|
|
cret = ch;
|
2018-01-05 17:55:34 +00:00
|
|
|
return 1;
|
2018-01-05 01:18:36 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t encode(
|
2018-01-05 17:55:34 +00:00
|
|
|
char (&ret)[4], char32_t ch
|
2018-01-01 00:02:49 +00:00
|
|
|
) noexcept {
|
|
|
|
if (ch <= 0x7F) {
|
2018-01-05 17:55:34 +00:00
|
|
|
ret[0] = char(ch);
|
2018-01-01 00:02:49 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (ch <= 0x7FF) {
|
2018-01-05 17:55:34 +00:00
|
|
|
ret[0] = char(0xC0 | (ch >> 6));
|
|
|
|
ret[1] = char(0x80 | (ch & 0x3F));
|
2018-01-01 00:02:49 +00:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
if (ch <= 0xFFFF) {
|
|
|
|
/* TODO: optional WTF-8 semantics
|
|
|
|
* for now simply reject surrogate code points
|
|
|
|
*/
|
2018-01-02 22:28:37 +00:00
|
|
|
if ((ch >= 0xD800) && (ch <= 0xDFFF)) {
|
2018-01-01 00:02:49 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
ret[0] = char(0xE0 | (ch >> 12));
|
|
|
|
ret[1] = char(0x80 | ((ch >> 6) & 0x3F));
|
|
|
|
ret[2] = char(0x80 | (ch & 0x3F));
|
2018-01-01 00:02:49 +00:00
|
|
|
return 3;
|
|
|
|
}
|
2018-01-07 16:13:53 +00:00
|
|
|
if (ch <= utf::max_unicode) {
|
2018-01-05 17:55:34 +00:00
|
|
|
ret[0] = char(0xF0 | (ch >> 18));
|
2018-01-07 23:27:55 +00:00
|
|
|
ret[1] = char(0x80 | ((ch >> 12) & 0x3F));
|
|
|
|
ret[2] = char(0x80 | ((ch >> 6) & 0x3F));
|
|
|
|
ret[3] = char(0x80 | (ch & 0x3F));
|
2018-01-01 00:02:49 +00:00
|
|
|
return 4;
|
|
|
|
}
|
|
|
|
return 0;
|
2017-12-31 02:00:23 +00:00
|
|
|
}
|
2018-01-05 01:18:36 +00:00
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t encode(
|
2018-01-05 17:55:34 +00:00
|
|
|
char16_t (&ret)[2], char32_t ch
|
2018-01-05 01:18:36 +00:00
|
|
|
) noexcept {
|
|
|
|
/* surrogate code point or out of bounds */
|
2018-01-07 16:13:53 +00:00
|
|
|
if (((ch >= 0xD800) && (ch <= 0xDFFF)) || (ch > utf::max_unicode)) {
|
2018-01-05 01:18:36 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (ch <= 0xFFFF) {
|
2018-01-05 17:55:34 +00:00
|
|
|
ret[0] = char16_t(ch);
|
2018-01-05 01:18:36 +00:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
/* 20-bit number */
|
|
|
|
ch -= 0x10000;
|
2018-01-05 17:55:34 +00:00
|
|
|
ret[0] = char16_t(0xD800 + (ch >> 10));
|
|
|
|
ret[1] = char16_t(0xDC00 + (ch & 0x3FF));
|
2018-01-05 01:18:36 +00:00
|
|
|
return 2;
|
|
|
|
}
|
2018-01-05 23:42:14 +00:00
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline std::size_t length(
|
|
|
|
basic_char_range<C const> &r, basic_char_range<C const> &cont
|
|
|
|
) noexcept {
|
|
|
|
std::size_t ret = 0;
|
|
|
|
for (char32_t ch; utf::decode(r, ch); ++ret) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
cont = r;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename C>
|
|
|
|
inline std::size_t length(basic_char_range<C const> &r) noexcept {
|
|
|
|
std::size_t ret = 0;
|
2018-01-07 16:13:53 +00:00
|
|
|
if constexpr(utf::max_units<C> == 1) {
|
2018-01-05 23:42:14 +00:00
|
|
|
ret = r.size();
|
|
|
|
} else {
|
|
|
|
for (;; ++ret) {
|
|
|
|
if (char32_t ch; !utf::decode(r, ch)) {
|
|
|
|
if (r.empty()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
r.pop_front();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
2018-01-01 00:02:49 +00:00
|
|
|
} /* namespace detail */
|
2017-12-31 02:00:23 +00:00
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool decode(string_range &r, char32_t &ret) noexcept {
|
2018-01-05 17:55:34 +00:00
|
|
|
auto tn = r.size();
|
|
|
|
auto *beg = reinterpret_cast<unsigned char const *>(r.data());
|
2018-01-05 18:25:20 +00:00
|
|
|
if (std::size_t n; (n = detail::u8_decode(beg, beg + tn, ret))) {
|
2018-01-05 17:55:34 +00:00
|
|
|
r = r.slice(n, tn);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2017-12-31 02:26:15 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool decode(u16string_range &r, char32_t &ret) noexcept {
|
2018-01-05 17:55:34 +00:00
|
|
|
auto tn = r.size();
|
2018-01-05 18:25:20 +00:00
|
|
|
auto *beg = r.data();
|
|
|
|
if (std::size_t n; (n = detail::u16_decode(beg, beg + tn, ret))) {
|
|
|
|
r = r.slice(n, tn);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool decode(u32string_range &r, char32_t &ret) noexcept {
|
2018-01-05 23:52:50 +00:00
|
|
|
if (r.empty()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
auto c = r.front();
|
2018-01-06 00:03:43 +00:00
|
|
|
if (detail::is_invalid_u32(c)) {
|
2018-01-05 23:52:50 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
ret = c;
|
|
|
|
r.pop_front();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool decode(wstring_range &r, char32_t &ret) noexcept {
|
2018-01-05 18:25:20 +00:00
|
|
|
std::size_t n, tn = r.size();
|
2018-01-07 16:13:53 +00:00
|
|
|
if constexpr(is_wchar_u32) {
|
2018-01-05 18:25:20 +00:00
|
|
|
if (!tn) {
|
|
|
|
return false;
|
|
|
|
}
|
2018-01-05 23:52:50 +00:00
|
|
|
auto c = char32_t(r.front());
|
2018-01-06 00:03:43 +00:00
|
|
|
if (detail::is_invalid_u32(c)) {
|
2018-01-05 23:52:50 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
ret = c;
|
|
|
|
r.pop_front();
|
2018-01-05 18:25:20 +00:00
|
|
|
return true;
|
2018-01-07 16:13:53 +00:00
|
|
|
} else if constexpr(is_wchar_u16) {
|
2018-01-05 18:25:20 +00:00
|
|
|
auto *beg = reinterpret_cast<char16_t const *>(r.data());
|
|
|
|
n = detail::u16_decode(beg, beg + tn, ret);
|
|
|
|
} else {
|
|
|
|
auto *beg = reinterpret_cast<unsigned char const *>(r.data());
|
|
|
|
n = detail::u8_decode(beg, beg + tn, ret);
|
|
|
|
}
|
|
|
|
if (n) {
|
2018-01-05 17:55:34 +00:00
|
|
|
r = r.slice(n, tn);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2018-01-05 01:18:36 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(string_range r, string_range &cont) noexcept {
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r, cont);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(u16string_range r, u16string_range &cont)
|
|
|
|
noexcept
|
|
|
|
{
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r, cont);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(u32string_range r, u32string_range &cont)
|
|
|
|
noexcept
|
|
|
|
{
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r, cont);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(wstring_range r, wstring_range &cont) noexcept {
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r, cont);
|
2017-12-31 02:00:23 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(string_range r) noexcept {
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(u16string_range r) noexcept {
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(u32string_range r) noexcept {
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r);
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT std::size_t length(wstring_range r) noexcept {
|
2018-01-05 23:42:14 +00:00
|
|
|
return detail::length(r);
|
2018-01-05 23:27:04 +00:00
|
|
|
}
|
|
|
|
|
2018-01-02 21:23:03 +00:00
|
|
|
/* unicode-aware ctype
|
|
|
|
* the other ones use custom tables for lookups
|
|
|
|
*/
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isalnum(char32_t c) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
return (utf::isalpha(c) || utf::isdigit(c));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isblank(char32_t c) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
return ((c == ' ') || (c == '\t'));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isgraph(char32_t c) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
return (!utf::isspace(c) && utf::isprint(c));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isprint(char32_t c) noexcept {
|
2018-01-05 17:55:34 +00:00
|
|
|
switch (c) {
|
2018-01-02 21:23:03 +00:00
|
|
|
case 0x2028:
|
|
|
|
case 0x2029:
|
|
|
|
case 0xFFF9:
|
|
|
|
case 0xFFFA:
|
|
|
|
case 0xFFFB:
|
|
|
|
return false;
|
|
|
|
default:
|
|
|
|
return !utf::iscntrl(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool ispunct(char32_t c) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
return (utf::isgraph(c) && !utf::isalnum(c));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isvalid(char32_t c) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
/* surrogate code points */
|
|
|
|
if ((c >= 0xD800) && (c <= 0xDFFF)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* non-characters */
|
|
|
|
if ((c >= 0xFDD0) && (c <= 0xFDEF)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* end of plane */
|
|
|
|
if ((c & 0xFFFE) == 0xFFFE) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* must be within range */
|
2018-01-07 16:13:53 +00:00
|
|
|
return (c <= utf::max_unicode);
|
2018-01-02 21:23:03 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isxdigit(char32_t c) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
if ((c >= '0') && (c <= '9')) {
|
|
|
|
return true;
|
|
|
|
}
|
2018-01-05 17:55:34 +00:00
|
|
|
auto uc = c | 32;
|
2018-01-02 21:23:03 +00:00
|
|
|
return ((uc >= 'a') && (uc <= 'f'));
|
|
|
|
}
|
|
|
|
|
2018-01-03 01:12:23 +00:00
|
|
|
inline int codepoint_cmp1(void const *a, void const *b) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
char32_t c1 = *static_cast<char32_t const *>(a);
|
|
|
|
char32_t c2 = *static_cast<char32_t const *>(b);
|
2018-01-03 16:09:28 +00:00
|
|
|
return (int(c1) - int(c2));
|
2018-01-02 21:23:03 +00:00
|
|
|
}
|
|
|
|
|
2018-01-03 01:12:23 +00:00
|
|
|
inline int codepoint_cmp2(void const *a, void const *b) noexcept {
|
2018-01-02 21:23:03 +00:00
|
|
|
char32_t c = *static_cast<char32_t const *>(a);
|
|
|
|
char32_t const *p = static_cast<char32_t const *>(b);
|
|
|
|
if ((c >= p[0]) && (c <= p[1])) {
|
|
|
|
return 0;
|
|
|
|
}
|
2018-01-03 16:09:28 +00:00
|
|
|
return (int(c) - int(p[0]));
|
2018-01-02 21:23:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template<
|
2018-01-02 22:28:37 +00:00
|
|
|
std::size_t RN, std::size_t RS,
|
|
|
|
std::size_t L1N, std::size_t L1S,
|
|
|
|
std::size_t L2N, std::size_t L2S,
|
|
|
|
std::size_t SN, std::size_t SS
|
2018-01-02 21:23:03 +00:00
|
|
|
>
|
|
|
|
struct uctype_func {
|
2018-01-02 22:28:37 +00:00
|
|
|
template<std::size_t N, std::size_t S>
|
|
|
|
static char32_t *search(
|
|
|
|
char32_t c, void const *arr, int (*cmp)(void const *, void const *)
|
2018-01-03 01:12:23 +00:00
|
|
|
) noexcept {
|
2018-01-02 22:28:37 +00:00
|
|
|
return static_cast<char32_t *>(std::bsearch(&c, arr, N / S, S, cmp));
|
|
|
|
}
|
|
|
|
|
2018-01-02 21:23:03 +00:00
|
|
|
static bool do_is(
|
|
|
|
char32_t c,
|
|
|
|
void const *ranges [[maybe_unused]],
|
|
|
|
void const *laces1 [[maybe_unused]],
|
|
|
|
void const *laces2 [[maybe_unused]],
|
|
|
|
void const *singles [[maybe_unused]]
|
2018-01-03 01:12:23 +00:00
|
|
|
) noexcept {
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(RN != 0) {
|
|
|
|
char32_t *found = search<RN, RS>(c, ranges, codepoint_cmp2);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(L1N != 0) {
|
|
|
|
char32_t *found = search<L1N, L1S>(c, laces1, codepoint_cmp2);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
return !((c - found[0]) % 2);
|
|
|
|
}
|
|
|
|
}
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(L2N != 0) {
|
|
|
|
char32_t *found = search<L2N, L2S>(c, laces2, codepoint_cmp2);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
return !((c - found[0]) % 2);
|
|
|
|
}
|
|
|
|
}
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(SN != 0) {
|
|
|
|
char32_t *found = search<SN, SS>(c, singles, codepoint_cmp1);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static char32_t do_to(
|
|
|
|
char32_t c,
|
|
|
|
void const *ranges [[maybe_unused]],
|
|
|
|
void const *laces1 [[maybe_unused]],
|
|
|
|
void const *laces2 [[maybe_unused]],
|
|
|
|
void const *singles [[maybe_unused]]
|
2018-01-03 01:12:23 +00:00
|
|
|
) noexcept {
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(RN != 0) {
|
|
|
|
char32_t *found = search<RN, RS>(c, ranges, codepoint_cmp2);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
return (found[2] + (c - found[0]));
|
|
|
|
}
|
|
|
|
}
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(L1N != 0) {
|
|
|
|
char32_t *found = search<L1N, L1S>(c, laces1, codepoint_cmp2);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
if ((c - found[0]) % 2) {
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
return c + 1;
|
|
|
|
}
|
|
|
|
}
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(L2N != 0) {
|
|
|
|
char32_t *found = search<L2N, L2S>(c, laces2, codepoint_cmp2);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
if ((c - found[0]) % 2) {
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
return c - 1;
|
|
|
|
}
|
|
|
|
}
|
2018-01-02 22:28:37 +00:00
|
|
|
if constexpr(SN != 0) {
|
|
|
|
char32_t *found = search<SN, SS>(c, singles, codepoint_cmp1);
|
2018-01-02 21:23:03 +00:00
|
|
|
if (found) {
|
|
|
|
return found[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
/* these are geneated */
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isalpha(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT bool iscntrl(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT bool isdigit(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT bool islower(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT bool isspace(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT bool istitle(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT bool isupper(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT char32_t tolower(char32_t c) noexcept;
|
|
|
|
OSTD_EXPORT char32_t toupper(char32_t c) noexcept;
|
2018-01-02 21:23:03 +00:00
|
|
|
|
2018-01-02 23:37:31 +00:00
|
|
|
#if __has_include("string_utf.hh")
|
2018-01-02 21:23:03 +00:00
|
|
|
#include "string_utf.hh"
|
2018-01-02 23:37:31 +00:00
|
|
|
#else
|
|
|
|
|
|
|
|
/* break the cycle (build system and generator use libostd, but string_utf.hh
|
|
|
|
* is generated during build) by providing a bunch of ASCII only fallbacks
|
|
|
|
*/
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isalpha(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return (utf::isupper(c) || utf::islower(c));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool iscntrl(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return ((c <= 0x1F) || (c == 0x7F));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isdigit(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return ((c >= '0') && (c <= '9'));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool islower(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return ((c >= 'a') && (c <= 'z'));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isspace(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return ((c == ' ') || ((c >= 0x09) && (c <= 0x0D)));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool istitle(char32_t) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT bool isupper(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
return ((c >= 'A') && (c <= 'Z'));
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT char32_t tolower(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
if (utf::isupper(c)) {
|
|
|
|
return c | 32;
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT char32_t toupper(char32_t c) noexcept {
|
2018-01-02 23:37:31 +00:00
|
|
|
if (utf::islower(c)) {
|
|
|
|
return c ^ 32;
|
|
|
|
}
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* __has_include("string_utf.hh") */
|
2018-01-02 21:23:03 +00:00
|
|
|
|
2018-01-07 00:15:17 +00:00
|
|
|
namespace detail {
|
|
|
|
template<typename C>
|
|
|
|
inline int case_compare(
|
|
|
|
C const *beg1, C const *end1,
|
|
|
|
C const *beg2, C const *end2
|
|
|
|
) {
|
|
|
|
auto s1l = std::size_t(end1 - beg1);
|
|
|
|
auto s2l = std::size_t(end2 - beg2);
|
|
|
|
|
|
|
|
auto ms = std::min(s1l, s2l);
|
|
|
|
end1 = beg1 + ms;
|
|
|
|
end2 = beg2 + ms;
|
|
|
|
|
|
|
|
while (beg1 != end1) {
|
|
|
|
auto ldec = char32_t(*beg1);
|
|
|
|
auto rdec = char32_t(*beg2);
|
2018-01-07 01:17:05 +00:00
|
|
|
if constexpr(std::is_same_v<C, char32_t>) {
|
|
|
|
++beg1;
|
|
|
|
++beg2;
|
|
|
|
} else if constexpr(std::is_same_v<C, char16_t>) {
|
2018-01-07 00:15:17 +00:00
|
|
|
std::size_t ndec;
|
|
|
|
if ((ldec <= 0x7F) || !(ndec = u16_decode(beg1, end1, ldec))) {
|
|
|
|
++beg1;
|
|
|
|
} else {
|
|
|
|
beg1 += ndec;
|
|
|
|
}
|
|
|
|
if ((rdec <= 0x7F) || !(ndec = u16_decode(beg2, end2, rdec))) {
|
|
|
|
++beg2;
|
|
|
|
} else {
|
|
|
|
beg2 += ndec;
|
|
|
|
}
|
|
|
|
} else if constexpr(std::is_same_v<C, unsigned char>) {
|
|
|
|
std::size_t ndec;
|
|
|
|
if ((ldec <= 0x7F) || !(ndec = u8_decode(beg1, end1, ldec))) {
|
|
|
|
++beg1;
|
|
|
|
} else {
|
|
|
|
beg1 += ndec;
|
|
|
|
}
|
|
|
|
if ((rdec <= 0x7F) || !(ndec = u8_decode(beg2, end2, rdec))) {
|
|
|
|
++beg2;
|
|
|
|
} else {
|
|
|
|
beg2 += ndec;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
int d = int(utf::tolower(ldec)) - int(utf::tolower(rdec));
|
|
|
|
if (d) {
|
|
|
|
return d;
|
|
|
|
}
|
2018-01-03 16:09:28 +00:00
|
|
|
}
|
2018-01-07 00:15:17 +00:00
|
|
|
return (s1l < s2l) ? -1 : ((s1l > s2l) ? 1 : 0);
|
2018-01-03 00:22:07 +00:00
|
|
|
}
|
2018-01-07 00:15:17 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT int case_compare(string_range s1, string_range s2) noexcept {
|
2018-01-07 00:15:17 +00:00
|
|
|
auto *beg1 = reinterpret_cast<unsigned char const *>(s1.data());
|
|
|
|
auto *beg2 = reinterpret_cast<unsigned char const *>(s2.data());
|
|
|
|
return detail::case_compare(beg1, beg1 + s1.size(), beg2, beg2 + s2.size());
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT int case_compare(u16string_range s1, u16string_range s2) noexcept {
|
2018-01-07 00:15:17 +00:00
|
|
|
auto *beg1 = s1.data(), *beg2 = s2.data();
|
|
|
|
return detail::case_compare(beg1, beg1 + s1.size(), beg2, beg2 + s2.size());
|
2018-01-03 00:22:07 +00:00
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT int case_compare(u32string_range s1, u32string_range s2) noexcept {
|
2018-01-07 00:15:17 +00:00
|
|
|
auto *beg1 = s1.data(), *beg2 = s2.data();
|
|
|
|
return detail::case_compare(beg1, beg1 + s1.size(), beg2, beg2 + s2.size());
|
|
|
|
}
|
|
|
|
|
2018-01-11 22:38:03 +00:00
|
|
|
OSTD_EXPORT int case_compare(wstring_range s1, wstring_range s2) noexcept {
|
2018-01-07 16:13:53 +00:00
|
|
|
using C = std::conditional_t<is_wchar_u8, unsigned char, wchar_fixed_t>;
|
2018-01-07 00:15:17 +00:00
|
|
|
auto *beg1 = reinterpret_cast<C const *>(s1.data());
|
|
|
|
auto *beg2 = reinterpret_cast<C const *>(s2.data());
|
|
|
|
return detail::case_compare(beg1, beg1 + s1.size(), beg2, beg2 + s2.size());
|
2018-01-03 00:22:07 +00:00
|
|
|
}
|
|
|
|
|
2017-12-31 02:00:23 +00:00
|
|
|
} /* namespace utf */
|
2018-01-05 21:31:04 +00:00
|
|
|
|
|
|
|
/* place the vtable in here */
|
|
|
|
format_error::~format_error() {}
|
|
|
|
|
2017-12-31 02:00:23 +00:00
|
|
|
} /* namespace ostd */
|