unicode fixes/cleanups
parent
8aace1e65a
commit
af635dc77a
|
@ -59,7 +59,7 @@ namespace detail {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
/* invalid sequence - surrogate code point */
|
/* invalid sequence - surrogate code point */
|
||||||
if ((ret & 0xD800) == 0xD800) {
|
if ((ret >= 0xD800) && (ret <= 0xDFFF)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
cret = ret;
|
cret = ret;
|
||||||
|
@ -83,7 +83,7 @@ namespace detail {
|
||||||
/* TODO: optional WTF-8 semantics
|
/* TODO: optional WTF-8 semantics
|
||||||
* for now simply reject surrogate code points
|
* for now simply reject surrogate code points
|
||||||
*/
|
*/
|
||||||
if ((ch & 0xD800) == 0xD800) {
|
if ((ch >= 0xD800) && (ch <= 0xDFFF)) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
ret[0] = 0xE0 | (ch >> 12);
|
ret[0] = 0xE0 | (ch >> 12);
|
||||||
|
@ -189,12 +189,19 @@ inline int codepoint_cmp2(void const *a, void const *b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template<
|
template<
|
||||||
std::size_t RangesN, std::size_t RangesS,
|
std::size_t RN, std::size_t RS,
|
||||||
std::size_t Laces1N, std::size_t Laces1S,
|
std::size_t L1N, std::size_t L1S,
|
||||||
std::size_t Laces2N, std::size_t Laces2S,
|
std::size_t L2N, std::size_t L2S,
|
||||||
std::size_t SinglesN, std::size_t SinglesS
|
std::size_t SN, std::size_t SS
|
||||||
>
|
>
|
||||||
struct uctype_func {
|
struct uctype_func {
|
||||||
|
template<std::size_t N, std::size_t S>
|
||||||
|
static char32_t *search(
|
||||||
|
char32_t c, void const *arr, int (*cmp)(void const *, void const *)
|
||||||
|
) {
|
||||||
|
return static_cast<char32_t *>(std::bsearch(&c, arr, N / S, S, cmp));
|
||||||
|
}
|
||||||
|
|
||||||
static bool do_is(
|
static bool do_is(
|
||||||
char32_t c,
|
char32_t c,
|
||||||
void const *ranges [[maybe_unused]],
|
void const *ranges [[maybe_unused]],
|
||||||
|
@ -202,38 +209,26 @@ struct uctype_func {
|
||||||
void const *laces2 [[maybe_unused]],
|
void const *laces2 [[maybe_unused]],
|
||||||
void const *singles [[maybe_unused]]
|
void const *singles [[maybe_unused]]
|
||||||
) {
|
) {
|
||||||
if constexpr(RangesN != 0) {
|
if constexpr(RN != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<RN, RS>(c, ranges, codepoint_cmp2);
|
||||||
&c, ranges, RangesN / RangesS, RangesS / sizeof(char32_t),
|
|
||||||
codepoint_cmp2
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(Laces1N != 0) {
|
if constexpr(L1N != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<L1N, L1S>(c, laces1, codepoint_cmp2);
|
||||||
&c, laces1, Laces1N / Laces1S, Laces1S / sizeof(char32_t),
|
|
||||||
codepoint_cmp2
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
return !((c - found[0]) % 2);
|
return !((c - found[0]) % 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(Laces2N != 0) {
|
if constexpr(L2N != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<L2N, L2S>(c, laces2, codepoint_cmp2);
|
||||||
&c, laces2, Laces2N / Laces2S, Laces2S / sizeof(char32_t),
|
|
||||||
codepoint_cmp2
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
return !((c - found[0]) % 2);
|
return !((c - found[0]) % 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(SinglesN != 0) {
|
if constexpr(SN != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<SN, SS>(c, singles, codepoint_cmp1);
|
||||||
&c, singles, SinglesN / SinglesS, SinglesS / sizeof(char32_t),
|
|
||||||
codepoint_cmp1
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -248,18 +243,14 @@ struct uctype_func {
|
||||||
void const *laces2 [[maybe_unused]],
|
void const *laces2 [[maybe_unused]],
|
||||||
void const *singles [[maybe_unused]]
|
void const *singles [[maybe_unused]]
|
||||||
) {
|
) {
|
||||||
if constexpr(RangesN != 0) {
|
if constexpr(RN != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<RN, RS>(c, ranges, codepoint_cmp2);
|
||||||
&c, ranges, RangesN >> 4, RangesN & 0xF, codepoint_cmp2
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
return (found[2] + (c - found[0]));
|
return (found[2] + (c - found[0]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(Laces1N != 0) {
|
if constexpr(L1N != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<L1N, L1S>(c, laces1, codepoint_cmp2);
|
||||||
&c, laces1, Laces1N >> 4, Laces1N & 0xF, codepoint_cmp2
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
if ((c - found[0]) % 2) {
|
if ((c - found[0]) % 2) {
|
||||||
return c;
|
return c;
|
||||||
|
@ -267,10 +258,8 @@ struct uctype_func {
|
||||||
return c + 1;
|
return c + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(Laces2N != 0) {
|
if constexpr(L2N != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<L2N, L2S>(c, laces2, codepoint_cmp2);
|
||||||
&c, laces2, Laces2N >> 4, Laces2N & 0xF, codepoint_cmp2
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
if ((c - found[0]) % 2) {
|
if ((c - found[0]) % 2) {
|
||||||
return c;
|
return c;
|
||||||
|
@ -278,10 +267,8 @@ struct uctype_func {
|
||||||
return c - 1;
|
return c - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr(SinglesN != 0) {
|
if constexpr(SN != 0) {
|
||||||
char32_t *found = static_cast<char32_t *>(std::bsearch(
|
char32_t *found = search<SN, SS>(c, singles, codepoint_cmp1);
|
||||||
&c, singles, SinglesN >> 4, SinglesN & 0xF, codepoint_cmp1
|
|
||||||
));
|
|
||||||
if (found) {
|
if (found) {
|
||||||
return found[1];
|
return found[1];
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue