direct encoding funcs to u8/u16/uw from any other UTF
parent
daea42666e
commit
58ccfbe276
|
@ -1022,52 +1022,6 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
void write_seq(R &writer, string_range &val) const {
|
||||
writer.put(val.front());
|
||||
val.pop_front();
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
void write_seq(R &writer, u16string_range &val) const {
|
||||
if (char32_t ret; !utf::decode(val, ret)) {
|
||||
write_replacement(writer);
|
||||
val.pop_front();
|
||||
} else {
|
||||
if (!utf::encode_u8(writer, ret)) {
|
||||
write_replacement(writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
void write_seq(R &writer, u32string_range &val) const {
|
||||
if (!utf::encode_u8(writer, val.front())) {
|
||||
write_replacement(writer);
|
||||
}
|
||||
val.pop_front();
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
void write_seq(R &writer, wstring_range &val) const {
|
||||
if constexpr(
|
||||
(sizeof(wchar_t) == sizeof(char32_t)) ||
|
||||
(sizeof(wchar_t) == sizeof(char16_t))
|
||||
) {
|
||||
if (char32_t ret; !utf::decode(val, ret)) {
|
||||
write_replacement(writer);
|
||||
val.pop_front();
|
||||
} else {
|
||||
if (!utf::encode_u8(writer, ret)) {
|
||||
write_replacement(writer);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
writer.put(char(val.front()));
|
||||
val.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
/* string base writer */
|
||||
template<typename C, typename R>
|
||||
void write_str(R &writer, bool escape, basic_char_range<C const> val) const {
|
||||
|
@ -1091,8 +1045,9 @@ private:
|
|||
write_char_raw(writer, c);
|
||||
}
|
||||
val.pop_front();
|
||||
} else {
|
||||
write_seq(writer, val);
|
||||
} else if (!utf::encode_u8(writer, val)) {
|
||||
write_replacement(writer);
|
||||
val.pop_front();
|
||||
}
|
||||
}
|
||||
writer.put('"');
|
||||
|
|
169
ostd/string.hh
169
ostd/string.hh
|
@ -820,6 +820,7 @@ namespace utf {
|
|||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u8(R &sink, u32string_range &r) {
|
||||
/* just a wrapper; does the same thing but advances */
|
||||
std::size_t n = 0;
|
||||
if (!r.empty() && (n = utf::encode_u8(sink, r.front()))) {
|
||||
r.pop_front();
|
||||
|
@ -827,6 +828,57 @@ namespace utf {
|
|||
return n;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u8(R &sink, u16string_range &r) {
|
||||
/* decodes to code point and encodes */
|
||||
auto rr = r;
|
||||
if (char32_t ch; utf::decode(rr, ch)) {
|
||||
if (std::size_t ret; (ret = utf::encode_u8(sink, ch))) {
|
||||
r = rr;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u8(R &sink, string_range &r) {
|
||||
/* identity match, advances */
|
||||
if (!r.empty()) {
|
||||
sink.put(r.front());
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u8(R &sink, wstring_range &r) {
|
||||
/* for utf-32, decode is just a swapper, for utf-16 it
|
||||
* actually decodes; in both cases it encodes to utf-8,
|
||||
* for utf-8 the whole thing is just an advancing wrapper
|
||||
*/
|
||||
if constexpr(
|
||||
(sizeof(wchar_t) == sizeof(char32_t)) ||
|
||||
(sizeof(wchar_t) == sizeof(char16_t))
|
||||
) {
|
||||
auto rr = r;
|
||||
if (char32_t ch; utf::decode(rr, ch)) {
|
||||
if (std::size_t ret; (ret = utf::encode_u8(sink, ch))) {
|
||||
r = rr;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (!r.empty()) {
|
||||
sink.put(char(r.front()));
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* @brief Encode a UTF-32 code point into UTF-16.
|
||||
*
|
||||
* The values are written in `sink` which is an ostd::output_range_tag.
|
||||
|
@ -851,6 +903,7 @@ namespace utf {
|
|||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u16(R &sink, u32string_range &r) {
|
||||
/* just a wrapper; does the same thing but advances */
|
||||
std::size_t n = 0;
|
||||
if (!r.empty() && (n = utf::encode_u16(sink, r.front()))) {
|
||||
r.pop_front();
|
||||
|
@ -858,6 +911,56 @@ namespace utf {
|
|||
return n;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u16(R &sink, u16string_range &r) {
|
||||
/* identity match, advances */
|
||||
if (!r.empty()) {
|
||||
sink.put(r.front());
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u16(R &sink, string_range &r) {
|
||||
/* has to decode and encode */
|
||||
auto rr = r;
|
||||
if (char32_t ch; utf::decode(rr, ch)) {
|
||||
if (std::size_t ret; (ret = utf::encode_u16(sink, ch))) {
|
||||
r = rr;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_u16(R &sink, wstring_range &r) {
|
||||
/* when wchar_t is guaranteed utf-16, we have an identity
|
||||
* match so we just advance; otherwise decode and encode
|
||||
*/
|
||||
if constexpr(
|
||||
(sizeof(wchar_t) != sizeof(char32_t)) &&
|
||||
(sizeof(wchar_t) == sizeof(char16_t))
|
||||
) {
|
||||
if (!r.empty()) {
|
||||
sink.put(char16_t(r.front()));
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
auto rr = r;
|
||||
if (char32_t ch; utf::decode(rr, ch)) {
|
||||
if (std::size_t ret; (ret = utf::encode_u16(sink, ch))) {
|
||||
r = rr;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* @brief Encode a UTF-32 code point into a wide Unicode char/sequence.
|
||||
*
|
||||
* The value(s) are written in `sink` which is an ostd::output_range_tag.
|
||||
|
@ -902,6 +1005,7 @@ namespace utf {
|
|||
|
||||
template<typename R>
|
||||
inline std::size_t encode_uw(R &sink, u32string_range &r) {
|
||||
/* just a wrapper; does the same thing but advances */
|
||||
std::size_t n = 0;
|
||||
if (!r.empty() && (n = utf::encode_uw(sink, r.front()))) {
|
||||
r.pop_front();
|
||||
|
@ -909,6 +1013,71 @@ namespace utf {
|
|||
return n;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_uw(R &sink, u16string_range &r) {
|
||||
/* when wchar_t is guaranteed utf-16, we have an identity
|
||||
* match much like encode_u16 with wstring, otherwise
|
||||
* decode and encode
|
||||
*/
|
||||
if constexpr(
|
||||
(sizeof(wchar_t) != sizeof(char32_t)) &&
|
||||
(sizeof(wchar_t) == sizeof(char16_t))
|
||||
) {
|
||||
if (!r.empty()) {
|
||||
sink.put(wchar_t(r.front()));
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
auto rr = r;
|
||||
if (char32_t ch; utf::decode(rr, ch)) {
|
||||
if (std::size_t ret; (ret = utf::encode_uw(sink, ch))) {
|
||||
r = rr;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_uw(R &sink, string_range &r) {
|
||||
/* when wchar_t is guaranteed utf-8, we have an identity
|
||||
* match so there is no reencoding, otherwise decode and
|
||||
* encode...
|
||||
*/
|
||||
if constexpr(
|
||||
(sizeof(wchar_t) != sizeof(char32_t)) &&
|
||||
(sizeof(wchar_t) != sizeof(char16_t))
|
||||
) {
|
||||
if (!r.empty()) {
|
||||
sink.put(wchar_t(r.front()));
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
auto rr = r;
|
||||
if (char32_t ch; utf::decode(rr, ch)) {
|
||||
if (std::size_t ret; (ret = utf::encode_uw(sink, ch))) {
|
||||
r = rr;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename R>
|
||||
inline std::size_t encode_uw(R &sink, wstring_range &r) {
|
||||
/* identity match, advances */
|
||||
if (!r.empty()) {
|
||||
sink.put(wchar_t(r.front()));
|
||||
r.pop_front();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* @brief Get the number of Unicode code points in a string.
|
||||
*
|
||||
* This function keeps reading Unicode code points while it can and
|
||||
|
|
Loading…
Reference in New Issue