completely unify encode funcs

master
Daniel Kolesa 2018-01-07 18:44:05 +01:00
parent 541fa43cbb
commit 4a2e5cd557
2 changed files with 81 additions and 127 deletions

View File

@ -666,6 +666,32 @@ namespace utf {
static inline constexpr std::size_t const max_units =
detail::max_units_base<C>::value;
namespace detail {
template<std::size_t N>
struct unicode_t_base;
template<>
struct unicode_t_base<1> {
using type = char32_t;
};
template<>
struct unicode_t_base<2> {
using type = char16_t;
};
template<>
struct unicode_t_base<4> {
using type = char;
};
}
template<std::size_t N>
using unicode_t = typename detail::unicode_t_base<N>::type;
template<typename T>
using unicode_base_t = unicode_t<max_units<T>>;
static inline constexpr bool const is_wchar_u32 =
std::is_same_v<wchar_fixed_t, char32_t>;
@ -748,14 +774,57 @@ namespace utf {
bool decode(wstring_range &r, char32_t &ret) noexcept;
namespace detail {
std::size_t u8_encode(
std::size_t encode(
char (&ret)[4], char32_t ch
) noexcept;
std::size_t u16_encode(
std::size_t encode(
char16_t (&ret)[2], char32_t ch
) noexcept;
}
template<typename C, typename R>
inline std::size_t encode(R &sink, char32_t ch) {
std::size_t ret;
if constexpr(max_units<C> == 1) {
sink.put(C(ch));
ret = 1;
} else {
unicode_base_t<C> buf[max_units<C>];
ret = detail::encode(buf, ch);
for (std::size_t i = 0; i < ret; ++i) {
sink.put(C(buf[i]));
}
}
return ret;
}
template<typename C, typename R, typename IC>
inline std::size_t encode(R &sink, basic_char_range<IC const> &r) {
if constexpr(max_units<IC> == 1) {
std::size_t n = 0;
if (!r.empty() && (n = utf::encode<C>(sink, char32_t(r.front())))) {
r.pop_front();
}
return n;
} else if constexpr(max_units<IC> == max_units<C>) {
/* FIXME: advance by a whole character always */
if (!r.empty()) {
sink.put(C(r.front()));
r.pop_front();
return 1;
}
} else {
auto rr = r;
if (char32_t ch; utf::decode(rr, ch)) {
if (std::size_t n; (n = utf::encode<C>(sink, ch))) {
r = rr;
return n;
}
}
}
return 0;
}
/* @brief Encode a UTF-32 code point into UTF-8 code units.
*
* The units are written in `sink` which is an ostd::output_range_tag.
@ -770,39 +839,12 @@ namespace utf {
*/
template<typename R>
inline std::size_t encode_u8(R &sink, char32_t ch) {
char buf[4];
std::size_t n = detail::u8_encode(buf, ch);
for (std::size_t i = 0; i < n; ++i) {
sink.put(buf[i]);
}
return n;
return encode<char>(sink, ch);
}
template<typename R, typename C>
inline std::size_t encode_u8(R &sink, basic_char_range<C const> &r) {
if constexpr(max_units<C> == 1) {
std::size_t n = 0;
if (!r.empty() && (n = utf::encode_u8(sink, char32_t(r.front())))) {
r.pop_front();
}
return n;
} else if constexpr(max_units<C> == 2) {
auto rr = r;
if (char32_t ch; utf::decode(rr, ch)) {
if (std::size_t n; (n = utf::encode_u8(sink, ch))) {
r = rr;
return n;
}
}
} else {
/* FIXME: advance by a whole character */
if (!r.empty()) {
sink.put(char(r.front()));
r.pop_front();
return 1;
}
}
return 0;
return encode<char>(sink, r);
}
/* @brief Encode a UTF-32 code point into UTF-16.
@ -819,54 +861,22 @@ namespace utf {
*/
template<typename R>
inline std::size_t encode_u16(R &sink, char32_t ch) {
char16_t buf[2];
std::size_t n = detail::u16_encode(buf, ch);
for (std::size_t i = 0; i < n; ++i) {
sink.put(buf[i]);
}
return n;
return encode<char16_t>(sink, ch);
}
template<typename R, typename C>
inline std::size_t encode_u16(R &sink, basic_char_range<C const> &r) {
if constexpr(max_units<C> == 1) {
std::size_t n = 0;
if (!r.empty() && (n = utf::encode_u16(sink, char32_t(r.front())))) {
r.pop_front();
}
return n;
} else if constexpr(max_units<C> == 2) {
/* FIXME: advance by a whole character */
if (!r.empty()) {
sink.put(char16_t(r.front()));
r.pop_front();
return 1;
}
} else {
auto rr = r;
if (char32_t ch; utf::decode(rr, ch)) {
if (std::size_t n; (n = utf::encode_u16(sink, ch))) {
r = rr;
return n;
}
}
}
return 0;
return encode<char16_t>(sink, r);
}
template<typename R>
inline std::size_t encode_u32(R &sink, char32_t ch) {
sink.put(ch);
return 1;
return encode<char32_t>(sink, ch);
}
template<typename R, typename C>
inline std::size_t encode_u32(R &sink, basic_char_range<C const> &r) {
if (char32_t ret; decode(r, ret)) {
sink.put(ret);
return 1;
}
return 0;
return encode<char32_t>(sink, r);
}
/* @brief Encode a UTF-32 code point into a wide Unicode char/sequence.
@ -891,68 +901,12 @@ namespace utf {
*/
template<typename R>
inline std::size_t encode_uw(R &sink, char32_t ch) {
std::size_t n;
if constexpr(is_wchar_u32) {
n = 1;
sink.put(wchar_t(ch));
} else if constexpr(is_wchar_u16) {
char16_t buf[2];
n = detail::u16_encode(buf, ch);
for (std::size_t i = 0; i < n; ++i) {
sink.put(wchar_t(buf[i]));
}
} else {
char buf[4];
n = detail::u8_encode(buf, ch);
for (std::size_t i = 0; i < n; ++i) {
sink.put(wchar_t(buf[i]));
}
}
return n;
return encode<wchar_t>(sink, ch);
}
template<typename R, typename C>
inline std::size_t encode_uw(R &sink, basic_char_range<C const> &r) {
if constexpr(max_units<C> == 1) {
std::size_t n = 0;
if (!r.empty() && (n = utf::encode_uw(sink, char32_t(r.front())))) {
r.pop_front();
}
return n;
} else if constexpr(max_units<C> == max_units<wchar_t>) {
/* FIXME: advance by a whole character */
if (!r.empty()) {
sink.put(wchar_t(r.front()));
r.pop_front();
return 1;
}
} else {
auto rr = r;
if (char32_t ch; utf::decode(rr, ch)) {
if (std::size_t n; (n = utf::encode_uw(sink, ch))) {
r = rr;
return n;
}
}
}
return 0;
}
template<typename C, typename OR, typename IR>
inline std::size_t encode(
[[maybe_unused]] OR &sink, [[maybe_unused]] IR &r
) {
static_assert(is_character<C>, "Invalid input type");
if constexpr(std::is_same_v<C, char32_t>) {
return encode_u32(sink, r);
} else if constexpr(std::is_same_v<C, char16_t>) {
return encode_u16(sink, r);
} else if constexpr(std::is_same_v<C, char>) {
return encode_u8(sink, r);
} else if constexpr(std::is_same_v<C, wchar_t>) {
return encode_uw(sink, r);
}
return 0;
return encode<wchar_t>(sink, r);
}
/* @brief Get the number of Unicode code points in a string.

View File

@ -92,7 +92,7 @@ namespace detail {
return 1;
}
std::size_t u8_encode(
std::size_t encode(
char (&ret)[4], char32_t ch
) noexcept {
if (ch <= 0x7F) {
@ -126,7 +126,7 @@ namespace detail {
return 0;
}
std::size_t u16_encode(
std::size_t encode(
char16_t (&ret)[2], char32_t ch
) noexcept {
/* surrogate code point or out of bounds */