libcubescript/src/cs_util.cc

477 lines
13 KiB
C++
Raw Normal View History

2017-06-20 21:21:39 +02:00
#include <cubescript/cubescript.hh>
#include "cs_util.hh"
#include "cs_vm.hh"
#include <ctype.h>
#include <math.h>
namespace cscript {
2017-02-16 19:07:22 +01:00
static inline void p_skip_white(ostd::string_range &v) {
2016-08-15 19:57:31 +02:00
while (!v.empty() && isspace(*v)) {
2016-08-15 19:55:22 +02:00
++v;
}
}
static inline void p_set_end(
2017-02-16 19:07:22 +01:00
const ostd::string_range &v, ostd::string_range *end
) {
if (!end) {
return;
}
*end = v;
}
/* this function assumes the input is definitely a hex digit */
2017-02-13 18:10:40 +01:00
static inline cs_int p_hexd_to_int(char c) {
if (c >= 97) { /* a-f */
2016-08-15 03:44:48 +02:00
return (c - 'a') + 10;
} else if (c >= 65) { /* A-F */
return (c - 'A') + 10;
}
/* 0-9 */
return c - '0';
}
2017-02-16 19:07:22 +01:00
static inline bool p_check_neg(ostd::string_range &input) {
2016-08-15 19:55:22 +02:00
bool neg = (*input == '-');
if (neg || (*input == '+')) {
++input;
2016-08-15 03:49:24 +02:00
}
return neg;
}
2017-02-16 19:07:22 +01:00
cs_int cs_parse_int(ostd::string_range input, ostd::string_range *end) {
ostd::string_range orig = input;
p_skip_white(input);
if (input.empty()) {
p_set_end(orig, end);
2017-02-13 18:10:40 +01:00
return cs_int(0);
}
2016-08-15 03:49:24 +02:00
bool neg = p_check_neg(input);
2017-02-13 18:10:40 +01:00
cs_int ret = 0;
2017-02-16 19:07:22 +01:00
ostd::string_range past = input;
if (input.size() >= 2) {
2017-02-16 19:07:22 +01:00
ostd::string_range pfx = input.slice(0, 2);
if ((pfx == "0x") || (pfx == "0X")) {
2017-04-01 01:03:22 +02:00
input = input.slice(2, input.size());
2016-08-15 03:44:48 +02:00
past = input;
2016-08-15 19:55:22 +02:00
while (!past.empty() && isxdigit(*past)) {
ret = ret * 16 + p_hexd_to_int(*past);
++past;
2016-08-15 03:44:48 +02:00
}
goto done;
} else if ((pfx == "0b") || (pfx == "0B")) {
2017-04-01 01:03:22 +02:00
input = input.slice(2, input.size());
2016-08-15 03:44:48 +02:00
past = input;
2016-08-15 19:55:22 +02:00
while (!past.empty() && ((*past == '0') || (*past == '1'))) {
ret = ret * 2 + (*past - '0');
++past;
2016-08-15 03:44:48 +02:00
}
goto done;
}
}
2016-08-15 19:55:22 +02:00
while (!past.empty() && isdigit(*past)) {
ret = ret * 10 + (*past - '0');
++past;
2016-08-15 03:44:48 +02:00
}
done:
2017-03-31 03:34:09 +02:00
if (&past[0] == &input[0]) {
p_set_end(orig, end);
} else {
p_set_end(past, end);
}
2016-08-15 03:49:24 +02:00
if (neg) {
return -ret;
}
return ret;
}
2016-08-17 18:08:14 +02:00
template<bool Hex, char e1 = Hex ? 'p' : 'e', char e2 = Hex ? 'P' : 'E'>
2017-02-16 19:07:22 +01:00
static inline bool p_read_exp(ostd::string_range &input, cs_int &fn) {
if (input.empty()) {
return true;
}
2016-08-15 19:55:22 +02:00
if ((*input != e1) && (*input != e2)) {
return true;
}
2016-08-15 19:55:22 +02:00
++input;
if (input.empty()) {
return false;
}
2016-08-15 03:49:24 +02:00
bool neg = p_check_neg(input);
2016-08-15 19:55:22 +02:00
if (input.empty() || !isdigit(*input)) {
return false;
}
2017-02-13 18:10:40 +01:00
cs_int exp = 0;
2016-08-15 19:55:22 +02:00
while (!input.empty() && isdigit(*input)) {
exp = exp * 10 + (*input - '0');
++input;
}
if (neg) {
exp = -exp;
}
fn += exp;
return true;
}
2016-08-17 18:08:14 +02:00
template<bool Hex>
static inline bool parse_gen_float(
2017-02-16 19:07:22 +01:00
ostd::string_range input, ostd::string_range *end, cs_float &ret
) {
2017-02-13 18:10:40 +01:00
auto read_digits = [&input](double r, cs_int &n) {
2016-08-17 18:08:14 +02:00
while (!input.empty() && (Hex ? isxdigit(*input) : isdigit(*input))) {
if (Hex) {
r = r * 16.0 + double(p_hexd_to_int(*input));
} else {
r = r * 10.0 + double(*input - '0');
}
++n;
2016-08-15 19:55:22 +02:00
++input;
}
return r;
};
2017-02-13 18:10:40 +01:00
cs_int wn = 0, fn = 0;
2016-08-17 18:08:14 +02:00
double r = read_digits(0.0, wn);
2016-08-15 19:55:22 +02:00
if (!input.empty() && (*input == '.')) {
++input;
2016-08-17 18:08:14 +02:00
r = read_digits(r, fn);
}
if (!wn && !fn) {
return false;
}
2016-08-17 18:10:55 +02:00
fn = -fn;
p_set_end(input, end); /* we have a valid number until here */
2016-08-17 18:08:14 +02:00
if (p_read_exp<Hex>(input, fn)) {
p_set_end(input, end);
}
2016-08-17 18:08:14 +02:00
if (Hex) {
2017-02-13 18:10:40 +01:00
ret = cs_float(ldexp(r, fn * 4));
2016-08-17 18:08:14 +02:00
} else {
2017-02-13 18:10:40 +01:00
ret = cs_float(r * pow(10, fn));
}
return true;
}
2017-02-16 19:07:22 +01:00
cs_float cs_parse_float(ostd::string_range input, ostd::string_range *end) {
ostd::string_range orig = input;
p_skip_white(input);
if (input.empty()) {
p_set_end(orig, end);
2017-02-13 18:10:40 +01:00
return cs_float(0);
}
2016-08-15 03:49:24 +02:00
bool neg = p_check_neg(input);
2017-02-13 18:10:40 +01:00
cs_float ret = cs_float(0);
if (input.size() >= 2) {
2017-02-16 19:07:22 +01:00
ostd::string_range pfx = input.slice(0, 2);
2016-08-15 03:49:24 +02:00
if ((pfx == "0x") || (pfx == "0X")) {
2017-04-01 01:03:22 +02:00
input = input.slice(2, input.size());
2016-08-17 18:08:14 +02:00
if (!parse_gen_float<true>(input, end, ret)) {
p_set_end(orig, end);
return ret;
}
2016-08-15 03:49:24 +02:00
goto done;
}
}
2016-08-17 18:08:14 +02:00
if (!parse_gen_float<false>(input, end, ret)) {
p_set_end(orig, end);
return ret;
}
2016-08-15 03:49:24 +02:00
done:
if (neg) {
return -ret;
}
return ret;
}
/* string manager */
inline cs_strref_state *get_ref_state(char const *ptr) {
return const_cast<cs_strref_state *>(
reinterpret_cast<cs_strref_state const *>(ptr)
) - 1;
}
char const *cs_strman::add(ostd::string_range str) {
2021-03-17 02:47:34 +01:00
auto it = counts.find(str);
/* already present: just increment ref */
2021-03-17 02:47:34 +01:00
if (it != counts.end()) {
auto *st = it->second;
/* having a null pointer is the same as non-existence */
if (st) {
++st->refcount;
return reinterpret_cast<char const *>(st + 1);
}
}
/* not present: allocate brand new data */
auto ss = str.size();
auto mem = cstate->alloc(nullptr, 0, ss + sizeof(cs_strref_state) + 1);
2021-03-17 02:47:34 +01:00
if (!mem) {
throw cs_internal_error{"allocation failed"};
}
/* write length and refcount */
auto *sst = static_cast<cs_strref_state *>(mem);
sst->length = ss;
sst->refcount = 1;
/* write string data */
auto *strp = reinterpret_cast<char *>(sst + 1);
memcpy(strp, str.data(), ss);
/* terminated for best compatibility */
strp[ss] = '\0';
2021-03-17 02:47:34 +01:00
/* store it */
counts.emplace(ostd::string_range{strp, strp + ss}, sst);
return strp;
}
char const *cs_strman::ref(char const *ptr) {
auto *ss = get_ref_state(ptr);
++ss->refcount;
return ptr;
}
void cs_strman::unref(char const *ptr) {
auto *ss = get_ref_state(ptr);
if (!--ss->refcount) {
/* refcount zero, so ditch it
* this path is a little slow...
*/
auto sr = ostd::string_range{ptr, ptr + ss->length};
auto it = counts.find(sr);
if (it == counts.end()) {
/* internal error: this should *never* happen */
2021-03-17 02:47:34 +01:00
throw cs_internal_error{"no refcount"};
}
2021-03-17 02:47:34 +01:00
/* we're freeing the key */
counts.erase(it);
/* dealloc */
cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0);
}
}
char const *cs_strman::find(ostd::string_range str) const {
auto it = counts.find(str);
if (it == counts.end()) {
return nullptr;
}
return reinterpret_cast<char const *>(it->second + 1);
}
ostd::string_range cs_strman::get(char const *ptr) const {
auto *ss = get_ref_state(ptr);
return ostd::string_range{ptr, ptr + ss->length};
}
2016-09-21 21:02:13 +02:00
namespace util {
2017-02-16 19:07:22 +01:00
OSTD_EXPORT ostd::string_range parse_string(
cs_state &cs, ostd::string_range str, size_t &nlines
2016-09-26 02:26:02 +02:00
) {
2017-01-25 02:09:50 +01:00
size_t nl = 0;
nlines = nl;
2016-10-10 20:14:16 +02:00
if (str.empty() || (*str != '\"')) {
return str;
}
2017-02-16 19:07:22 +01:00
ostd::string_range orig = str;
2016-09-26 02:26:02 +02:00
++str;
++nl;
2016-09-21 21:02:13 +02:00
while (!str.empty()) {
switch (*str) {
case '\r':
case '\n':
case '\"':
2016-09-26 02:26:02 +02:00
goto end;
2016-09-21 21:02:13 +02:00
case '^':
case '\\': {
bool needn = (*str == '\\');
2016-09-21 21:02:13 +02:00
++str;
if (str.empty()) {
goto end;
2016-09-21 21:02:13 +02:00
}
if ((*str == '\r') || (*str == '\n')) {
char c = *str;
++str;
++nl;
if (!str.empty() && (c == '\r') && (*str == '\n')) {
++str;
}
} else if (needn) {
goto end;
} else {
++str;
}
continue;
}
2016-09-21 21:02:13 +02:00
}
++str;
}
2016-09-26 02:26:02 +02:00
end:
nlines = nl;
2016-09-26 02:26:02 +02:00
if (str.empty() || (*str != '\"')) {
2017-02-13 18:10:40 +01:00
throw cs_error(
2017-06-15 20:44:09 +02:00
cs, "unfinished string '%s'", orig.slice(0, &str[0] - &orig[0])
2016-09-26 02:26:02 +02:00
);
}
2017-04-01 01:03:22 +02:00
str.pop_front();
return str;
2016-09-21 21:02:13 +02:00
}
2017-02-16 19:07:22 +01:00
OSTD_EXPORT ostd::string_range parse_word(
cs_state &cs, ostd::string_range str
) {
2016-09-21 21:02:13 +02:00
for (;;) {
2017-02-16 19:07:22 +01:00
str = ostd::find_one_of(str, ostd::string_range("\"/;()[] \t\r\n"));
2016-09-21 21:02:13 +02:00
if (str.empty()) {
return str;
}
switch (*str) {
case '"':
case ';':
case ' ':
case '\t':
case '\r':
case '\n':
return str;
case '/':
2016-09-22 01:07:43 +02:00
if ((str.size() > 1) && (str[1] == '/')) {
2016-09-21 21:02:13 +02:00
return str;
}
break;
case '[':
2017-04-01 01:03:22 +02:00
str.pop_front();
str = parse_word(cs, str);
2016-09-21 21:02:13 +02:00
if (str.empty() || (*str != ']')) {
2017-02-13 18:10:40 +01:00
throw cs_error(cs, "missing \"]\"");
2016-09-21 21:02:13 +02:00
}
break;
case '(':
2017-04-01 01:03:22 +02:00
str.pop_front();
str = parse_word(cs, str);
2016-09-21 21:02:13 +02:00
if (str.empty() || (*str != ')')) {
2017-02-13 18:10:40 +01:00
throw cs_error(cs, "missing \")\"");
2016-09-21 21:02:13 +02:00
}
break;
case ']':
case ')':
return str;
}
++str;
}
return str;
}
2021-03-18 23:53:16 +01:00
} /* namespace util */
2016-09-21 21:02:13 +02:00
2021-03-18 23:53:16 +01:00
OSTD_EXPORT bool list_parse(cs_list_parse_state &ps, cs_state &cs) {
list_find_item(ps);
if (ps.input.empty()) {
return false;
}
switch (*ps.input) {
case '"':
ps.quoted_item = ps.input;
ps.input = util::parse_string(cs, ps.input);
ps.quoted_item = ps.quoted_item.slice(
0, &ps.input[0] - &ps.quoted_item[0]
);
ps.item = ps.quoted_item.slice(1, ps.quoted_item.size() - 1);
break;
case '(':
case '[': {
ps.quoted_item = ps.input;
++ps.input;
ps.item = ps.input;
char btype = *ps.quoted_item;
int brak = 1;
for (;;) {
ps.input = ostd::find_one_of(
ps.input, ostd::string_range("\"/;()[]")
);
if (ps.input.empty()) {
return true;
}
char c = *ps.input;
++ps.input;
switch (c) {
case '"':
ps.input = util::parse_string(cs, ps.input);
break;
case '/':
if (!ps.input.empty() && (*ps.input == '/')) {
ps.input = ostd::find(ps.input, '\n');
}
break;
case '(':
case '[':
brak += (c == btype);
break;
case ')':
if ((btype == '(') && (--brak <= 0)) {
goto endblock;
}
break;
case ']':
if ((btype == '[') && (--brak <= 0)) {
goto endblock;
}
break;
2016-09-21 21:02:13 +02:00
}
}
2021-03-18 23:53:16 +01:00
endblock:
ps.item = ps.item.slice(0, &ps.input[0] - &ps.item[0]);
ps.item.pop_back();
ps.quoted_item = ps.quoted_item.slice(
0, &ps.input[0] - &ps.quoted_item[0]
);
break;
2016-09-21 21:02:13 +02:00
}
2021-03-18 23:53:16 +01:00
case ')':
case ']':
2016-09-21 21:02:13 +02:00
return false;
2021-03-18 23:53:16 +01:00
default: {
ostd::string_range e = util::parse_word(cs, ps.input);
ps.quoted_item = ps.item = ps.input.slice(0, &e[0] - &ps.input[0]);
ps.input = e;
break;
2016-09-21 21:02:13 +02:00
}
2021-03-18 23:53:16 +01:00
}
list_find_item(ps);
if (!ps.input.empty() && (*ps.input == ';')) {
++ps.input;
}
return true;
}
OSTD_EXPORT std::size_t list_count(cs_list_parse_state &ps, cs_state &cs) {
size_t ret = 0;
while (list_parse(ps, cs)) {
++ret;
}
return ret;
}
OSTD_EXPORT cs_strref list_get_item(cs_list_parse_state &ps, cs_state &cs) {
if (!ps.quoted_item.empty() && (*ps.quoted_item == '"')) {
auto app = ostd::appender<cs_string>();
util::unescape_string(app, ps.item);
return cs_strref{cs, app.get()};
}
return cs_strref{cs, ps.item};
}
OSTD_EXPORT void list_find_item(cs_list_parse_state &ps) {
for (;;) {
while (!ps.input.empty()) {
char c = *ps.input;
if ((c == ' ') || (c == '\t') || (c == '\r') || (c == '\n')) {
++ps.input;
} else {
2016-09-21 21:02:13 +02:00
break;
}
}
2021-03-18 23:53:16 +01:00
if ((ps.input.size() < 2) || (ps.input[0] != '/') || (ps.input[1] != '/')) {
break;
}
2021-03-18 23:53:16 +01:00
ps.input = ostd::find(ps.input, '\n');
}
2021-03-18 23:53:16 +01:00
}
2016-09-21 21:02:13 +02:00
} /* namespace cscript */