move compileblockstr/compileunescapestr logic into gen

master
Daniel Kolesa 2021-04-09 02:54:36 +02:00
parent c0fc1aa452
commit bac186a0c7
3 changed files with 97 additions and 81 deletions

View File

@ -73,6 +73,97 @@ void gen_state::gen_val_string(std::string_view v) {
code.push_back(u);
}
/* FIXME: figure out how to do without the intermediate buffer */
template<typename F>
static void gen_str_filter(
valbuf<std::uint32_t> &code, thread_state &ts, std::string_view v, F &&func
) {
code.push_back(BC_INST_VAL | BC_RET_STRING);
auto ncode = code.size();
/* we're reserving a proper number of words */
auto nwords = (v.size() / sizeof(std::uint32_t)) + 1;
code.reserve(ncode + nwords);
/* allocate a character buffer that's at least that many words */
auto al = std_allocator<char>{ts.istate};
auto *buf = al.allocate(nwords * sizeof(std::uint32_t));
/* the body */
auto len = func(&buf[0]);
/* fill the leftover bytes with zeroes */
memset(&buf[len], 0, sizeof(std::uint32_t) - len % sizeof(std::uint32_t));
/* set the actual length */
code.back() |= (len << 8);
auto *ubuf = reinterpret_cast<std::uint32_t *>(buf);
code.append(ubuf, ubuf + ((len / sizeof(std::uint32_t)) + 1));
al.deallocate(buf, nwords * sizeof(std::uint32_t));
}
void gen_state::gen_val_string_unescape(std::string_view v) {
gen_str_filter(code, ts, v, [&v](auto *buf) {
auto *wbuf = unescape_string(buf, v);
return std::size_t(wbuf - buf);
});
}
void gen_state::gen_val_block(std::string_view v) {
gen_str_filter(code, ts, v, [&v, this](auto *buf) {
auto *str = v.data();
auto *send = v.data() + v.size();
std::size_t len = 0;
for (std::string_view chrs{"\r/\"@]"}; str < send;) {
auto *orig = str;
/* find a boundary character */
str = std::find_first_of(str, send, chrs.begin(), chrs.end());
/* copy everything up until boundary character */
std::memcpy(&buf[len], orig, str - orig);
len += (str - orig);
/* found nothing: bail out */
if (str == send) {
return len;
}
switch (*str) {
case '\r': /* filter out */
++str;
break;
case '\"': { /* quoted string */
char const *start = str;
str = parse_string(
*ts.pstate, std::string_view{str, send}
);
std::memcpy(&buf[len], start, std::size_t(str - start));
len += (str - start);
break;
}
case '/':
if (((str + 1) != send) && (str[1] == '/')) {
/* comment */
char const *start = str;
str = std::find(str, send, '\n');
if (((start + 2) != send) && std::ispunct(start[2])) {
/* these comments will be preserved */
std::memcpy(
&buf[len], start, std::size_t(str - start)
);
len += (str - start);
}
} else {
/* write and skip */
buf[len++] = *str++;
}
break;
case '@':
case ']':
if (str <send) {
buf[len++] = *str++;
} else {
return len;
}
break;
}
}
return len;
});
}
void gen_state::gen_val_ident() {
gen_val_ident(*ts.istate->id_dummy);
}

View File

@ -33,6 +33,8 @@ struct gen_state {
void gen_val_float(std::string_view v);
void gen_val_string(std::string_view v = std::string_view{});
void gen_val_string_unescape(std::string_view str);
void gen_val_block(std::string_view str);
void gen_val_ident();
void gen_val_ident(ident &i);

View File

@ -403,26 +403,6 @@ static inline int ret_code(int type, int def = 0) {
return type << BC_INST_RET;
}
static inline void compileunescapestr(parser_state &gs) {
auto str = gs.get_str();
gs.gs.code.push_back(BC_INST_VAL | BC_RET_STRING);
gs.gs.code.reserve(
gs.gs.code.size() + str.size() / sizeof(uint32_t) + 1
);
size_t bufs = (gs.gs.code.capacity() - gs.gs.code.size()) * sizeof(uint32_t);
auto alloc = std_allocator<char>{gs.ts.istate};
auto *buf = alloc.allocate(bufs + 1);
char *wbuf = unescape_string(&buf[0], str);
memset(
&buf[wbuf - buf], 0,
sizeof(uint32_t) - (wbuf - buf) % sizeof(uint32_t)
);
gs.gs.code.back() |= (wbuf - buf) << 8;
uint32_t *ubuf = reinterpret_cast<uint32_t *>(buf);
gs.gs.code.append(ubuf, ubuf + ((wbuf - buf) / sizeof(uint32_t) + 1));
alloc.deallocate(buf, bufs + 1);
}
static bool compilearg(
parser_state &gs, int wordtype, charbuf *word = nullptr
);
@ -640,62 +620,6 @@ invalid:
}
}
static bool compileblockstr(parser_state &gs, char const *str, char const *send) {
std::size_t startc = gs.gs.code.size();
gs.gs.code.push_back(BC_INST_VAL | BC_RET_STRING);
gs.gs.code.reserve(gs.gs.code.size() + (send - str) / sizeof(uint32_t) + 1);
auto alloc = std_allocator<char>{gs.ts.istate};
auto asz = ((send - str) / sizeof(uint32_t) + 1) * sizeof(uint32_t);
char *buf = alloc.allocate(asz);
std::size_t len = 0;
while (str < send) {
std::string_view chrs{"\r/\"@]"};
char const *orig = str;
str = std::find_first_of(str, send, chrs.begin(), chrs.end());
memcpy(&buf[len], orig, str - orig);
len += (str - orig);
if (str == send) {
goto done;
}
switch (*str) {
case '\r':
++str;
break;
case '\"': {
char const *start = str;
str = parse_string(
*gs.ts.pstate, std::string_view{str, send}
);
memcpy(&buf[len], start, std::size_t(str - start));
len += (str - start);
break;
}
case '/':
if (((str + 1) != send) && str[1] == '/') {
str = std::find(str, send, '\n');
} else {
buf[len++] = *str++;
}
break;
case '@':
case ']':
if (str < send) {
buf[len++] = *str++;
} else {
goto done;
}
break;
}
}
done:
memset(&buf[len], '\0', sizeof(uint32_t) - len % sizeof(uint32_t));
uint32_t *ubuf = reinterpret_cast<uint32_t *>(buf);
gs.gs.code.append(ubuf, ubuf + (len / sizeof(uint32_t) + 1));
gs.gs.code[startc] |= len << 8;
alloc.deallocate(buf, asz);
return true;
}
static bool compileblocksub(parser_state &gs) {
charbuf lookup{gs.ts};
switch (gs.current()) {
@ -790,9 +714,8 @@ static void compileblockmain(parser_state &gs, int wordtype) {
throw error{*gs.ts.pstate, "too many @s"};
return;
}
if (compileblockstr(gs, start, esc)) {
concs++;
}
gs.gs.gen_val_block(std::string_view{start, esc});
concs++;
if (compileblocksub(gs)) {
concs++;
}
@ -829,7 +752,7 @@ static void compileblockmain(parser_state &gs, int wordtype) {
return;
}
}
compileblockstr(gs, start, gs.source - 1);
gs.gs.gen_val_block(std::string_view{start, gs.source - 1});
if (concs > 1) {
concs++;
}
@ -918,7 +841,7 @@ static bool compilearg(
break;
case VAL_ANY:
case VAL_STRING:
compileunescapestr(gs);
gs.gs.gen_val_string_unescape(gs.get_str());
break;
default: {
int line = int(gs.current_line);