diff --git a/src/cs_gen.cc b/src/cs_gen.cc index 09a4067..d0efd05 100644 --- a/src/cs_gen.cc +++ b/src/cs_gen.cc @@ -73,6 +73,97 @@ void gen_state::gen_val_string(std::string_view v) { code.push_back(u); } +/* FIXME: figure out how to do without the intermediate buffer */ +template +static void gen_str_filter( + valbuf &code, thread_state &ts, std::string_view v, F &&func +) { + code.push_back(BC_INST_VAL | BC_RET_STRING); + auto ncode = code.size(); + /* we're reserving a proper number of words */ + auto nwords = (v.size() / sizeof(std::uint32_t)) + 1; + code.reserve(ncode + nwords); + /* allocate a character buffer that's at least that many words */ + auto al = std_allocator{ts.istate}; + auto *buf = al.allocate(nwords * sizeof(std::uint32_t)); + /* the body */ + auto len = func(&buf[0]); + /* fill the leftover bytes with zeroes */ + memset(&buf[len], 0, sizeof(std::uint32_t) - len % sizeof(std::uint32_t)); + /* set the actual length */ + code.back() |= (len << 8); + auto *ubuf = reinterpret_cast(buf); + code.append(ubuf, ubuf + ((len / sizeof(std::uint32_t)) + 1)); + al.deallocate(buf, nwords * sizeof(std::uint32_t)); +} + +void gen_state::gen_val_string_unescape(std::string_view v) { + gen_str_filter(code, ts, v, [&v](auto *buf) { + auto *wbuf = unescape_string(buf, v); + return std::size_t(wbuf - buf); + }); +} + +void gen_state::gen_val_block(std::string_view v) { + gen_str_filter(code, ts, v, [&v, this](auto *buf) { + auto *str = v.data(); + auto *send = v.data() + v.size(); + std::size_t len = 0; + for (std::string_view chrs{"\r/\"@]"}; str < send;) { + auto *orig = str; + /* find a boundary character */ + str = std::find_first_of(str, send, chrs.begin(), chrs.end()); + /* copy everything up until boundary character */ + std::memcpy(&buf[len], orig, str - orig); + len += (str - orig); + /* found nothing: bail out */ + if (str == send) { + return len; + } + switch (*str) { + case '\r': /* filter out */ + ++str; + break; + case '\"': { /* quoted string */ + char const *start = str; + str = parse_string( + *ts.pstate, std::string_view{str, send} + ); + std::memcpy(&buf[len], start, std::size_t(str - start)); + len += (str - start); + break; + } + case '/': + if (((str + 1) != send) && (str[1] == '/')) { + /* comment */ + char const *start = str; + str = std::find(str, send, '\n'); + if (((start + 2) != send) && std::ispunct(start[2])) { + /* these comments will be preserved */ + std::memcpy( + &buf[len], start, std::size_t(str - start) + ); + len += (str - start); + } + } else { + /* write and skip */ + buf[len++] = *str++; + } + break; + case '@': + case ']': + if (str id_dummy); } diff --git a/src/cs_gen.hh b/src/cs_gen.hh index 556378b..821751e 100644 --- a/src/cs_gen.hh +++ b/src/cs_gen.hh @@ -33,6 +33,8 @@ struct gen_state { void gen_val_float(std::string_view v); void gen_val_string(std::string_view v = std::string_view{}); + void gen_val_string_unescape(std::string_view str); + void gen_val_block(std::string_view str); void gen_val_ident(); void gen_val_ident(ident &i); diff --git a/src/cs_parser.cc b/src/cs_parser.cc index a17cd64..a74ca95 100644 --- a/src/cs_parser.cc +++ b/src/cs_parser.cc @@ -403,26 +403,6 @@ static inline int ret_code(int type, int def = 0) { return type << BC_INST_RET; } -static inline void compileunescapestr(parser_state &gs) { - auto str = gs.get_str(); - gs.gs.code.push_back(BC_INST_VAL | BC_RET_STRING); - gs.gs.code.reserve( - gs.gs.code.size() + str.size() / sizeof(uint32_t) + 1 - ); - size_t bufs = (gs.gs.code.capacity() - gs.gs.code.size()) * sizeof(uint32_t); - auto alloc = std_allocator{gs.ts.istate}; - auto *buf = alloc.allocate(bufs + 1); - char *wbuf = unescape_string(&buf[0], str); - memset( - &buf[wbuf - buf], 0, - sizeof(uint32_t) - (wbuf - buf) % sizeof(uint32_t) - ); - gs.gs.code.back() |= (wbuf - buf) << 8; - uint32_t *ubuf = reinterpret_cast(buf); - gs.gs.code.append(ubuf, ubuf + ((wbuf - buf) / sizeof(uint32_t) + 1)); - alloc.deallocate(buf, bufs + 1); -} - static bool compilearg( parser_state &gs, int wordtype, charbuf *word = nullptr ); @@ -640,62 +620,6 @@ invalid: } } -static bool compileblockstr(parser_state &gs, char const *str, char const *send) { - std::size_t startc = gs.gs.code.size(); - gs.gs.code.push_back(BC_INST_VAL | BC_RET_STRING); - gs.gs.code.reserve(gs.gs.code.size() + (send - str) / sizeof(uint32_t) + 1); - auto alloc = std_allocator{gs.ts.istate}; - auto asz = ((send - str) / sizeof(uint32_t) + 1) * sizeof(uint32_t); - char *buf = alloc.allocate(asz); - std::size_t len = 0; - while (str < send) { - std::string_view chrs{"\r/\"@]"}; - char const *orig = str; - str = std::find_first_of(str, send, chrs.begin(), chrs.end()); - memcpy(&buf[len], orig, str - orig); - len += (str - orig); - if (str == send) { - goto done; - } - switch (*str) { - case '\r': - ++str; - break; - case '\"': { - char const *start = str; - str = parse_string( - *gs.ts.pstate, std::string_view{str, send} - ); - memcpy(&buf[len], start, std::size_t(str - start)); - len += (str - start); - break; - } - case '/': - if (((str + 1) != send) && str[1] == '/') { - str = std::find(str, send, '\n'); - } else { - buf[len++] = *str++; - } - break; - case '@': - case ']': - if (str < send) { - buf[len++] = *str++; - } else { - goto done; - } - break; - } - } -done: - memset(&buf[len], '\0', sizeof(uint32_t) - len % sizeof(uint32_t)); - uint32_t *ubuf = reinterpret_cast(buf); - gs.gs.code.append(ubuf, ubuf + (len / sizeof(uint32_t) + 1)); - gs.gs.code[startc] |= len << 8; - alloc.deallocate(buf, asz); - return true; -} - static bool compileblocksub(parser_state &gs) { charbuf lookup{gs.ts}; switch (gs.current()) { @@ -790,9 +714,8 @@ static void compileblockmain(parser_state &gs, int wordtype) { throw error{*gs.ts.pstate, "too many @s"}; return; } - if (compileblockstr(gs, start, esc)) { - concs++; - } + gs.gs.gen_val_block(std::string_view{start, esc}); + concs++; if (compileblocksub(gs)) { concs++; } @@ -829,7 +752,7 @@ static void compileblockmain(parser_state &gs, int wordtype) { return; } } - compileblockstr(gs, start, gs.source - 1); + gs.gs.gen_val_block(std::string_view{start, gs.source - 1}); if (concs > 1) { concs++; } @@ -918,7 +841,7 @@ static bool compilearg( break; case VAL_ANY: case VAL_STRING: - compileunescapestr(gs); + gs.gs.gen_val_string_unescape(gs.get_str()); break; default: { int line = int(gs.current_line);