initial bits for separation of codegen from parser

master
Daniel Kolesa 2021-04-09 00:41:55 +02:00
parent ccb0c09d59
commit 5f67b205df
7 changed files with 428 additions and 383 deletions

View File

@ -1,5 +1,157 @@
#include <cstdlib>
#include <cstring>
#include <cmath>
#include "cs_gen.hh"
#include "cs_bcode.hh"
#include "cs_ident.hh"
#include "cs_parser.hh"
namespace cubescript {
void gen_state::gen_val_null() {
code.push_back(BC_INST_VAL_INT | BC_RET_NULL);
}
void gen_state::gen_val_integer(integer_type v) {
if (v >= -0x800000 && v <= 0x7FFFFF) {
code.push_back(BC_INST_VAL_INT | BC_RET_INT | (v << 8));
} else {
std::uint32_t u[bc_store_size<integer_type>] = {0};
std::memcpy(u, &v, sizeof(v));
code.push_back(BC_INST_VAL | BC_RET_INT);
code.append(u, u + bc_store_size<integer_type>);
}
}
void gen_state::gen_val_integer(std::string_view v) {
gen_val_integer(parse_int(v));
}
void gen_state::gen_val_float(float_type v) {
if (std::floor(v) == v && v >= -0x800000 && v <= 0x7FFFFF) {
code.push_back(
BC_INST_VAL_INT | BC_RET_FLOAT | (integer_type(std::floor(v)) << 8)
);
} else {
std::uint32_t u[bc_store_size<float_type>] = {0};
std::memcpy(u, &v, sizeof(v));
code.push_back(BC_INST_VAL | BC_RET_FLOAT);
code.append(u, u + bc_store_size<float_type>);
}
}
void gen_state::gen_val_float(std::string_view v) {
gen_val_float(parse_float(v));
}
void gen_state::gen_val_string(std::string_view v) {
auto vsz = v.size();
if (vsz <= 3) {
std::uint32_t op = BC_INST_VAL_INT | BC_RET_STRING;
for (size_t i = 0; i < vsz; ++i) {
auto c = static_cast<unsigned char>(v[i]);
op |= std::uint32_t(c) << ((i + 1) * 8);
}
code.push_back(op);
return;
}
code.push_back(BC_INST_VAL | BC_RET_STRING | std::uint32_t(vsz << 8));
auto it = reinterpret_cast<std::uint32_t const *>(v.data());
code.append(it, it + (v.size() / sizeof(std::uint32_t)));
std::size_t esz = v.size() % sizeof(std::uint32_t);
char c[sizeof(std::uint32_t)] = {0};
std::memcpy(c, v.data() + v.size() - esz, esz);
std::uint32_t u;
std::memcpy(&u, c, sizeof(u));
code.push_back(u);
}
void gen_state::gen_val_ident() {
gen_val_ident(*ts.istate->id_dummy);
}
void gen_state::gen_val_ident(ident &i) {
code.push_back(BC_INST_IDENT | (i.get_index() << 8));
}
void gen_state::gen_val_ident(std::string_view v) {
gen_val_ident(ts.istate->new_ident(*ts.pstate, v, IDENT_FLAG_UNKNOWN));
}
void gen_state::gen_val(
int val_type, std::string_view v, std::size_t line
) {
switch (val_type) {
case VAL_ANY:
if (!v.empty()) {
gen_val_string(v);
} else {
gen_val_null();
}
break;
case VAL_STRING:
gen_val_string(v);
break;
case VAL_FLOAT:
gen_val_float(v);
break;
case VAL_INT:
gen_val_integer(v);
break;
case VAL_COND:
if (!v.empty()) {
gen_block(v, line);
} else {
gen_val_null();
}
break;
case VAL_CODE:
gen_block(v, line);
break;
case VAL_IDENT:
gen_val_ident(v);
break;
default:
break;
}
}
void gen_state::gen_block() {
code.push_back(BC_INST_EMPTY);
}
std::pair<std::size_t, std::string_view> gen_state::gen_block(
std::string_view v, std::size_t line, int ret_type, int term
) {
auto csz = code.size();
code.push_back(BC_INST_BLOCK);
/* encodes the offset from the start of the bytecode block
* this is used for refcounting (subtract the offset, get to
* the start of the original allocation, i.e. BC_INST_START)
*/
code.push_back(BC_INST_OFFSET | std::uint32_t((csz + 2) << 8));
auto ret_line = line;
if (!v.empty()) {
parser_state ps{ts, *this};
ps.source = v.data();
ps.send = v.data() + v.size();
ps.current_line = line;
ps.parse_block(VAL_ANY, term);
v = std::string_view{ps.source, ps.send};
ret_line = ps.current_line;
}
if (code.size() > (csz + 2)) {
code.push_back(BC_INST_EXIT | ret_type);
/* encode the block size in BC_INST_BLOCK */
code[csz] |= (std::uint32_t(code.size() - csz - 1) << 8);
} else {
/* empty code */
code.resize(csz);
code.push_back(BC_INST_EMPTY | ret_type);
}
return std::make_pair(ret_line, v);
}
} /* namespace cubescript */

View File

@ -1,10 +1,52 @@
#ifndef LIBCUBESCRIPT_GEN_HH
#define LIBCUBESCRIPT_GEN_HH
#include "cubescript/cubescript.hh"
#include <cstdint>
#include <string_view>
#include <utility>
#include <cubescript/cubescript.hh>
#include "cs_std.hh"
#include "cs_thread.hh"
namespace cubescript {
struct gen_state {
thread_state &ts;
valbuf<std::uint32_t> code;
gen_state() = delete;
gen_state(thread_state &tsr):
ts{tsr}, code{tsr.istate}
{}
void gen_val_null();
void gen_val_integer(integer_type v = 0);
void gen_val_integer(std::string_view v);
void gen_val_float(float_type v = 0);
void gen_val_float(std::string_view v);
void gen_val_string(std::string_view v = std::string_view{});
void gen_val_ident();
void gen_val_ident(ident &i);
void gen_val_ident(std::string_view v);
void gen_val(
int val_type, std::string_view v = std::string_view{},
std::size_t line = 0
);
void gen_block();
std::pair<std::size_t, std::string_view> gen_block(
std::string_view v, std::size_t line,
int ret_type = BC_RET_NULL, int term = '\0'
);
};
} /* namespace cubescript */
#endif /* LIBCUBESCRIPT_GEN_HH */

File diff suppressed because it is too large Load Diff

View File

@ -11,6 +11,7 @@
#include "cs_bcode.hh"
#include "cs_ident.hh"
#include "cs_thread.hh"
#include "cs_gen.hh"
namespace cubescript {
@ -21,16 +22,16 @@ bool is_valid_name(std::string_view input);
struct parser_state {
thread_state &ts;
gen_state &gs;
parser_state *prevps;
bool parsing = true;
valbuf<uint32_t> code;
char const *source, *send;
std::size_t current_line;
std::string_view src_name;
parser_state() = delete;
parser_state(thread_state &tsr):
ts{tsr}, prevps{tsr.cstate}, code{tsr.istate},
parser_state(thread_state &tsr, gen_state &gsr):
ts{tsr}, gs{gsr}, prevps{tsr.cstate},
source{}, send{}, current_line{1}, src_name{}
{
tsr.cstate = this;
@ -53,77 +54,7 @@ struct parser_state {
std::string_view get_word();
void gen_str(std::string_view word = std::string_view{}) {
if (word.size() <= 3) {
std::uint32_t op = BC_INST_VAL_INT | BC_RET_STRING;
for (size_t i = 0; i < word.size(); ++i) {
op |= std::uint32_t(
static_cast<unsigned char>(word[i])
) << ((i + 1) * 8);
}
code.push_back(op);
return;
}
code.push_back(
BC_INST_VAL | BC_RET_STRING | std::uint32_t(word.size() << 8)
);
auto it = reinterpret_cast<std::uint32_t const *>(word.data());
code.append(it, it + (word.size() / sizeof(std::uint32_t)));
std::size_t esz = word.size() % sizeof(std::uint32_t);
char c[sizeof(std::uint32_t)] = {0};
std::memcpy(c, word.data() + word.size() - esz, esz);
std::uint32_t u;
std::memcpy(&u, c, sizeof(u));
code.push_back(u);
}
void gen_null() {
code.push_back(BC_INST_VAL_INT | BC_RET_NULL);
}
void gen_int(integer_type i = 0) {
if (i >= -0x800000 && i <= 0x7FFFFF) {
code.push_back(BC_INST_VAL_INT | BC_RET_INT | (i << 8));
} else {
std::uint32_t u[bc_store_size<integer_type>] = {0};
std::memcpy(u, &i, sizeof(i));
code.push_back(BC_INST_VAL | BC_RET_INT);
code.append(u, u + bc_store_size<integer_type>);
}
}
void gen_int(std::string_view word);
void gen_float(float_type f = 0.0f) {
if (integer_type(f) == f && f >= -0x800000 && f <= 0x7FFFFF) {
code.push_back(BC_INST_VAL_INT | BC_RET_FLOAT | (integer_type(f) << 8));
} else {
std::uint32_t u[bc_store_size<float_type>] = {0};
std::memcpy(u, &f, sizeof(f));
code.push_back(BC_INST_VAL | BC_RET_FLOAT);
code.append(u, u + bc_store_size<float_type>);
}
}
void gen_float(std::string_view word);
void gen_ident(ident &id) {
code.push_back(BC_INST_IDENT | (id.get_index() << 8));
}
void gen_ident() {
gen_ident(*ts.istate->id_dummy);
}
void gen_ident(std::string_view word) {
gen_ident(ts.istate->new_ident(*ts.pstate, word, IDENT_FLAG_UNKNOWN));
}
void gen_value(
int wordtype, std::string_view word = std::string_view(),
int line = 0
);
void parse_block(int ret_type, int term = '\0');
void gen_main(std::string_view s, int ret_type = VAL_ANY);
void next_char() {

View File

@ -655,11 +655,12 @@ static any_value do_run(
thread_state &ts, std::string_view file, std::string_view code
) {
any_value ret{*ts.pstate};
parser_state gs{ts};
gs.src_name = file;
gen_state gs{ts};
parser_state ps{ts, gs};
ps.src_name = file;
gs.code.reserve(64);
gs.gen_main(code, VAL_ANY);
gs.done();
ps.gen_main(code, VAL_ANY);
ps.done();
std::uint32_t *cbuf = bcode_alloc(ts.istate, gs.code.size());
std::memcpy(cbuf, gs.code.data(), gs.code.size() * sizeof(std::uint32_t));
bcode_ref cref{reinterpret_cast<bcode *>(cbuf + 1)};

View File

@ -269,10 +269,11 @@ bcode_ref any_value::force_code(state &cs) {
default:
break;
}
parser_state gs{state_p{cs}.ts()};
gen_state gs{state_p{cs}.ts()};
parser_state ps{state_p{cs}.ts(), gs};
gs.code.reserve(64);
gs.gen_main(get_string());
gs.done();
ps.gen_main(get_string());
ps.done();
uint32_t *cbuf = bcode_alloc(state_p{cs}.ts().istate, gs.code.size());
std::memcpy(cbuf, gs.code.data(), gs.code.size() * sizeof(std::uint32_t));
auto *bc = reinterpret_cast<bcode *>(cbuf + 1);

View File

@ -239,9 +239,10 @@ bool exec_alias(
ident_link aliaslink = {a, ts.callstack, uargs};
ts.callstack = &aliaslink;
if (!aast.node->code) {
parser_state gs{ts};
gen_state gs{ts};
parser_state ps{ts, gs};
gs.code.reserve(64);
gs.gen_main(aast.node->val_s.get_string());
ps.gen_main(aast.node->val_s.get_string());
/* i wish i could steal the memory somehow */
uint32_t *code = bcode_alloc(ts.istate, gs.code.size());
memcpy(code, gs.code.data(), gs.code.size() * sizeof(uint32_t));
@ -687,35 +688,37 @@ std::uint32_t *vm_exec(
case BC_INST_COMPILE: {
any_value &arg = args.back();
parser_state gs{ts};
gen_state gs{ts};
switch (arg.get_type()) {
case value_type::INTEGER:
gs.code.reserve(8);
gs.code.push_back(BC_INST_START);
gs.gen_int(arg.get_integer());
gs.gen_val_integer(arg.get_integer());
gs.code.push_back(BC_INST_RESULT);
gs.code.push_back(BC_INST_EXIT);
break;
case value_type::FLOAT:
gs.code.reserve(8);
gs.code.push_back(BC_INST_START);
gs.gen_float(arg.get_float());
gs.gen_val_float(arg.get_float());
gs.code.push_back(BC_INST_RESULT);
gs.code.push_back(BC_INST_EXIT);
break;
case value_type::STRING:
case value_type::STRING: {
parser_state ps{ts, gs};
gs.code.reserve(64);
gs.gen_main(arg.get_string());
ps.gen_main(arg.get_string());
ps.done();
break;
}
default:
gs.code.reserve(8);
gs.code.push_back(BC_INST_START);
gs.gen_null();
gs.gen_val_null();
gs.code.push_back(BC_INST_RESULT);
gs.code.push_back(BC_INST_EXIT);
break;
}
gs.done();
std::uint32_t *cbuf = bcode_alloc(ts.istate, gs.code.size());
std::memcpy(
cbuf, gs.code.data(),
@ -733,10 +736,11 @@ std::uint32_t *vm_exec(
case value_type::STRING: {
std::string_view s = arg.get_string();
if (!s.empty()) {
parser_state gs{ts};
gen_state gs{ts};
parser_state ps{ts, gs};
gs.code.reserve(64);
gs.gen_main(s);
gs.done();
ps.gen_main(s);
ps.done();
std::uint32_t *cbuf = bcode_alloc(
ts.istate, gs.code.size()
);