clean up the main parser path
parent
51f09c3dfb
commit
0ee0ebfb42
480
src/cs_parser.cc
480
src/cs_parser.cc
|
@ -307,6 +307,9 @@ bool is_valid_name(std::string_view s) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* parse out a quoted string; return the raw string, without the quotes
|
||||||
|
* current parser state will be after the final quote
|
||||||
|
*/
|
||||||
std::string_view parser_state::get_str() {
|
std::string_view parser_state::get_str() {
|
||||||
size_t nl;
|
size_t nl;
|
||||||
char const *beg = source;
|
char const *beg = source;
|
||||||
|
@ -318,12 +321,18 @@ std::string_view parser_state::get_str() {
|
||||||
return ret.substr(1, ret.size() - 2);
|
return ret.substr(1, ret.size() - 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* like the above, but unescapes the string and dups it as a buffer */
|
||||||
charbuf parser_state::get_str_dup() {
|
charbuf parser_state::get_str_dup() {
|
||||||
charbuf buf{ts};
|
charbuf buf{ts};
|
||||||
unescape_string(std::back_inserter(buf), get_str());
|
unescape_string(std::back_inserter(buf), get_str());
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* a simple name, used for @foo in macro substitutions
|
||||||
|
*
|
||||||
|
* consists of an alpha character (or '_') followed
|
||||||
|
* by alphanumeric characters (or more '_')
|
||||||
|
*/
|
||||||
std::string_view parser_state::read_macro_name() {
|
std::string_view parser_state::read_macro_name() {
|
||||||
char const *op = source;
|
char const *op = source;
|
||||||
char c = current();
|
char c = current();
|
||||||
|
@ -336,6 +345,7 @@ std::string_view parser_state::read_macro_name() {
|
||||||
return std::string_view{op, std::size_t(source - op)};
|
return std::string_view{op, std::size_t(source - op)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* advance the parser until we reach any of the given chars, then stop at it */
|
||||||
char parser_state::skip_until(std::string_view chars) {
|
char parser_state::skip_until(std::string_view chars) {
|
||||||
char c = current();
|
char c = current();
|
||||||
while (c && (chars.find(c) == std::string_view::npos)) {
|
while (c && (chars.find(c) == std::string_view::npos)) {
|
||||||
|
@ -345,6 +355,7 @@ char parser_state::skip_until(std::string_view chars) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* advance the parser until we reach the given character, then stop at it */
|
||||||
char parser_state::skip_until(char cf) {
|
char parser_state::skip_until(char cf) {
|
||||||
char c = current();
|
char c = current();
|
||||||
while (c && (c != cf)) {
|
while (c && (c != cf)) {
|
||||||
|
@ -382,8 +393,13 @@ void parser_state::skip_comments() {
|
||||||
if ((current() != '/') || (current(1) != '/')) {
|
if ((current() != '/') || (current(1) != '/')) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (current() != '\n') {
|
for (;;) {
|
||||||
next_char();
|
auto c = current();
|
||||||
|
if (c && (c != '\n')) {
|
||||||
|
next_char();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1234,220 +1250,282 @@ bool parser_state::parse_id_and_or(ident &id, int ltype) {
|
||||||
return more;
|
return more;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parser_state::parse_block(int rettype, int brak) {
|
static bool finish_statement(parser_state &ps, bool more, int term) {
|
||||||
charbuf idname{gs.ts};
|
/* skip through any remaining args in the statement */
|
||||||
for (;;) {
|
if (more) {
|
||||||
skip_comments();
|
while (ps.parse_arg(VAL_POP)) {}
|
||||||
idname.clear();
|
}
|
||||||
size_t curline = current_line;
|
/* handle special characters */
|
||||||
bool more = parse_arg(VAL_WORD, &idname);
|
switch (ps.skip_until(")];/\n")) {
|
||||||
if (!more) {
|
/* EOS */
|
||||||
goto endstatement;
|
case '\0':
|
||||||
|
if (ps.current() != term) {
|
||||||
|
throw error{*ps.ts.pstate, "missing \"%c\"", char(term)};
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
/* terminating parens/brackets */
|
||||||
|
case ')':
|
||||||
|
case ']':
|
||||||
|
/* if the expected terminator, finish normally */
|
||||||
|
if (ps.current() == term) {
|
||||||
|
ps.next_char();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
throw error{*ps.ts.pstate, "unexpected \"%c\"", ps.current()};
|
||||||
|
/* potential comment */
|
||||||
|
case '/':
|
||||||
|
ps.next_char();
|
||||||
|
if (ps.current() == '/') {
|
||||||
|
ps.skip_until('\n');
|
||||||
|
}
|
||||||
|
return finish_statement(ps, false, term);
|
||||||
|
/* next statement */
|
||||||
|
default:
|
||||||
|
ps.next_char();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* advance to next statement */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool parser_state::parse_call_id(ident &id, int ltype) {
|
||||||
|
switch (ident_p{id}.impl().p_type) {
|
||||||
|
case ID_ALIAS:
|
||||||
|
return parse_call_alias(static_cast<alias &>(id));
|
||||||
|
case ID_COMMAND:
|
||||||
|
return parse_call_command(
|
||||||
|
static_cast<command_impl *>(&id), id, ltype
|
||||||
|
);
|
||||||
|
case ID_LOCAL:
|
||||||
|
return parse_id_local();
|
||||||
|
case ID_DO:
|
||||||
|
return parse_id_do(false, ltype);
|
||||||
|
case ID_DOARGS:
|
||||||
|
return parse_id_do(true, ltype);
|
||||||
|
case ID_IF:
|
||||||
|
return parse_id_if(id, ltype);
|
||||||
|
case ID_BREAK:
|
||||||
|
gs.gen_break();
|
||||||
|
return true;
|
||||||
|
case ID_CONTINUE:
|
||||||
|
gs.gen_continue();
|
||||||
|
return true;
|
||||||
|
case ID_RESULT: {
|
||||||
|
bool more = parse_arg(VAL_ANY);
|
||||||
|
if (!more) {
|
||||||
|
gs.gen_result_null(ltype);
|
||||||
|
} else {
|
||||||
|
gs.gen_result(ltype);
|
||||||
|
}
|
||||||
|
return more;
|
||||||
}
|
}
|
||||||
skip_comments();
|
case ID_NOT: {
|
||||||
if (current() == '=') {
|
bool more = parse_arg(VAL_ANY);
|
||||||
switch (current(1)) {
|
if (!more) {
|
||||||
case '/':
|
gs.gen_result_true(ltype);
|
||||||
if (current(2) != '/') {
|
} else {
|
||||||
break;
|
gs.gen_not(ltype);
|
||||||
|
}
|
||||||
|
return more;
|
||||||
|
}
|
||||||
|
case ID_AND:
|
||||||
|
case ID_OR:
|
||||||
|
return parse_id_and_or(id, ltype);
|
||||||
|
case ID_IVAR: {
|
||||||
|
auto *hid = ts.istate->cmd_ivar;
|
||||||
|
return parse_call_command(
|
||||||
|
static_cast<command_impl *>(hid), id, ltype
|
||||||
|
);
|
||||||
|
}
|
||||||
|
case ID_FVAR: {
|
||||||
|
auto *hid = ts.istate->cmd_fvar;
|
||||||
|
return parse_call_command(
|
||||||
|
static_cast<command_impl *>(hid), id, ltype
|
||||||
|
);
|
||||||
|
}
|
||||||
|
case ID_SVAR: {
|
||||||
|
auto *hid = ts.istate->cmd_svar;
|
||||||
|
return parse_call_command(
|
||||||
|
static_cast<command_impl *>(hid), id, ltype
|
||||||
|
);
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
/* unreachable */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* generates a call to an unknown entity on the stack */
|
||||||
|
static bool parse_no_id(parser_state &ps, int term) {
|
||||||
|
std::uint32_t nargs = 0;
|
||||||
|
/* the entity is already on the stack, parse out any arguments to it */
|
||||||
|
while (ps.parse_arg(VAL_ANY)) {
|
||||||
|
++nargs;
|
||||||
|
}
|
||||||
|
ps.gs.gen_call(nargs);
|
||||||
|
return finish_statement(ps, false, term);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool parser_state::parse_assign(
|
||||||
|
charbuf &idname, int ltype, int term, bool &noass
|
||||||
|
) {
|
||||||
|
/* lookahead */
|
||||||
|
switch (current(1)) {
|
||||||
|
/* the = can be followed by a bunch of stuff
|
||||||
|
* some of these result in empty assignments
|
||||||
|
*/
|
||||||
|
case '/': /* a comment maybe? */
|
||||||
|
if (current(2) != '/') {
|
||||||
|
/* not a comment */
|
||||||
|
noass = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
[[fallthrough]];
|
||||||
|
case ' ':
|
||||||
|
case '\t':
|
||||||
|
case '\r':
|
||||||
|
case '\n':
|
||||||
|
case '\0': {
|
||||||
|
/* skip = */
|
||||||
|
next_char();
|
||||||
|
/* we had a name on the left hand side */
|
||||||
|
if (!idname.empty()) {
|
||||||
|
idname.push_back('\0');
|
||||||
|
/* fetch an ident or make up a fresh one (unknown alias) */
|
||||||
|
ident &id = ts.istate->new_ident(
|
||||||
|
*ts.pstate, idname.str_term(), IDENT_FLAG_UNKNOWN
|
||||||
|
);
|
||||||
|
/* check what we're assigning */
|
||||||
|
switch (id.get_type()) {
|
||||||
|
case ident_type::ALIAS: {
|
||||||
|
/* alias assignment: parse out any one argument */
|
||||||
|
bool more = parse_arg(VAL_ANY);
|
||||||
|
if (!more) {
|
||||||
|
gs.gen_val_string();
|
||||||
|
}
|
||||||
|
gs.gen_assign_alias(id);
|
||||||
|
return finish_statement(*this, more, term);
|
||||||
}
|
}
|
||||||
[[fallthrough]];
|
case ident_type::IVAR: {
|
||||||
case ';':
|
auto *hid = ts.istate->cmd_ivar;
|
||||||
case ' ':
|
bool more = parse_call_command(
|
||||||
case '\t':
|
static_cast<command_impl *>(hid),
|
||||||
case '\r':
|
id, ltype, 1
|
||||||
case '\n':
|
|
||||||
case '\0':
|
|
||||||
next_char();
|
|
||||||
if (!idname.empty()) {
|
|
||||||
idname.push_back('\0');
|
|
||||||
ident &id = ts.istate->new_ident(
|
|
||||||
*ts.pstate, idname.str_term(), IDENT_FLAG_UNKNOWN
|
|
||||||
);
|
);
|
||||||
switch (id.get_type()) {
|
return finish_statement(*this, more, term);
|
||||||
case ident_type::ALIAS:
|
|
||||||
more = parse_arg(VAL_ANY);
|
|
||||||
if (!more) {
|
|
||||||
gs.gen_val_string();
|
|
||||||
}
|
|
||||||
gs.gen_assign_alias(id);
|
|
||||||
goto endstatement;
|
|
||||||
case ident_type::IVAR: {
|
|
||||||
auto *hid = ts.istate->cmd_ivar;
|
|
||||||
more = parse_call_command(
|
|
||||||
static_cast<command_impl *>(hid),
|
|
||||||
id, rettype, 1
|
|
||||||
);
|
|
||||||
goto endstatement;
|
|
||||||
}
|
|
||||||
case ident_type::FVAR: {
|
|
||||||
auto *hid = ts.istate->cmd_fvar;
|
|
||||||
more = parse_call_command(
|
|
||||||
static_cast<command_impl *>(hid),
|
|
||||||
id, rettype, 1
|
|
||||||
);
|
|
||||||
goto endstatement;
|
|
||||||
}
|
|
||||||
case ident_type::SVAR: {
|
|
||||||
auto *hid = ts.istate->cmd_svar;
|
|
||||||
more = parse_call_command(
|
|
||||||
static_cast<command_impl *>(hid),
|
|
||||||
id, rettype, 1
|
|
||||||
);
|
|
||||||
goto endstatement;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
gs.gen_val_string(idname.str_term());
|
|
||||||
}
|
}
|
||||||
more = parse_arg(VAL_ANY);
|
case ident_type::FVAR: {
|
||||||
if (!more) {
|
auto *hid = ts.istate->cmd_fvar;
|
||||||
gs.gen_val_string();
|
bool more = parse_call_command(
|
||||||
|
static_cast<command_impl *>(hid),
|
||||||
|
id, ltype, 1
|
||||||
|
);
|
||||||
|
return finish_statement(*this, more, term);
|
||||||
}
|
}
|
||||||
gs.gen_assign();
|
case ident_type::SVAR: {
|
||||||
goto endstatement;
|
auto *hid = ts.istate->cmd_svar;
|
||||||
}
|
bool more = parse_call_command(
|
||||||
}
|
static_cast<command_impl *>(hid),
|
||||||
if (idname.empty()) {
|
id, ltype, 1
|
||||||
noid:
|
);
|
||||||
std::uint32_t numargs = 0;
|
return finish_statement(*this, more, term);
|
||||||
for (;;) {
|
|
||||||
more = parse_arg(VAL_ANY);
|
|
||||||
if (!more) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
++numargs;
|
|
||||||
}
|
|
||||||
gs.gen_call(numargs);
|
|
||||||
} else {
|
|
||||||
idname.push_back('\0');
|
|
||||||
ident *id = ts.pstate->get_ident(idname.str_term());
|
|
||||||
if (!id) {
|
|
||||||
if (is_valid_name(idname.str_term())) {
|
|
||||||
gs.gen_val_string(idname.str_term());
|
|
||||||
goto noid;
|
|
||||||
}
|
|
||||||
switch (rettype) {
|
|
||||||
case VAL_ANY: {
|
|
||||||
std::string_view end = idname.str_term();
|
|
||||||
integer_type val = parse_int(end, &end);
|
|
||||||
if (!end.empty()) {
|
|
||||||
gs.gen_val_string(idname.str_term());
|
|
||||||
} else {
|
|
||||||
gs.gen_val_integer(val);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
gs.gen_val(rettype, idname.str_term(), int(curline));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
gs.gen_result();
|
gs.gen_val_string(idname.str_term());
|
||||||
} else {
|
}
|
||||||
switch (ident_p{*id}.impl().p_type) {
|
/* unknown thing, make it the VM's problem */
|
||||||
case ID_ALIAS:
|
bool more = parse_arg(VAL_ANY);
|
||||||
more = parse_call_alias(static_cast<alias &>(*id));
|
if (!more) {
|
||||||
break;
|
gs.gen_val_string();
|
||||||
case ID_COMMAND:
|
}
|
||||||
more = parse_call_command(
|
gs.gen_assign();
|
||||||
static_cast<command_impl *>(id), *id, rettype
|
return finish_statement(*this, more, term);
|
||||||
);
|
}
|
||||||
break;
|
/* not followed by any of these: not an assignment */
|
||||||
case ID_LOCAL:
|
default:
|
||||||
more = parse_id_local();
|
noass = true;
|
||||||
break;
|
return true;
|
||||||
case ID_DO:
|
}
|
||||||
more = parse_id_do(false, rettype);
|
return true;
|
||||||
break;
|
}
|
||||||
case ID_DOARGS:
|
|
||||||
more = parse_id_do(true, rettype);
|
void parser_state::parse_block(int ltype, int term) {
|
||||||
break;
|
charbuf idname{gs.ts};
|
||||||
case ID_IF:
|
/* the main statement parse loop */
|
||||||
more = parse_id_if(*id, rettype);
|
for (;;) {
|
||||||
break;
|
/* first, skip any comments in the way and prepare the env */
|
||||||
case ID_BREAK:
|
skip_comments();
|
||||||
gs.gen_break();
|
idname.clear();
|
||||||
break;
|
std::size_t curline = current_line;
|
||||||
case ID_CONTINUE:
|
bool more = true;
|
||||||
gs.gen_continue();
|
/* parse the left hand side of the statement */
|
||||||
break;
|
if (!parse_arg(VAL_WORD, &idname)) {
|
||||||
case ID_RESULT:
|
if (!finish_statement(*this, more, term)) {
|
||||||
if (more) {
|
return;
|
||||||
more = parse_arg(VAL_ANY);
|
}
|
||||||
}
|
continue;
|
||||||
if (!more) {
|
}
|
||||||
gs.gen_result_null(rettype);
|
skip_comments();
|
||||||
} else {
|
/* potentially an assignment */
|
||||||
gs.gen_result(rettype);
|
if (current() == '=') {
|
||||||
}
|
bool noass = false;
|
||||||
break;
|
if (!parse_assign(idname, ltype, term, noass)) {
|
||||||
case ID_NOT:
|
/* terminated */
|
||||||
if (more) {
|
return;
|
||||||
more = parse_arg(VAL_ANY);
|
}
|
||||||
}
|
if (!noass) {
|
||||||
if (!more) {
|
/* was actually an assignment */
|
||||||
gs.gen_result_true(rettype);
|
continue;
|
||||||
} else {
|
|
||||||
gs.gen_not(rettype);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ID_AND:
|
|
||||||
case ID_OR:
|
|
||||||
more = parse_id_and_or(*id, rettype);
|
|
||||||
break;
|
|
||||||
case ID_IVAR: {
|
|
||||||
auto *hid = ts.istate->cmd_ivar;
|
|
||||||
more = parse_call_command(
|
|
||||||
static_cast<command_impl *>(hid), *id, rettype
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ID_FVAR: {
|
|
||||||
auto *hid = ts.istate->cmd_fvar;
|
|
||||||
more = parse_call_command(
|
|
||||||
static_cast<command_impl *>(hid), *id, rettype
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ID_SVAR: {
|
|
||||||
auto *hid = ts.istate->cmd_svar;
|
|
||||||
more = parse_call_command(
|
|
||||||
static_cast<command_impl *>(hid), *id, rettype
|
|
||||||
);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
endstatement:
|
/* we didn't get a name to look up: treat as unknown */
|
||||||
if (more) {
|
if (idname.empty()) {
|
||||||
while (parse_arg(VAL_POP));
|
if (!parse_no_id(*this, term)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
switch (skip_until(")];/\n")) {
|
idname.push_back('\0');
|
||||||
case '\0':
|
auto idstr = idname.str_term();
|
||||||
if (current() != brak) {
|
ident *id = ts.pstate->get_ident(idstr);
|
||||||
throw error{*ts.pstate, "missing \"%c\"", char(brak)};
|
if (!id) {
|
||||||
|
/* no such ident exists but the name is valid, which means
|
||||||
|
* it's a syntactically ok call, make it the VM's problem
|
||||||
|
*/
|
||||||
|
if (is_valid_name(idstr)) {
|
||||||
|
/* VAL_WORD does not codegen, put the name on the stack */
|
||||||
|
gs.gen_val_string(idstr);
|
||||||
|
if (!parse_no_id(*this, term)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
return;
|
continue;
|
||||||
case ')':
|
}
|
||||||
case ']':
|
/* not a valid command name: treat like an expression */
|
||||||
if (current() == brak) {
|
switch (ltype) {
|
||||||
next_char();
|
case VAL_ANY: {
|
||||||
return;
|
auto end = idstr;
|
||||||
|
auto val = parse_int(idstr, &end);
|
||||||
|
if (!end.empty()) {
|
||||||
|
gs.gen_val_string(idstr);
|
||||||
|
} else {
|
||||||
|
gs.gen_val_integer(val);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
throw error{*ts.pstate, "unexpected \"%c\"", current()};
|
default:
|
||||||
return;
|
gs.gen_val(ltype, idname.str_term(), int(curline));
|
||||||
case '/':
|
break;
|
||||||
next_char();
|
}
|
||||||
if (current() == '/') {
|
gs.gen_result();
|
||||||
skip_until('\n');
|
continue;
|
||||||
}
|
}
|
||||||
goto endstatement;
|
/* the ident exists; treat like a call according to its type */
|
||||||
default:
|
more = parse_call_id(*id, ltype);
|
||||||
next_char();
|
if (!finish_statement(*this, more, term)) {
|
||||||
break;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ struct parser_state {
|
||||||
|
|
||||||
std::string_view get_word();
|
std::string_view get_word();
|
||||||
|
|
||||||
void parse_block(int ret_type, int term = '\0');
|
void parse_block(int ltype, int term = '\0');
|
||||||
|
|
||||||
void next_char() {
|
void next_char() {
|
||||||
if (source == send) {
|
if (source == send) {
|
||||||
|
@ -77,6 +77,9 @@ struct parser_state {
|
||||||
command_impl *id, ident &self, int rettype, std::uint32_t limit = 0
|
command_impl *id, ident &self, int rettype, std::uint32_t limit = 0
|
||||||
);
|
);
|
||||||
bool parse_call_alias(alias &id);
|
bool parse_call_alias(alias &id);
|
||||||
|
bool parse_call_id(ident &id, int ltype);
|
||||||
|
|
||||||
|
bool parse_assign(charbuf &idname, int ltype, int term, bool &noass);
|
||||||
|
|
||||||
bool parse_id_local();
|
bool parse_id_local();
|
||||||
bool parse_id_do(bool args, int ltype);
|
bool parse_id_do(bool args, int ltype);
|
||||||
|
|
Loading…
Reference in New Issue