clean up the main parser path

master
Daniel Kolesa 2021-04-10 19:32:01 +02:00
parent 51f09c3dfb
commit 0ee0ebfb42
2 changed files with 283 additions and 202 deletions

View File

@ -307,6 +307,9 @@ bool is_valid_name(std::string_view s) {
} }
} }
/* parse out a quoted string; return the raw string, without the quotes
* current parser state will be after the final quote
*/
std::string_view parser_state::get_str() { std::string_view parser_state::get_str() {
size_t nl; size_t nl;
char const *beg = source; char const *beg = source;
@ -318,12 +321,18 @@ std::string_view parser_state::get_str() {
return ret.substr(1, ret.size() - 2); return ret.substr(1, ret.size() - 2);
} }
/* like the above, but unescapes the string and dups it as a buffer */
charbuf parser_state::get_str_dup() { charbuf parser_state::get_str_dup() {
charbuf buf{ts}; charbuf buf{ts};
unescape_string(std::back_inserter(buf), get_str()); unescape_string(std::back_inserter(buf), get_str());
return buf; return buf;
} }
/* a simple name, used for @foo in macro substitutions
*
* consists of an alpha character (or '_') followed
* by alphanumeric characters (or more '_')
*/
std::string_view parser_state::read_macro_name() { std::string_view parser_state::read_macro_name() {
char const *op = source; char const *op = source;
char c = current(); char c = current();
@ -336,6 +345,7 @@ std::string_view parser_state::read_macro_name() {
return std::string_view{op, std::size_t(source - op)}; return std::string_view{op, std::size_t(source - op)};
} }
/* advance the parser until we reach any of the given chars, then stop at it */
char parser_state::skip_until(std::string_view chars) { char parser_state::skip_until(std::string_view chars) {
char c = current(); char c = current();
while (c && (chars.find(c) == std::string_view::npos)) { while (c && (chars.find(c) == std::string_view::npos)) {
@ -345,6 +355,7 @@ char parser_state::skip_until(std::string_view chars) {
return c; return c;
} }
/* advance the parser until we reach the given character, then stop at it */
char parser_state::skip_until(char cf) { char parser_state::skip_until(char cf) {
char c = current(); char c = current();
while (c && (c != cf)) { while (c && (c != cf)) {
@ -382,8 +393,13 @@ void parser_state::skip_comments() {
if ((current() != '/') || (current(1) != '/')) { if ((current() != '/') || (current(1) != '/')) {
return; return;
} }
while (current() != '\n') { for (;;) {
next_char(); auto c = current();
if (c && (c != '\n')) {
next_char();
} else {
break;
}
} }
} }
} }
@ -1234,220 +1250,282 @@ bool parser_state::parse_id_and_or(ident &id, int ltype) {
return more; return more;
} }
void parser_state::parse_block(int rettype, int brak) { static bool finish_statement(parser_state &ps, bool more, int term) {
charbuf idname{gs.ts}; /* skip through any remaining args in the statement */
for (;;) { if (more) {
skip_comments(); while (ps.parse_arg(VAL_POP)) {}
idname.clear(); }
size_t curline = current_line; /* handle special characters */
bool more = parse_arg(VAL_WORD, &idname); switch (ps.skip_until(")];/\n")) {
if (!more) { /* EOS */
goto endstatement; case '\0':
if (ps.current() != term) {
throw error{*ps.ts.pstate, "missing \"%c\"", char(term)};
}
return false;
/* terminating parens/brackets */
case ')':
case ']':
/* if the expected terminator, finish normally */
if (ps.current() == term) {
ps.next_char();
return false;
}
throw error{*ps.ts.pstate, "unexpected \"%c\"", ps.current()};
/* potential comment */
case '/':
ps.next_char();
if (ps.current() == '/') {
ps.skip_until('\n');
}
return finish_statement(ps, false, term);
/* next statement */
default:
ps.next_char();
break;
}
/* advance to next statement */
return true;
}
bool parser_state::parse_call_id(ident &id, int ltype) {
switch (ident_p{id}.impl().p_type) {
case ID_ALIAS:
return parse_call_alias(static_cast<alias &>(id));
case ID_COMMAND:
return parse_call_command(
static_cast<command_impl *>(&id), id, ltype
);
case ID_LOCAL:
return parse_id_local();
case ID_DO:
return parse_id_do(false, ltype);
case ID_DOARGS:
return parse_id_do(true, ltype);
case ID_IF:
return parse_id_if(id, ltype);
case ID_BREAK:
gs.gen_break();
return true;
case ID_CONTINUE:
gs.gen_continue();
return true;
case ID_RESULT: {
bool more = parse_arg(VAL_ANY);
if (!more) {
gs.gen_result_null(ltype);
} else {
gs.gen_result(ltype);
}
return more;
} }
skip_comments(); case ID_NOT: {
if (current() == '=') { bool more = parse_arg(VAL_ANY);
switch (current(1)) { if (!more) {
case '/': gs.gen_result_true(ltype);
if (current(2) != '/') { } else {
break; gs.gen_not(ltype);
}
return more;
}
case ID_AND:
case ID_OR:
return parse_id_and_or(id, ltype);
case ID_IVAR: {
auto *hid = ts.istate->cmd_ivar;
return parse_call_command(
static_cast<command_impl *>(hid), id, ltype
);
}
case ID_FVAR: {
auto *hid = ts.istate->cmd_fvar;
return parse_call_command(
static_cast<command_impl *>(hid), id, ltype
);
}
case ID_SVAR: {
auto *hid = ts.istate->cmd_svar;
return parse_call_command(
static_cast<command_impl *>(hid), id, ltype
);
}
default:
/* unreachable */
break;
}
return true;
}
/* generates a call to an unknown entity on the stack */
static bool parse_no_id(parser_state &ps, int term) {
std::uint32_t nargs = 0;
/* the entity is already on the stack, parse out any arguments to it */
while (ps.parse_arg(VAL_ANY)) {
++nargs;
}
ps.gs.gen_call(nargs);
return finish_statement(ps, false, term);
}
bool parser_state::parse_assign(
charbuf &idname, int ltype, int term, bool &noass
) {
/* lookahead */
switch (current(1)) {
/* the = can be followed by a bunch of stuff
* some of these result in empty assignments
*/
case '/': /* a comment maybe? */
if (current(2) != '/') {
/* not a comment */
noass = true;
return true;
}
[[fallthrough]];
case ' ':
case '\t':
case '\r':
case '\n':
case '\0': {
/* skip = */
next_char();
/* we had a name on the left hand side */
if (!idname.empty()) {
idname.push_back('\0');
/* fetch an ident or make up a fresh one (unknown alias) */
ident &id = ts.istate->new_ident(
*ts.pstate, idname.str_term(), IDENT_FLAG_UNKNOWN
);
/* check what we're assigning */
switch (id.get_type()) {
case ident_type::ALIAS: {
/* alias assignment: parse out any one argument */
bool more = parse_arg(VAL_ANY);
if (!more) {
gs.gen_val_string();
}
gs.gen_assign_alias(id);
return finish_statement(*this, more, term);
} }
[[fallthrough]]; case ident_type::IVAR: {
case ';': auto *hid = ts.istate->cmd_ivar;
case ' ': bool more = parse_call_command(
case '\t': static_cast<command_impl *>(hid),
case '\r': id, ltype, 1
case '\n':
case '\0':
next_char();
if (!idname.empty()) {
idname.push_back('\0');
ident &id = ts.istate->new_ident(
*ts.pstate, idname.str_term(), IDENT_FLAG_UNKNOWN
); );
switch (id.get_type()) { return finish_statement(*this, more, term);
case ident_type::ALIAS:
more = parse_arg(VAL_ANY);
if (!more) {
gs.gen_val_string();
}
gs.gen_assign_alias(id);
goto endstatement;
case ident_type::IVAR: {
auto *hid = ts.istate->cmd_ivar;
more = parse_call_command(
static_cast<command_impl *>(hid),
id, rettype, 1
);
goto endstatement;
}
case ident_type::FVAR: {
auto *hid = ts.istate->cmd_fvar;
more = parse_call_command(
static_cast<command_impl *>(hid),
id, rettype, 1
);
goto endstatement;
}
case ident_type::SVAR: {
auto *hid = ts.istate->cmd_svar;
more = parse_call_command(
static_cast<command_impl *>(hid),
id, rettype, 1
);
goto endstatement;
}
default:
break;
}
gs.gen_val_string(idname.str_term());
} }
more = parse_arg(VAL_ANY); case ident_type::FVAR: {
if (!more) { auto *hid = ts.istate->cmd_fvar;
gs.gen_val_string(); bool more = parse_call_command(
static_cast<command_impl *>(hid),
id, ltype, 1
);
return finish_statement(*this, more, term);
} }
gs.gen_assign(); case ident_type::SVAR: {
goto endstatement; auto *hid = ts.istate->cmd_svar;
} bool more = parse_call_command(
} static_cast<command_impl *>(hid),
if (idname.empty()) { id, ltype, 1
noid: );
std::uint32_t numargs = 0; return finish_statement(*this, more, term);
for (;;) {
more = parse_arg(VAL_ANY);
if (!more) {
break;
}
++numargs;
}
gs.gen_call(numargs);
} else {
idname.push_back('\0');
ident *id = ts.pstate->get_ident(idname.str_term());
if (!id) {
if (is_valid_name(idname.str_term())) {
gs.gen_val_string(idname.str_term());
goto noid;
}
switch (rettype) {
case VAL_ANY: {
std::string_view end = idname.str_term();
integer_type val = parse_int(end, &end);
if (!end.empty()) {
gs.gen_val_string(idname.str_term());
} else {
gs.gen_val_integer(val);
}
break;
} }
default: default:
gs.gen_val(rettype, idname.str_term(), int(curline));
break; break;
} }
gs.gen_result(); gs.gen_val_string(idname.str_term());
} else { }
switch (ident_p{*id}.impl().p_type) { /* unknown thing, make it the VM's problem */
case ID_ALIAS: bool more = parse_arg(VAL_ANY);
more = parse_call_alias(static_cast<alias &>(*id)); if (!more) {
break; gs.gen_val_string();
case ID_COMMAND: }
more = parse_call_command( gs.gen_assign();
static_cast<command_impl *>(id), *id, rettype return finish_statement(*this, more, term);
); }
break; /* not followed by any of these: not an assignment */
case ID_LOCAL: default:
more = parse_id_local(); noass = true;
break; return true;
case ID_DO: }
more = parse_id_do(false, rettype); return true;
break; }
case ID_DOARGS:
more = parse_id_do(true, rettype); void parser_state::parse_block(int ltype, int term) {
break; charbuf idname{gs.ts};
case ID_IF: /* the main statement parse loop */
more = parse_id_if(*id, rettype); for (;;) {
break; /* first, skip any comments in the way and prepare the env */
case ID_BREAK: skip_comments();
gs.gen_break(); idname.clear();
break; std::size_t curline = current_line;
case ID_CONTINUE: bool more = true;
gs.gen_continue(); /* parse the left hand side of the statement */
break; if (!parse_arg(VAL_WORD, &idname)) {
case ID_RESULT: if (!finish_statement(*this, more, term)) {
if (more) { return;
more = parse_arg(VAL_ANY); }
} continue;
if (!more) { }
gs.gen_result_null(rettype); skip_comments();
} else { /* potentially an assignment */
gs.gen_result(rettype); if (current() == '=') {
} bool noass = false;
break; if (!parse_assign(idname, ltype, term, noass)) {
case ID_NOT: /* terminated */
if (more) { return;
more = parse_arg(VAL_ANY); }
} if (!noass) {
if (!more) { /* was actually an assignment */
gs.gen_result_true(rettype); continue;
} else {
gs.gen_not(rettype);
}
break;
case ID_AND:
case ID_OR:
more = parse_id_and_or(*id, rettype);
break;
case ID_IVAR: {
auto *hid = ts.istate->cmd_ivar;
more = parse_call_command(
static_cast<command_impl *>(hid), *id, rettype
);
break;
}
case ID_FVAR: {
auto *hid = ts.istate->cmd_fvar;
more = parse_call_command(
static_cast<command_impl *>(hid), *id, rettype
);
break;
}
case ID_SVAR: {
auto *hid = ts.istate->cmd_svar;
more = parse_call_command(
static_cast<command_impl *>(hid), *id, rettype
);
break;
}
}
} }
} }
endstatement: /* we didn't get a name to look up: treat as unknown */
if (more) { if (idname.empty()) {
while (parse_arg(VAL_POP)); if (!parse_no_id(*this, term)) {
return;
}
continue;
} }
switch (skip_until(")];/\n")) { idname.push_back('\0');
case '\0': auto idstr = idname.str_term();
if (current() != brak) { ident *id = ts.pstate->get_ident(idstr);
throw error{*ts.pstate, "missing \"%c\"", char(brak)}; if (!id) {
/* no such ident exists but the name is valid, which means
* it's a syntactically ok call, make it the VM's problem
*/
if (is_valid_name(idstr)) {
/* VAL_WORD does not codegen, put the name on the stack */
gs.gen_val_string(idstr);
if (!parse_no_id(*this, term)) {
return; return;
} }
return; continue;
case ')': }
case ']': /* not a valid command name: treat like an expression */
if (current() == brak) { switch (ltype) {
next_char(); case VAL_ANY: {
return; auto end = idstr;
auto val = parse_int(idstr, &end);
if (!end.empty()) {
gs.gen_val_string(idstr);
} else {
gs.gen_val_integer(val);
}
break;
} }
throw error{*ts.pstate, "unexpected \"%c\"", current()}; default:
return; gs.gen_val(ltype, idname.str_term(), int(curline));
case '/': break;
next_char(); }
if (current() == '/') { gs.gen_result();
skip_until('\n'); continue;
} }
goto endstatement; /* the ident exists; treat like a call according to its type */
default: more = parse_call_id(*id, ltype);
next_char(); if (!finish_statement(*this, more, term)) {
break; return;
} }
} }
} }

View File

@ -42,7 +42,7 @@ struct parser_state {
std::string_view get_word(); std::string_view get_word();
void parse_block(int ret_type, int term = '\0'); void parse_block(int ltype, int term = '\0');
void next_char() { void next_char() {
if (source == send) { if (source == send) {
@ -77,6 +77,9 @@ struct parser_state {
command_impl *id, ident &self, int rettype, std::uint32_t limit = 0 command_impl *id, ident &self, int rettype, std::uint32_t limit = 0
); );
bool parse_call_alias(alias &id); bool parse_call_alias(alias &id);
bool parse_call_id(ident &id, int ltype);
bool parse_assign(charbuf &idname, int ltype, int term, bool &noass);
bool parse_id_local(); bool parse_id_local();
bool parse_id_do(bool args, int ltype); bool parse_id_do(bool args, int ltype);