libcubescript/include/cubescript/cubescript/util.hh

413 lines
13 KiB
C++

/** @file util.hh
*
* @brief Utility API.
*
* This contains various utilities that don't quite fit within the other
* structures, but provide convenience; this includes things such as parsing
* of lists, strings and numbers.
*
* @copyright See COPYING.md in the project tree for further information.
*/
#ifndef LIBCUBESCRIPT_CUBESCRIPT_UTIL_HH
#define LIBCUBESCRIPT_CUBESCRIPT_UTIL_HH
#include <cstddef>
#include <string_view>
#include <algorithm>
#include "ident.hh"
namespace cubescript {
/** @brief A safe alias handler for commands
*
* In general, when dealing with aliases in commands, you do not want to
* set them directly, since this would set the alias globally. Instead, you
* can use this to make aliases local to the command.
*
* Internally, each Cubescript thread has a mapping for alias state within
* the thread. This mapping is stack based - which means you can push an
* alias, and then anything affecting the value of the alias in that thread
* will only be visible until the stack is popped. This structure provides
* a safe means of handling the alias stack; constructing it will push the
* alias, destroying it will pop it.
*
* Therefore, what you can do is something like this:
*
* ```
* {
* alias_local s{my_thread, "test"};
* // branch taken when the alias was successfully pushed
* // setting the alias will only be visible within this scope
* s.set(some_value); // a convenient setter
* my_thread.run(...);
* }
* ```
*
* If the provided input is not an alias, a cubescript::error will be thrown.
* Often you don't have to catch it (since this is primarily intended for use
* within commands, the error will propagate outside your command).
*
* Since the goal is to interact tightly with RAII and ensure consistency at
* all times, it is not possible to copy or move this object. That means you
* should also not be storing it; it should be used purely as a scope based
* alias stack manager.
*/
struct LIBCUBESCRIPT_EXPORT alias_local {
/** @brief Construct the local handler */
alias_local(state &cs, ident &a);
/** @brief Construct the local handler
*
* The ident will be retrieved using state::new_ident().
*/
alias_local(state &cs, std::string_view name);
/** @brief Construct the local handler
*
* The ident will be retrieved from the value. If the contained value
* is not an ident, it will be treated as a name.
*/
alias_local(state &cs, any_value const &val);
/** @brief Destroy the local handler */
~alias_local();
/** @brief Local handlers are not copyable */
alias_local(alias_local const &) = delete;
/** @brief Local handlers are not movable */
alias_local(alias_local &&) = delete;
/** @brief Local handlers are not copy assignable */
alias_local &operator=(alias_local const &) = delete;
/** @brief Local handlers are not move assignable */
alias_local &operator=(alias_local &&v) = delete;
/** @brief Get the contained alias
*
* @return the alias or `nullptr` if none set
*/
alias *get_alias() noexcept { return p_alias; }
/** @brief Get the contained alias
*
* @return the alias or `nullptr` if none set
*/
alias const *get_alias() const noexcept { return p_alias; }
/** @brief Set the contained alias's value
*
* @return `true` if the alias is valid, `false` otherwise
*/
bool set(any_value val);
private:
alias *p_alias;
void *p_sp;
};
/** @brief A list parser
*
* Cubescript does not have data structures and everything is a string.
* However, you can represent lists as strings; there is a standard syntax
* to them.
*
* A list in Cubescript is simply a bunch of items separated by whitespace.
* The items can take the form of any literal value Cubescript has. That means
* they can be number literals, they can be words, and they can be strings.
* Strings can be quoted either with double quotes, square brackets or even
* parenthesis; basically any syntax representing a value.
*
* Comments (anything following two slashes, inclusive) are skipped. As far
* as allowed whitespace consisting an item delimiter goes, this is either
* regular spaces, horizontal tabs, or newlines.
*
* Keep in mind that it does not own the string it is parsing. Therefore,
* you have to make sure to keep it alive for as long as the parser is.
*
* The input string by itself should not be quoted.
*/
struct LIBCUBESCRIPT_EXPORT list_parser {
/** @brief Construct a list parser.
*
* Nothing is done until you actually start parsing.
*
* @param cs the thread
* @param s the string representing the list
*/
list_parser(state &cs, std::string_view s = std::string_view{}):
p_state{&cs}, p_input_beg{s.data()}, p_input_end{s.data() + s.size()}
{}
/** @brief Reset the input string for the list */
void set_input(std::string_view s) {
p_input_beg = s.data();
p_input_end = s.data() + s.size();
}
/** @brief Get the current input string in the parser
*
* The already read items will not be contained in the result.
*/
std::string_view get_input() const {
return std::string_view{
p_input_beg, std::size_t(p_input_end - p_input_beg)
};
}
/** @brief Attempt to parse an item
*
* This will first skip whitespace and then attempt to read an element.
*
* @return `true` if an element was found, `false` otherwise
*/
bool parse();
/** @brief Get the number of items in the current list
*
* This will not contain items that are already parsed out, and will
* parse the list itself, i.e. the final state will be an empty list.
*/
std::size_t count();
/** @brief Get the currently parsed item
*
* If the item was quoted with double quotes, the contents will be run
* through cubescript::unescape_string() first.
*
* @see get_raw_item()
* @see get_quoted_item()
*/
string_ref get_item() const;
/** @brief Get the currently parsed raw item
*
* Unlike get_item(), this will not unescape the string under any
* circumstances and represents simply a slice of the original input.
*
* @see get_item()
* @see get_quoted_item()
*/
std::string_view get_raw_item() const {
return std::string_view{p_ibeg, std::size_t(p_iend - p_ibeg)};
}
/** @brief Get the currently parsed raw item
*
* Like get_raw_item(), but contains the quotes too, if there were any.
* Likewise, the resulting view is just a slice of the original input.
*
* @see get_item()
* @see get_raw_item()
*/
std::string_view get_quoted_item() const {
return std::string_view{p_qbeg, std::size_t(p_qend - p_qbeg)};
}
/** @brief Skip whitespace in the input until a value is reached. */
void skip_until_item();
private:
state *p_state;
char const *p_input_beg, *p_input_end;
char const *p_ibeg{}, *p_iend{};
char const *p_qbeg{}, *p_qend{};
};
/** @brief Parse a double quoted Cubescript string
*
* This parses double quoted strings according to the Cubescript syntax. The
* string has to begin with a double quote; if it does not for any reason,
* `str.data()` is returned.
*
* Escape sequences are not expanded and have the syntax `^X` where X is the
* specific escape character (e.g. `^n` for newline). It is possible to make
* the string multiline; the line needs to end with `\\`.
*
* Strings must be terminated again with double quotes.
*
* @param cs the thread
* @param str the input string
* @param[out] nlines the number of lines in the string
*
* @return a pointer to the character after the last double quotes
* @throw cubescript::error if the string is started but not finished
*
* @see cubescript::parse_word()
*/
LIBCUBESCRIPT_EXPORT char const *parse_string(
state &cs, std::string_view str, size_t &nlines
);
/** @brief Parse a double quoted Cubescript string
*
* This overload has the same semantics but it does not return the number
* of lines.
*/
inline char const *parse_string(
state &cs, std::string_view str
) {
size_t nlines;
return parse_string(cs, str, nlines);
}
/** @brief Parse a Cubescript word.
*
* A Cubescript word is a sequence of any characters that are not whitespace
* (spaces, newlines, tabs) or a comment (two consecutive slashes). It is
* allowed to have parenthesis and square brackets as long a they are balanced.
*
* Examples of valid words: `foo`, `test123`, `125.4`, `[foo]`, `hi(bar)`.
*
* If a non-word character is encountered immediately, the resulting pointer
* will be `str.data()`.
*
* Keep in mind that a valid word may not be a valid ident name (e.g. numbers
* are valid words but not valid ident names).
*
* @return a pointer to the first character after the word
* @throw cubescript::error if there is unbalanced `[` or `(`
*/
LIBCUBESCRIPT_EXPORT char const *parse_word(
state &cs, std::string_view str
);
/** @brief Concatenate a span of values
*
* The input values are concatenated by `sep`. Non-integer/float/string
* input values are considered empty strings. Integers and floats are
* converted to strings. The input list is not affected, however.
*/
LIBCUBESCRIPT_EXPORT string_ref concat_values(
state &cs, span_type<any_value> vals,
std::string_view sep = std::string_view{}
);
/** @brief Escape a Cubescript string
*
* This reads and input string and writes it into `writer`, treating special
* characters as escape sequences. Newlines are turned into `^n`, tabs are
* turned into `^t`, vertical tabs into `^f`; double quotes are prefixed
* with a caret, carets are duplicated. All other characters are passed
* through.
*
* @return `writer` after writing into it
*
* @see cubescript::unescape_string()
*/
template<typename R>
inline R escape_string(R writer, std::string_view str) {
*writer++ = '"';
for (auto c: str) {
switch (c) {
case '\n': *writer++ = '^'; *writer++ = 'n'; break;
case '\t': *writer++ = '^'; *writer++ = 't'; break;
case '\f': *writer++ = '^'; *writer++ = 'f'; break;
case '"': *writer++ = '^'; *writer++ = '"'; break;
case '^': *writer++ = '^'; *writer++ = '^'; break;
default: *writer++ = c; break;
}
}
*writer++ = '"';
return writer;
}
/** @brief Unscape a Cubescript string
*
* If a caret is encountered, it is skipped. If the following character is `n`,
* it is turned into a newline; `t` is turned into a tab, `f` into a vertical
* tab, double quote is written as is, as is a second caret. Any others are
* written as they are.
*
* If a backslash is encountered and followed by a newline, the sequence is
* skipped, otherwise the backslash is written out. Any other character is
* written out as is.
*
* @return `writer` after writing into it
*
* @see cubescript::unescape_string()
*/
template<typename R>
inline R unescape_string(R writer, std::string_view str) {
for (auto it = str.begin(); it != str.end(); ++it) {
if (*it == '^') {
++it;
if (it == str.end()) {
break;
}
switch (*it) {
case 'n': *writer++ = '\n'; break;
case 't': *writer++ = '\t'; break;
case 'f': *writer++ = '\f'; break;
case '"': *writer++ = '"'; break;
case '^': *writer++ = '^'; break;
default: *writer++ = *it; break;
}
} else if (*it == '\\') {
++it;
if (it == str.end()) {
break;
}
char c = *it;
if ((c == '\r') || (c == '\n')) {
if ((c == '\r') && ((it + 1) != str.end())) {
if (it[1] == '\n') {
++it;
}
}
continue;
}
*writer++ = '\\';
} else {
*writer++ = *it;
}
}
return writer;
}
/** @brief Print a Cubescript stack
*
* This prints out the Cubescript stack as stored in cubescript::error, into
* the `writer`. Each level is written on its own line. The line starts with
* two spaces. If there is a gap in the stack and we've reached index 1,
* the two spaces are followed with two periods. Following that is the index
* followed by a right parenthesis, a space, and the name of the ident.
*
* The last line is not terminated with a newline.
*
* @return `writer` after writing into it
*/
template<typename R>
inline R print_stack(R writer, stack_state const &st) {
char buf[32] = {0};
auto nd = st.get();
while (nd) {
auto name = nd->id->get_name();
*writer++ = ' ';
*writer++ = ' ';
if ((nd->index == 1) && st.gap()) {
*writer++ = '.';
*writer++ = '.';
}
snprintf(buf, sizeof(buf), "%d", nd->index);
char const *p = buf;
std::copy(p, p + strlen(p), writer);
*writer++ = ')';
*writer++ = ' ';
std::copy(name.begin(), name.end(), writer);
nd = nd->next;
if (nd) {
*writer++ = '\n';
}
}
return writer;
}
} /* namespace cubescript */
#endif /* LIBCUBESCRIPT_CUBESCRIPT_UTIL_HH */