diff --git a/include/cubescript/cubescript.hh b/include/cubescript/cubescript.hh index 931a587..ed43f05 100644 --- a/include/cubescript/cubescript.hh +++ b/include/cubescript/cubescript.hh @@ -41,6 +41,8 @@ enum { }; struct cs_bcode; +struct cs_value; +struct cs_shared_state; struct OSTD_EXPORT cs_bcode_ref { cs_bcode_ref(): @@ -68,6 +70,28 @@ private: OSTD_EXPORT bool cs_code_is_empty(cs_bcode *code); +struct OSTD_EXPORT cs_strref { + friend struct cs_value; + + cs_strref() = delete; + cs_strref(cs_shared_state &cs, ostd::string_range str); + + cs_strref(cs_strref const &ref); + + ~cs_strref(); + + cs_strref &operator=(cs_strref const &ref); + + operator ostd::string_range() const; + +private: + /* for internal use only */ + cs_strref(char const *p, cs_shared_state &cs); + + char const *p_str; + cs_shared_state *p_state; +}; + enum class cs_value_type { Null = 0, Int, Float, String, Cstring, Code, Macro, Ident }; @@ -121,7 +145,6 @@ struct cs_ident_stack { cs_ident_stack *next; }; -struct cs_shared_state; struct cs_error; struct cs_gen_state; @@ -342,6 +365,7 @@ static inline void *cs_default_alloc(void *, void *p, size_t, size_t ns) { struct OSTD_EXPORT cs_state { friend struct cs_error; + friend struct cs_strman; friend struct cs_gen_state; cs_shared_state *p_state; diff --git a/src/cs_util.cc b/src/cs_util.cc index 3151762..c002e7f 100644 --- a/src/cs_util.cc +++ b/src/cs_util.cc @@ -1,5 +1,6 @@ #include #include "cs_util.hh" +#include "cs_vm.hh" #include #include @@ -183,6 +184,85 @@ done: return ret; } +/* string manager */ + +inline cs_strref_state *get_ref_state(char const *ptr) { + return const_cast( + reinterpret_cast(ptr) + ) - 1; +} + +char const *cs_strman::add(ostd::string_range str) { + /* if it already exists, nothing will happen */ + auto p = counts.try_emplace(str, nullptr); + /* already present: just increment ref */ + if (!p.second) { + auto *st = p.first->second; + /* having a null pointer is the same as non-existence */ + if (st) { + ++st->refcount; + return reinterpret_cast(st + 1); + } + } + /* not present: allocate brand new data */ + auto ss = str.size(); + auto mem = cstate->alloc(nullptr, 0, ss + sizeof(cs_strref_state) + 1); + /*if (!mem) { + cstate->panic(); + }*/ + /* write length and refcount, store it */ + auto *sst = static_cast(mem); + sst->length = ss; + sst->refcount = 1; + p.first->second = sst; + /* write string data */ + auto *strp = reinterpret_cast(sst + 1); + memcpy(strp, str.data(), ss); + /* terminated for best compatibility */ + strp[ss] = '\0'; + return strp; +} + +char const *cs_strman::ref(char const *ptr) { + auto *ss = get_ref_state(ptr); + ++ss->refcount; + return ptr; +} + +void cs_strman::unref(char const *ptr) { + auto *ss = get_ref_state(ptr); + if (!--ss->refcount) { + /* refcount zero, so ditch it + * this path is a little slow... + */ + auto sr = ostd::string_range{ptr, ptr + ss->length}; + auto it = counts.find(sr); + if (it == counts.end()) { + /* internal error: this should *never* happen */ + //cstate->panic(); + } + /* dealloc */ + cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0); + /* set to null, which is okay + * we keep the value around, in case the string ever reappears + */ + it->second = nullptr; + } +} + +char const *cs_strman::find(ostd::string_range str) const { + auto it = counts.find(str); + if (it == counts.end()) { + return nullptr; + } + return reinterpret_cast(it->second + 1); +} + +ostd::string_range cs_strman::get(char const *ptr) const { + auto *ss = get_ref_state(ptr); + return ostd::string_range{ptr, ptr + ss->length}; +} + namespace util { OSTD_EXPORT ostd::string_range parse_string( cs_state &cs, ostd::string_range str, size_t &nlines diff --git a/src/cs_util.hh b/src/cs_util.hh index 01ddd52..ae3a9a7 100644 --- a/src/cs_util.hh +++ b/src/cs_util.hh @@ -38,6 +38,70 @@ inline void cs_do_and_cleanup(F1 &&dof, F2 &&clf) { dof(); } +struct cs_shared_state; + +/* string manager + * + * the purpose of this is to handle interning of strings; each string within + * a libcs state is represented (and allocated) exactly once, and reference + * counted; that both helps save resources, and potentially provide a means + * to reliably represent returned strings in places that is compatible with + * multiple threads and eliminate the chance of dangling pointers + * + * strings are allocated in a manner where the refcount and length are stored + * as a part of the string's memory, so it can be easily accessed using just + * the pointer to the string, but also this is transparent for usage + * + * this is not thread-safe yet, and later on it should be made that, + * for now we don't bother... + */ + +struct cs_strref_state { + size_t length; + size_t refcount; +}; + +struct cs_strman { + cs_strman() = delete; + cs_strman(cs_shared_state *cs): cstate{cs} {} + ~cs_strman() {} + + cs_strman(cs_strman const &) = delete; + cs_strman(cs_strman &&) = delete; + + cs_strman &operator=(cs_strman const &) = delete; + cs_strman &operator=(cs_strman &&) = delete; + + /* adds a string into the manager using any source, and returns a managed + * version; this is "slow" as it has to hash the string and potentially + * allocate fresh memory for it, but is perfectly safe at any time + */ + char const *add(ostd::string_range str); + + /* this simply increments the reference count of an existing managed + * string, this is only safe when you know the pointer you are passing + * is already managed the system + */ + char const *ref(char const *ptr); + + /* decrements the reference count and removes it from the system if + * that reaches zero; likewise, only safe with pointers that are managed + */ + void unref(char const *ptr); + + /* just finds a managed pointer with the same contents + * as the input, if not found then a null pointer is returned + */ + char const *find(ostd::string_range str) const; + + /* a quick helper to make a proper ostd string range out of a ptr */ + ostd::string_range get(char const *ptr) const; + + cs_shared_state *cstate; + /* FIXME: use main allocator */ + std::unordered_map counts{}; +}; + } /* namespace cscript */ #endif /* LIBCUBESCRIPT_CS_UTIL_HH */ diff --git a/src/cs_vm.cc b/src/cs_vm.cc index 1d94459..0947a0c 100644 --- a/src/cs_vm.cc +++ b/src/cs_vm.cc @@ -6,6 +6,39 @@ namespace cscript { +cs_strref::cs_strref(cs_shared_state &cs, ostd::string_range str): + p_state{&cs} +{ + p_str = cs.strman->add(str); +} + +cs_strref::cs_strref(cs_strref const &ref): p_str{ref.p_str}, p_state{ref.p_state} +{ + p_state->strman->ref(p_str); +} + +/* this can be used by friends to do quick cs_strref creation */ +cs_strref::cs_strref(char const *p, cs_shared_state &cs): + p_state{&cs} +{ + p_str = p_state->strman->ref(p); +} + +cs_strref::~cs_strref() { + p_state->strman->unref(p_str); +} + +cs_strref &cs_strref::operator=(cs_strref const &ref) { + p_str = ref.p_str; + p_state = ref.p_state; + p_state->strman->ref(p_str); + return *this; +} + +cs_strref::operator ostd::string_range() const { + return p_state->strman->get(p_str); +} + struct cs_cmd_internal { static void call( cs_state &cs, cs_command *c, cs_value_r args, cs_value &ret diff --git a/src/cs_vm.hh b/src/cs_vm.hh index 194fa01..3d3c979 100644 --- a/src/cs_vm.hh +++ b/src/cs_vm.hh @@ -97,6 +97,7 @@ struct cs_shared_state { cs_map idents; cs_vector identmap; cs_alloc_cb allocf; + cs_strman *strman; void *aptr; void *alloc(void *ptr, size_t os, size_t ns) { diff --git a/src/cubescript.cc b/src/cubescript.cc index 8c75113..e4e3f86 100644 --- a/src/cubescript.cc +++ b/src/cubescript.cc @@ -315,6 +315,7 @@ cs_state::cs_state(cs_alloc_cb func, void *data): /* set up allocator, from now we can call into alloc() */ p_state->allocf = func; p_state->aptr = data; + p_state->strman = p_state->create(p_state); for (int i = 0; i < MaxArguments; ++i) { char buf[32]; @@ -432,6 +433,7 @@ OSTD_EXPORT void cs_state::destroy() { } p_state->destroy(i); } + p_state->destroy(p_state->strman); p_state->destroy(p_state); }