From 6821260c37a9f1ad8790d5160079c63fedf5f65c Mon Sep 17 00:00:00 2001 From: Daniel Kolesa Date: Tue, 23 Mar 2021 01:25:47 +0100 Subject: [PATCH] separate header/impl for strman --- include/cubescript/cubescript.hh | 6 +- src/cs_state.cc | 1 + src/cs_strman.cc | 112 +++++++++++++++++++++++++++++++ src/cs_strman.hh | 92 +++++++++++++++++++++++++ src/cs_util.cc | 103 +--------------------------- src/cs_util.hh | 79 ---------------------- src/lib_str.cc | 1 + src/meson.build | 1 + 8 files changed, 211 insertions(+), 184 deletions(-) create mode 100644 src/cs_strman.cc create mode 100644 src/cs_strman.hh diff --git a/include/cubescript/cubescript.hh b/include/cubescript/cubescript.hh index c525240..a34f117 100644 --- a/include/cubescript/cubescript.hh +++ b/include/cubescript/cubescript.hh @@ -298,10 +298,10 @@ LIBCUBESCRIPT_EXPORT bool cs_code_is_empty(cs_bcode *code); struct LIBCUBESCRIPT_EXPORT cs_strref { friend struct cs_value; /* FIXME: eliminate this */ - friend inline cs_strref cs_make_strref(char const *p, cs_shared_state &cs); + friend inline cs_strref cs_make_strref(char const *p, cs_shared_state *cs); cs_strref() = delete; - cs_strref(cs_shared_state &cs, std::string_view str); + cs_strref(cs_shared_state *cs, std::string_view str); cs_strref(cs_state &cs, std::string_view str); cs_strref(cs_strref const &ref); @@ -327,7 +327,7 @@ struct LIBCUBESCRIPT_EXPORT cs_strref { private: /* for internal use only */ - cs_strref(char const *p, cs_shared_state &cs); + cs_strref(char const *p, cs_shared_state *cs); cs_shared_state *p_state; char const *p_str; diff --git a/src/cs_state.cc b/src/cs_state.cc index 8a3e061..e806631 100644 --- a/src/cs_state.cc +++ b/src/cs_state.cc @@ -3,6 +3,7 @@ #include "cs_util.hh" #include "cs_bcode.hh" #include "cs_state.hh" +#include "cs_strman.hh" namespace cscript { diff --git a/src/cs_strman.cc b/src/cs_strman.cc new file mode 100644 index 0000000..2c49b6e --- /dev/null +++ b/src/cs_strman.cc @@ -0,0 +1,112 @@ +#include + +#include "cs_strman.hh" + +namespace cscript { + +struct cs_strref_state { + std::size_t length; + std::size_t refcount; +}; + +inline cs_strref_state *get_ref_state(char const *ptr) { + return const_cast( + reinterpret_cast(ptr) + ) - 1; +} + +char const *cs_strman::add(std::string_view str) { + auto it = counts.find(str); + /* already present: just increment ref */ + if (it != counts.end()) { + auto *st = it->second; + /* having a null pointer is the same as non-existence */ + if (st) { + ++st->refcount; + return reinterpret_cast(st + 1); + } + } + /* not present: allocate brand new data */ + auto ss = str.size(); + auto strp = alloc_buf(ss); + /* write string data, it's already pre-terminated */ + memcpy(strp, str.data(), ss); + /* store it */ + counts.emplace(std::string_view{strp, ss}, get_ref_state(strp)); + return strp; +} + +char const *cs_strman::ref(char const *ptr) { + auto *ss = get_ref_state(ptr); + ++ss->refcount; + return ptr; +} + +char const *cs_strman::steal(char *ptr) { + auto *ss = get_ref_state(ptr); + auto sr = std::string_view{ptr, ss->length}; + /* much like add(), but we already have memory */ + auto it = counts.find(sr); + if (it != counts.end()) { + auto *st = it->second; + if (st) { + ++st->refcount; + /* the buffer is superfluous now */ + cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0); + return reinterpret_cast(st + 1); + } + } + ss->refcount = 1; + counts.emplace(sr, ss); + return ptr; +} + +void cs_strman::unref(char const *ptr) { + auto *ss = get_ref_state(ptr); + if (!--ss->refcount) { + /* refcount zero, so ditch it + * this path is a little slow... + */ + auto sr = std::string_view{ptr, ss->length}; + auto it = counts.find(sr); + if (it == counts.end()) { + /* internal error: this should *never* happen */ + throw cs_internal_error{"no refcount"}; + } + /* we're freeing the key */ + counts.erase(it); + /* dealloc */ + cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0); + } +} + +char const *cs_strman::find(std::string_view str) const { + auto it = counts.find(str); + if (it == counts.end()) { + return nullptr; + } + return reinterpret_cast(it->second + 1); +} + +std::string_view cs_strman::get(char const *ptr) const { + auto *ss = get_ref_state(ptr); + return std::string_view{ptr, ss->length}; +} + +char *cs_strman::alloc_buf(std::size_t len) const { + auto mem = cstate->alloc(nullptr, 0, len + sizeof(cs_strref_state) + 1); + if (!mem) { + throw cs_internal_error{"allocation failed"}; + } + /* write length and initial refcount */ + auto *sst = static_cast(mem); + sst->length = len; + sst->refcount = 1; + /* pre-terminate */ + auto *strp = reinterpret_cast(sst + 1); + strp[len] = '\0'; + /* now the user can fill it */ + return strp; +}; + +} /* namespace cscript */ diff --git a/src/cs_strman.hh b/src/cs_strman.hh new file mode 100644 index 0000000..21d84db --- /dev/null +++ b/src/cs_strman.hh @@ -0,0 +1,92 @@ +#ifndef LIBCUBESCRIPT_STRMAN_HH +#define LIBCUBESCRIPT_STRMAN_HH + +#include + +#include +#include + +#include "cs_std.hh" +#include "cs_state.hh" + +namespace cscript { + +struct cs_strref_state; + +/* string manager + * + * the purpose of this is to handle interning of strings; each string within + * a libcs state is represented (and allocated) exactly once, and reference + * counted; that both helps save resources, and potentially provide a means + * to reliably represent returned strings in places that is compatible with + * multiple threads and eliminate the chance of dangling pointers + * + * strings are allocated in a manner where the refcount and length are stored + * as a part of the string's memory, so it can be easily accessed using just + * the pointer to the string, but also this is transparent for usage + * + * this is not thread-safe yet, and later on it should be made that, + * for now we don't bother... + */ + +struct cs_strman { + using allocator_type = cs_allocator< + std::pair + >; + cs_strman() = delete; + cs_strman(cs_shared_state *cs): cstate{cs}, counts{allocator_type{cs}} {} + ~cs_strman() {} + + cs_strman(cs_strman const &) = delete; + cs_strman(cs_strman &&) = delete; + + cs_strman &operator=(cs_strman const &) = delete; + cs_strman &operator=(cs_strman &&) = delete; + + /* adds a string into the manager using any source, and returns a managed + * version; this is "slow" as it has to hash the string and potentially + * allocate fresh memory for it, but is perfectly safe at any time + */ + char const *add(std::string_view str); + + /* this simply increments the reference count of an existing managed + * string, this is only safe when you know the pointer you are passing + * is already managed the system + */ + char const *ref(char const *ptr); + + /* this will use the provided memory, assuming it is a fresh string that + * is yet to be added; the memory must be allocated with alloc_buf() + */ + char const *steal(char *ptr); + + /* decrements the reference count and removes it from the system if + * that reaches zero; likewise, only safe with pointers that are managed + */ + void unref(char const *ptr); + + /* just finds a managed pointer with the same contents + * as the input, if not found then a null pointer is returned + */ + char const *find(std::string_view str) const; + + /* a quick helper to make a proper string view out of a ptr */ + std::string_view get(char const *ptr) const; + + /* this will allocate a buffer of the given length (plus one for + * terminating zero) so you can fill it; use steal() to write it + */ + char *alloc_buf(std::size_t len) const; + + cs_shared_state *cstate; + std::unordered_map< + std::string_view, cs_strref_state *, + std::hash, + std::equal_to, + allocator_type + > counts; +}; + +} /* namespace cscript */ + +#endif diff --git a/src/cs_util.cc b/src/cs_util.cc index 3d4b2e5..30290c7 100644 --- a/src/cs_util.cc +++ b/src/cs_util.cc @@ -1,6 +1,7 @@ #include #include "cs_util.hh" #include "cs_vm.hh" +#include "cs_strman.hh" #include #include @@ -182,108 +183,6 @@ done: return ret; } -/* string manager */ - -inline cs_strref_state *get_ref_state(char const *ptr) { - return const_cast( - reinterpret_cast(ptr) - ) - 1; -} - -char const *cs_strman::add(std::string_view str) { - auto it = counts.find(str); - /* already present: just increment ref */ - if (it != counts.end()) { - auto *st = it->second; - /* having a null pointer is the same as non-existence */ - if (st) { - ++st->refcount; - return reinterpret_cast(st + 1); - } - } - /* not present: allocate brand new data */ - auto ss = str.size(); - auto strp = alloc_buf(ss); - /* write string data, it's already pre-terminated */ - memcpy(strp, str.data(), ss); - /* store it */ - counts.emplace(std::string_view{strp, ss}, get_ref_state(strp)); - return strp; -} - -char const *cs_strman::ref(char const *ptr) { - auto *ss = get_ref_state(ptr); - ++ss->refcount; - return ptr; -} - -char const *cs_strman::steal(char *ptr) { - auto *ss = get_ref_state(ptr); - auto sr = std::string_view{ptr, ss->length}; - /* much like add(), but we already have memory */ - auto it = counts.find(sr); - if (it != counts.end()) { - auto *st = it->second; - if (st) { - ++st->refcount; - /* the buffer is superfluous now */ - cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0); - return reinterpret_cast(st + 1); - } - } - ss->refcount = 1; - counts.emplace(sr, ss); - return ptr; -} - -void cs_strman::unref(char const *ptr) { - auto *ss = get_ref_state(ptr); - if (!--ss->refcount) { - /* refcount zero, so ditch it - * this path is a little slow... - */ - auto sr = std::string_view{ptr, ss->length}; - auto it = counts.find(sr); - if (it == counts.end()) { - /* internal error: this should *never* happen */ - throw cs_internal_error{"no refcount"}; - } - /* we're freeing the key */ - counts.erase(it); - /* dealloc */ - cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0); - } -} - -char const *cs_strman::find(std::string_view str) const { - auto it = counts.find(str); - if (it == counts.end()) { - return nullptr; - } - return reinterpret_cast(it->second + 1); -} - -std::string_view cs_strman::get(char const *ptr) const { - auto *ss = get_ref_state(ptr); - return std::string_view{ptr, ss->length}; -} - -char *cs_strman::alloc_buf(std::size_t len) const { - auto mem = cstate->alloc(nullptr, 0, len + sizeof(cs_strref_state) + 1); - if (!mem) { - throw cs_internal_error{"allocation failed"}; - } - /* write length and initial refcount */ - auto *sst = static_cast(mem); - sst->length = len; - sst->refcount = 1; - /* pre-terminate */ - auto *strp = reinterpret_cast(sst + 1); - strp[len] = '\0'; - /* now the user can fill it */ - return strp; -}; - /* strref */ cs_strref::cs_strref(cs_shared_state *cs, std::string_view str): diff --git a/src/cs_util.hh b/src/cs_util.hh index f2b86fb..a437be4 100644 --- a/src/cs_util.hh +++ b/src/cs_util.hh @@ -25,85 +25,6 @@ inline cs_strref cs_make_strref(char const *p, cs_shared_state *cs) { return cs_strref{p, cs}; } -/* string manager - * - * the purpose of this is to handle interning of strings; each string within - * a libcs state is represented (and allocated) exactly once, and reference - * counted; that both helps save resources, and potentially provide a means - * to reliably represent returned strings in places that is compatible with - * multiple threads and eliminate the chance of dangling pointers - * - * strings are allocated in a manner where the refcount and length are stored - * as a part of the string's memory, so it can be easily accessed using just - * the pointer to the string, but also this is transparent for usage - * - * this is not thread-safe yet, and later on it should be made that, - * for now we don't bother... - */ - -struct cs_strref_state { - size_t length; - size_t refcount; -}; - -struct cs_strman { - using allocator_type = cs_allocator< - std::pair - >; - cs_strman() = delete; - cs_strman(cs_shared_state *cs): cstate{cs}, counts{allocator_type{cs}} {} - ~cs_strman() {} - - cs_strman(cs_strman const &) = delete; - cs_strman(cs_strman &&) = delete; - - cs_strman &operator=(cs_strman const &) = delete; - cs_strman &operator=(cs_strman &&) = delete; - - /* adds a string into the manager using any source, and returns a managed - * version; this is "slow" as it has to hash the string and potentially - * allocate fresh memory for it, but is perfectly safe at any time - */ - char const *add(std::string_view str); - - /* this simply increments the reference count of an existing managed - * string, this is only safe when you know the pointer you are passing - * is already managed the system - */ - char const *ref(char const *ptr); - - /* this will use the provided memory, assuming it is a fresh string that - * is yet to be added; the memory must be allocated with alloc_buf() - */ - char const *steal(char *ptr); - - /* decrements the reference count and removes it from the system if - * that reaches zero; likewise, only safe with pointers that are managed - */ - void unref(char const *ptr); - - /* just finds a managed pointer with the same contents - * as the input, if not found then a null pointer is returned - */ - char const *find(std::string_view str) const; - - /* a quick helper to make a proper string view out of a ptr */ - std::string_view get(char const *ptr) const; - - /* this will allocate a buffer of the given length (plus one for - * terminating zero) so you can fill it; use steal() to write it - */ - char *alloc_buf(std::size_t len) const; - - cs_shared_state *cstate; - std::unordered_map< - std::string_view, cs_strref_state *, - std::hash, - std::equal_to, - allocator_type - > counts; -}; - } /* namespace cscript */ #endif /* LIBCUBESCRIPT_CS_UTIL_HH */ diff --git a/src/lib_str.cc b/src/lib_str.cc index a1b6a96..4f7c923 100644 --- a/src/lib_str.cc +++ b/src/lib_str.cc @@ -5,6 +5,7 @@ #include "cs_util.hh" #include "cs_std.hh" +#include "cs_strman.hh" namespace cscript { diff --git a/src/meson.build b/src/meson.build index 6702cf0..6bc8cdd 100644 --- a/src/meson.build +++ b/src/meson.build @@ -9,6 +9,7 @@ libcubescript_src = [ 'cs_gen.cc', 'cs_ident.cc', 'cs_state.cc', + 'cs_strman.cc', 'cs_util.cc', 'cs_val.cc', 'cs_vm.cc',