initial implementation of unique/interned string manager

master
Daniel Kolesa 2021-03-16 00:44:25 +01:00
parent 6018f6b6c2
commit dcae5b30b2
6 changed files with 205 additions and 1 deletions

View File

@ -41,6 +41,8 @@ enum {
};
struct cs_bcode;
struct cs_value;
struct cs_shared_state;
struct OSTD_EXPORT cs_bcode_ref {
cs_bcode_ref():
@ -68,6 +70,28 @@ private:
OSTD_EXPORT bool cs_code_is_empty(cs_bcode *code);
struct OSTD_EXPORT cs_strref {
friend struct cs_value;
cs_strref() = delete;
cs_strref(cs_shared_state &cs, ostd::string_range str);
cs_strref(cs_strref const &ref);
~cs_strref();
cs_strref &operator=(cs_strref const &ref);
operator ostd::string_range() const;
private:
/* for internal use only */
cs_strref(char const *p, cs_shared_state &cs);
char const *p_str;
cs_shared_state *p_state;
};
enum class cs_value_type {
Null = 0, Int, Float, String, Cstring, Code, Macro, Ident
};
@ -121,7 +145,6 @@ struct cs_ident_stack {
cs_ident_stack *next;
};
struct cs_shared_state;
struct cs_error;
struct cs_gen_state;
@ -342,6 +365,7 @@ static inline void *cs_default_alloc(void *, void *p, size_t, size_t ns) {
struct OSTD_EXPORT cs_state {
friend struct cs_error;
friend struct cs_strman;
friend struct cs_gen_state;
cs_shared_state *p_state;

View File

@ -1,5 +1,6 @@
#include <cubescript/cubescript.hh>
#include "cs_util.hh"
#include "cs_vm.hh"
#include <ctype.h>
#include <math.h>
@ -183,6 +184,85 @@ done:
return ret;
}
/* string manager */
inline cs_strref_state *get_ref_state(char const *ptr) {
return const_cast<cs_strref_state *>(
reinterpret_cast<cs_strref_state const *>(ptr)
) - 1;
}
char const *cs_strman::add(ostd::string_range str) {
/* if it already exists, nothing will happen */
auto p = counts.try_emplace(str, nullptr);
/* already present: just increment ref */
if (!p.second) {
auto *st = p.first->second;
/* having a null pointer is the same as non-existence */
if (st) {
++st->refcount;
return reinterpret_cast<char const *>(st + 1);
}
}
/* not present: allocate brand new data */
auto ss = str.size();
auto mem = cstate->alloc(nullptr, 0, ss + sizeof(cs_strref_state) + 1);
/*if (!mem) {
cstate->panic();
}*/
/* write length and refcount, store it */
auto *sst = static_cast<cs_strref_state *>(mem);
sst->length = ss;
sst->refcount = 1;
p.first->second = sst;
/* write string data */
auto *strp = reinterpret_cast<char *>(sst + 1);
memcpy(strp, str.data(), ss);
/* terminated for best compatibility */
strp[ss] = '\0';
return strp;
}
char const *cs_strman::ref(char const *ptr) {
auto *ss = get_ref_state(ptr);
++ss->refcount;
return ptr;
}
void cs_strman::unref(char const *ptr) {
auto *ss = get_ref_state(ptr);
if (!--ss->refcount) {
/* refcount zero, so ditch it
* this path is a little slow...
*/
auto sr = ostd::string_range{ptr, ptr + ss->length};
auto it = counts.find(sr);
if (it == counts.end()) {
/* internal error: this should *never* happen */
//cstate->panic();
}
/* dealloc */
cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0);
/* set to null, which is okay
* we keep the value around, in case the string ever reappears
*/
it->second = nullptr;
}
}
char const *cs_strman::find(ostd::string_range str) const {
auto it = counts.find(str);
if (it == counts.end()) {
return nullptr;
}
return reinterpret_cast<char const *>(it->second + 1);
}
ostd::string_range cs_strman::get(char const *ptr) const {
auto *ss = get_ref_state(ptr);
return ostd::string_range{ptr, ptr + ss->length};
}
namespace util {
OSTD_EXPORT ostd::string_range parse_string(
cs_state &cs, ostd::string_range str, size_t &nlines

View File

@ -38,6 +38,70 @@ inline void cs_do_and_cleanup(F1 &&dof, F2 &&clf) {
dof();
}
struct cs_shared_state;
/* string manager
*
* the purpose of this is to handle interning of strings; each string within
* a libcs state is represented (and allocated) exactly once, and reference
* counted; that both helps save resources, and potentially provide a means
* to reliably represent returned strings in places that is compatible with
* multiple threads and eliminate the chance of dangling pointers
*
* strings are allocated in a manner where the refcount and length are stored
* as a part of the string's memory, so it can be easily accessed using just
* the pointer to the string, but also this is transparent for usage
*
* this is not thread-safe yet, and later on it should be made that,
* for now we don't bother...
*/
struct cs_strref_state {
size_t length;
size_t refcount;
};
struct cs_strman {
cs_strman() = delete;
cs_strman(cs_shared_state *cs): cstate{cs} {}
~cs_strman() {}
cs_strman(cs_strman const &) = delete;
cs_strman(cs_strman &&) = delete;
cs_strman &operator=(cs_strman const &) = delete;
cs_strman &operator=(cs_strman &&) = delete;
/* adds a string into the manager using any source, and returns a managed
* version; this is "slow" as it has to hash the string and potentially
* allocate fresh memory for it, but is perfectly safe at any time
*/
char const *add(ostd::string_range str);
/* this simply increments the reference count of an existing managed
* string, this is only safe when you know the pointer you are passing
* is already managed the system
*/
char const *ref(char const *ptr);
/* decrements the reference count and removes it from the system if
* that reaches zero; likewise, only safe with pointers that are managed
*/
void unref(char const *ptr);
/* just finds a managed pointer with the same contents
* as the input, if not found then a null pointer is returned
*/
char const *find(ostd::string_range str) const;
/* a quick helper to make a proper ostd string range out of a ptr */
ostd::string_range get(char const *ptr) const;
cs_shared_state *cstate;
/* FIXME: use main allocator */
std::unordered_map<ostd::string_range, cs_strref_state *> counts{};
};
} /* namespace cscript */
#endif /* LIBCUBESCRIPT_CS_UTIL_HH */

View File

@ -6,6 +6,39 @@
namespace cscript {
cs_strref::cs_strref(cs_shared_state &cs, ostd::string_range str):
p_state{&cs}
{
p_str = cs.strman->add(str);
}
cs_strref::cs_strref(cs_strref const &ref): p_str{ref.p_str}, p_state{ref.p_state}
{
p_state->strman->ref(p_str);
}
/* this can be used by friends to do quick cs_strref creation */
cs_strref::cs_strref(char const *p, cs_shared_state &cs):
p_state{&cs}
{
p_str = p_state->strman->ref(p);
}
cs_strref::~cs_strref() {
p_state->strman->unref(p_str);
}
cs_strref &cs_strref::operator=(cs_strref const &ref) {
p_str = ref.p_str;
p_state = ref.p_state;
p_state->strman->ref(p_str);
return *this;
}
cs_strref::operator ostd::string_range() const {
return p_state->strman->get(p_str);
}
struct cs_cmd_internal {
static void call(
cs_state &cs, cs_command *c, cs_value_r args, cs_value &ret

View File

@ -97,6 +97,7 @@ struct cs_shared_state {
cs_map<ostd::string_range, cs_ident *> idents;
cs_vector<cs_ident *> identmap;
cs_alloc_cb allocf;
cs_strman *strman;
void *aptr;
void *alloc(void *ptr, size_t os, size_t ns) {

View File

@ -315,6 +315,7 @@ cs_state::cs_state(cs_alloc_cb func, void *data):
/* set up allocator, from now we can call into alloc() */
p_state->allocf = func;
p_state->aptr = data;
p_state->strman = p_state->create<cs_strman>(p_state);
for (int i = 0; i < MaxArguments; ++i) {
char buf[32];
@ -432,6 +433,7 @@ OSTD_EXPORT void cs_state::destroy() {
}
p_state->destroy(i);
}
p_state->destroy(p_state->strman);
p_state->destroy(p_state);
}