initial implementation of unique/interned string manager
parent
6018f6b6c2
commit
dcae5b30b2
|
@ -41,6 +41,8 @@ enum {
|
|||
};
|
||||
|
||||
struct cs_bcode;
|
||||
struct cs_value;
|
||||
struct cs_shared_state;
|
||||
|
||||
struct OSTD_EXPORT cs_bcode_ref {
|
||||
cs_bcode_ref():
|
||||
|
@ -68,6 +70,28 @@ private:
|
|||
|
||||
OSTD_EXPORT bool cs_code_is_empty(cs_bcode *code);
|
||||
|
||||
struct OSTD_EXPORT cs_strref {
|
||||
friend struct cs_value;
|
||||
|
||||
cs_strref() = delete;
|
||||
cs_strref(cs_shared_state &cs, ostd::string_range str);
|
||||
|
||||
cs_strref(cs_strref const &ref);
|
||||
|
||||
~cs_strref();
|
||||
|
||||
cs_strref &operator=(cs_strref const &ref);
|
||||
|
||||
operator ostd::string_range() const;
|
||||
|
||||
private:
|
||||
/* for internal use only */
|
||||
cs_strref(char const *p, cs_shared_state &cs);
|
||||
|
||||
char const *p_str;
|
||||
cs_shared_state *p_state;
|
||||
};
|
||||
|
||||
enum class cs_value_type {
|
||||
Null = 0, Int, Float, String, Cstring, Code, Macro, Ident
|
||||
};
|
||||
|
@ -121,7 +145,6 @@ struct cs_ident_stack {
|
|||
cs_ident_stack *next;
|
||||
};
|
||||
|
||||
struct cs_shared_state;
|
||||
struct cs_error;
|
||||
struct cs_gen_state;
|
||||
|
||||
|
@ -342,6 +365,7 @@ static inline void *cs_default_alloc(void *, void *p, size_t, size_t ns) {
|
|||
|
||||
struct OSTD_EXPORT cs_state {
|
||||
friend struct cs_error;
|
||||
friend struct cs_strman;
|
||||
friend struct cs_gen_state;
|
||||
|
||||
cs_shared_state *p_state;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#include <cubescript/cubescript.hh>
|
||||
#include "cs_util.hh"
|
||||
#include "cs_vm.hh"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
|
@ -183,6 +184,85 @@ done:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* string manager */
|
||||
|
||||
inline cs_strref_state *get_ref_state(char const *ptr) {
|
||||
return const_cast<cs_strref_state *>(
|
||||
reinterpret_cast<cs_strref_state const *>(ptr)
|
||||
) - 1;
|
||||
}
|
||||
|
||||
char const *cs_strman::add(ostd::string_range str) {
|
||||
/* if it already exists, nothing will happen */
|
||||
auto p = counts.try_emplace(str, nullptr);
|
||||
/* already present: just increment ref */
|
||||
if (!p.second) {
|
||||
auto *st = p.first->second;
|
||||
/* having a null pointer is the same as non-existence */
|
||||
if (st) {
|
||||
++st->refcount;
|
||||
return reinterpret_cast<char const *>(st + 1);
|
||||
}
|
||||
}
|
||||
/* not present: allocate brand new data */
|
||||
auto ss = str.size();
|
||||
auto mem = cstate->alloc(nullptr, 0, ss + sizeof(cs_strref_state) + 1);
|
||||
/*if (!mem) {
|
||||
cstate->panic();
|
||||
}*/
|
||||
/* write length and refcount, store it */
|
||||
auto *sst = static_cast<cs_strref_state *>(mem);
|
||||
sst->length = ss;
|
||||
sst->refcount = 1;
|
||||
p.first->second = sst;
|
||||
/* write string data */
|
||||
auto *strp = reinterpret_cast<char *>(sst + 1);
|
||||
memcpy(strp, str.data(), ss);
|
||||
/* terminated for best compatibility */
|
||||
strp[ss] = '\0';
|
||||
return strp;
|
||||
}
|
||||
|
||||
char const *cs_strman::ref(char const *ptr) {
|
||||
auto *ss = get_ref_state(ptr);
|
||||
++ss->refcount;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void cs_strman::unref(char const *ptr) {
|
||||
auto *ss = get_ref_state(ptr);
|
||||
if (!--ss->refcount) {
|
||||
/* refcount zero, so ditch it
|
||||
* this path is a little slow...
|
||||
*/
|
||||
auto sr = ostd::string_range{ptr, ptr + ss->length};
|
||||
auto it = counts.find(sr);
|
||||
if (it == counts.end()) {
|
||||
/* internal error: this should *never* happen */
|
||||
//cstate->panic();
|
||||
}
|
||||
/* dealloc */
|
||||
cstate->alloc(ss, ss->length + sizeof(cs_strref_state) + 1, 0);
|
||||
/* set to null, which is okay
|
||||
* we keep the value around, in case the string ever reappears
|
||||
*/
|
||||
it->second = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
char const *cs_strman::find(ostd::string_range str) const {
|
||||
auto it = counts.find(str);
|
||||
if (it == counts.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return reinterpret_cast<char const *>(it->second + 1);
|
||||
}
|
||||
|
||||
ostd::string_range cs_strman::get(char const *ptr) const {
|
||||
auto *ss = get_ref_state(ptr);
|
||||
return ostd::string_range{ptr, ptr + ss->length};
|
||||
}
|
||||
|
||||
namespace util {
|
||||
OSTD_EXPORT ostd::string_range parse_string(
|
||||
cs_state &cs, ostd::string_range str, size_t &nlines
|
||||
|
|
|
@ -38,6 +38,70 @@ inline void cs_do_and_cleanup(F1 &&dof, F2 &&clf) {
|
|||
dof();
|
||||
}
|
||||
|
||||
struct cs_shared_state;
|
||||
|
||||
/* string manager
|
||||
*
|
||||
* the purpose of this is to handle interning of strings; each string within
|
||||
* a libcs state is represented (and allocated) exactly once, and reference
|
||||
* counted; that both helps save resources, and potentially provide a means
|
||||
* to reliably represent returned strings in places that is compatible with
|
||||
* multiple threads and eliminate the chance of dangling pointers
|
||||
*
|
||||
* strings are allocated in a manner where the refcount and length are stored
|
||||
* as a part of the string's memory, so it can be easily accessed using just
|
||||
* the pointer to the string, but also this is transparent for usage
|
||||
*
|
||||
* this is not thread-safe yet, and later on it should be made that,
|
||||
* for now we don't bother...
|
||||
*/
|
||||
|
||||
struct cs_strref_state {
|
||||
size_t length;
|
||||
size_t refcount;
|
||||
};
|
||||
|
||||
struct cs_strman {
|
||||
cs_strman() = delete;
|
||||
cs_strman(cs_shared_state *cs): cstate{cs} {}
|
||||
~cs_strman() {}
|
||||
|
||||
cs_strman(cs_strman const &) = delete;
|
||||
cs_strman(cs_strman &&) = delete;
|
||||
|
||||
cs_strman &operator=(cs_strman const &) = delete;
|
||||
cs_strman &operator=(cs_strman &&) = delete;
|
||||
|
||||
/* adds a string into the manager using any source, and returns a managed
|
||||
* version; this is "slow" as it has to hash the string and potentially
|
||||
* allocate fresh memory for it, but is perfectly safe at any time
|
||||
*/
|
||||
char const *add(ostd::string_range str);
|
||||
|
||||
/* this simply increments the reference count of an existing managed
|
||||
* string, this is only safe when you know the pointer you are passing
|
||||
* is already managed the system
|
||||
*/
|
||||
char const *ref(char const *ptr);
|
||||
|
||||
/* decrements the reference count and removes it from the system if
|
||||
* that reaches zero; likewise, only safe with pointers that are managed
|
||||
*/
|
||||
void unref(char const *ptr);
|
||||
|
||||
/* just finds a managed pointer with the same contents
|
||||
* as the input, if not found then a null pointer is returned
|
||||
*/
|
||||
char const *find(ostd::string_range str) const;
|
||||
|
||||
/* a quick helper to make a proper ostd string range out of a ptr */
|
||||
ostd::string_range get(char const *ptr) const;
|
||||
|
||||
cs_shared_state *cstate;
|
||||
/* FIXME: use main allocator */
|
||||
std::unordered_map<ostd::string_range, cs_strref_state *> counts{};
|
||||
};
|
||||
|
||||
} /* namespace cscript */
|
||||
|
||||
#endif /* LIBCUBESCRIPT_CS_UTIL_HH */
|
||||
|
|
33
src/cs_vm.cc
33
src/cs_vm.cc
|
@ -6,6 +6,39 @@
|
|||
|
||||
namespace cscript {
|
||||
|
||||
cs_strref::cs_strref(cs_shared_state &cs, ostd::string_range str):
|
||||
p_state{&cs}
|
||||
{
|
||||
p_str = cs.strman->add(str);
|
||||
}
|
||||
|
||||
cs_strref::cs_strref(cs_strref const &ref): p_str{ref.p_str}, p_state{ref.p_state}
|
||||
{
|
||||
p_state->strman->ref(p_str);
|
||||
}
|
||||
|
||||
/* this can be used by friends to do quick cs_strref creation */
|
||||
cs_strref::cs_strref(char const *p, cs_shared_state &cs):
|
||||
p_state{&cs}
|
||||
{
|
||||
p_str = p_state->strman->ref(p);
|
||||
}
|
||||
|
||||
cs_strref::~cs_strref() {
|
||||
p_state->strman->unref(p_str);
|
||||
}
|
||||
|
||||
cs_strref &cs_strref::operator=(cs_strref const &ref) {
|
||||
p_str = ref.p_str;
|
||||
p_state = ref.p_state;
|
||||
p_state->strman->ref(p_str);
|
||||
return *this;
|
||||
}
|
||||
|
||||
cs_strref::operator ostd::string_range() const {
|
||||
return p_state->strman->get(p_str);
|
||||
}
|
||||
|
||||
struct cs_cmd_internal {
|
||||
static void call(
|
||||
cs_state &cs, cs_command *c, cs_value_r args, cs_value &ret
|
||||
|
|
|
@ -97,6 +97,7 @@ struct cs_shared_state {
|
|||
cs_map<ostd::string_range, cs_ident *> idents;
|
||||
cs_vector<cs_ident *> identmap;
|
||||
cs_alloc_cb allocf;
|
||||
cs_strman *strman;
|
||||
void *aptr;
|
||||
|
||||
void *alloc(void *ptr, size_t os, size_t ns) {
|
||||
|
|
|
@ -315,6 +315,7 @@ cs_state::cs_state(cs_alloc_cb func, void *data):
|
|||
/* set up allocator, from now we can call into alloc() */
|
||||
p_state->allocf = func;
|
||||
p_state->aptr = data;
|
||||
p_state->strman = p_state->create<cs_strman>(p_state);
|
||||
|
||||
for (int i = 0; i < MaxArguments; ++i) {
|
||||
char buf[32];
|
||||
|
@ -432,6 +433,7 @@ OSTD_EXPORT void cs_state::destroy() {
|
|||
}
|
||||
p_state->destroy(i);
|
||||
}
|
||||
p_state->destroy(p_state->strman);
|
||||
p_state->destroy(p_state);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue