forked from OctaForge/libostd
optimize hashtable to use small ranges (but larger nodes)
parent
3613af3daa
commit
453b463ce6
|
@ -19,6 +19,7 @@ namespace ostd {
|
||||||
namespace detail {
|
namespace detail {
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct HashChain {
|
struct HashChain {
|
||||||
|
HashChain<T> *prev;
|
||||||
HashChain<T> *next;
|
HashChain<T> *next;
|
||||||
T value;
|
T value;
|
||||||
};
|
};
|
||||||
|
@ -46,36 +47,19 @@ private:
|
||||||
friend struct HashRange;
|
friend struct HashRange;
|
||||||
|
|
||||||
using Chain = detail::HashChain<T>;
|
using Chain = detail::HashChain<T>;
|
||||||
|
|
||||||
Chain **p_beg;
|
|
||||||
Chain **p_end;
|
|
||||||
Chain *p_node;
|
Chain *p_node;
|
||||||
|
|
||||||
void advance() {
|
|
||||||
while ((p_beg != p_end) && !p_beg[0])
|
|
||||||
++p_beg;
|
|
||||||
if (p_beg != p_end) p_node = p_beg[0];
|
|
||||||
}
|
|
||||||
public:
|
public:
|
||||||
HashRange(): p_beg(nullptr), p_end(nullptr), p_node(nullptr) {}
|
HashRange(): p_node(nullptr) {}
|
||||||
HashRange(const HashRange &v): p_beg(v.p_beg), p_end(v.p_end),
|
HashRange(const HashRange &v): p_node(v.p_node) {}
|
||||||
p_node(v.p_node) {}
|
HashRange(Chain *node): p_node(node) {}
|
||||||
HashRange(Chain **beg, Chain **end): p_beg(beg), p_end(end), p_node() {
|
|
||||||
advance();
|
|
||||||
}
|
|
||||||
HashRange(Chain **beg, Chain **end, Chain *node): p_beg(beg), p_end(end),
|
|
||||||
p_node(node) {}
|
|
||||||
|
|
||||||
template<typename U>
|
template<typename U>
|
||||||
HashRange(const HashRange<U> &v, EnableIf<
|
HashRange(const HashRange<U> &v, EnableIf<
|
||||||
IsSame<RemoveCv<T>, RemoveCv<U>>::value &&
|
IsSame<RemoveCv<T>, RemoveCv<U>>::value &&
|
||||||
IsConvertible<U *, T *>::value, bool
|
IsConvertible<U *, T *>::value, bool
|
||||||
> = true): p_beg((Chain **)v.p_beg), p_end((Chain **)v.p_end),
|
> = true): p_node((Chain *)v.p_node) {}
|
||||||
p_node((Chain *)v.p_node) {}
|
|
||||||
|
|
||||||
HashRange &operator=(const HashRange &v) {
|
HashRange &operator=(const HashRange &v) {
|
||||||
p_beg = v.p_beg;
|
|
||||||
p_end = v.p_end;
|
|
||||||
p_node = v.p_node;
|
p_node = v.p_node;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -85,9 +69,6 @@ public:
|
||||||
bool pop_front() {
|
bool pop_front() {
|
||||||
if (!p_node) return false;
|
if (!p_node) return false;
|
||||||
p_node = p_node->next;
|
p_node = p_node->next;
|
||||||
if (p_node) return true;
|
|
||||||
++p_beg;
|
|
||||||
advance();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,27 +86,28 @@ private:
|
||||||
friend struct BucketRange;
|
friend struct BucketRange;
|
||||||
|
|
||||||
using Chain = detail::HashChain<T>;
|
using Chain = detail::HashChain<T>;
|
||||||
Chain *p_node;
|
Chain *p_node, *p_end;
|
||||||
public:
|
public:
|
||||||
BucketRange(): p_node(nullptr) {}
|
BucketRange(): p_node(nullptr) {}
|
||||||
BucketRange(Chain *node): p_node(node) {}
|
BucketRange(Chain *node, Chain *end): p_node(node), p_end(end) {}
|
||||||
BucketRange(const BucketRange &v): p_node(v.p_node) {}
|
BucketRange(const BucketRange &v): p_node(v.p_node), p_end(v.p_end) {}
|
||||||
|
|
||||||
template<typename U>
|
template<typename U>
|
||||||
BucketRange(const BucketRange<U> &v, EnableIf<
|
BucketRange(const BucketRange<U> &v, EnableIf<
|
||||||
IsSame<RemoveCv<T>, RemoveCv<U>>::value &&
|
IsSame<RemoveCv<T>, RemoveCv<U>>::value &&
|
||||||
IsConvertible<U *, T *>::value, bool
|
IsConvertible<U *, T *>::value, bool
|
||||||
> = true): p_node((Chain *)v.p_node) {}
|
> = true): p_node((Chain *)v.p_node), p_end((Chain *)v.p_end) {}
|
||||||
|
|
||||||
BucketRange &operator=(const BucketRange &v) {
|
BucketRange &operator=(const BucketRange &v) {
|
||||||
p_node = v.p_node;
|
p_node = v.p_node;
|
||||||
|
p_end = v.p_end;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool empty() const { return !p_node; }
|
bool empty() const { return p_node == p_end; }
|
||||||
|
|
||||||
bool pop_front() {
|
bool pop_front() {
|
||||||
if (!p_node) return false;
|
if (p_node == p_end) return false;
|
||||||
p_node = p_node->next;
|
p_node = p_node->next;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -182,27 +164,37 @@ private:
|
||||||
|
|
||||||
float p_maxlf;
|
float p_maxlf;
|
||||||
|
|
||||||
Range iter_from(Chain *c, Size h) {
|
Chain *find(const K &key, Size &h) const {
|
||||||
return Range(p_data.first() + h + 1,
|
if (!p_size) return nullptr;
|
||||||
p_data.first() + bucket_count(), c);
|
h = get_hash()(key) & (p_size - 1);
|
||||||
}
|
Chain **cp = p_data.first();
|
||||||
ConstRange iter_from(Chain *c, Size h) const {
|
for (Chain *c = cp[h], *e = cp[h + 1]; c != e; c = c->next)
|
||||||
using RChain = detail::HashChain<const E>;
|
if (get_eq()(key, B::get_key(c->value)))
|
||||||
return ConstRange((RChain **)(p_data.first() + h + 1),
|
return c;
|
||||||
(RChain **)(p_data.first() + bucket_count()),
|
return nullptr;
|
||||||
(RChain *)c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool find(const K &key, Size &h, Chain *&oc) const {
|
Chain *insert_node(Size h, Chain *c) {
|
||||||
if (!p_size) return false;
|
Chain **cp = p_data.first();
|
||||||
h = get_hash()(key) & (p_size - 1);
|
Chain *it = cp[h + 1];
|
||||||
for (Chain *c = p_data.first()[h]; c; c = c->next) {
|
c->next = it;
|
||||||
if (get_eq()(key, B::get_key(c->value))) {
|
if (it) {
|
||||||
oc = c;
|
c->prev = it->prev;
|
||||||
return true;
|
it->prev = c;
|
||||||
}
|
if (c->prev) c->prev->next = c;
|
||||||
|
} else {
|
||||||
|
size_t nb = h;
|
||||||
|
while (nb && !cp[nb]) --nb;
|
||||||
|
Chain *prev = cp[nb];
|
||||||
|
while (prev && prev->next) prev = prev->next;
|
||||||
|
c->prev = prev;
|
||||||
|
if (prev) prev->next = c;
|
||||||
}
|
}
|
||||||
return false;
|
for (; it == cp[h]; --h) {
|
||||||
|
cp[h] = c;
|
||||||
|
if (!h) break;
|
||||||
|
}
|
||||||
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
Chain *insert(Size h) {
|
Chain *insert(Size h) {
|
||||||
|
@ -216,12 +208,10 @@ private:
|
||||||
chunk->chains[CHUNKSIZE - 1].next = p_unused;
|
chunk->chains[CHUNKSIZE - 1].next = p_unused;
|
||||||
p_unused = chunk->chains;
|
p_unused = chunk->chains;
|
||||||
}
|
}
|
||||||
|
++p_len;
|
||||||
Chain *c = p_unused;
|
Chain *c = p_unused;
|
||||||
p_unused = p_unused->next;
|
p_unused = p_unused->next;
|
||||||
c->next = p_data.first()[h];
|
return insert_node(h, c);
|
||||||
p_data.first()[h] = c;
|
|
||||||
++p_len;
|
|
||||||
return c;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void delete_chunks(Chunk *chunks) {
|
void delete_chunks(Chunk *chunks) {
|
||||||
|
@ -234,12 +224,9 @@ private:
|
||||||
|
|
||||||
T *access_base(const K &key, Size &h) const {
|
T *access_base(const K &key, Size &h) const {
|
||||||
if (!p_size) return NULL;
|
if (!p_size) return NULL;
|
||||||
h = get_hash()(key) & (p_size - 1);
|
Chain *c = find(key, h);
|
||||||
for (Chain *c = p_data.first()[h]; c; c = c->next) {
|
if (c) return &B::get_data(c->value);
|
||||||
if (get_eq()(key, B::get_key(c->value)))
|
return nullptr;
|
||||||
return &B::get_data(c->value);
|
|
||||||
}
|
|
||||||
return NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void rehash_ahead(Size n) {
|
void rehash_ahead(Size n) {
|
||||||
|
@ -330,13 +317,11 @@ protected:
|
||||||
p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1);
|
p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1);
|
||||||
memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *));
|
memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *));
|
||||||
Chain **och = ht.p_data.first();
|
Chain **och = ht.p_data.first();
|
||||||
for (Size h = 0; h < p_size; ++h) {
|
for (Chain *oc = *och; oc; oc = oc->next) {
|
||||||
Chain *oc = och[h];
|
Size h = get_hash()(B::get_key(oc->value)) & (p_size - 1);
|
||||||
for (; oc; oc = oc->next) {
|
Chain *nc = insert(h);
|
||||||
Chain *nc = insert(h);
|
allocator_destroy(get_alloc(), &nc->value);
|
||||||
allocator_destroy(get_alloc(), &nc->value);
|
allocator_construct(get_alloc(), &nc->value, oc->value);
|
||||||
allocator_construct(get_alloc(), &nc->value, oc->value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -371,12 +356,10 @@ protected:
|
||||||
p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1);
|
p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1);
|
||||||
memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *));
|
memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *));
|
||||||
Chain **och = ht.p_data.first();
|
Chain **och = ht.p_data.first();
|
||||||
for (Size h = 0; h < p_size; ++h) {
|
for (Chain *oc = *och; oc; oc = oc->next) {
|
||||||
Chain *oc = och[h];
|
Size h = get_hash()(B::get_key(oc->value)) & (p_size - 1);
|
||||||
for (; oc; oc = oc->next) {
|
Chain *nc = insert(h);
|
||||||
Chain *nc = insert(h);
|
B::swap_elem(oc->value, nc->value);
|
||||||
B::swap_elem(oc->value, nc->value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -467,9 +450,8 @@ public:
|
||||||
Size bucket_size(Size n) const {
|
Size bucket_size(Size n) const {
|
||||||
Size ret = 0;
|
Size ret = 0;
|
||||||
if (ret >= p_size) return ret;
|
if (ret >= p_size) return ret;
|
||||||
Chain *c = p_data.first()[n];
|
Chain **cp = p_data.first();
|
||||||
if (!c) return ret;
|
for (Chain *c = cp[n], *e = cp[n + 1]; c != e; c = c->next)
|
||||||
for (; c; c = c->next)
|
|
||||||
++ret;
|
++ret;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -483,13 +465,12 @@ public:
|
||||||
/* multihash: always insert */
|
/* multihash: always insert */
|
||||||
Chain *ch = insert(h);
|
Chain *ch = insert(h);
|
||||||
B::swap_elem(ch->value, elem);
|
B::swap_elem(ch->value, elem);
|
||||||
Chain **hch = p_data.first();
|
return make_pair(Range(ch), true);
|
||||||
return make_pair(Range(hch + h + 1, hch + bucket_count(),
|
|
||||||
ch), true);
|
|
||||||
}
|
}
|
||||||
Chain *found = nullptr;
|
Chain *found = nullptr;
|
||||||
bool ins = true;
|
bool ins = true;
|
||||||
for (Chain *c = p_data.first()[h]; c; c = c->next) {
|
Chain **cp = p_data.first();
|
||||||
|
for (Chain *c = cp[h], *e = cp[h + 1]; c != e; c = c->next) {
|
||||||
if (get_eq()(B::get_key(elem), B::get_key(c->value))) {
|
if (get_eq()(B::get_key(elem), B::get_key(c->value))) {
|
||||||
found = c;
|
found = c;
|
||||||
ins = false;
|
ins = false;
|
||||||
|
@ -500,57 +481,54 @@ public:
|
||||||
found = insert(h);
|
found = insert(h);
|
||||||
B::swap_elem(found->value, elem);
|
B::swap_elem(found->value, elem);
|
||||||
}
|
}
|
||||||
Chain **hch = p_data.first();
|
return make_pair(Range(found), ins);
|
||||||
return make_pair(Range(hch + h + 1, hch + bucket_count(),
|
|
||||||
found), ins);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Size erase(const K &key) {
|
Size erase(const K &key) {
|
||||||
if (!p_len) return 0;
|
if (!p_len) return 0;
|
||||||
Size olen = p_len;
|
Size olen = p_len;
|
||||||
Size h = get_hash()(key) & (p_size - 1);
|
Size h = get_hash()(key) & (p_size - 1);
|
||||||
Chain **p = &p_data.first()[h], *c = *p;
|
Chain **cp = p_data.first();
|
||||||
while (c) {
|
for (Chain *c = cp[h], *e = cp[h + 1]; c != e; c = c->next)
|
||||||
if (get_eq()(key, B::get_key(c->value))) {
|
if (get_eq()(key, B::get_key(c->value))) {
|
||||||
--p_len;
|
--p_len;
|
||||||
*p = c->next;
|
Size hh = h;
|
||||||
|
Chain *next = c->next;
|
||||||
|
for (; cp[hh] == c; --hh) {
|
||||||
|
cp[hh] = next;
|
||||||
|
if (!hh) break;
|
||||||
|
}
|
||||||
|
if (c->prev) c->prev->next = next;
|
||||||
|
if (next) next->prev = c->prev;
|
||||||
c->next = p_unused;
|
c->next = p_unused;
|
||||||
|
c->prev = nullptr;
|
||||||
p_unused = c;
|
p_unused = c;
|
||||||
allocator_destroy(get_alloc(), &c->value);
|
allocator_destroy(get_alloc(), &c->value);
|
||||||
allocator_construct(get_alloc(), &c->value);
|
allocator_construct(get_alloc(), &c->value);
|
||||||
if (!Multihash) return 1;
|
if (!Multihash) return 1;
|
||||||
} else {
|
|
||||||
p = &c->next;
|
|
||||||
}
|
}
|
||||||
c = *p;
|
|
||||||
}
|
|
||||||
return olen - p_len;
|
return olen - p_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
Size count(const K &key) {
|
Size count(const K &key) {
|
||||||
if (!p_len) return 0;
|
Size h = 0;
|
||||||
Size h = get_hash()(key) & (p_size - 1);
|
Chain *c = find(key, h);
|
||||||
Size ret = 0;
|
if (!c) return 0;
|
||||||
for (Chain *c = p_data.first()[h]; c; c = c->next)
|
Size ret = 1;
|
||||||
if (get_eq()(key, B::get_key(c->value))) {
|
if (!Multihash) return ret;
|
||||||
++ret;
|
for (c = c->next; c; c = c->next)
|
||||||
if (!Multihash) break;
|
if (get_eq()(key, B::get_key(c->value))) ++ret;
|
||||||
}
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
Range find(const K &key) {
|
Range find(const K &key) {
|
||||||
Size h = 0;
|
Size h = 0;
|
||||||
Chain *c;
|
return Range(find(key, h));
|
||||||
if (find(key, h, c)) return iter_from(c, h);
|
|
||||||
return Range();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ConstRange find(const K &key) const {
|
ConstRange find(const K &key) const {
|
||||||
Size h = 0;
|
Size h = 0;
|
||||||
Chain *c;
|
return ConstRange((detail::HashChain<const E> *)find(key, h));
|
||||||
if (find(key, h, c)) return iter_from(c, h);
|
|
||||||
return ConstRange();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float load_factor() const { return float(p_len) / p_size; }
|
float load_factor() const { return float(p_len) / p_size; }
|
||||||
|
@ -569,18 +547,17 @@ public:
|
||||||
Size osize = p_size;
|
Size osize = p_size;
|
||||||
p_size = count;
|
p_size = count;
|
||||||
|
|
||||||
for (Size i = 0; i < osize; ++i) {
|
Chain *p = och ? *och : nullptr;
|
||||||
for (Chain *oc = och[i]; oc;) {
|
while (p) {
|
||||||
Size h = get_hash()(B::get_key(oc->value)) & (p_size - 1);
|
Chain *pp = p->next;
|
||||||
Chain *nxc = oc->next;
|
Size h = get_hash()(B::get_key(p->value)) & (p_size - 1);
|
||||||
oc->next = nch[h];
|
p->prev = p->next = nullptr;
|
||||||
nch[h] = oc;
|
insert_node(h, p);
|
||||||
oc = nxc;
|
p = pp;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (och && osize) allocator_deallocate(get_cpalloc(),
|
if (och && osize) allocator_deallocate(get_cpalloc(),
|
||||||
och, osize);
|
och, osize + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void reserve(Size count) {
|
void reserve(Size count) {
|
||||||
|
@ -588,32 +565,35 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
Range iter() {
|
Range iter() {
|
||||||
return Range(p_data.first(), p_data.first() + bucket_count());
|
if (!p_len) return Range();
|
||||||
|
return Range(*p_data.first());
|
||||||
}
|
}
|
||||||
ConstRange iter() const {
|
ConstRange iter() const {
|
||||||
using Chain = detail::HashChain<const E>;
|
using Chain = detail::HashChain<const E>;
|
||||||
return ConstRange((Chain **)p_data.first(),
|
if (!p_len) return ConstRange();
|
||||||
(Chain **)(p_data.first() + bucket_count()));
|
return ConstRange((Chain *)*p_data.first());
|
||||||
}
|
}
|
||||||
ConstRange citer() const {
|
ConstRange citer() const {
|
||||||
using Chain = detail::HashChain<const E>;
|
using Chain = detail::HashChain<const E>;
|
||||||
return ConstRange((Chain **)p_data.first(),
|
if (!p_len) return ConstRange();
|
||||||
(Chain **)(p_data.first() + bucket_count()));
|
return ConstRange((Chain *)*p_data.first());
|
||||||
}
|
}
|
||||||
|
|
||||||
LocalRange iter(Size n) {
|
LocalRange iter(Size n) {
|
||||||
if (n >= p_size) return LocalRange();
|
if (n >= p_size) return LocalRange();
|
||||||
return LocalRange(p_data.first()[n]);
|
return LocalRange(p_data.first()[n], p_data.first()[n + 1]);
|
||||||
}
|
}
|
||||||
ConstLocalRange iter(Size n) const {
|
ConstLocalRange iter(Size n) const {
|
||||||
using Chain = detail::HashChain<const E>;
|
using Chain = detail::HashChain<const E>;
|
||||||
if (n >= p_size) return ConstLocalRange();
|
if (n >= p_size) return ConstLocalRange();
|
||||||
return ConstLocalRange((Chain *)p_data.first()[n]);
|
return ConstLocalRange((Chain *)p_data.first()[n],
|
||||||
|
(Chain *)p_data.first()[n + 1]);
|
||||||
}
|
}
|
||||||
ConstLocalRange citer(Size n) const {
|
ConstLocalRange citer(Size n) const {
|
||||||
using Chain = detail::HashChain<const E>;
|
using Chain = detail::HashChain<const E>;
|
||||||
if (n >= p_size) return ConstLocalRange();
|
if (n >= p_size) return ConstLocalRange();
|
||||||
return ConstLocalRange((Chain *)p_data.first()[n]);
|
return ConstLocalRange((Chain *)p_data.first()[n],
|
||||||
|
(Chain *)p_data.first()[n + 1]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} /* namespace detail */
|
} /* namespace detail */
|
||||||
|
|
Loading…
Reference in New Issue