optimize hashtable to use small ranges (but larger nodes)

master
Daniel Kolesa 2015-07-20 02:08:30 +01:00
parent 3613af3daa
commit 453b463ce6
1 changed files with 99 additions and 119 deletions

View File

@ -19,6 +19,7 @@ namespace ostd {
namespace detail { namespace detail {
template<typename T> template<typename T>
struct HashChain { struct HashChain {
HashChain<T> *prev;
HashChain<T> *next; HashChain<T> *next;
T value; T value;
}; };
@ -46,36 +47,19 @@ private:
friend struct HashRange; friend struct HashRange;
using Chain = detail::HashChain<T>; using Chain = detail::HashChain<T>;
Chain **p_beg;
Chain **p_end;
Chain *p_node; Chain *p_node;
void advance() {
while ((p_beg != p_end) && !p_beg[0])
++p_beg;
if (p_beg != p_end) p_node = p_beg[0];
}
public: public:
HashRange(): p_beg(nullptr), p_end(nullptr), p_node(nullptr) {} HashRange(): p_node(nullptr) {}
HashRange(const HashRange &v): p_beg(v.p_beg), p_end(v.p_end), HashRange(const HashRange &v): p_node(v.p_node) {}
p_node(v.p_node) {} HashRange(Chain *node): p_node(node) {}
HashRange(Chain **beg, Chain **end): p_beg(beg), p_end(end), p_node() {
advance();
}
HashRange(Chain **beg, Chain **end, Chain *node): p_beg(beg), p_end(end),
p_node(node) {}
template<typename U> template<typename U>
HashRange(const HashRange<U> &v, EnableIf< HashRange(const HashRange<U> &v, EnableIf<
IsSame<RemoveCv<T>, RemoveCv<U>>::value && IsSame<RemoveCv<T>, RemoveCv<U>>::value &&
IsConvertible<U *, T *>::value, bool IsConvertible<U *, T *>::value, bool
> = true): p_beg((Chain **)v.p_beg), p_end((Chain **)v.p_end), > = true): p_node((Chain *)v.p_node) {}
p_node((Chain *)v.p_node) {}
HashRange &operator=(const HashRange &v) { HashRange &operator=(const HashRange &v) {
p_beg = v.p_beg;
p_end = v.p_end;
p_node = v.p_node; p_node = v.p_node;
return *this; return *this;
} }
@ -85,9 +69,6 @@ public:
bool pop_front() { bool pop_front() {
if (!p_node) return false; if (!p_node) return false;
p_node = p_node->next; p_node = p_node->next;
if (p_node) return true;
++p_beg;
advance();
return true; return true;
} }
@ -105,27 +86,28 @@ private:
friend struct BucketRange; friend struct BucketRange;
using Chain = detail::HashChain<T>; using Chain = detail::HashChain<T>;
Chain *p_node; Chain *p_node, *p_end;
public: public:
BucketRange(): p_node(nullptr) {} BucketRange(): p_node(nullptr) {}
BucketRange(Chain *node): p_node(node) {} BucketRange(Chain *node, Chain *end): p_node(node), p_end(end) {}
BucketRange(const BucketRange &v): p_node(v.p_node) {} BucketRange(const BucketRange &v): p_node(v.p_node), p_end(v.p_end) {}
template<typename U> template<typename U>
BucketRange(const BucketRange<U> &v, EnableIf< BucketRange(const BucketRange<U> &v, EnableIf<
IsSame<RemoveCv<T>, RemoveCv<U>>::value && IsSame<RemoveCv<T>, RemoveCv<U>>::value &&
IsConvertible<U *, T *>::value, bool IsConvertible<U *, T *>::value, bool
> = true): p_node((Chain *)v.p_node) {} > = true): p_node((Chain *)v.p_node), p_end((Chain *)v.p_end) {}
BucketRange &operator=(const BucketRange &v) { BucketRange &operator=(const BucketRange &v) {
p_node = v.p_node; p_node = v.p_node;
p_end = v.p_end;
return *this; return *this;
} }
bool empty() const { return !p_node; } bool empty() const { return p_node == p_end; }
bool pop_front() { bool pop_front() {
if (!p_node) return false; if (p_node == p_end) return false;
p_node = p_node->next; p_node = p_node->next;
return true; return true;
} }
@ -182,27 +164,37 @@ private:
float p_maxlf; float p_maxlf;
Range iter_from(Chain *c, Size h) { Chain *find(const K &key, Size &h) const {
return Range(p_data.first() + h + 1, if (!p_size) return nullptr;
p_data.first() + bucket_count(), c); h = get_hash()(key) & (p_size - 1);
} Chain **cp = p_data.first();
ConstRange iter_from(Chain *c, Size h) const { for (Chain *c = cp[h], *e = cp[h + 1]; c != e; c = c->next)
using RChain = detail::HashChain<const E>; if (get_eq()(key, B::get_key(c->value)))
return ConstRange((RChain **)(p_data.first() + h + 1), return c;
(RChain **)(p_data.first() + bucket_count()), return nullptr;
(RChain *)c);
} }
bool find(const K &key, Size &h, Chain *&oc) const { Chain *insert_node(Size h, Chain *c) {
if (!p_size) return false; Chain **cp = p_data.first();
h = get_hash()(key) & (p_size - 1); Chain *it = cp[h + 1];
for (Chain *c = p_data.first()[h]; c; c = c->next) { c->next = it;
if (get_eq()(key, B::get_key(c->value))) { if (it) {
oc = c; c->prev = it->prev;
return true; it->prev = c;
} if (c->prev) c->prev->next = c;
} else {
size_t nb = h;
while (nb && !cp[nb]) --nb;
Chain *prev = cp[nb];
while (prev && prev->next) prev = prev->next;
c->prev = prev;
if (prev) prev->next = c;
} }
return false; for (; it == cp[h]; --h) {
cp[h] = c;
if (!h) break;
}
return c;
} }
Chain *insert(Size h) { Chain *insert(Size h) {
@ -216,12 +208,10 @@ private:
chunk->chains[CHUNKSIZE - 1].next = p_unused; chunk->chains[CHUNKSIZE - 1].next = p_unused;
p_unused = chunk->chains; p_unused = chunk->chains;
} }
++p_len;
Chain *c = p_unused; Chain *c = p_unused;
p_unused = p_unused->next; p_unused = p_unused->next;
c->next = p_data.first()[h]; return insert_node(h, c);
p_data.first()[h] = c;
++p_len;
return c;
} }
void delete_chunks(Chunk *chunks) { void delete_chunks(Chunk *chunks) {
@ -234,12 +224,9 @@ private:
T *access_base(const K &key, Size &h) const { T *access_base(const K &key, Size &h) const {
if (!p_size) return NULL; if (!p_size) return NULL;
h = get_hash()(key) & (p_size - 1); Chain *c = find(key, h);
for (Chain *c = p_data.first()[h]; c; c = c->next) { if (c) return &B::get_data(c->value);
if (get_eq()(key, B::get_key(c->value))) return nullptr;
return &B::get_data(c->value);
}
return NULL;
} }
void rehash_ahead(Size n) { void rehash_ahead(Size n) {
@ -330,13 +317,11 @@ protected:
p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1); p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1);
memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *)); memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *));
Chain **och = ht.p_data.first(); Chain **och = ht.p_data.first();
for (Size h = 0; h < p_size; ++h) { for (Chain *oc = *och; oc; oc = oc->next) {
Chain *oc = och[h]; Size h = get_hash()(B::get_key(oc->value)) & (p_size - 1);
for (; oc; oc = oc->next) { Chain *nc = insert(h);
Chain *nc = insert(h); allocator_destroy(get_alloc(), &nc->value);
allocator_destroy(get_alloc(), &nc->value); allocator_construct(get_alloc(), &nc->value, oc->value);
allocator_construct(get_alloc(), &nc->value, oc->value);
}
} }
} }
@ -371,12 +356,10 @@ protected:
p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1); p_data.first() = allocator_allocate(get_cpalloc(), p_size + 1);
memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *)); memset(p_data.first(), 0, (p_size + 1) * sizeof(Chain *));
Chain **och = ht.p_data.first(); Chain **och = ht.p_data.first();
for (Size h = 0; h < p_size; ++h) { for (Chain *oc = *och; oc; oc = oc->next) {
Chain *oc = och[h]; Size h = get_hash()(B::get_key(oc->value)) & (p_size - 1);
for (; oc; oc = oc->next) { Chain *nc = insert(h);
Chain *nc = insert(h); B::swap_elem(oc->value, nc->value);
B::swap_elem(oc->value, nc->value);
}
} }
} }
@ -467,9 +450,8 @@ public:
Size bucket_size(Size n) const { Size bucket_size(Size n) const {
Size ret = 0; Size ret = 0;
if (ret >= p_size) return ret; if (ret >= p_size) return ret;
Chain *c = p_data.first()[n]; Chain **cp = p_data.first();
if (!c) return ret; for (Chain *c = cp[n], *e = cp[n + 1]; c != e; c = c->next)
for (; c; c = c->next)
++ret; ++ret;
return ret; return ret;
} }
@ -483,13 +465,12 @@ public:
/* multihash: always insert */ /* multihash: always insert */
Chain *ch = insert(h); Chain *ch = insert(h);
B::swap_elem(ch->value, elem); B::swap_elem(ch->value, elem);
Chain **hch = p_data.first(); return make_pair(Range(ch), true);
return make_pair(Range(hch + h + 1, hch + bucket_count(),
ch), true);
} }
Chain *found = nullptr; Chain *found = nullptr;
bool ins = true; bool ins = true;
for (Chain *c = p_data.first()[h]; c; c = c->next) { Chain **cp = p_data.first();
for (Chain *c = cp[h], *e = cp[h + 1]; c != e; c = c->next) {
if (get_eq()(B::get_key(elem), B::get_key(c->value))) { if (get_eq()(B::get_key(elem), B::get_key(c->value))) {
found = c; found = c;
ins = false; ins = false;
@ -500,57 +481,54 @@ public:
found = insert(h); found = insert(h);
B::swap_elem(found->value, elem); B::swap_elem(found->value, elem);
} }
Chain **hch = p_data.first(); return make_pair(Range(found), ins);
return make_pair(Range(hch + h + 1, hch + bucket_count(),
found), ins);
} }
Size erase(const K &key) { Size erase(const K &key) {
if (!p_len) return 0; if (!p_len) return 0;
Size olen = p_len; Size olen = p_len;
Size h = get_hash()(key) & (p_size - 1); Size h = get_hash()(key) & (p_size - 1);
Chain **p = &p_data.first()[h], *c = *p; Chain **cp = p_data.first();
while (c) { for (Chain *c = cp[h], *e = cp[h + 1]; c != e; c = c->next)
if (get_eq()(key, B::get_key(c->value))) { if (get_eq()(key, B::get_key(c->value))) {
--p_len; --p_len;
*p = c->next; Size hh = h;
Chain *next = c->next;
for (; cp[hh] == c; --hh) {
cp[hh] = next;
if (!hh) break;
}
if (c->prev) c->prev->next = next;
if (next) next->prev = c->prev;
c->next = p_unused; c->next = p_unused;
c->prev = nullptr;
p_unused = c; p_unused = c;
allocator_destroy(get_alloc(), &c->value); allocator_destroy(get_alloc(), &c->value);
allocator_construct(get_alloc(), &c->value); allocator_construct(get_alloc(), &c->value);
if (!Multihash) return 1; if (!Multihash) return 1;
} else {
p = &c->next;
} }
c = *p;
}
return olen - p_len; return olen - p_len;
} }
Size count(const K &key) { Size count(const K &key) {
if (!p_len) return 0; Size h = 0;
Size h = get_hash()(key) & (p_size - 1); Chain *c = find(key, h);
Size ret = 0; if (!c) return 0;
for (Chain *c = p_data.first()[h]; c; c = c->next) Size ret = 1;
if (get_eq()(key, B::get_key(c->value))) { if (!Multihash) return ret;
++ret; for (c = c->next; c; c = c->next)
if (!Multihash) break; if (get_eq()(key, B::get_key(c->value))) ++ret;
}
return ret; return ret;
} }
Range find(const K &key) { Range find(const K &key) {
Size h = 0; Size h = 0;
Chain *c; return Range(find(key, h));
if (find(key, h, c)) return iter_from(c, h);
return Range();
} }
ConstRange find(const K &key) const { ConstRange find(const K &key) const {
Size h = 0; Size h = 0;
Chain *c; return ConstRange((detail::HashChain<const E> *)find(key, h));
if (find(key, h, c)) return iter_from(c, h);
return ConstRange();
} }
float load_factor() const { return float(p_len) / p_size; } float load_factor() const { return float(p_len) / p_size; }
@ -569,18 +547,17 @@ public:
Size osize = p_size; Size osize = p_size;
p_size = count; p_size = count;
for (Size i = 0; i < osize; ++i) { Chain *p = och ? *och : nullptr;
for (Chain *oc = och[i]; oc;) { while (p) {
Size h = get_hash()(B::get_key(oc->value)) & (p_size - 1); Chain *pp = p->next;
Chain *nxc = oc->next; Size h = get_hash()(B::get_key(p->value)) & (p_size - 1);
oc->next = nch[h]; p->prev = p->next = nullptr;
nch[h] = oc; insert_node(h, p);
oc = nxc; p = pp;
}
} }
if (och && osize) allocator_deallocate(get_cpalloc(), if (och && osize) allocator_deallocate(get_cpalloc(),
och, osize); och, osize + 1);
} }
void reserve(Size count) { void reserve(Size count) {
@ -588,32 +565,35 @@ public:
} }
Range iter() { Range iter() {
return Range(p_data.first(), p_data.first() + bucket_count()); if (!p_len) return Range();
return Range(*p_data.first());
} }
ConstRange iter() const { ConstRange iter() const {
using Chain = detail::HashChain<const E>; using Chain = detail::HashChain<const E>;
return ConstRange((Chain **)p_data.first(), if (!p_len) return ConstRange();
(Chain **)(p_data.first() + bucket_count())); return ConstRange((Chain *)*p_data.first());
} }
ConstRange citer() const { ConstRange citer() const {
using Chain = detail::HashChain<const E>; using Chain = detail::HashChain<const E>;
return ConstRange((Chain **)p_data.first(), if (!p_len) return ConstRange();
(Chain **)(p_data.first() + bucket_count())); return ConstRange((Chain *)*p_data.first());
} }
LocalRange iter(Size n) { LocalRange iter(Size n) {
if (n >= p_size) return LocalRange(); if (n >= p_size) return LocalRange();
return LocalRange(p_data.first()[n]); return LocalRange(p_data.first()[n], p_data.first()[n + 1]);
} }
ConstLocalRange iter(Size n) const { ConstLocalRange iter(Size n) const {
using Chain = detail::HashChain<const E>; using Chain = detail::HashChain<const E>;
if (n >= p_size) return ConstLocalRange(); if (n >= p_size) return ConstLocalRange();
return ConstLocalRange((Chain *)p_data.first()[n]); return ConstLocalRange((Chain *)p_data.first()[n],
(Chain *)p_data.first()[n + 1]);
} }
ConstLocalRange citer(Size n) const { ConstLocalRange citer(Size n) const {
using Chain = detail::HashChain<const E>; using Chain = detail::HashChain<const E>;
if (n >= p_size) return ConstLocalRange(); if (n >= p_size) return ConstLocalRange();
return ConstLocalRange((Chain *)p_data.first()[n]); return ConstLocalRange((Chain *)p_data.first()[n],
(Chain *)p_data.first()[n + 1]);
} }
}; };
} /* namespace detail */ } /* namespace detail */