summaryrefslogtreecommitdiffstats
path: root/deps/emhash/include
diff options
context:
space:
mode:
Diffstat (limited to 'deps/emhash/include')
-rw-r--r--deps/emhash/include/emhash/hash_set8.hpp1524
-rw-r--r--deps/emhash/include/emhash/hash_table8.hpp1805
2 files changed, 0 insertions, 3329 deletions
diff --git a/deps/emhash/include/emhash/hash_set8.hpp b/deps/emhash/include/emhash/hash_set8.hpp
deleted file mode 100644
index 3b6454e..0000000
--- a/deps/emhash/include/emhash/hash_set8.hpp
+++ /dev/null
@@ -1,1524 +0,0 @@
-// emhash8::HashSet for C++11/14/17
-// version 1.6.3
-//
-// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2019-2022 Huang Yuanbing & bailuzhou AT 163.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE
-
-#pragma once
-
-#include <cstring>
-#include <string>
-#include <cstdlib>
-#include <type_traits>
-#include <cassert>
-#include <utility>
-#include <cstdint>
-#include <functional>
-#include <iterator>
-#include <algorithm>
-
-#ifdef EMH_KEY
- #undef EMH_KEY
- #undef EMH_BUCKET
- #undef EMH_NEW
- #undef EMH_EMPTY
- #undef EMH_PREVET
-#endif
-
-// likely/unlikely
-#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
-# define EMH_LIKELY(condition) __builtin_expect(condition, 1)
-# define EMH_UNLIKELY(condition) __builtin_expect(condition, 0)
-#else
-# define EMH_LIKELY(condition) (condition)
-# define EMH_UNLIKELY(condition) (condition)
-#endif
-
-#define EMH_KEY(p,n) p[n]
-
-#define EMH_INDEX(i,n) i[n]
-#define EMH_BUCKET(i,n) i[n].bucket
-#define EMH_HSLOT(i,n) i[n].slot
-#define EMH_SLOT(i,n) (i[n].slot & _mask)
-#define EMH_PREVET(i,n) i[n].slot
-
-#define EMH_KEYMASK(key, mask) ((size_type)(key >> 0) & ~mask)
-#define EMH_EQHASH(n, key_hash) (EMH_KEYMASK(key_hash, _mask) == (_index[n].slot & ~_mask))
-#define EMH_NEW(key, bucket, key_hash) new(_pairs + _num_filled) value_type(key); _index[bucket] = {bucket, _num_filled++ | EMH_KEYMASK(key_hash, _mask)}
-
-#define EMH_EMPTY(i, n) (0 > (int)i[n].bucket)
-
-namespace emhash8 {
-
-constexpr uint32_t INACTIVE = 0xAAAAAAAA;
-constexpr uint32_t END = 0-0x1u;
-constexpr uint32_t EAD = 2;
-
-#ifndef EMH_DEFAULT_LOAD_FACTOR
- constexpr static float EMH_DEFAULT_LOAD_FACTOR = 0.80f;
-#endif
-#if EMH_CACHE_LINE_SIZE < 32
- constexpr static uint32_t EMH_CACHE_LINE_SIZE = 64;
-#endif
-
-/// A cache-friendly hash table with open addressing, linear/quadratic probing and power-of-two capacity
-template <typename KeyT, typename HashT = std::hash<KeyT>, typename EqT = std::equal_to<KeyT>>
-class HashSet
-{
-public:
- using htype = HashSet<KeyT, HashT, EqT>;
- using value_type = KeyT;
- using key_type = KeyT;
-
-#ifdef EMH_SMALL_TYPE
- using size_type = uint16_t;
-#elif EMH_SIZE_TYPE == 0
- using size_type = uint32_t;
-#else
- using size_type = size_t;
-#endif
-
- using hasher = HashT;
- using key_equal = EqT;
-
- struct Index
- {
- size_type bucket;
- size_type slot;
- };
-
- class const_iterator;
- class iterator
- {
- public:
- using iterator_category = std::bidirectional_iterator_tag;
- using difference_type = std::ptrdiff_t;
- using value_type = typename htype::value_type;
- using pointer = value_type*;
- using const_pointer = const value_type* ;
- using reference = value_type&;
- using const_reference = const value_type&;
-
- iterator() : kv_(nullptr) {}
- iterator(const_iterator& cit) {
- kv_ = cit.kv_;
- }
-
- iterator(const htype* hash_map, size_type bucket) {
- kv_ = hash_map->_pairs + (int)bucket;
- }
-
- iterator& operator++()
- {
- kv_ ++;
- return *this;
- }
-
- iterator operator++(int)
- {
- auto cur = *this; kv_ ++;
- return cur;
- }
-
- iterator& operator--()
- {
- kv_ --;
- return *this;
- }
-
- iterator operator--(int)
- {
- auto cur = *this; kv_ --;
- return cur;
- }
-
- reference operator*() const { return *kv_; }
- pointer operator->() const { return kv_; }
-
- bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; }
- bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; }
-
- public:
- value_type* kv_;
- };
-
- class const_iterator
- {
- public:
- using iterator_category = std::bidirectional_iterator_tag;
- using value_type = typename htype::value_type;
- using difference_type = std::ptrdiff_t;
- using pointer = value_type*;
- using const_pointer = const value_type*;
- using reference = value_type&;
- using const_reference = const value_type&;
-
- const_iterator(const iterator& it) {
- kv_ = it.kv_;
- }
-
- const_iterator (const htype* hash_map, size_type bucket) {
- kv_ = hash_map->_pairs + (int)bucket;
- }
-
- const_iterator& operator++()
- {
- kv_ ++;
- return *this;
- }
-
- const_iterator operator++(int)
- {
- auto cur = *this; kv_ ++;
- return cur;
- }
-
- const_iterator& operator--()
- {
- kv_ --;
- return *this;
- }
-
- const_iterator operator--(int)
- {
- auto cur = *this; kv_ --;
- return cur;
- }
-
- const_reference operator*() const { return *kv_; }
- const_pointer operator->() const { return kv_; }
-
- bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; }
- bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; }
- public:
- const value_type* kv_;
- };
-
- void init(size_type bucket, float mlf = EMH_DEFAULT_LOAD_FACTOR)
- {
- _pairs = nullptr;
- _index = nullptr;
- _mask = _num_buckets = 0;
- _num_filled = 0;
- max_load_factor(mlf);
- rehash(bucket);
- }
-
- HashSet(size_type bucket = 2, float mlf = EMH_DEFAULT_LOAD_FACTOR)
- {
- init(bucket, mlf);
- }
-
- HashSet(const HashSet& rhs)
- {
- _pairs = alloc_bucket(rhs._num_buckets * rhs.max_load_factor() + 4);
- _index = alloc_index(rhs._num_buckets);
- clone(rhs);
- }
-
- HashSet(HashSet&& rhs)
- {
- init(0);
- *this = std::move(rhs);
- }
-
- HashSet(std::initializer_list<value_type> ilist)
- {
- init((size_type)ilist.size());
- for (auto it = ilist.begin(); it != ilist.end(); ++it)
- do_insert(*it);
- }
-
- template<class InputIt>
- HashSet(InputIt first, InputIt last, size_type bucket_count=4)
- {
- init(std::distance(first, last) + bucket_count);
- for (; first != last; ++first)
- emplace(*first);
- }
-
- HashSet& operator=(const HashSet& rhs)
- {
- if (this == &rhs)
- return *this;
-
- clearkv();
-
- if (_num_buckets < rhs._num_buckets || _num_buckets > 2 * rhs._num_buckets) {
- free(_pairs); _pairs = alloc_bucket(rhs._num_buckets * rhs.max_load_factor() + 4);
- free(_index); _index = alloc_index(rhs._num_buckets);
- }
-
- clone(rhs);
- return *this;
- }
-
- HashSet& operator=(HashSet&& rhs)
- {
- if (this != &rhs) {
- swap(rhs);
- rhs.clear();
- }
- return *this;
- }
-
- template<typename Con>
- bool operator == (const Con& rhs) const
- {
- if (size() != rhs.size())
- return false;
-
- for (auto it = begin(), last = end(); it != last; ++it) {
- auto oi = rhs.find(*it);
- if (oi == rhs.end())
- return false;
- }
- return true;
- }
-
- template<typename Con>
- bool operator != (const Con& rhs) const { return !(*this == rhs); }
-
- ~HashSet()
- {
- clearkv();
- free(_pairs);
- free(_index);
- }
-
- void clone(const HashSet& rhs)
- {
- _hasher = rhs._hasher;
-// _eq = rhs._eq;
- _num_buckets = rhs._num_buckets;
- _num_filled = rhs._num_filled;
- _mlf = rhs._mlf;
- _last = rhs._last;
- _mask = rhs._mask;
-
- auto opairs = rhs._pairs;
- memcpy((char*)_index, (char*)rhs._index, (_num_buckets + EAD) * sizeof(Index));
-
- if (is_copy_trivially()) {
- if (opairs)
- memcpy((char*)_pairs, (char*)opairs, _num_filled * sizeof(value_type));
- } else {
- for (size_type slot = 0; slot < _num_filled; slot++)
- new(_pairs + slot) value_type(opairs[slot]);
- }
- }
-
- void swap(HashSet& rhs)
- {
- // std::swap(_eq, rhs._eq);
- std::swap(_hasher, rhs._hasher);
- std::swap(_pairs, rhs._pairs);
- std::swap(_index, rhs._index);
- std::swap(_num_buckets, rhs._num_buckets);
- std::swap(_num_filled, rhs._num_filled);
- std::swap(_mask, rhs._mask);
- std::swap(_mlf, rhs._mlf);
- std::swap(_last, rhs._last);
- }
-
- // -------------------------------------------------------------
- inline iterator first() const { return {this, 0}; }
- inline iterator last() const { return {this, _num_filled - 1}; }
-
- iterator begin() { return first(); }
- const_iterator cbegin() const { return first(); }
- const_iterator begin() const { return first(); }
-
- inline iterator end() { return {this, _num_filled}; }
- inline const_iterator cend() const { return {this, _num_filled}; }
- const_iterator end() const { return cend(); }
-
- size_type size() const { return _num_filled; }
- bool empty() const { return _num_filled == 0; }
- size_type bucket_count() const { return _num_buckets; }
-
- /// Returns average number of elements per bucket.
- float load_factor() const { return static_cast<float>(_num_filled) / (_mask + 1); }
-
- HashT& hash_function() const { return _hasher; }
- EqT& key_eq() const { return _eq; }
-
- void max_load_factor(float mlf)
- {
- if (mlf < 1.0-1e-4 && mlf > 0.2f)
- _mlf = (uint32_t)((1 << 27) / mlf);
- }
-
- constexpr float max_load_factor() const { return (1 << 27) / (float)_mlf; }
- constexpr size_type max_size() const { return (1ull << (sizeof(size_type) * 8 - 2)); }
- constexpr size_type max_bucket_count() const { return max_size(); }
-
-#ifdef EMH_STATIS
- //Returns the bucket number where the element with key k is located.
- size_type bucket(const KeyT& key) const
- {
- const auto bucket = hash_bucket(key);
- const auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- return 0;
- else if (bucket == next_bucket)
- return bucket + 1;
-
- return hash_main(bucket) + 1;
- }
-
- //Returns the number of elements in bucket n.
- size_type bucket_size(const size_type bucket) const
- {
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- return 0;
-
- next_bucket = hash_main(bucket);
- size_type ibucket_size = 1;
-
- //iterator each item in current main bucket
- while (true) {
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (nbucket == next_bucket) {
- break;
- }
- ibucket_size ++;
- next_bucket = nbucket;
- }
- return ibucket_size;
- }
-
- size_type get_main_bucket(const size_type bucket) const
- {
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- return INACTIVE;
-
- return hash_main(bucket);
- }
-
- size_type get_diss(size_type bucket, size_type next_bucket, const size_type slots) const
- {
- auto pbucket = reinterpret_cast<uint64_t>(&_pairs[bucket]);
- auto pnext = reinterpret_cast<uint64_t>(&_pairs[next_bucket]);
- if (pbucket / EMH_CACHE_LINE_SIZE == pnext / EMH_CACHE_LINE_SIZE)
- return 0;
- size_type diff = pbucket > pnext ? (pbucket - pnext) : (pnext - pbucket);
- if (diff / EMH_CACHE_LINE_SIZE < slots - 1)
- return diff / EMH_CACHE_LINE_SIZE + 1;
- return slots - 1;
- }
-
- int get_bucket_info(const size_type bucket, size_type steps[], const size_type slots) const
- {
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- return -1;
-
- const auto main_bucket = hash_main(bucket);
- if (next_bucket == main_bucket)
- return 1;
- else if (main_bucket != bucket)
- return 0;
-
- steps[get_diss(bucket, next_bucket, slots)] ++;
- size_type ibucket_size = 2;
- //find a new empty and linked it to tail
- while (true) {
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (nbucket == next_bucket)
- break;
-
- steps[get_diss(nbucket, next_bucket, slots)] ++;
- ibucket_size ++;
- next_bucket = nbucket;
- }
- return (int)ibucket_size;
- }
-
- void dump_statics() const
- {
- const uint32_t slots = 128;
- size_type buckets[slots + 1] = {0};
- size_type steps[slots + 1] = {0};
- for (size_type bucket = 0; bucket < _num_buckets; ++bucket) {
- auto bsize = get_bucket_info(bucket, steps, slots);
- if (bsize > 0)
- buckets[bsize] ++;
- }
-
- size_type sumb = 0, collision = 0, sumc = 0, finds = 0, sumn = 0;
- puts("============== buckets size ration =========");
- for (size_type i = 0; i < sizeof(buckets) / sizeof(buckets[0]); i++) {
- const auto bucketsi = buckets[i];
- if (bucketsi == 0)
- continue;
- sumb += bucketsi;
- sumn += bucketsi * i;
- collision += bucketsi * (i - 1);
- finds += bucketsi * i * (i + 1) / 2;
- printf(" %2u %8u %2.2lf| %.2lf\n", i, bucketsi, bucketsi * 100.0 * i / _num_filled, sumn * 100.0 / _num_filled);
- }
-
- puts("========== collision miss ration ===========");
- for (size_type i = 0; i < sizeof(steps) / sizeof(steps[0]); i++) {
- sumc += steps[i];
- if (steps[i] <= 2)
- continue;
- printf(" %2u %8u %.2lf %.2lf\n", i, steps[i], steps[i] * 100.0 / collision, sumc * 100.0 / collision);
- }
-
- if (sumb == 0) return;
- printf(" _num_filled/bucket_size/packed collision/cache_miss/hit_find = %u/%.2lf/%zd/ %.2lf%%/%.2lf%%/%.2lf\n",
- _num_filled, _num_filled * 1.0 / sumb, sizeof(value_type), (collision * 100.0 / _num_filled), (collision - steps[0]) * 100.0 / _num_filled, finds * 1.0 / _num_filled);
- assert(sumn == _num_filled);
- assert(sumc == collision);
- puts("============== buckets size end =============");
- }
-#endif
-
- // ------------------------------------------------------------
- template<typename K=KeyT>
- iterator find(const KeyT& key) noexcept
- {
- return {this, find_filled_slot(key)};
- }
-
- template<typename K=KeyT>
- const_iterator find(const K& key) const noexcept
- {
- return {this, find_filled_slot(key)};
- }
-
- template<typename K=KeyT>
- bool contains(const K& key) const noexcept
- {
- return find_filled_slot(key) != _num_filled;
- }
-
- template<typename K=KeyT>
- size_type count(const K& key) const noexcept
- {
- return find_filled_slot(key) == _num_filled ? 0 : 1;
- //return find_sorted_bucket(key) == END ? 0 : 1;
- //return find_hash_bucket(key) == END ? 0 : 1;
- }
-
- template<typename K=KeyT>
- std::pair<iterator, iterator> equal_range(const K& key)
- {
- const auto found = find(key);
- if (found.second == _num_filled)
- return { found, found };
- else
- return { found, std::next(found) };
- }
-
- void merge(HashSet& rhs)
- {
- if (empty()) {
- *this = std::move(rhs);
- return;
- }
-
- for (auto rit = rhs.begin(); rit != rhs.end(); ) {
- auto fit = find(*rit);
- if (fit == end()) {
- insert_unique(*rit);
- rit = rhs.erase(rit);
- } else {
- ++rit;
- }
- }
- }
-
- // -----------------------------------------------------
- std::pair<iterator, bool> do_insert(const value_type& value)
- {
- const auto key_hash = hash_key(value);
- const auto bucket = find_or_allocate(value, key_hash);
- const auto empty = EMH_EMPTY(_index, bucket);
- if (empty) {
- EMH_NEW(value, bucket, key_hash);
- }
-
- const auto slot = EMH_SLOT(_index, bucket);
- return { {this, slot}, empty };
- }
-
- std::pair<iterator, bool> do_insert(value_type&& value)
- {
- const auto key_hash = hash_key(value);
- const auto bucket = find_or_allocate(value, key_hash);
- const auto empty = EMH_EMPTY(_index, bucket);
- if (empty) {
- EMH_NEW(std::forward<KeyT>(value), bucket, key_hash);
- }
-
- const auto slot = EMH_SLOT(_index, bucket);
- return { {this, slot}, empty };
- }
-
- template<typename K>
- std::pair<iterator, bool> do_insert(K&& key)
- {
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- const auto empty = EMH_EMPTY(_index, bucket);
- if (empty) {
- EMH_NEW(std::forward<K>(key), bucket, key_hash);
- }
-
- const auto slot = EMH_SLOT(_index, bucket);
- return { {this, slot}, empty };
- }
-
- template<typename K>
- std::pair<iterator, bool> do_assign(K&& key)
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- const auto empty = EMH_EMPTY(_index, bucket);
- if (empty) {
- EMH_NEW(std::forward<K>(key), bucket, key_hash);
- }
-
- const auto slot = EMH_SLOT(_index, bucket);
- return { {this, slot}, empty };
- }
-
- std::pair<iterator, bool> insert(const value_type& p)
- {
- check_expand_need();
- return do_insert(p);
- }
-
- std::pair<iterator, bool> insert(value_type && p)
- {
- check_expand_need();
- return do_insert(std::move(p));
- }
-
- void insert(std::initializer_list<value_type> ilist)
- {
- reserve(ilist.size() + _num_filled, false);
- for (auto it = ilist.begin(); it != ilist.end(); ++it)
- do_insert(*it);
- }
-
- template <typename Iter>
- void insert(Iter first, Iter last)
- {
- reserve(std::distance(first, last) + _num_filled, false);
- for (; first != last; ++first)
- do_insert(*first);
- }
-
- template <typename Iter>
- void insert_unique(Iter begin, Iter end)
- {
- reserve(std::distance(begin, end) + _num_filled, false);
- for (; begin != end; ++begin) {
- insert_unique(*begin);
- }
- }
-
- template<typename K>
- size_type insert_unique(K&& key)
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- auto bucket = find_unique_bucket(key_hash);
- EMH_NEW(std::forward<K>(key), bucket, key_hash);
- return bucket;
- }
-
- size_type insert_unique(value_type&& value)
- {
- return insert_unique(std::move(value));
- }
-
- inline size_type insert_unique(const value_type& value)
- {
- return insert_unique(value);
- }
-
- template <class... Args>
- inline std::pair<iterator, bool> emplace(Args&&... args)
- {
- check_expand_need();
- return do_insert(std::forward<Args>(args)...);
- }
-
- //no any optimize for position
- template <class... Args>
- iterator emplace_hint(const_iterator hint, Args&&... args)
- {
- (void)hint;
- check_expand_need();
- return do_insert(std::forward<Args>(args)...).first;
- }
-
- template<class... Args>
- std::pair<iterator, bool> try_emplace(const KeyT& k, Args&&... args)
- {
- check_expand_need();
- return do_insert(k, std::forward<Args>(args)...);
- }
-
- template<class... Args>
- std::pair<iterator, bool> try_emplace(KeyT&& k, Args&&... args)
- {
- check_expand_need();
- return do_insert(std::move(k), std::forward<Args>(args)...);
- }
-
- template <class... Args>
- inline size_type emplace_unique(Args&&... args)
- {
- return insert_unique(std::forward<Args>(args)...);
- }
-
- std::pair<iterator, bool> insert_or_assign(const KeyT& key) { return do_assign(key); }
- std::pair<iterator, bool> insert_or_assign(KeyT&& key) { return do_assign(std::move(key)); }
-
- /// Erase an element from the hash table.
- /// return 0 if element was not found
- size_type erase(const KeyT& key)
- {
- const auto key_hash = hash_key(key);
- const auto sbucket = find_filled_bucket(key, key_hash);
- if (sbucket == END)
- return 0;
-
- const auto main_bucket = key_hash & _mask;
- erase_slot(sbucket, main_bucket);
- return 1;
- }
-
- //iterator erase(const_iterator begin_it, const_iterator end_it)
- iterator erase(const const_iterator& cit)
- {
- const auto slot = (size_type)(cit.kv_ - _pairs);
- size_type main_bucket;
- const auto sbucket = find_slot_bucket(slot, main_bucket); //TODO
- erase_slot(sbucket, main_bucket);
- return {this, slot};
- }
-
- //only last >= first
- iterator erase(const_iterator first, const_iterator last)
- {
- auto esize = long(last.kv_ - first.kv_);
- auto tsize = long((_pairs + _num_filled) - last.kv_); //last to tail size
- auto next = first;
- while (tsize -- > 0) {
- if (esize-- <= 0)
- break;
- next = ++erase(next);
- }
-
- //fast erase from last
- next = this->last();
- while (esize -- > 0)
- next = --erase(next);
-
- return {this, size_type(next.kv_ - _pairs)};
- }
-
- template<typename Pred>
- size_type erase_if(Pred pred)
- {
- auto old_size = size();
- for (auto it = begin(); it != end();) {
- if (pred(*it))
- it = erase(it);
- else
- ++it;
- }
- return old_size - size();
- }
-
- static constexpr bool is_triviall_destructable()
- {
-#if __cplusplus >= 201402L || _MSC_VER > 1600
- return !(std::is_trivially_destructible<KeyT>::value);
-#else
- return !(std::is_pod<KeyT>::value);
-#endif
- }
-
- static constexpr bool is_copy_trivially()
- {
-#if __cplusplus >= 201103L || _MSC_VER > 1600
- return (std::is_trivially_copyable<KeyT>::value);
-#else
- return (std::is_pod<KeyT>::value);
-#endif
- }
-
- void clearkv()
- {
- if (is_triviall_destructable()) {
- while (_num_filled --)
- _pairs[_num_filled].~value_type();
- }
- }
-
- /// Remove all elements, keeping full capacity.
- void clear()
- {
- if (_num_filled > 0)
- memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets);
-
- clearkv();
-
- _last = _num_filled = 0;
- }
-
- void shrink_to_fit(const float min_factor = EMH_DEFAULT_LOAD_FACTOR / 4)
- {
- if (load_factor() < min_factor && bucket_count() > 10) //safe guard
- rehash(_num_filled);
- }
-
- /// Make room for this many elements
- bool reserve(uint64_t num_elems, bool force)
- {
- (void)force;
- const auto required_buckets = (uint32_t)(num_elems * _mlf >> 27);
- if (EMH_LIKELY(required_buckets < _mask)) // && !force
- return false;
-
-#if EMH_STATIS
- if (_num_filled > 1'000'000) dump_statics();
-#endif
-
- //assert(required_buckets < max_size());
- rehash(required_buckets + 2);
- return true;
- }
-
- static value_type* alloc_bucket(size_type num_buckets)
- {
-#if 0
- auto new_pairs = (char*)malloc(num_buckets * sizeof(value_type) + (EAD + num_buckets) * sizeof(Index));
-#else
- auto new_pairs = (char*)malloc(num_buckets * sizeof(value_type));
-#endif
- return (value_type *)(new_pairs);
- }
-
- static Index* alloc_index(size_type num_buckets)
- {
- auto new_index = (char*)malloc((EAD + num_buckets) * sizeof(Index));
- return (Index *)(new_index);
- }
-
- bool reserve(size_type required_buckets)
- {
- if (_num_filled != required_buckets)
- return reserve(required_buckets, true);
-
- _last = 0;
-
- std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) {
- const auto hashl = (size_type)hash_key(l) & _mask, hashr = (size_type)hash_key(r) & _mask;
- if (hashl != hashr)
- return hashl < hashr;
-#if 0
- return hashl < hashr;
-#else
- return l < r;
-#endif
- });
-
- memset(_index, INACTIVE, sizeof(_index[0]) * _num_buckets);
- for (size_type slot = 0; slot < _num_filled; slot++) {
- const auto& key = EMH_KEY(_pairs, slot);
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- auto& next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- EMH_INDEX(_index, bucket) = {1, slot | EMH_KEYMASK(key_hash, _mask)};
- else {
- EMH_HSLOT(_index, bucket) |= EMH_KEYMASK(key_hash, _mask);
- next_bucket ++;
- }
- }
- return true;
- }
-
- void rebuild(size_type num_buckets)
- {
- auto new_pairs = (value_type*)alloc_bucket(num_buckets * max_load_factor() + 4);
- if (is_copy_trivially()) {
- if (_pairs)
- memcpy((char*)new_pairs, (char*)_pairs, _num_filled * sizeof(value_type));
- } else {
- for (size_type slot = 0; slot < _num_filled; slot++) {
- new(new_pairs + slot) value_type(std::move(_pairs[slot]));
- if (is_triviall_destructable())
- _pairs[slot].~value_type();
- }
- }
- free(_pairs); _pairs = new_pairs;
- }
-
- void rehash(uint64_t required_buckets)
- {
- if (required_buckets < _num_filled)
- return;
-
- uint32_t num_buckets = _num_filled > (1u << 16) ? (1u << 16) : 4u;
- while (num_buckets < required_buckets) { num_buckets *= 2; }
- assert(num_buckets < max_size());
-
-#if EMH_REHASH_LOG
- auto last = _last;
- size_type collision = 0;
-#endif
-
- _last = 0;
- _num_buckets = num_buckets;
- _mask = num_buckets - 1;
-
- free(_index);
- rebuild(num_buckets);
-
- _index = (Index*)alloc_index (num_buckets);
-
- memset((char*)_index, INACTIVE, sizeof(_index[0]) * num_buckets);
- memset((char*)(_index + num_buckets), 0, sizeof(_index[0]) * EAD);
-
-#ifdef EMH_SORT
- std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) {
- const auto hashl = hash_key(l), hashr = hash_key(r);
- auto diff = int64_t((hashl & _mask) - (hashr & _mask));
- if (diff != 0)
- return diff < 0;
- return hashl < hashr;
-// return l < r;
- });
-#endif
-
- for (size_type slot = 0; slot < _num_filled; slot++) {
- const auto& key = EMH_KEY(_pairs, slot);
- const auto key_hash = hash_key(key);
- const auto bucket = find_unique_bucket(key_hash);
- EMH_INDEX(_index, bucket) = {bucket, slot | EMH_KEYMASK(key_hash, _mask)};
-
-#if EMH_REHASH_LOG
- if (bucket != hash_main(bucket))
- collision ++;
-#endif
- }
-
-#if EMH_REHASH_LOG
- if (_num_filled > EMH_REHASH_LOG) {
- auto mbucket = _num_filled - collision;
- char buff[255] = {0};
- sprintf(buff, " _num_filled/aver_size/K.V/pack/collision|last = %u/%.2lf/%s.%s/%zd|%.2lf%%,%.2lf%%",
- _num_filled, double (_num_filled) / mbucket, typeid(KeyT).name(), typeid(ValueT).name(), sizeof(_pairs[0]), collision * 100.0 / _num_filled, last * 100.0 / _num_buckets);
-#ifdef EMH_LOG
- static uint32_t ihashs = 0; EMH_LOG() << "hash_nums = " << ihashs ++ << "|" <<__FUNCTION__ << "|" << buff << endl;
-#else
- puts(buff);
-#endif
- }
-#endif
- }
-
-private:
- // Can we fit another element?
- inline bool check_expand_need()
- {
- return reserve(_num_filled, false);
- }
-
- size_type slot_to_bucket(const size_type slot) const
- {
- size_type main_bucket;
- return find_slot_bucket(slot, main_bucket); //TODO
- }
-
- //very slow
- void erase_slot(const size_type sbucket, const size_type main_bucket)
- {
- const auto slot = EMH_SLOT(_index, sbucket);
- const auto ebucket = erase_bucket(sbucket, main_bucket);
- const auto last_slot = --_num_filled;
- if (EMH_LIKELY(slot != last_slot)) {
- const auto last_bucket = slot_to_bucket(last_slot);
- EMH_KEY(_pairs, slot) = std::move(EMH_KEY(_pairs, last_slot));
- EMH_HSLOT(_index, last_bucket) = slot | (EMH_HSLOT(_index, last_bucket) & ~_mask);
- }
-
- if (is_triviall_destructable())
- _pairs[last_slot].~value_type();
-
- EMH_INDEX(_index, ebucket) = {INACTIVE, END};
- }
-
- size_type erase_bucket(const size_type bucket, const size_type main_bucket)
- {
- const auto next_bucket = EMH_BUCKET(_index, bucket);
- if (bucket == main_bucket) {
- if (main_bucket != next_bucket) {
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- EMH_INDEX(_index, main_bucket) = {
- (nbucket == next_bucket) ? main_bucket : nbucket,
- EMH_HSLOT(_index, next_bucket)
- };
- }
- return next_bucket;
- }
-
- const auto prev_bucket = find_prev_bucket(main_bucket, bucket);
- EMH_BUCKET(_index, prev_bucket) = (bucket == next_bucket) ? prev_bucket : next_bucket;
- return bucket;
- }
-
- // Find the slot with this key, or return bucket size
- size_type find_slot_bucket(const size_type slot, size_type& main_bucket) const
- {
- const auto key_hash = hash_key(EMH_KEY(_pairs, slot));
- const auto bucket = main_bucket = size_type(key_hash & _mask);
-// if (EMH_EQHASH(bucket, key_hash)) {
- if (slot == EMH_SLOT(_index, bucket))
- return bucket;
-// }
-
- auto next_bucket = EMH_BUCKET(_index, bucket);
- while (true) {
- if (EMH_LIKELY(slot == EMH_SLOT(_index, next_bucket)))
- return next_bucket;
- next_bucket = EMH_BUCKET(_index, next_bucket);
- }
-
- return 0;
- }
-
- // Find the slot with this key, or return bucket size
- size_type find_filled_bucket(const KeyT& key, uint64_t key_hash) const
- {
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if (EMH_UNLIKELY((int)next_bucket < 0))
- return END;
-
- if (EMH_EQHASH(bucket, key_hash)) {
- const auto slot = EMH_SLOT(_index, bucket);
- if (EMH_LIKELY(_eq(key, EMH_KEY(_pairs, slot))))
- return bucket;
- }
- if (next_bucket == bucket)
- return END;
-
- while (true) {
- if (EMH_EQHASH(next_bucket, key_hash)) {
- const auto slot = EMH_SLOT(_index, next_bucket);
- if (EMH_LIKELY(_eq(key, EMH_KEY(_pairs, slot))))
- return next_bucket;
- }
-
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (EMH_UNLIKELY(nbucket == next_bucket))
- return END;
- next_bucket = nbucket;
- }
- return 0;
- }
-
- // Find the slot with this key, or return bucket size
- size_type find_filled_slot(const KeyT& key) const
- {
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- return _num_filled;
-
- if (EMH_EQHASH(bucket, key_hash)) {
- const auto slot = EMH_SLOT(_index, bucket);
- if (EMH_LIKELY(_eq(key, EMH_KEY(_pairs, slot))))
- return slot;
- }
- if (next_bucket == bucket)
- return _num_filled;
-
- while (true) {
- if (EMH_EQHASH(next_bucket, key_hash)) {
- const auto slot = EMH_SLOT(_index, next_bucket);
- if (EMH_LIKELY(_eq(key, EMH_KEY(_pairs, slot))))
- return slot;
- }
-
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (EMH_UNLIKELY(nbucket == next_bucket))
- return _num_filled;
- next_bucket = nbucket;
- }
-
- return 0;
- }
-
- size_type find_hash_bucket(const KeyT& key) const
- {
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- const auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0)
- return END;
-
- auto slot = EMH_SLOT(_index, bucket);
- if (_eq(key, EMH_KEY(_pairs, slot++)))
- return slot;
- else if (next_bucket == bucket)
- return END;
-
- while (true) {
- const auto& okey = EMH_KEY(_pairs, slot++);
- if (_eq(key, okey))
- return slot;
-
- const auto hasho = hash_key(okey);
- if ((hasho & _mask) != bucket)
- break;
- else if (hasho > key_hash)
- break;
- else if (EMH_UNLIKELY(slot >= _num_filled))
- break;
- }
-
- return END;
- }
-
- size_type find_sorted_bucket(const KeyT& key) const
- {
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- const auto slots = (int)(EMH_BUCKET(_index, bucket)); //TODO
- if (slots < 0 /**|| key < EMH_KEY(_pairs, slot)*/)
- return END;
-
- const auto slot = EMH_SLOT(_index, bucket);
- auto ormask = _index[bucket].slot & ~_mask;
- auto hmask = EMH_KEYMASK(key_hash, _mask);
- if ((hmask | ormask) != ormask)
- return END;
-
- if (_eq(key, EMH_KEY(_pairs, slot)))
- return slot;
- else if (slots == 1 || key < EMH_KEY(_pairs, slot))
- return END;
-
-#if 0
- if (key < EMH_KEY(_pairs, slot) || key > EMH_KEY(_pairs, slots + slot - 1))
- return END;
-#endif
-
- for (size_type i = 1; i < slots; i++) {
- const auto& okey = EMH_KEY(_pairs, slot + i);
- if (_eq(key, okey))
- return slot + i;
-// else if (okey > key)
-// return END;
- }
-
- return END;
- }
-
- //kick out bucket and find empty to occpuy
- //it will break the orgin link and relnik again.
- //before: main_bucket-->prev_bucket --> bucket --> next_bucket
- //atfer : main_bucket-->prev_bucket --> (removed)--> new_bucket--> next_bucket
- size_type kickout_bucket(const size_type kmain, const size_type bucket)
- {
- const auto next_bucket = EMH_BUCKET(_index, bucket);
- const auto new_bucket = find_empty_bucket(next_bucket);
- const auto prev_bucket = find_prev_bucket(kmain, bucket);
-
- const auto oslot = EMH_HSLOT(_index, bucket);
- if (next_bucket == bucket)
- EMH_INDEX(_index, new_bucket) = {new_bucket, oslot};
- else
- EMH_INDEX(_index, new_bucket) = {next_bucket, oslot};
-
- EMH_BUCKET(_index, prev_bucket) = new_bucket;
- EMH_BUCKET(_index, bucket) = INACTIVE;
-
- return bucket;
- }
-
-/*
-** inserts a new key into a hash table; first, check whether key's main
-** bucket/position is free. If not, check whether colliding node/bucket is in its main
-** position or not: if it is not, move colliding bucket to an empty place and
-** put new key in its main position; otherwise (colliding bucket is in its main
-** position), new key goes to an empty position.
-*/
- size_type find_or_allocate(const KeyT& key, uint64_t key_hash)
- {
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0) {
- return bucket;
- }
-
- const auto slot = EMH_SLOT(_index, bucket);
- if (EMH_EQHASH(bucket, key_hash))
- if (EMH_LIKELY(_eq(key, EMH_KEY(_pairs, slot))))
- return bucket;
-
- //check current bucket_key is in main bucket or not
- const auto kmain = hash_bucket(EMH_KEY(_pairs, slot));
- if (kmain != bucket)
- return kickout_bucket(kmain, bucket);
- else if (next_bucket == bucket)
- return EMH_BUCKET(_index, next_bucket) = find_empty_bucket(next_bucket);
-
- //find next linked bucket and check key
- while (true) {
- const auto slot2 = EMH_SLOT(_index, next_bucket);
- if (EMH_UNLIKELY(EMH_EQHASH(next_bucket, key_hash))) {
- if (EMH_LIKELY(_eq(key, EMH_KEY(_pairs, slot2))))
- return next_bucket;
- }
-
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (nbucket == next_bucket)
- break;
- next_bucket = nbucket;
- }
-
- //find a new empty and link it to tail
- const auto new_bucket = find_empty_bucket(next_bucket);
- return EMH_BUCKET(_index, next_bucket) = new_bucket;
- }
-
- size_type find_unique_bucket(uint64_t key_hash)
- {
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = EMH_BUCKET(_index, bucket);
- if ((int)next_bucket < 0) {
- return bucket;
- }
-
- //check current bucket_key is in main bucket or not
- const auto kmain = hash_main(bucket);
- if (EMH_UNLIKELY(kmain != bucket))
- return kickout_bucket(kmain, bucket);
- else if (EMH_UNLIKELY(next_bucket != bucket))
- next_bucket = find_last_bucket(next_bucket);
-
- //find a new empty and link it to tail
- return EMH_BUCKET(_index, next_bucket) = find_empty_bucket(next_bucket);
- }
-
-/***
- Different probing techniques usually provide a trade-off between memory locality and avoidance of clustering.
-Since Robin Hood hashing is relatively resilient to clustering (both primary and secondary), linear probing is the most cache friendly alternativeis typically used.
-
- It's the core algorithm of this hash map with highly optimization/benchmark.
-normaly linear probing is inefficient with high load factor, it use a new 3-way linear
-probing strategy to search empty slot. from benchmark even the load factor > 0.9, it's more 2-3 timer fast than
-one-way search strategy.
-
-1. linear or quadratic probing a few cache line for less cache miss from input slot "bucket_from".
-2. the first search slot from member variant "_last", init with 0
-3. the second search slot from calculated pos "(_num_filled + _last) & _mask", it's like a rand value
-*/
- // key is not in this mavalue. Find a place to put it.
- size_type find_empty_bucket(const size_type bucket_from)
- {
- auto bucket = bucket_from;
- if (EMH_EMPTY(_index, ++bucket) || EMH_EMPTY(_index, ++bucket))
- return bucket;
-
- auto offset = 2u;
-
-#ifndef EMH_QUADRATIC
- constexpr auto linear_probe_length = 2 + EMH_CACHE_LINE_SIZE / 16;//2 4 6 8
- for (; offset < linear_probe_length; offset += 2) {
- auto bucket1 = (bucket + offset) & _mask;
- if (EMH_EMPTY(_index, bucket1) || EMH_EMPTY(_index, ++bucket1))
- return bucket1;
- }
-#else
- constexpr auto linear_probe_length = 10;//2 4 7 11
- for (auto step = offset; offset < linear_probe_length; offset += ++step) {
- auto bucket1 = (bucket + offset) & _mask;
- if (EMH_EMPTY(_index, bucket1) || EMH_EMPTY(_index, ++bucket1))
- return bucket1;
- }
-#endif
-
-#if 0
- while (true) {
- _last &= _mask;
- if (EMH_EMPTY(_index, _last++) || EMH_EMPTY(_index, _last++))
- return _last++ - 1;
-
-#if 1
- auto tail = _mask - (_last & _mask);
- if (EMH_EMPTY(_index, tail) || EMH_EMPTY(_index, ++tail))
- return tail;
-#endif
-#if 0
- auto medium = (_num_filled + _last) & _mask;
- if (EMH_EMPTY(_index, medium) || EMH_EMPTY(_index, ++medium))
- return medium;
-#endif
- }
-#else
- //for (auto slot = bucket + offset; ;slot += offset++) {
- for (auto slot = bucket + offset; ; slot++) {
-// if (EMH_EMPTY(_index, ++_last))// || EMH_EMPTY(_index, ++_last))
-// return _last ++;
-
- auto bucket1 = slot++ & _mask;
- if (EMH_UNLIKELY(EMH_EMPTY(_index, bucket1)))// || EMH_UNLIKELY(EMH_EMPTY(_index, ++bucket1))))
- return bucket1;
-
- auto medium = (_num_filled + _last++) & _mask;
- if (EMH_EMPTY(_index, medium) || EMH_EMPTY(_index, ++medium))
- return medium;
-
- ++_last &= _mask;
- }
-#endif
- return 0;
- }
-
- size_type find_last_bucket(size_type main_bucket) const
- {
- auto next_bucket = EMH_BUCKET(_index, main_bucket);
- if (next_bucket == main_bucket)
- return main_bucket;
-
- while (true) {
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (nbucket == next_bucket)
- return next_bucket;
- next_bucket = nbucket;
- }
- }
-
- size_type find_prev_bucket(const size_type main_bucket, const size_type bucket) const
- {
- auto next_bucket = EMH_BUCKET(_index, main_bucket);
- if (next_bucket == bucket)
- return main_bucket;
-
- while (true) {
- const auto nbucket = EMH_BUCKET(_index, next_bucket);
- if (nbucket == bucket)
- return next_bucket;
- next_bucket = nbucket;
- }
- }
-
- inline size_type hash_bucket(const KeyT& key) const
- {
- return (size_type)hash_key(key) & _mask;
- }
-
- inline size_type hash_main(const size_type bucket) const
- {
- const auto slot = EMH_SLOT(_index, bucket);
- return (size_type)hash_key(EMH_KEY(_pairs, slot)) & _mask;
- }
-
-#ifdef EMH_INT_HASH
- static constexpr uint64_t KC = UINT64_C(11400714819323198485);
- static uint64_t hash64(uint64_t key)
- {
-#if __SIZEOF_INT128__
- __uint128_t r = key; r *= KC;
- return (uint64_t)(r >> 64) + (uint64_t)r;
-#elif _WIN64
- uint64_t high;
- return _umul128(key, KC, &high) + high;
-#elif 1
- auto low = key;
- auto high = (key >> 32) | (key << 32);
- auto mix = (0x94d049bb133111ebull * low + 0xbf58476d1ce4e5b9ull * high);
- return mix >> 32;
-#elif 1
- uint64_t r = key * UINT64_C(0xca4bcaa75ec3f625);
- return (r >> 32) + r;
-#elif 1
- //MurmurHash3Mixer
- uint64_t h = key;
- h ^= h >> 33;
- h *= 0xff51afd7ed558ccd;
- h ^= h >> 33;
- h *= 0xc4ceb9fe1a85ec53;
- h ^= h >> 33;
- return h;
-#elif 1
- uint64_t x = key;
- x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
- x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
- x = x ^ (x >> 31);
- return x;
-#endif
- }
-#endif
-
-#if EMH_WYHASH_HASH
- //#define WYHASH_CONDOM 1
- static inline uint64_t wymix(uint64_t A, uint64_t B)
- {
-#if defined(__SIZEOF_INT128__)
- __uint128_t r = A; r *= B;
-#if WYHASH_CONDOM
- A ^= (uint64_t)r; B ^= (uint64_t)(r >> 64);
-#else
- A = (uint64_t)r; B = (uint64_t)(r >> 64);
-#endif
-
-#elif defined(_MSC_VER) && defined(_M_X64)
-#if WYHASH_CONDOM
- uint64_t a, b;
- a = _umul128(A, B, &b);
- A ^= a; B ^= b;
-#else
- A = _umul128(A, B, &B);
-#endif
-#else
- uint64_t ha = A >> 32, hb = B >> 32, la = (uint32_t)A, lb = (uint32_t)B, hi, lo;
- uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl;
- lo = t + (rm1 << 32); c += lo < t; hi = rh + (rm0 >> 32) + (rm1 >> 32) + c;
-#if WYHASH_CONDOM
- A ^= lo; B ^= hi;
-#else
- A = lo; B = hi;
-#endif
-#endif
- return A ^ B;
- }
-
- //multiply and xor mix function, aka MUM
- static inline uint64_t wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v; }
- static inline uint64_t wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v; }
- static inline uint64_t wyr3(const uint8_t *p, size_t k) {
- return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];
- }
-
- static constexpr uint64_t secret[4] = {
- 0xa0761d6478bd642full, 0xe7037ed1a0b428dbull,
- 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull};
-
- //wyhash main function https://github.com/wangyi-fudan/wyhash
- static uint64_t wyhashstr(const void *key, const size_t len)
- {
- uint64_t a = 0, b = 0, seed = secret[0];
- const uint8_t *p = (const uint8_t*)key;
- if (EMH_LIKELY(len <= 16)) {
- if (EMH_LIKELY(len >= 4)) {
- const auto half = (len >> 3) << 2;
- a = (wyr4(p) << 32U) | wyr4(p + half); p += len - 4;
- b = (wyr4(p) << 32U) | wyr4(p - half);
- } else if (len) {
- a = wyr3(p, len);
- }
- } else {
- size_t i = len;
- if (EMH_UNLIKELY(i > 48)) {
- uint64_t see1 = seed, see2 = seed;
- do {
- seed = wymix(wyr8(p + 0) ^ secret[1], wyr8(p + 8) ^ seed);
- see1 = wymix(wyr8(p + 16) ^ secret[2], wyr8(p + 24) ^ see1);
- see2 = wymix(wyr8(p + 32) ^ secret[3], wyr8(p + 40) ^ see2);
- p += 48; i -= 48;
- } while (EMH_LIKELY(i > 48));
- seed ^= see1 ^ see2;
- }
- while (i > 16) {
- seed = wymix(wyr8(p) ^ secret[1], wyr8(p + 8) ^ seed);
- i -= 16; p += 16;
- }
- a = wyr8(p + i - 16);
- b = wyr8(p + i - 8);
- }
-
- return wymix(secret[1] ^ len, wymix(a ^ secret[1], b ^ seed));
- }
-#endif
-
- template<typename UType, typename std::enable_if<std::is_integral<UType>::value, uint32_t>::type = 0>
- inline uint64_t hash_key(const UType key) const
- {
-#ifdef EMH_INT_HASH
- return hash64(key);
-#elif EMH_IDENTITY_HASH
- return (key + (key >> (sizeof(UType) * 4)));
-#elif EMH_WYHASH64
- return wyhash64(key, KC);
-#else
- return _hasher(key);
-#endif
- }
-
-template<typename UType, typename std::enable_if<std::is_same<UType, std::string>::value, uint32_t>::type = 0>
- inline uint64_t hash_key(const UType& key) const
- {
-#if EMH_WYHASH_HASH
- return wyhashstr(key.data(), key.size());
-#elif WYHASH_LITTLE_ENDIAN
- return wyhash(key.data(), key.size(), 0);
-#else
- return _hasher(key);
-#endif
- }
-
- template<typename UType, typename std::enable_if<!std::is_integral<UType>::value && !std::is_same<UType, std::string>::value, uint32_t>::type = 0>
- inline uint64_t hash_key(const UType& key) const
- {
-#ifdef EMH_INT_HASH
- return _hasher(key) * KC;
-#else
- return _hasher(key);
-#endif
- }
-
-private:
- value_type*_pairs;
- Index* _index;
-
- HashT _hasher;
- EqT _eq;
- uint32_t _mlf;
- size_type _mask;
- size_type _num_buckets;
- size_type _num_filled;
- size_type _last;
-};
-} // namespace emhash
diff --git a/deps/emhash/include/emhash/hash_table8.hpp b/deps/emhash/include/emhash/hash_table8.hpp
deleted file mode 100644
index b14d64b..0000000
--- a/deps/emhash/include/emhash/hash_table8.hpp
+++ /dev/null
@@ -1,1805 +0,0 @@
-// emhash8::HashMap for C++14/17
-// version 1.6.5
-// https://github.com/ktprime/emhash/blob/master/hash_table8.hpp
-//
-// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
-// SPDX-License-Identifier: MIT
-// Copyright (c) 2021-2023 Huang Yuanbing & bailuzhou AT 163.com
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in all
-// copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE
-
-#pragma once
-
-#include <cstring>
-#include <string>
-#include <cstdlib>
-#include <type_traits>
-#include <cassert>
-#include <utility>
-#include <cstdint>
-#include <functional>
-#include <iterator>
-#include <algorithm>
-
-#undef EMH_NEW
-#undef EMH_EMPTY
-
-// likely/unlikely
-#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
-# define EMH_LIKELY(condition) __builtin_expect(condition, 1)
-# define EMH_UNLIKELY(condition) __builtin_expect(condition, 0)
-#else
-# define EMH_LIKELY(condition) condition
-# define EMH_UNLIKELY(condition) condition
-#endif
-
-#define EMH_EMPTY(n) (0 > (int)(_index[n].next))
-#define EMH_EQHASH(n, key_hash) (((size_type)(key_hash) & ~_mask) == (_index[n].slot & ~_mask))
-//#define EMH_EQHASH(n, key_hash) ((size_type)(key_hash - _index[n].slot) & ~_mask) == 0
-#define EMH_NEW(key, val, bucket, key_hash) \
- new(_pairs + _num_filled) value_type(key, val); \
- _etail = bucket; \
- _index[bucket] = {bucket, _num_filled++ | ((size_type)(key_hash) & ~_mask)}
-
-namespace emhash8 {
-
-#ifndef EMH_DEFAULT_LOAD_FACTOR
- constexpr static float EMH_DEFAULT_LOAD_FACTOR = 0.80f;
- constexpr static float EMH_MIN_LOAD_FACTOR = 0.25f; //< 0.5
-#endif
-#if EMH_CACHE_LINE_SIZE < 32
- constexpr static uint32_t EMH_CACHE_LINE_SIZE = 64;
-#endif
-
-template <typename KeyT, typename ValueT, typename HashT = std::hash<KeyT>, typename EqT = std::equal_to<KeyT>>
-class HashMap
-{
-public:
- using htype = HashMap<KeyT, ValueT, HashT, EqT>;
- using value_type = std::pair<KeyT, ValueT>;
- using key_type = KeyT;
- using mapped_type = ValueT;
-
-#ifdef EMH_SMALL_TYPE
- using size_type = uint16_t;
-#elif EMH_SIZE_TYPE == 0
- using size_type = uint32_t;
-#else
- using size_type = size_t;
-#endif
-
- using hasher = HashT;
- using key_equal = EqT;
-
- constexpr static size_type INACTIVE = 0-1u;
- //constexpr uint32_t END = 0-0x1u;
- constexpr static size_type EAD = 2;
-
- struct Index
- {
- size_type next;
- size_type slot;
- };
-
- class const_iterator;
- class iterator
- {
- public:
- using iterator_category = std::bidirectional_iterator_tag;
- using difference_type = std::ptrdiff_t;
- using value_type = typename htype::value_type;
- using pointer = value_type*;
- using const_pointer = const value_type* ;
- using reference = value_type&;
- using const_reference = const value_type&;
-
- iterator() : kv_(nullptr) {}
- iterator(const_iterator& cit) {
- kv_ = cit.kv_;
- }
-
- iterator(const htype* hash_map, size_type bucket) {
- kv_ = hash_map->_pairs + (int)bucket;
- }
-
- iterator& operator++()
- {
- kv_ ++;
- return *this;
- }
-
- iterator operator++(int)
- {
- auto cur = *this; kv_ ++;
- return cur;
- }
-
- iterator& operator--()
- {
- kv_ --;
- return *this;
- }
-
- iterator operator--(int)
- {
- auto cur = *this; kv_ --;
- return cur;
- }
-
- reference operator*() const { return *kv_; }
- pointer operator->() const { return kv_; }
-
- bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; }
- bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; }
-
- public:
- value_type* kv_;
- };
-
- class const_iterator
- {
- public:
- using iterator_category = std::bidirectional_iterator_tag;
- using value_type = typename htype::value_type;
- using difference_type = std::ptrdiff_t;
- using pointer = value_type*;
- using const_pointer = const value_type*;
- using reference = value_type&;
- using const_reference = const value_type&;
-
- const_iterator(const iterator& it) {
- kv_ = it.kv_;
- }
-
- const_iterator (const htype* hash_map, size_type bucket) {
- kv_ = hash_map->_pairs + (int)bucket;
- }
-
- const_iterator& operator++()
- {
- kv_ ++;
- return *this;
- }
-
- const_iterator operator++(int)
- {
- auto cur = *this; kv_ ++;
- return cur;
- }
-
- const_iterator& operator--()
- {
- kv_ --;
- return *this;
- }
-
- const_iterator operator--(int)
- {
- auto cur = *this; kv_ --;
- return cur;
- }
-
- const_reference operator*() const { return *kv_; }
- const_pointer operator->() const { return kv_; }
-
- bool operator == (const iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const iterator& rhs) const { return kv_ != rhs.kv_; }
- bool operator == (const const_iterator& rhs) const { return kv_ == rhs.kv_; }
- bool operator != (const const_iterator& rhs) const { return kv_ != rhs.kv_; }
- public:
- const value_type* kv_;
- };
-
- void init(size_type bucket, float mlf = EMH_DEFAULT_LOAD_FACTOR)
- {
- _pairs = nullptr;
- _index = nullptr;
- _mask = _num_buckets = 0;
- _num_filled = 0;
- _mlf = (uint32_t)((1 << 27) / EMH_DEFAULT_LOAD_FACTOR);
- max_load_factor(mlf);
- rehash(bucket);
- }
-
- HashMap(size_type bucket = 2, float mlf = EMH_DEFAULT_LOAD_FACTOR)
- {
- init(bucket, mlf);
- }
-
- HashMap(const HashMap& rhs)
- {
- if (rhs.load_factor() > EMH_MIN_LOAD_FACTOR) {
- _pairs = alloc_bucket((size_type)(rhs._num_buckets * rhs.max_load_factor()) + 4);
- _index = alloc_index(rhs._num_buckets);
- clone(rhs);
- } else {
- init(rhs._num_filled + 2, EMH_DEFAULT_LOAD_FACTOR);
- for (auto it = rhs.begin(); it != rhs.end(); ++it)
- insert_unique(it->first, it->second);
- }
- }
-
- HashMap(HashMap&& rhs) noexcept
- {
- init(0);
- *this = std::move(rhs);
- }
-
- HashMap(std::initializer_list<value_type> ilist)
- {
- init((size_type)ilist.size());
- for (auto it = ilist.begin(); it != ilist.end(); ++it)
- do_insert(*it);
- }
-
- template<class InputIt>
- HashMap(InputIt first, InputIt last, size_type bucket_count=4)
- {
- init(std::distance(first, last) + bucket_count);
- for (; first != last; ++first)
- emplace(*first);
- }
-
- HashMap& operator=(const HashMap& rhs)
- {
- if (this == &rhs)
- return *this;
-
- if (rhs.load_factor() < EMH_MIN_LOAD_FACTOR) {
- clear(); free(_pairs); _pairs = nullptr;
- rehash(rhs._num_filled + 2);
- for (auto it = rhs.begin(); it != rhs.end(); ++it)
- insert_unique(it->first, it->second);
- return *this;
- }
-
- clearkv();
-
- if (_num_buckets != rhs._num_buckets) {
- free(_pairs); free(_index);
- _index = alloc_index(rhs._num_buckets);
- _pairs = alloc_bucket((size_type)(rhs._num_buckets * rhs.max_load_factor()) + 4);
- }
-
- clone(rhs);
- return *this;
- }
-
- HashMap& operator=(HashMap&& rhs) noexcept
- {
- if (this != &rhs) {
- swap(rhs);
- rhs.clear();
- }
- return *this;
- }
-
- template<typename Con>
- bool operator == (const Con& rhs) const
- {
- if (size() != rhs.size())
- return false;
-
- for (auto it = begin(), last = end(); it != last; ++it) {
- auto oi = rhs.find(it->first);
- if (oi == rhs.end() || it->second != oi->second)
- return false;
- }
- return true;
- }
-
- template<typename Con>
- bool operator != (const Con& rhs) const { return !(*this == rhs); }
-
- ~HashMap() noexcept
- {
- clearkv();
- free(_pairs);
- free(_index);
- _index = nullptr;
- _pairs = nullptr;
- }
-
- void clone(const HashMap& rhs)
- {
- _hasher = rhs._hasher;
-// _eq = rhs._eq;
- _num_buckets = rhs._num_buckets;
- _num_filled = rhs._num_filled;
- _mlf = rhs._mlf;
- _last = rhs._last;
- _mask = rhs._mask;
-#if EMH_HIGH_LOAD
- _ehead = rhs._ehead;
-#endif
- _etail = rhs._etail;
-
- auto opairs = rhs._pairs;
- memcpy((char*)_index, (char*)rhs._index, (_num_buckets + EAD) * sizeof(Index));
-
- if (is_copy_trivially()) {
- memcpy((char*)_pairs, (char*)opairs, _num_filled * sizeof(value_type));
- } else {
- for (size_type slot = 0; slot < _num_filled; slot++)
- new(_pairs + slot) value_type(opairs[slot]);
- }
- }
-
- void swap(HashMap& rhs)
- {
- // std::swap(_eq, rhs._eq);
- std::swap(_hasher, rhs._hasher);
- std::swap(_pairs, rhs._pairs);
- std::swap(_index, rhs._index);
- std::swap(_num_buckets, rhs._num_buckets);
- std::swap(_num_filled, rhs._num_filled);
- std::swap(_mask, rhs._mask);
- std::swap(_mlf, rhs._mlf);
- std::swap(_last, rhs._last);
-#if EMH_HIGH_LOAD
- std::swap(_ehead, rhs._ehead);
-#endif
- std::swap(_etail, rhs._etail);
- }
-
- // -------------------------------------------------------------
- iterator first() const { return {this, 0}; }
- iterator last() const { return {this, _num_filled - 1}; }
-
- value_type& front() { return _pairs[0]; }
- const value_type& front() const { return _pairs[0]; }
- value_type& back() { return _pairs[_num_filled - 1]; }
- const value_type& back() const { return _pairs[_num_filled - 1]; }
-
- void pop_front() { erase(begin()); } //TODO. only erase first without move last
- void pop_back() { erase(last()); }
-
- iterator begin() { return first(); }
- const_iterator cbegin() const { return first(); }
- const_iterator begin() const { return first(); }
-
- iterator end() { return {this, _num_filled}; }
- const_iterator cend() const { return {this, _num_filled}; }
- const_iterator end() const { return cend(); }
-
- const value_type* values() const { return _pairs; }
- const Index* index() const { return _index; }
-
- size_type size() const { return _num_filled; }
- bool empty() const { return _num_filled == 0; }
- size_type bucket_count() const { return _num_buckets; }
-
- /// Returns average number of elements per bucket.
- float load_factor() const { return static_cast<float>(_num_filled) / (_mask + 1); }
-
- HashT& hash_function() const { return _hasher; }
- EqT& key_eq() const { return _eq; }
-
- void max_load_factor(float mlf)
- {
- if (mlf < 0.992 && mlf > EMH_MIN_LOAD_FACTOR) {
- _mlf = (uint32_t)((1 << 27) / mlf);
- if (_num_buckets > 0) rehash(_num_buckets);
- }
- }
-
- constexpr float max_load_factor() const { return (1 << 27) / (float)_mlf; }
- constexpr size_type max_size() const { return (1ull << (sizeof(size_type) * 8 - 1)); }
- constexpr size_type max_bucket_count() const { return max_size(); }
-
-#if EMH_STATIS
- //Returns the bucket number where the element with key k is located.
- size_type bucket(const KeyT& key) const
- {
- const auto bucket = hash_bucket(key);
- const auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- return 0;
- else if (bucket == next_bucket)
- return bucket + 1;
-
- return hash_main(bucket) + 1;
- }
-
- //Returns the number of elements in bucket n.
- size_type bucket_size(const size_type bucket) const
- {
- auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- return 0;
-
- next_bucket = hash_main(bucket);
- size_type ibucket_size = 1;
-
- //iterator each item in current main bucket
- while (true) {
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == next_bucket) {
- break;
- }
- ibucket_size ++;
- next_bucket = nbucket;
- }
- return ibucket_size;
- }
-
- size_type get_main_bucket(const size_type bucket) const
- {
- auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- return INACTIVE;
-
- return hash_main(bucket);
- }
-
- size_type get_diss(size_type bucket, size_type next_bucket, const size_type slots) const
- {
- auto pbucket = reinterpret_cast<uint64_t>(&_pairs[bucket]);
- auto pnext = reinterpret_cast<uint64_t>(&_pairs[next_bucket]);
- if (pbucket / EMH_CACHE_LINE_SIZE == pnext / EMH_CACHE_LINE_SIZE)
- return 0;
- size_type diff = pbucket > pnext ? (pbucket - pnext) : (pnext - pbucket);
- if (diff / EMH_CACHE_LINE_SIZE < slots - 1)
- return diff / EMH_CACHE_LINE_SIZE + 1;
- return slots - 1;
- }
-
- int get_bucket_info(const size_type bucket, size_type steps[], const size_type slots) const
- {
- auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- return -1;
-
- const auto main_bucket = hash_main(bucket);
- if (next_bucket == main_bucket)
- return 1;
- else if (main_bucket != bucket)
- return 0;
-
- steps[get_diss(bucket, next_bucket, slots)] ++;
- size_type ibucket_size = 2;
- //find a empty and linked it to tail
- while (true) {
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == next_bucket)
- break;
-
- steps[get_diss(nbucket, next_bucket, slots)] ++;
- ibucket_size ++;
- next_bucket = nbucket;
- }
- return (int)ibucket_size;
- }
-
- void dump_statics() const
- {
- const size_type slots = 128;
- size_type buckets[slots + 1] = {0};
- size_type steps[slots + 1] = {0};
- for (size_type bucket = 0; bucket < _num_buckets; ++bucket) {
- auto bsize = get_bucket_info(bucket, steps, slots);
- if (bsize > 0)
- buckets[bsize] ++;
- }
-
- size_type sumb = 0, collision = 0, sumc = 0, finds = 0, sumn = 0;
- puts("============== buckets size ration =========");
- for (size_type i = 0; i < sizeof(buckets) / sizeof(buckets[0]); i++) {
- const auto bucketsi = buckets[i];
- if (bucketsi == 0)
- continue;
- sumb += bucketsi;
- sumn += bucketsi * i;
- collision += bucketsi * (i - 1);
- finds += bucketsi * i * (i + 1) / 2;
- printf(" %2u %8u %2.2lf| %.2lf\n", i, bucketsi, bucketsi * 100.0 * i / _num_filled, sumn * 100.0 / _num_filled);
- }
-
- puts("========== collision miss ration ===========");
- for (size_type i = 0; i < sizeof(steps) / sizeof(steps[0]); i++) {
- sumc += steps[i];
- if (steps[i] <= 2)
- continue;
- printf(" %2u %8u %.2lf %.2lf\n", i, steps[i], steps[i] * 100.0 / collision, sumc * 100.0 / collision);
- }
-
- if (sumb == 0) return;
- printf(" _num_filled/bucket_size/packed collision/cache_miss/hit_find = %u/%.2lf/%zd/ %.2lf%%/%.2lf%%/%.2lf\n",
- _num_filled, _num_filled * 1.0 / sumb, sizeof(value_type), (collision * 100.0 / _num_filled), (collision - steps[0]) * 100.0 / _num_filled, finds * 1.0 / _num_filled);
- assert(sumn == _num_filled);
- assert(sumc == collision);
- puts("============== buckets size end =============");
- }
-#endif
-
- void pack_zero(ValueT zero)
- {
- _pairs[_num_filled] = {KeyT(), zero};
- }
-
- // ------------------------------------------------------------
- template<typename K=KeyT>
- iterator find(const K& key) noexcept
- {
- return {this, find_filled_slot(key)};
- }
-
- template<typename K=KeyT>
- const_iterator find(const K& key) const noexcept
- {
- return {this, find_filled_slot(key)};
- }
-
- template<typename K=KeyT>
- ValueT& at(const K& key)
- {
- const auto slot = find_filled_slot(key);
- //throw
- return _pairs[slot].second;
- }
-
- template<typename K=KeyT>
- const ValueT& at(const K& key) const
- {
- const auto slot = find_filled_slot(key);
- //throw
- return _pairs[slot].second;
- }
-
- const ValueT& index(const uint32_t index) const
- {
- return _pairs[index].second;
- }
-
- ValueT& index(const uint32_t index)
- {
- return _pairs[index].second;
- }
-
- template<typename K=KeyT>
- bool contains(const K& key) const noexcept
- {
- return find_filled_slot(key) != _num_filled;
- }
-
- template<typename K=KeyT>
- size_type count(const K& key) const noexcept
- {
- return find_filled_slot(key) == _num_filled ? 0 : 1;
- //return find_sorted_bucket(key) == END ? 0 : 1;
- //return find_hash_bucket(key) == END ? 0 : 1;
- }
-
- template<typename K=KeyT>
- std::pair<iterator, iterator> equal_range(const K& key)
- {
- const auto found = find(key);
- if (found.second == _num_filled)
- return { found, found };
- else
- return { found, std::next(found) };
- }
-
- void merge(HashMap& rhs)
- {
- if (empty()) {
- *this = std::move(rhs);
- return;
- }
-
- for (auto rit = rhs.begin(); rit != rhs.end(); ) {
- auto fit = find(rit->first);
- if (fit == end()) {
- insert_unique(rit->first, std::move(rit->second));
- rit = rhs.erase(rit);
- } else {
- ++rit;
- }
- }
- }
-
- /// Returns the matching ValueT or nullptr if k isn't found.
- bool try_get(const KeyT& key, ValueT& val) const noexcept
- {
- const auto slot = find_filled_slot(key);
- const auto found = slot != _num_filled;
- if (found) {
- val = _pairs[slot].second;
- }
- return found;
- }
-
- /// Returns the matching ValueT or nullptr if k isn't found.
- ValueT* try_get(const KeyT& key) noexcept
- {
- const auto slot = find_filled_slot(key);
- return slot != _num_filled ? &_pairs[slot].second : nullptr;
- }
-
- /// Const version of the above
- ValueT* try_get(const KeyT& key) const noexcept
- {
- const auto slot = find_filled_slot(key);
- return slot != _num_filled ? &_pairs[slot].second : nullptr;
- }
-
- /// set value if key exist
- bool try_set(const KeyT& key, const ValueT& val) noexcept
- {
- const auto slot = find_filled_slot(key);
- if (slot == _num_filled)
- return false;
-
- _pairs[slot].second = val;
- return true;
- }
-
- /// set value if key exist
- bool try_set(const KeyT& key, ValueT&& val) noexcept
- {
- const auto slot = find_filled_slot(key);
- if (slot == _num_filled)
- return false;
-
- _pairs[slot].second = std::move(val);
- return true;
- }
-
- /// Convenience function.
- ValueT get_or_return_default(const KeyT& key) const noexcept
- {
- const auto slot = find_filled_slot(key);
- return slot == _num_filled ? ValueT() : _pairs[slot].second;
- }
-
- // -----------------------------------------------------
- std::pair<iterator, bool> do_insert(const value_type& value) noexcept
- {
- const auto key_hash = hash_key(value.first);
- const auto bucket = find_or_allocate(value.first, key_hash);
- const auto bempty = EMH_EMPTY(bucket);
- if (bempty) {
- EMH_NEW(value.first, value.second, bucket, key_hash);
- }
-
- const auto slot = _index[bucket].slot & _mask;
- return { {this, slot}, bempty };
- }
-
- std::pair<iterator, bool> do_insert(value_type&& value) noexcept
- {
- const auto key_hash = hash_key(value.first);
- const auto bucket = find_or_allocate(value.first, key_hash);
- const auto bempty = EMH_EMPTY(bucket);
- if (bempty) {
- EMH_NEW(std::move(value.first), std::move(value.second), bucket, key_hash);
- }
-
- const auto slot = _index[bucket].slot & _mask;
- return { {this, slot}, bempty };
- }
-
- template<typename K, typename V>
- std::pair<iterator, bool> do_insert(K&& key, V&& val) noexcept
- {
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- const auto bempty = EMH_EMPTY(bucket);
- if (bempty) {
- EMH_NEW(std::forward<K>(key), std::forward<V>(val), bucket, key_hash);
- }
-
- const auto slot = _index[bucket].slot & _mask;
- return { {this, slot}, bempty };
- }
-
- template<typename K, typename V>
- std::pair<iterator, bool> do_assign(K&& key, V&& val) noexcept
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- const auto bempty = EMH_EMPTY(bucket);
- if (bempty) {
- EMH_NEW(std::forward<K>(key), std::forward<V>(val), bucket, key_hash);
- } else {
- _pairs[_index[bucket].slot & _mask].second = std::move(val);
- }
-
- const auto slot = _index[bucket].slot & _mask;
- return { {this, slot}, bempty };
- }
-
- std::pair<iterator, bool> insert(const value_type& p)
- {
- check_expand_need();
- return do_insert(p);
- }
-
- std::pair<iterator, bool> insert(value_type && p)
- {
- check_expand_need();
- return do_insert(std::move(p));
- }
-
- void insert(std::initializer_list<value_type> ilist)
- {
- reserve(ilist.size() + _num_filled, false);
- for (auto it = ilist.begin(); it != ilist.end(); ++it)
- do_insert(*it);
- }
-
- template <typename Iter>
- void insert(Iter first, Iter last)
- {
- reserve(std::distance(first, last) + _num_filled, false);
- for (; first != last; ++first)
- do_insert(first->first, first->second);
- }
-
-#if 0
- template <typename Iter>
- void insert_unique(Iter begin, Iter end)
- {
- reserve(std::distance(begin, end) + _num_filled, false);
- for (; begin != end; ++begin) {
- insert_unique(*begin);
- }
- }
-#endif
-
- template<typename K, typename V>
- size_type insert_unique(K&& key, V&& val)
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- auto bucket = find_unique_bucket(key_hash);
- EMH_NEW(std::forward<K>(key), std::forward<V>(val), bucket, key_hash);
- return bucket;
- }
-
- size_type insert_unique(value_type&& value)
- {
- return insert_unique(std::move(value.first), std::move(value.second));
- }
-
- size_type insert_unique(const value_type& value)
- {
- return insert_unique(value.first, value.second);
- }
-
- template <class... Args>
- std::pair<iterator, bool> emplace(Args&&... args) noexcept
- {
- check_expand_need();
- return do_insert(std::forward<Args>(args)...);
- }
-
- //no any optimize for position
- template <class... Args>
- iterator emplace_hint(const_iterator hint, Args&&... args)
- {
- (void)hint;
- check_expand_need();
- return do_insert(std::forward<Args>(args)...).first;
- }
-
- template<class... Args>
- std::pair<iterator, bool> try_emplace(const KeyT& k, Args&&... args)
- {
- check_expand_need();
- return do_insert(k, std::forward<Args>(args)...);
- }
-
- template<class... Args>
- std::pair<iterator, bool> try_emplace(KeyT&& k, Args&&... args)
- {
- check_expand_need();
- return do_insert(std::move(k), std::forward<Args>(args)...);
- }
-
- template <class... Args>
- size_type emplace_unique(Args&&... args)
- {
- return insert_unique(std::forward<Args>(args)...);
- }
-
- std::pair<iterator, bool> insert_or_assign(const KeyT& key, ValueT&& val) { return do_assign(key, std::forward<ValueT>(val)); }
- std::pair<iterator, bool> insert_or_assign(KeyT&& key, ValueT&& val) { return do_assign(std::move(key), std::forward<ValueT>(val)); }
-
- /// Return the old value or ValueT() if it didn't exist.
- ValueT set_get(const KeyT& key, const ValueT& val)
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- if (EMH_EMPTY(bucket)) {
- EMH_NEW(key, val, bucket, key_hash);
- return ValueT();
- } else {
- const auto slot = _index[bucket].slot & _mask;
- ValueT old_value(val);
- std::swap(_pairs[slot].second, old_value);
- return old_value;
- }
- }
-
- /// Like std::map<KeyT, ValueT>::operator[].
- ValueT& operator[](const KeyT& key) noexcept
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- if (EMH_EMPTY(bucket)) {
- /* Check if inserting a value rather than overwriting an old entry */
- EMH_NEW(key, std::move(ValueT()), bucket, key_hash);
- }
-
- const auto slot = _index[bucket].slot & _mask;
- return _pairs[slot].second;
- }
-
- ValueT& operator[](KeyT&& key) noexcept
- {
- check_expand_need();
- const auto key_hash = hash_key(key);
- const auto bucket = find_or_allocate(key, key_hash);
- if (EMH_EMPTY(bucket)) {
- EMH_NEW(std::move(key), std::move(ValueT()), bucket, key_hash);
- }
-
- const auto slot = _index[bucket].slot & _mask;
- return _pairs[slot].second;
- }
-
- /// Erase an element from the hash table.
- /// return 0 if element was not found
- size_type erase(const KeyT& key) noexcept
- {
- const auto key_hash = hash_key(key);
- const auto sbucket = find_filled_bucket(key, key_hash);
- if (sbucket == INACTIVE)
- return 0;
-
- const auto main_bucket = key_hash & _mask;
- erase_slot(sbucket, (size_type)main_bucket);
- return 1;
- }
-
- //iterator erase(const_iterator begin_it, const_iterator end_it)
- iterator erase(const const_iterator& cit) noexcept
- {
- const auto slot = (size_type)(cit.kv_ - _pairs);
- size_type main_bucket;
- const auto sbucket = find_slot_bucket(slot, main_bucket); //TODO
- erase_slot(sbucket, main_bucket);
- return {this, slot};
- }
-
- //only last >= first
- iterator erase(const_iterator first, const_iterator last) noexcept
- {
- auto esize = long(last.kv_ - first.kv_);
- auto tsize = long((_pairs + _num_filled) - last.kv_); //last to tail size
- auto next = first;
- while (tsize -- > 0) {
- if (esize-- <= 0)
- break;
- next = ++erase(next);
- }
-
- //fast erase from last
- next = this->last();
- while (esize -- > 0)
- next = --erase(next);
-
- return {this, size_type(next.kv_ - _pairs)};
- }
-
- template<typename Pred>
- size_type erase_if(Pred pred)
- {
- auto old_size = size();
- for (auto it = begin(); it != end();) {
- if (pred(*it))
- it = erase(it);
- else
- ++it;
- }
- return old_size - size();
- }
-
- static constexpr bool is_triviall_destructable()
- {
-#if __cplusplus >= 201402L || _MSC_VER > 1600
- return !(std::is_trivially_destructible<KeyT>::value && std::is_trivially_destructible<ValueT>::value);
-#else
- return !(std::is_pod<KeyT>::value && std::is_pod<ValueT>::value);
-#endif
- }
-
- static constexpr bool is_copy_trivially()
- {
-#if __cplusplus >= 201103L || _MSC_VER > 1600
- return (std::is_trivially_copyable<KeyT>::value && std::is_trivially_copyable<ValueT>::value);
-#else
- return (std::is_pod<KeyT>::value && std::is_pod<ValueT>::value);
-#endif
- }
-
- void clearkv()
- {
- if (is_triviall_destructable()) {
- while (_num_filled --)
- _pairs[_num_filled].~value_type();
- }
- }
-
- /// Remove all elements, keeping full capacity.
- void clear() noexcept
- {
- clearkv();
-
- if (_num_filled > 0)
- memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets);
-
- _last = _num_filled = 0;
- _etail = INACTIVE;
-
-#if EMH_HIGH_LOAD
- _ehead = 0;
-#endif
- }
-
- void shrink_to_fit(const float min_factor = EMH_DEFAULT_LOAD_FACTOR / 4)
- {
- if (load_factor() < min_factor && bucket_count() > 10) //safe guard
- rehash(_num_filled + 1);
- }
-
-#if EMH_HIGH_LOAD
- #define EMH_PREVET(i, n) i[n].slot
- void set_empty()
- {
- auto prev = 0;
- for (int32_t bucket = 1; bucket < _num_buckets; ++bucket) {
- if (EMH_EMPTY(bucket)) {
- if (prev != 0) {
- EMH_PREVET(_index, bucket) = prev;
- _index[_prev].next = -bucket;
- }
- else
- _ehead = bucket;
- prev = bucket;
- }
- }
-
- EMH_PREVET(_index, _ehead) = prev;
- _index[_prev].next = 0-_ehead;
- _ehead = 0-_index[_ehead].next;
- }
-
- void clear_empty()
- {
- auto prev = EMH_PREVET(_index, _ehead);
- while (prev != _ehead) {
- _index[_prev].next = INACTIVE;
- prev = EMH_PREVET(_index, prev);
- }
- _index[_ehead].next = INACTIVE;
- _ehead = 0;
- }
-
- //prev-ehead->next
- size_type pop_empty(const size_type bucket)
- {
- const auto prev_bucket = EMH_PREVET(_index, bucket);
- const int next_bucket = 0-_index[bucket].next;
-
- EMH_PREVET(_index, next_bucket) = prev_bucket;
- _index[prev_bucket].next = -next_bucket;
-
- _ehead = next_bucket;
- return bucket;
- }
-
- //ehead->bucket->next
- void push_empty(const int32_t bucket)
- {
- const int next_bucket = 0-_index[_ehead].next;
- assert(next_bucket > 0);
-
- EMH_PREVET(_index, bucket) = _ehead;
- _index[bucket].next = -next_bucket;
-
- EMH_PREVET(_index, next_bucket) = bucket;
- _index[_ehead].next = -bucket;
- // _ehead = bucket;
- }
-#endif
-
- /// Make room for this many elements
- bool reserve(uint64_t num_elems, bool force)
- {
- (void)force;
-#if EMH_HIGH_LOAD == 0
- const auto required_buckets = num_elems * _mlf >> 27;
- if (EMH_LIKELY(required_buckets < _mask)) // && !force
- return false;
-
-#elif EMH_HIGH_LOAD
- const auto required_buckets = num_elems + num_elems * 1 / 9;
- if (EMH_LIKELY(required_buckets < _mask))
- return false;
-
- else if (_num_buckets < 16 && _num_filled < _num_buckets)
- return false;
-
- else if (_num_buckets > EMH_HIGH_LOAD) {
- if (_ehead == 0) {
- set_empty();
- return false;
- } else if (/*_num_filled + 100 < _num_buckets && */_index[_ehead].next != 0-_ehead) {
- return false;
- }
- }
-#endif
-#if EMH_STATIS
- if (_num_filled > EMH_STATIS) dump_statics();
-#endif
-
- //assert(required_buckets < max_size());
- rehash(required_buckets + 2);
- return true;
- }
-
- static value_type* alloc_bucket(size_type num_buckets)
- {
-#ifdef EMH_ALLOC
- auto new_pairs = aligned_alloc(32, (uint64_t)num_buckets * sizeof(value_type));
-#else
- auto new_pairs = malloc((uint64_t)num_buckets * sizeof(value_type));
-#endif
- return (value_type *)(new_pairs);
- }
-
- static Index* alloc_index(size_type num_buckets)
- {
- auto new_index = (char*)malloc((uint64_t)(EAD + num_buckets) * sizeof(Index));
- return (Index *)(new_index);
- }
-
- bool reserve(size_type required_buckets) noexcept
- {
- if (_num_filled != required_buckets)
- return reserve(required_buckets, true);
-
- _last = 0;
-#if EMH_HIGH_LOAD
- _ehead = 0;
-#endif
-
-#if EMH_SORT
- std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) {
- const auto hashl = (size_type)hash_key(l.first) & _mask, hashr = (size_type)hash_key(r.first) & _mask;
- return hashl < hashr;
- //return l.first < r.first;
- });
-#endif
-
- memset((char*)_index, INACTIVE, sizeof(_index[0]) * _num_buckets);
- for (size_type slot = 0; slot < _num_filled; slot++) {
- const auto& key = _pairs[slot].first;
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- auto& next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- _index[bucket] = {1, slot | ((size_type)(key_hash) & ~_mask)};
- else {
- _index[bucket].slot |= (size_type)(key_hash) & ~_mask;
- next_bucket ++;
- }
- }
- return true;
- }
-
- void rebuild(size_type num_buckets) noexcept
- {
- free(_index);
- auto new_pairs = (value_type*)alloc_bucket((size_type)(num_buckets * max_load_factor()) + 4);
- if (is_copy_trivially()) {
- if (_pairs)
- memcpy((char*)new_pairs, (char*)_pairs, _num_filled * sizeof(value_type));
- } else {
- for (size_type slot = 0; slot < _num_filled; slot++) {
- new(new_pairs + slot) value_type(std::move(_pairs[slot]));
- if (is_triviall_destructable())
- _pairs[slot].~value_type();
- }
- }
- free(_pairs);
- _pairs = new_pairs;
- _index = (Index*)alloc_index (num_buckets);
-
- memset((char*)_index, INACTIVE, sizeof(_index[0]) * num_buckets);
- memset((char*)(_index + num_buckets), 0, sizeof(_index[0]) * EAD);
- }
-
- void rehash(uint64_t required_buckets)
- {
- if (required_buckets < _num_filled)
- return;
-
- assert(required_buckets < max_size());
- auto num_buckets = _num_filled > (1u << 16) ? (1u << 16) : 4u;
- while (num_buckets < required_buckets) { num_buckets *= 2; }
-#if EMH_SAVE_MEM
- if (sizeof(KeyT) < sizeof(size_type) && num_buckets >= (1ul << (2 * 8)))
- num_buckets = 2ul << (sizeof(KeyT) * 8);
-#endif
-
-#if EMH_REHASH_LOG
- auto last = _last;
- size_type collision = 0;
-#endif
-
-#if EMH_HIGH_LOAD
- _ehead = 0;
-#endif
- _last = _mask / 4;
-
- _mask = num_buckets - 1;
-#if EMH_PACK_TAIL > 1
- _last = _mask;
- num_buckets += num_buckets * EMH_PACK_TAIL / 100; //add more 5-10%
-#endif
- _num_buckets = num_buckets;
-
- rebuild(num_buckets);
-
-#ifdef EMH_SORT
- std::sort(_pairs, _pairs + _num_filled, [this](const value_type & l, const value_type & r) {
- const auto hashl = hash_key(l.first), hashr = hash_key(r.first);
- auto diff = int64_t((hashl & _mask) - (hashr & _mask));
- if (diff != 0)
- return diff < 0;
- return hashl < hashr;
-// return l.first < r.first;
- });
-#endif
-
- _etail = INACTIVE;
- for (size_type slot = 0; slot < _num_filled; ++slot) {
- const auto& key = _pairs[slot].first;
- const auto key_hash = hash_key(key);
- const auto bucket = find_unique_bucket(key_hash);
- _index[bucket] = { bucket, slot | ((size_type)(key_hash) & ~_mask) };
-
-#if EMH_REHASH_LOG
- if (bucket != hash_main(bucket))
- collision ++;
-#endif
- }
-
-#if EMH_REHASH_LOG
- if (_num_filled > EMH_REHASH_LOG) {
- auto mbucket = _num_filled - collision;
- char buff[255] = {0};
- sprintf(buff, " _num_filled/aver_size/K.V/pack/collision|last = %u/%.2lf/%s.%s/%zd|%.2lf%%,%.2lf%%",
- _num_filled, double (_num_filled) / mbucket, typeid(KeyT).name(), typeid(ValueT).name(), sizeof(_pairs[0]), collision * 100.0 / _num_filled, last * 100.0 / _num_buckets);
-#ifdef EMH_LOG
- static uint32_t ihashs = 0; EMH_LOG() << "hash_nums = " << ihashs ++ << "|" <<__FUNCTION__ << "|" << buff << endl;
-#else
- puts(buff);
-#endif
- }
-#endif
- }
-
-private:
- // Can we fit another element?
- bool check_expand_need()
- {
- return reserve(_num_filled, false);
- }
-
- size_type slot_to_bucket(const size_type slot) const noexcept
- {
- size_type main_bucket;
- return find_slot_bucket(slot, main_bucket); //TODO
- }
-
- //very slow
- void erase_slot(const size_type sbucket, const size_type main_bucket) noexcept
- {
- const auto slot = _index[sbucket].slot & _mask;
- const auto ebucket = erase_bucket(sbucket, main_bucket);
- const auto last_slot = --_num_filled;
- if (EMH_LIKELY(slot != last_slot)) {
- const auto last_bucket = (_etail == INACTIVE || ebucket == _etail)
- ? slot_to_bucket(last_slot) : _etail;
-
- _pairs[slot] = std::move(_pairs[last_slot]);
- _index[last_bucket].slot = slot | (_index[last_bucket].slot & ~_mask);
- }
-
- if (is_triviall_destructable())
- _pairs[last_slot].~value_type();
-
- _etail = INACTIVE;
- _index[ebucket] = {INACTIVE, 0};
-#if EMH_HIGH_LOAD
- if (_ehead) {
- if (10 * _num_filled < 8 * _num_buckets)
- clear_empty();
- else if (ebucket)
- push_empty(ebucket);
- }
-#endif
- }
-
- size_type erase_bucket(const size_type bucket, const size_type main_bucket) noexcept
- {
- const auto next_bucket = _index[bucket].next;
- if (bucket == main_bucket) {
- if (main_bucket != next_bucket) {
- const auto nbucket = _index[next_bucket].next;
- _index[main_bucket] = {
- (nbucket == next_bucket) ? main_bucket : nbucket,
- _index[next_bucket].slot
- };
- }
- return next_bucket;
- }
-
- const auto prev_bucket = find_prev_bucket(main_bucket, bucket);
- _index[prev_bucket].next = (bucket == next_bucket) ? prev_bucket : next_bucket;
- return bucket;
- }
-
- // Find the slot with this key, or return bucket size
- size_type find_slot_bucket(const size_type slot, size_type& main_bucket) const
- {
- const auto key_hash = hash_key(_pairs[slot].first);
- const auto bucket = main_bucket = size_type(key_hash & _mask);
- if (slot == (_index[bucket].slot & _mask))
- return bucket;
-
- auto next_bucket = _index[bucket].next;
- while (true) {
- if (EMH_LIKELY(slot == (_index[next_bucket].slot & _mask)))
- return next_bucket;
- next_bucket = _index[next_bucket].next;
- }
-
- return INACTIVE;
- }
-
- // Find the slot with this key, or return bucket size
- size_type find_filled_bucket(const KeyT& key, uint64_t key_hash) const noexcept
- {
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = _index[bucket].next;
- if (EMH_UNLIKELY((int)next_bucket < 0))
- return INACTIVE;
-
- if (EMH_EQHASH(bucket, key_hash)) {
- const auto slot = _index[bucket].slot & _mask;
- if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
- return bucket;
- }
- if (next_bucket == bucket)
- return INACTIVE;
-
- while (true) {
- if (EMH_EQHASH(next_bucket, key_hash)) {
- const auto slot = _index[next_bucket].slot & _mask;
- if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
- return next_bucket;
- }
-
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == next_bucket)
- return INACTIVE;
- next_bucket = nbucket;
- }
-
- return INACTIVE;
- }
-
- // Find the slot with this key, or return bucket size
- template<typename K=KeyT>
- size_type find_filled_slot(const K& key) const noexcept
- {
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- return _num_filled;
-
- if (EMH_EQHASH(bucket, key_hash)) {
- const auto slot = _index[bucket].slot & _mask;
- if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
- return slot;
- }
- if (next_bucket == bucket)
- return _num_filled;
-
- while (true) {
- if (EMH_EQHASH(next_bucket, key_hash)) {
- const auto slot = _index[next_bucket].slot & _mask;
- if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
- return slot;
- }
-
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == next_bucket)
- return _num_filled;
- next_bucket = nbucket;
- }
-
- return _num_filled;
- }
-
-#if EMH_SORT
- size_type find_hash_bucket(const KeyT& key) const noexcept
- {
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- const auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0)
- return END;
-
- auto slot = _index[bucket].slot & _mask;
- if (_eq(key, _pairs[slot++].first))
- return slot;
- else if (next_bucket == bucket)
- return END;
-
- while (true) {
- const auto& okey = _pairs[slot++].first;
- if (_eq(key, okey))
- return slot;
-
- const auto hasho = hash_key(okey);
- if ((hasho & _mask) != bucket)
- break;
- else if (hasho > key_hash)
- break;
- else if (EMH_UNLIKELY(slot >= _num_filled))
- break;
- }
-
- return END;
- }
-
- //only for find/can not insert
- size_type find_sorted_bucket(const KeyT& key) const noexcept
- {
- const auto key_hash = hash_key(key);
- const auto bucket = size_type(key_hash & _mask);
- const auto slots = (int)(_index[bucket].next); //TODO
- if (slots < 0 /**|| key < _pairs[slot].first*/)
- return END;
-
- const auto slot = _index[bucket].slot & _mask;
- auto ormask = _index[bucket].slot & ~_mask;
- auto hmask = (size_type)(key_hash) & ~_mask;
- if ((hmask | ormask) != ormask)
- return END;
-
- if (_eq(key, _pairs[slot].first))
- return slot;
- else if (slots == 1 || key < _pairs[slot].first)
- return END;
-
-#if EMH_SORT
- if (key < _pairs[slot].first || key > _pairs[slots + slot - 1].first)
- return END;
-#endif
-
- for (size_type i = 1; i < slots; ++i) {
- const auto& okey = _pairs[slot + i].first;
- if (_eq(key, okey))
- return slot + i;
- // else if (okey > key)
- // return END;
- }
-
- return END;
- }
-#endif
-
- //kick out bucket and find empty to occpuy
- //it will break the orgin link and relnik again.
- //before: main_bucket-->prev_bucket --> bucket --> next_bucket
- //atfer : main_bucket-->prev_bucket --> (removed)--> new_bucket--> next_bucket
- size_type kickout_bucket(const size_type kmain, const size_type bucket) noexcept
- {
- const auto next_bucket = _index[bucket].next;
- const auto new_bucket = find_empty_bucket(next_bucket, 2);
- const auto prev_bucket = find_prev_bucket(kmain, bucket);
-
- const auto last = next_bucket == bucket ? new_bucket : next_bucket;
- _index[new_bucket] = {last, _index[bucket].slot};
-
- _index[prev_bucket].next = new_bucket;
- _index[bucket].next = INACTIVE;
-
- return bucket;
- }
-
- /*
- ** inserts a new key into a hash table; first, check whether key's main
- ** bucket/position is free. If not, check whether colliding node/bucket is in its main
- ** position or not: if it is not, move colliding bucket to an empty place and
- ** put new key in its main position; otherwise (colliding bucket is in its main
- ** position), new key goes to an empty position.
- */
- template<typename K=KeyT>
- size_type find_or_allocate(const K& key, uint64_t key_hash) noexcept
- {
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0) {
-#if EMH_HIGH_LOAD
- if (next_bucket != INACTIVE)
- pop_empty(bucket);
-#endif
- return bucket;
- }
-
- const auto slot = _index[bucket].slot & _mask;
- if (EMH_EQHASH(bucket, key_hash))
- if (EMH_LIKELY(_eq(key, _pairs[slot].first)))
- return bucket;
-
- //check current bucket_key is in main bucket or not
- const auto kmain = hash_bucket(_pairs[slot].first);
- if (kmain != bucket)
- return kickout_bucket(kmain, bucket);
- else if (next_bucket == bucket)
- return _index[next_bucket].next = find_empty_bucket(next_bucket, 1);
-
- uint32_t csize = 1;
- //find next linked bucket and check key
- while (true) {
- const auto eslot = _index[next_bucket].slot & _mask;
- if (EMH_EQHASH(next_bucket, key_hash)) {
- if (EMH_LIKELY(_eq(key, _pairs[eslot].first)))
- return next_bucket;
- }
-
- csize += 1;
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == next_bucket)
- break;
- next_bucket = nbucket;
- }
-
- //find a empty and link it to tail
- const auto new_bucket = find_empty_bucket(next_bucket, csize);
- return _index[next_bucket].next = new_bucket;
- }
-
- size_type find_unique_bucket(uint64_t key_hash) noexcept
- {
- const auto bucket = size_type(key_hash & _mask);
- auto next_bucket = _index[bucket].next;
- if ((int)next_bucket < 0) {
-#if EMH_HIGH_LOAD
- if (next_bucket != INACTIVE)
- pop_empty(bucket);
-#endif
- return bucket;
- }
-
- //check current bucket_key is in main bucket or not
- const auto kmain = hash_main(bucket);
- if (EMH_UNLIKELY(kmain != bucket))
- return kickout_bucket(kmain, bucket);
- else if (EMH_UNLIKELY(next_bucket != bucket))
- next_bucket = find_last_bucket(next_bucket);
-
- return _index[next_bucket].next = find_empty_bucket(next_bucket, 2);
- }
-
- /***
- Different probing techniques usually provide a trade-off between memory locality and avoidance of clustering.
- Since Robin Hood hashing is relatively resilient to clustering (both primary and secondary), linear probing is the most cache friendly alternativeis typically used.
-
- It's the core algorithm of this hash map with highly optimization/benchmark.
- normaly linear probing is inefficient with high load factor, it use a new 3-way linear
- probing strategy to search empty slot. from benchmark even the load factor > 0.9, it's more 2-3 timer fast than
- one-way search strategy.
-
- 1. linear or quadratic probing a few cache line for less cache miss from input slot "bucket_from".
- 2. the first search slot from member variant "_last", init with 0
- 3. the second search slot from calculated pos "(_num_filled + _last) & _mask", it's like a rand value
- */
- // key is not in this mavalue. Find a place to put it.
- size_type find_empty_bucket(const size_type bucket_from, uint32_t csize) noexcept
- {
- (void)csize;
-#if EMH_HIGH_LOAD
- if (_ehead)
- return pop_empty(_ehead);
-#endif
-
- auto bucket = bucket_from;
- if (EMH_EMPTY(++bucket) || EMH_EMPTY(++bucket))
- return bucket;
-
-#ifdef EMH_QUADRATIC
- constexpr size_type linear_probe_length = 2 * EMH_CACHE_LINE_SIZE / sizeof(Index);//16
- for (size_type offset = csize + 2, step = 4; offset <= linear_probe_length; ) {
- bucket = (bucket_from + offset) & _mask;
- if (EMH_EMPTY(bucket) || EMH_EMPTY(++bucket))
- return bucket;
- offset += step; //7/8. 12. 16
- }
-#else
- constexpr size_type quadratic_probe_length = 6u;
- for (size_type offset = 4u, step = 3u; step < quadratic_probe_length; ) {
- bucket = (bucket_from + offset) & _mask;
- if (EMH_EMPTY(bucket) || EMH_EMPTY(++bucket))
- return bucket;
- offset += step++;
- }
-#endif
-
-#if EMH_PREFETCH
- __builtin_prefetch(static_cast<const void*>(_index + _last + 1), 0, EMH_PREFETCH);
-#endif
-
- for (;;) {
-#if EMH_PACK_TAIL
- //find empty bucket and skip next
- if (EMH_EMPTY(_last++))// || EMH_EMPTY(_last++))
- return _last++ - 1;
-
- if (EMH_UNLIKELY(_last >= _num_buckets))
- _last = 0;
-
- auto medium = (_mask / 4 + _last++) & _mask;
- if (EMH_EMPTY(medium))
- return medium;
-#else
- if (EMH_EMPTY(++_last))// || EMH_EMPTY(++_last))
- return _last;
-
- _last &= _mask;
- auto medium = (_num_buckets / 2 + _last) & _mask;
- if (EMH_EMPTY(medium))// || EMH_EMPTY(++medium))
- return medium;
-#endif
- }
-
- return 0;
- }
-
- size_type find_last_bucket(size_type main_bucket) const
- {
- auto next_bucket = _index[main_bucket].next;
- if (next_bucket == main_bucket)
- return main_bucket;
-
- while (true) {
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == next_bucket)
- return next_bucket;
- next_bucket = nbucket;
- }
- }
-
- size_type find_prev_bucket(const size_type main_bucket, const size_type bucket) const
- {
- auto next_bucket = _index[main_bucket].next;
- if (next_bucket == bucket)
- return main_bucket;
-
- while (true) {
- const auto nbucket = _index[next_bucket].next;
- if (nbucket == bucket)
- return next_bucket;
- next_bucket = nbucket;
- }
- }
-
- size_type hash_bucket(const KeyT& key) const noexcept
- {
- return (size_type)hash_key(key) & _mask;
- }
-
- size_type hash_main(const size_type bucket) const noexcept
- {
- const auto slot = _index[bucket].slot & _mask;
- return (size_type)hash_key(_pairs[slot].first) & _mask;
- }
-
-#if EMH_INT_HASH
- static constexpr uint64_t KC = UINT64_C(11400714819323198485);
- static uint64_t hash64(uint64_t key)
- {
-#if __SIZEOF_INT128__ && EMH_INT_HASH == 1
- __uint128_t r = key; r *= KC;
- return (uint64_t)(r >> 64) + (uint64_t)r;
-#elif EMH_INT_HASH == 2
- //MurmurHash3Mixer
- uint64_t h = key;
- h ^= h >> 33;
- h *= 0xff51afd7ed558ccd;
- h ^= h >> 33;
- h *= 0xc4ceb9fe1a85ec53;
- h ^= h >> 33;
- return h;
-#elif _WIN64 && EMH_INT_HASH == 1
- uint64_t high;
- return _umul128(key, KC, &high) + high;
-#elif EMH_INT_HASH == 3
- auto ror = (key >> 32) | (key << 32);
- auto low = key * 0xA24BAED4963EE407ull;
- auto high = ror * 0x9FB21C651E98DF25ull;
- auto mix = low + high;
- return mix;
-#elif EMH_INT_HASH == 1
- uint64_t r = key * UINT64_C(0xca4bcaa75ec3f625);
- return (r >> 32) + r;
-#elif EMH_WYHASH64
- return wyhash64(key, KC);
-#else
- uint64_t x = key;
- x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
- x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
- x = x ^ (x >> 31);
- return x;
-#endif
- }
-#endif
-
-#if EMH_WYHASH_HASH
- //#define WYHASH_CONDOM 1
- static uint64_t wymix(uint64_t A, uint64_t B)
- {
-#if defined(__SIZEOF_INT128__)
- __uint128_t r = A; r *= B;
-#if WYHASH_CONDOM2
- A ^= (uint64_t)r; B ^= (uint64_t)(r >> 64);
-#else
- A = (uint64_t)r; B = (uint64_t)(r >> 64);
-#endif
-
-#elif defined(_MSC_VER) && defined(_M_X64)
-#if WYHASH_CONDOM2
- uint64_t a, b;
- a = _umul128(A, B, &b);
- A ^= a; B ^= b;
-#else
- A = _umul128(A, B, &B);
-#endif
-#else
- uint64_t ha = A >> 32, hb = B >> 32, la = (uint32_t)A, lb = (uint32_t)B, hi, lo;
- uint64_t rh = ha * hb, rm0 = ha * lb, rm1 = hb * la, rl = la * lb, t = rl + (rm0 << 32), c = t < rl;
- lo = t + (rm1 << 32); c += lo < t; hi = rh + (rm0 >> 32) + (rm1 >> 32) + c;
-#if WYHASH_CONDOM2
- A ^= lo; B ^= hi;
-#else
- A = lo; B = hi;
-#endif
-#endif
- return A ^ B;
- }
-
- //multiply and xor mix function, aka MUM
- static inline uint64_t wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v; }
- static inline uint64_t wyr4(const uint8_t *p) { uint32_t v; memcpy(&v, p, 4); return v; }
- static inline uint64_t wyr3(const uint8_t *p, size_t k) {
- return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];
- }
-
- inline static const uint64_t secret[4] = {
- 0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull,
- 0x4b33a62ed433d4a3ull, 0x4d5a2da51de1aa47ull};
-public:
- //wyhash main function https://github.com/wangyi-fudan/wyhash
- static uint64_t wyhashstr(const char *key, const size_t len)
- {
- uint64_t a = 0, b = 0, seed = secret[0];
- const uint8_t *p = (const uint8_t*)key;
- if (EMH_LIKELY(len <= 16)) {
- if (EMH_LIKELY(len >= 4)) {
- const auto half = (len >> 3) << 2;
- a = (wyr4(p) << 32U) | wyr4(p + half); p += len - 4;
- b = (wyr4(p) << 32U) | wyr4(p - half);
- } else if (len) {
- a = wyr3(p, len);
- }
- } else {
- size_t i = len;
- if (EMH_UNLIKELY(i > 48)) {
- uint64_t see1 = seed, see2 = seed;
- do {
- seed = wymix(wyr8(p + 0) ^ secret[1], wyr8(p + 8) ^ seed);
- see1 = wymix(wyr8(p + 16) ^ secret[2], wyr8(p + 24) ^ see1);
- see2 = wymix(wyr8(p + 32) ^ secret[3], wyr8(p + 40) ^ see2);
- p += 48; i -= 48;
- } while (EMH_LIKELY(i > 48));
- seed ^= see1 ^ see2;
- }
- while (i > 16) {
- seed = wymix(wyr8(p) ^ secret[1], wyr8(p + 8) ^ seed);
- i -= 16; p += 16;
- }
- a = wyr8(p + i - 16);
- b = wyr8(p + i - 8);
- }
-
- return wymix(secret[1] ^ len, wymix(a ^ secret[1], b ^ seed));
- }
-#endif
-
-private:
- template<typename UType, typename std::enable_if<std::is_integral<UType>::value, uint32_t>::type = 0>
- inline uint64_t hash_key(const UType key) const
- {
-#if EMH_INT_HASH
- return hash64(key);
-#elif EMH_IDENTITY_HASH
- return key + (key >> 24);
-#else
- return _hasher(key);
-#endif
- }
-
- template<typename UType, typename std::enable_if<std::is_same<UType, std::string>::value, uint32_t>::type = 0>
- inline uint64_t hash_key(const UType& key) const
- {
-#if EMH_WYHASH_HASH
- return wyhashstr(key.data(), key.size());
-#else
- return _hasher(key);
-#endif
- }
-
- template<typename UType, typename std::enable_if<!std::is_integral<UType>::value && !std::is_same<UType, std::string>::value, uint32_t>::type = 0>
- inline uint64_t hash_key(const UType& key) const
- {
- return _hasher(key);
- }
-
-private:
- Index* _index;
- value_type*_pairs;
-
- HashT _hasher;
- EqT _eq;
- uint32_t _mlf;
- size_type _mask;
- size_type _num_buckets;
- size_type _num_filled;
- size_type _last;
-#if EMH_HIGH_LOAD
- size_type _ehead;
-#endif
- size_type _etail;
-};
-} // namespace emhash
-