rexylib/include/rexy/cx/hashmap.hpp

206 lines
6.8 KiB
C++

/**
This file is a part of rexy's general purpose library
Copyright (C) 2020 rexy712
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef REXY_CX_HASHMAP_HPP
#define REXY_CX_HASHMAP_HPP
#include "vector.hpp"
#include "array.hpp"
#include "../algorithm.hpp"
#include "../hash.hpp"
#include <climits> //CHAR_BIT
#include <cstddef> //size_t, ptrdiff_t
#include <utility> //pair
#include <type_traits> //decay
#include <initializer_list>
namespace rexy::cx{
template<class Key, class Value>
struct element{
using key_type = Key;
using value_type = Value;
Key key;
Value value;
};
template<class Key, class Value>
element(Key,Value) -> element<Key,Value>;
template<class Key, class Value, size_t N, class Hash = hash<Key>>
class hashmap
{
public:
using key_type = Key;
using mapped_type = Value;
using value_type = element<Key,Value>;
using size_type = size_t;
using difference_type = ptrdiff_t;
using hasher = Hash;
using reference = mapped_type&;
using const_reference = const mapped_type&;
using pointer = mapped_type*;
using const_pointer = const mapped_type*;
static constexpr size_type single_bucket_bit = size_type{1} << ((sizeof(size_type)*CHAR_BIT) - 1);
static constexpr size_type max_size = N;
static_assert((max_size & single_bucket_bit) == 0);
private:
array<mapped_type,N> m_values; //perfect hash table
array<size_type,N> m_g; //'salt' values for indexing into the perfect hash table
array<size_type,N> m_key_hashes; //full hash values for keys to verify good index values
public:
constexpr hashmap(const value_type(&elements)[N])
noexcept(std::is_nothrow_default_constructible<value_type>::value &&
std::is_nothrow_copy_constructible<value_type>::value &&
std::is_nothrow_move_assignable<mapped_type>::value &&
std::is_nothrow_invocable<Hash,Key,size_t>::value);
//no key checks. give a correct key or get a random answer :)
template<class U, class UHash = hash<std::decay_t<U>>>
constexpr reference operator[](U&& u)noexcept;
template<class U, class UHash = hash<std::decay_t<U>>>
constexpr const_reference operator[](U&& u)const noexcept;
template<class U, class UHash = hash<std::decay_t<U>>>
constexpr bool is_valid(U&& u)const noexcept;
};
template<class Key, class Value, size_t N, class Hash>
constexpr hashmap<Key,Value,N,Hash>::hashmap(const value_type(&elements)[N])
noexcept(std::is_nothrow_default_constructible<value_type>::value &&
std::is_nothrow_copy_constructible<value_type>::value &&
std::is_nothrow_move_assignable<mapped_type>::value &&
std::is_nothrow_invocable<Hash,Key,size_t>::value)
{
array<vector<value_type,N>,N> buckets;
size_type current_bucket = 0;
//place all keys into buckets
for(auto& element : elements){
buckets[Hash{}(element.key, 0) % max_size].push_back(element);
}
//sort the buckets based on size, largest first
quicksort(buckets.begin(), buckets.end(), [](auto&& left, auto&& right) -> bool{
return left.size() > right.size();
});
//for each bucket, try different values of 'd' to try to find a hash that doesn't collide
for(current_bucket = 0;current_bucket < buckets.size();++current_bucket){
auto& bucket = buckets[current_bucket];
//only handle buckets containing collisions
if(bucket.size() <= 1)
break;
const auto hashval = Hash{}(bucket[0].key, 0);
array<bool,N> pass_slots_used;
vector<size_type,N> pass_slots;
size_type d = 1;
for(size_type i = 0;i < bucket.size();){
size_type slot = Hash{}(bucket[i].key, d) % max_size;
if(pass_slots_used[slot] || m_key_hashes[slot] != 0){
//slot already in use, try another value for 'd'
++d;
i = 0;
pass_slots_used.fill(false);
pass_slots.clear();
}else{
//slot is good to go
pass_slots_used[slot] = true;
pass_slots.push_back(slot);
++i;
}
}
//store the successful value of 'd' at index of the first hash for this bucket
m_g[hashval % max_size] = d;
//take the value from the temporary bucket into the permanent slot
for(size_type i = 0;i < bucket.size();++i){
m_values[pass_slots[i]] = std::move(bucket[i].value);
m_key_hashes[pass_slots[i]] = hashval;
}
}
//Handle remaining single value buckets
size_type next_free_slot = 0;
for(;current_bucket < buckets.size();++current_bucket){
auto& bucket = buckets[current_bucket];
if(bucket.size() == 0)
break;
const auto hashval = Hash{}(bucket[0].key, 0);
for(;m_key_hashes[next_free_slot] != 0;++next_free_slot);
m_g[Hash{}(bucket[0].key, 0) % max_size] = (next_free_slot | single_bucket_bit);
m_values[next_free_slot] = std::move(bucket[0].value);
m_key_hashes[next_free_slot] = hashval;
}
}
//no key checks. give a correct key or get a random answer :)
template<class Key, class Value, size_t N, class Hash>
template<class U, class UHash>
constexpr auto hashmap<Key,Value,N,Hash>::operator[](U&& key)noexcept -> reference{
auto d = m_g[UHash{}(std::forward<U>(key), 0) % max_size];
if(d & single_bucket_bit)
return m_values[d & ~single_bucket_bit];
return m_values[UHash{}(std::forward<U>(key), d) % max_size];
}
template<class Key, class Value, size_t N, class Hash>
template<class U, class UHash>
constexpr auto hashmap<Key,Value,N,Hash>::operator[](U&& key)const noexcept -> const_reference{
auto d = m_g[UHash{}(std::forward<U>(key), 0) % max_size];
if(d & single_bucket_bit)
return m_values[d & ~single_bucket_bit];
return m_values[UHash{}(std::forward<U>(key), d) % max_size];
}
template<class Key, class Value, size_t N, class Hash>
template<class U, class UHash>
constexpr bool hashmap<Key,Value,N,Hash>::is_valid(U&& key)const noexcept{
const auto hashval = UHash{}(std::forward<U>(key), 0);
const auto d = m_g[hashval % max_size];
if(d & single_bucket_bit){
return m_key_hashes[d & ~single_bucket_bit] == hashval;
}
return m_key_hashes[UHash{}(std::forward<U>(key), d) % max_size] == hashval;
}
template<class Key, class Value, size_t N, class Hash = hash<Key>>
constexpr auto make_hashmap(const typename hashmap<Key,Value,N,Hash>::value_type(&list)[N]){
return hashmap<Key,Value,N,Hash>(list);
}
}
#ifdef REXY_STRING_BASE_HPP
#include "../string_hash.hpp"
#endif
#endif