impl hash map

This commit is contained in:
2024-07-18 14:21:43 +08:00
parent 640cb926ca
commit 04678ac09b
6 changed files with 370 additions and 1 deletions

View File

@@ -148,6 +148,10 @@ set(SOURCE_FILE_HEADERS
circular_queue.h
binary_tree.h
binary_search_tree.h
utils_static.h
utils_static.cpp
hash_map.h
hash_map.cpp
)
if (NOT HAVE_STRPTIME)
@@ -197,7 +201,8 @@ if (ENABLE_UTILS_TESTING)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
add_subdirectory(googletest)
enable_testing()
add_executable(unittest test/stack_test.cpp test/queue_test.cpp test/binary_tree_test.cpp)
add_executable(unittest test/stack_test.cpp test/queue_test.cpp test/binary_tree_test.cpp
test/hash_map_test.cpp)
target_link_libraries(unittest GTest::gtest_main utils)
include(GoogleTest)
gtest_discover_tests(unittest)

29
hash_map.cpp Normal file
View File

@@ -0,0 +1,29 @@
#include "hash_map.h"
size_t hash_map_get_next_cap(size_t cap) {
if (cap >= hashmap_primes[25]) return cap * 2;
for (auto i = 0; i < 26; i++) {
if (hashmap_primes[i] > cap) return hashmap_primes[i];
}
return cap * 2;
}
std::function<size_t(size_t)> hash_map_linear_probing(size_t interval) {
return std::function([interval](size_t i) {
return i * interval;
});
}
size_t hash_map_quadratic_probing(size_t i) {
return i * i;
}
size_t hash_map_quadratic_probing_alter(size_t i) {
size_t t = (i + 1) / 2;
t = t * t;
return i % 2 == 0 ? -t : t;
}
std::function<size_t(size_t)> hash_map_random_probing(uint64_t seed) {
return HashMapRandomProbingGeneator(seed);
}

240
hash_map.h Normal file
View File

@@ -0,0 +1,240 @@
#ifndef _UTIL_HASH_MAP_H
#define _UTIL_HASH_MAP_H
#include <functional>
#include <random>
#include <stdint.h>
#include <string.h>
#include "utils_static.h"
template <typename K, typename V>
struct hash_map_entry {
K key;
V value;
};
size_t hash_map_get_next_cap(size_t cap);
std::function<size_t(size_t)> hash_map_linear_probing(size_t interval);
size_t hash_map_quadratic_probing(size_t i);
size_t hash_map_quadratic_probing_alter(size_t i);
class HashMapRandomProbingGeneator: public std::function<size_t(size_t)> {
std::mt19937_64 gen;
std::vector<size_t> cache;
public:
HashMapRandomProbingGeneator(uint64_t seed): std::function<size_t(size_t)>([&](size_t i) {
while (cache.size() < i) {
cache.push_back(gen());
}
return cache[i - 1];
}) {
gen = std::mt19937_64(seed);
}
};
std::function<size_t(size_t)> hash_map_random_probing(uint64_t seed);
template <typename K, typename V>
struct hash_map {
std::function<void*(size_t)> malloc;
std::function<void*(void *, size_t)> realloc;
std::function<void(void *)> free;
size_t cap;
size_t count;
size_t growat;
uint8_t loadfactor;
struct hash_map_entry<K, V>** map;
std::function<size_t(size_t)> probing;
std::hash<K> hash;
std::function<void(K)> free_key;
std::function<void(V)> free_value;
};
template <typename K, typename V, class H = std::hash<K>>
struct hash_map<K, V>* hash_map_new(
size_t cap = hashmap_primes[0],
uint8_t loadfactor = 60,
H hash = H(),
std::function<size_t(size_t)> probing = std::function<size_t(size_t)>(hash_map_quadratic_probing_alter),
std::function<void*(size_t)> malloc = std::function<void*(size_t)>(malloc),
std::function<void*(void *, size_t)> realloc = std::function<void*(void *, size_t)>(realloc),
std::function<void(void *)> free = std::function<void(void *)>(free),
std::function<void(K)> free_key = std::function<void(K)>(),
std::function<void(V)> free_value = std::function<void(V)>()
) {
if (!cap) cap = hash_map_get_next_cap(cap);
struct hash_map<K, V>* map = new (struct hash_map<K, V>)();
if (!map) return nullptr;
map->malloc = malloc;
map->realloc = realloc;
map->free = free;
map->cap = cap;
map->count = 0;
hash_map_set_loadfactor(map, loadfactor);
map->probing = probing;
map->hash = hash;
map->free_key = free_key;
map->free_value = free_value;
size_t mapsize = sizeof(void*) * map->cap;
map->map = (struct hash_map_entry<K, V>**)map->malloc(mapsize);
if (!map->map) {
delete map;
return nullptr;
}
memset(map->map, 0, mapsize);
return map;
}
template <typename K, typename V>
void free_hash_map(struct hash_map<K, V>*& map) {
hash_map_clear(map, false);
map->free(map->map);
delete map;
map = nullptr;
}
template <typename K, typename V>
void hash_map_clear(struct hash_map<K, V>* map, bool shrink = true) {
if (!map) return;
for (size_t i = 0; i < map->cap; i++) {
auto m = map->map[i];
if (m) {
if (map->free_key) map->free_key(m->key);
if (map->free_value) map->free_value(m->value);
delete map->map[i];
map->map[i] = nullptr;
}
}
map->count = 0;
if (shrink && map->cap > hashmap_primes[0]) {
hash_map_set_cap(map, hashmap_primes[0]);
}
}
template <typename K, typename V>
struct hash_map_entry<K, V>* hash_map_get_entry(struct hash_map<K, V>* map, K key) {
if (!map) return nullptr;
size_t h = map->hash(key);
size_t loc = h % map->cap;
size_t i = 1;
while (map->map[loc] && map->map[loc]->key != key) {
loc = (h + map->probing(i++)) % map->cap;
}
return map->map[loc];
}
template <typename K, typename V, class X>
inline struct hash_map_entry<K, V>* hash_map_get_entry(struct hash_map<K, V>* map, X key) {
return hash_map_get_entry(map, K(key));
}
template <typename K, typename V>
bool hash_map_get(struct hash_map<K, V>* map, K key, V& value) {
if (!map) return false;
size_t h = map->hash(key);
size_t loc = h % map->cap;
size_t i = 1;
while (map->map[loc] && map->map[loc]->key != key) {
loc = (h + map->probing(i++)) % map->cap;
}
value = map->map[loc]->value;
return true;
}
template <typename K, typename V, class X>
inline bool hash_map_get(struct hash_map<K, V>* map, X key, V& value) {
return hash_map_get(map, K(key), value);
}
template <typename K, typename V>
struct hash_map_entry<K, V>* hash_map_insert_entry(struct hash_map<K, V>* map, struct hash_map_entry<K, V>* entry) {
if (!map || !entry) return nullptr;
size_t h = map->hash(entry->key);
size_t loc = h % map->cap;
size_t i = 1;
while (map->map[loc] && map->map[loc]->key != entry->key) {
loc = (h + map->probing(i++)) % map->cap;
}
auto t = map->map[loc];
map->map[loc] = entry;
map->count += t ? 0 : 1;
return t;
}
template <typename K, typename V>
struct hash_map_entry<K, V>* hash_map_insert(struct hash_map<K, V>* map, K key, V value) {
if (!map) return nullptr;
if (map->count >= map->growat) {
if (!hash_map_resize(map, hash_map_get_next_cap(map->cap))) {
return nullptr;
}
}
size_t h = map->hash(key);
size_t loc = h % map->cap;
size_t i = 1;
while (map->map[loc] && map->map[loc]->key != key) {
loc = (h + map->probing(i++)) % map->cap;
}
if (map->map[loc]) {
if (map->free_value) map->free_value(map->map[loc]->value);
if (map->free_key) map->free_key(key);
map->map[loc]->value = value;
return map->map[loc];
}
struct hash_map_entry<K, V>* ent = new (struct hash_map_entry<K, V>)({key, value});
if (!ent) return nullptr;
map->map[loc] = ent;
map->count += 1;
return map->map[loc];
}
template <typename K, typename V, class X, class Y>
inline struct hash_map_entry<K, V>* hash_map_insert(struct hash_map<K, V>* map, X key, Y value) {
return hash_map_insert(map, K(key), V(value));
}
template <typename K, typename V>
bool hash_map_resize(struct hash_map<K, V>* map, size_t newcap) {
if (!map && !newcap) return false;
if (map->count > newcap) return false;
if (map->cap == newcap) return true;
size_t mapsize = sizeof(void *) * newcap;
auto t = (struct hash_map_entry<K, V>**)map->malloc(mapsize);
if (!t) return false;
memset(t, 0, mapsize);
auto ori = map->map;
auto oricap = map->cap;
map->cap = newcap;
map->growat = map->cap * map->loadfactor / 100;
map->map = t;
map->count = 0;
for (size_t i = 0; i < oricap; i++) {
if (ori[i]) {
hash_map_insert_entry(map, ori[i]);
}
}
map->free(ori);
return true;
}
template <typename K, typename V>
bool hash_map_set_cap(struct hash_map<K, V>* map, size_t newcap) {
if (!map || !newcap) return false;
auto t = (struct hash_map_entry<K, V>**)map->realloc(map->map, sizeof(void *) * newcap);
if (t) {
map->map = t;
map->cap = newcap;
map->growat = map->cap * map->loadfactor / 100;
return true;
}
return false;
}
template <typename K, typename V>
void hash_map_set_loadfactor(struct hash_map<K, V>* map, uint8_t loadfactor) {
if (!map) return;
if (loadfactor > 100) loadfactor = 100;
map->loadfactor = loadfactor;
map->growat = map->cap * loadfactor / 100;
}
#endif

81
test/hash_map_test.cpp Normal file
View File

@@ -0,0 +1,81 @@
#include "gtest/gtest.h"
#include "hash_map.h"
#include <string>
TEST(HashMapTest, NextCapTest) {
GTEST_ASSERT_EQ(hash_map_get_next_cap(0), 53);
GTEST_ASSERT_EQ(hash_map_get_next_cap(53), 97);
GTEST_ASSERT_EQ(hash_map_get_next_cap(1610612741), 3221225482);
}
TEST(HashMapTest, ProbingTest) {
auto i = hash_map_linear_probing(1);
GTEST_ASSERT_EQ(i(1), 1);
GTEST_ASSERT_EQ(i(3), 3);
i = hash_map_linear_probing(33);
GTEST_ASSERT_EQ(i(3), 99);
GTEST_ASSERT_EQ(hash_map_quadratic_probing(4), 16);
GTEST_ASSERT_EQ(hash_map_quadratic_probing_alter(3), 4);
GTEST_ASSERT_EQ(hash_map_quadratic_probing_alter(4), -4);
i = hash_map_random_probing(0);
GTEST_ASSERT_EQ(i(1), 16539830640600551411llu);
GTEST_ASSERT_EQ(i(2), 9045840598434793555llu);
}
TEST(HashMapTest, HashMap) {
auto map = hash_map_new<std::string, int>();
GTEST_ASSERT_TRUE(map);
GTEST_ASSERT_TRUE(hash_map_insert(map, "123", 123));
GTEST_ASSERT_TRUE(hash_map_resize(map, hashmap_primes[1]));
GTEST_ASSERT_TRUE(hash_map_insert(map, "234", 234));
GTEST_ASSERT_EQ(hash_map_get_entry(map, "123")->value, 123);
int v = 0;
GTEST_ASSERT_TRUE(hash_map_get(map, "234", v));
GTEST_ASSERT_EQ(v, 234);
free_hash_map(map);
}
class IntHash: public std::hash<int> {
size_t operator()(const int& s) const noexcept {
return (size_t)s;
}
};
TEST(HashMapTest, HashMapConf) {
std::hash<int> h = IntHash();
auto l = hash_map_linear_probing(1);
auto map = hash_map_new<int, int>(10, 60, h, l);
GTEST_ASSERT_TRUE(map);
GTEST_ASSERT_TRUE(hash_map_insert(map, 1, 123));
GTEST_ASSERT_TRUE(hash_map_insert(map, 11, 234));
GTEST_ASSERT_EQ(map->map[2]->key, 11);
GTEST_ASSERT_EQ(hash_map_get_entry(map, 11)->value, 234);
free_hash_map(map);
GTEST_ASSERT_FALSE(map);
map = hash_map_new<int, int>(10, 60, h);
GTEST_ASSERT_TRUE(map);
GTEST_ASSERT_TRUE(hash_map_insert(map, 1, 123));
GTEST_ASSERT_TRUE(hash_map_insert(map, 11, 234));
GTEST_ASSERT_TRUE(hash_map_insert(map, 41, 255));
GTEST_ASSERT_TRUE(hash_map_insert(map, 51, 188));
GTEST_ASSERT_TRUE(hash_map_insert(map, 61, 133));
GTEST_ASSERT_EQ(map->map[1]->key, 1);
GTEST_ASSERT_EQ(map->map[2]->key, 11);
GTEST_ASSERT_EQ(map->map[0]->key, 41);
GTEST_ASSERT_EQ(map->map[5]->key, 51);
GTEST_ASSERT_EQ(map->map[7]->key, 61);
GTEST_ASSERT_EQ(hash_map_get_entry(map, 51)->value, 188);
free_hash_map(map);
}
TEST(HashMapTest, HashMap2) {
auto map = hash_map_new<int, int>();
GTEST_ASSERT_TRUE(map);
for (int i = 0; i < 100; i++) {
hash_map_insert(map, i, i * i);
}
GTEST_ASSERT_EQ(map->count, 100);
GTEST_ASSERT_EQ(map->cap, hashmap_primes[2]);
GTEST_ASSERT_EQ(hash_map_get_entry(map, 15)->value, 225);
free_hash_map(map);
}

3
utils_static.cpp Normal file
View File

@@ -0,0 +1,3 @@
#include "utils_static.h"
size_t hashmap_primes[26] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741 };

11
utils_static.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef _UTIL_UTILS_STATIC_H
#define _UTIL_UTILS_STATIC_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
extern size_t hashmap_primes[26];
#ifdef __cplusplus
}
#endif
#endif