From 04678ac09b3e0a5b0810dfbe1c20b3018c5e0e47 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Thu, 18 Jul 2024 14:21:43 +0800 Subject: [PATCH] impl hash map --- CMakeLists.txt | 7 +- hash_map.cpp | 29 +++++ hash_map.h | 240 +++++++++++++++++++++++++++++++++++++++++ test/hash_map_test.cpp | 81 ++++++++++++++ utils_static.cpp | 3 + utils_static.h | 11 ++ 6 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 hash_map.cpp create mode 100644 hash_map.h create mode 100644 test/hash_map_test.cpp create mode 100644 utils_static.cpp create mode 100644 utils_static.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ee0f79c..8ee83bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,6 +148,10 @@ set(SOURCE_FILE_HEADERS circular_queue.h binary_tree.h binary_search_tree.h + utils_static.h + utils_static.cpp + hash_map.h + hash_map.cpp ) if (NOT HAVE_STRPTIME) @@ -197,7 +201,8 @@ if (ENABLE_UTILS_TESTING) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) add_subdirectory(googletest) enable_testing() - add_executable(unittest test/stack_test.cpp test/queue_test.cpp test/binary_tree_test.cpp) + add_executable(unittest test/stack_test.cpp test/queue_test.cpp test/binary_tree_test.cpp + test/hash_map_test.cpp) target_link_libraries(unittest GTest::gtest_main utils) include(GoogleTest) gtest_discover_tests(unittest) diff --git a/hash_map.cpp b/hash_map.cpp new file mode 100644 index 0000000..afeed73 --- /dev/null +++ b/hash_map.cpp @@ -0,0 +1,29 @@ +#include "hash_map.h" + +size_t hash_map_get_next_cap(size_t cap) { + if (cap >= hashmap_primes[25]) return cap * 2; + for (auto i = 0; i < 26; i++) { + if (hashmap_primes[i] > cap) return hashmap_primes[i]; + } + return cap * 2; +} + +std::function hash_map_linear_probing(size_t interval) { + return std::function([interval](size_t i) { + return i * interval; + }); +} + +size_t hash_map_quadratic_probing(size_t i) { + return i * i; +} + +size_t hash_map_quadratic_probing_alter(size_t i) { + size_t t = (i + 1) / 2; + t = t * t; + return i % 2 == 0 ? -t : t; +} + +std::function hash_map_random_probing(uint64_t seed) { + return HashMapRandomProbingGeneator(seed); +} diff --git a/hash_map.h b/hash_map.h new file mode 100644 index 0000000..6ec94f5 --- /dev/null +++ b/hash_map.h @@ -0,0 +1,240 @@ +#ifndef _UTIL_HASH_MAP_H +#define _UTIL_HASH_MAP_H +#include +#include +#include +#include +#include "utils_static.h" + +template +struct hash_map_entry { + K key; + V value; +}; + +size_t hash_map_get_next_cap(size_t cap); +std::function hash_map_linear_probing(size_t interval); +size_t hash_map_quadratic_probing(size_t i); +size_t hash_map_quadratic_probing_alter(size_t i); + +class HashMapRandomProbingGeneator: public std::function { + std::mt19937_64 gen; + std::vector cache; +public: + HashMapRandomProbingGeneator(uint64_t seed): std::function([&](size_t i) { + while (cache.size() < i) { + cache.push_back(gen()); + } + return cache[i - 1]; + }) { + gen = std::mt19937_64(seed); + } +}; + +std::function hash_map_random_probing(uint64_t seed); + +template +struct hash_map { + std::function malloc; + std::function realloc; + std::function free; + size_t cap; + size_t count; + size_t growat; + uint8_t loadfactor; + struct hash_map_entry** map; + std::function probing; + std::hash hash; + std::function free_key; + std::function free_value; +}; + +template > +struct hash_map* hash_map_new( + size_t cap = hashmap_primes[0], + uint8_t loadfactor = 60, + H hash = H(), + std::function probing = std::function(hash_map_quadratic_probing_alter), + std::function malloc = std::function(malloc), + std::function realloc = std::function(realloc), + std::function free = std::function(free), + std::function free_key = std::function(), + std::function free_value = std::function() +) { + if (!cap) cap = hash_map_get_next_cap(cap); + struct hash_map* map = new (struct hash_map)(); + if (!map) return nullptr; + map->malloc = malloc; + map->realloc = realloc; + map->free = free; + map->cap = cap; + map->count = 0; + hash_map_set_loadfactor(map, loadfactor); + map->probing = probing; + map->hash = hash; + map->free_key = free_key; + map->free_value = free_value; + size_t mapsize = sizeof(void*) * map->cap; + map->map = (struct hash_map_entry**)map->malloc(mapsize); + if (!map->map) { + delete map; + return nullptr; + } + memset(map->map, 0, mapsize); + return map; +} + +template +void free_hash_map(struct hash_map*& map) { + hash_map_clear(map, false); + map->free(map->map); + delete map; + map = nullptr; +} + +template +void hash_map_clear(struct hash_map* map, bool shrink = true) { + if (!map) return; + for (size_t i = 0; i < map->cap; i++) { + auto m = map->map[i]; + if (m) { + if (map->free_key) map->free_key(m->key); + if (map->free_value) map->free_value(m->value); + delete map->map[i]; + map->map[i] = nullptr; + } + } + map->count = 0; + if (shrink && map->cap > hashmap_primes[0]) { + hash_map_set_cap(map, hashmap_primes[0]); + } +} + +template +struct hash_map_entry* hash_map_get_entry(struct hash_map* map, K key) { + if (!map) return nullptr; + size_t h = map->hash(key); + size_t loc = h % map->cap; + size_t i = 1; + while (map->map[loc] && map->map[loc]->key != key) { + loc = (h + map->probing(i++)) % map->cap; + } + return map->map[loc]; +} + +template +inline struct hash_map_entry* hash_map_get_entry(struct hash_map* map, X key) { + return hash_map_get_entry(map, K(key)); +} + +template +bool hash_map_get(struct hash_map* map, K key, V& value) { + if (!map) return false; + size_t h = map->hash(key); + size_t loc = h % map->cap; + size_t i = 1; + while (map->map[loc] && map->map[loc]->key != key) { + loc = (h + map->probing(i++)) % map->cap; + } + value = map->map[loc]->value; + return true; +} + +template +inline bool hash_map_get(struct hash_map* map, X key, V& value) { + return hash_map_get(map, K(key), value); +} + +template +struct hash_map_entry* hash_map_insert_entry(struct hash_map* map, struct hash_map_entry* entry) { + if (!map || !entry) return nullptr; + size_t h = map->hash(entry->key); + size_t loc = h % map->cap; + size_t i = 1; + while (map->map[loc] && map->map[loc]->key != entry->key) { + loc = (h + map->probing(i++)) % map->cap; + } + auto t = map->map[loc]; + map->map[loc] = entry; + map->count += t ? 0 : 1; + return t; +} + +template +struct hash_map_entry* hash_map_insert(struct hash_map* map, K key, V value) { + if (!map) return nullptr; + if (map->count >= map->growat) { + if (!hash_map_resize(map, hash_map_get_next_cap(map->cap))) { + return nullptr; + } + } + size_t h = map->hash(key); + size_t loc = h % map->cap; + size_t i = 1; + while (map->map[loc] && map->map[loc]->key != key) { + loc = (h + map->probing(i++)) % map->cap; + } + if (map->map[loc]) { + if (map->free_value) map->free_value(map->map[loc]->value); + if (map->free_key) map->free_key(key); + map->map[loc]->value = value; + return map->map[loc]; + } + struct hash_map_entry* ent = new (struct hash_map_entry)({key, value}); + if (!ent) return nullptr; + map->map[loc] = ent; + map->count += 1; + return map->map[loc]; +} + +template +inline struct hash_map_entry* hash_map_insert(struct hash_map* map, X key, Y value) { + return hash_map_insert(map, K(key), V(value)); +} + +template +bool hash_map_resize(struct hash_map* map, size_t newcap) { + if (!map && !newcap) return false; + if (map->count > newcap) return false; + if (map->cap == newcap) return true; + size_t mapsize = sizeof(void *) * newcap; + auto t = (struct hash_map_entry**)map->malloc(mapsize); + if (!t) return false; + memset(t, 0, mapsize); + auto ori = map->map; + auto oricap = map->cap; + map->cap = newcap; + map->growat = map->cap * map->loadfactor / 100; + map->map = t; + map->count = 0; + for (size_t i = 0; i < oricap; i++) { + if (ori[i]) { + hash_map_insert_entry(map, ori[i]); + } + } + map->free(ori); + return true; +} + +template +bool hash_map_set_cap(struct hash_map* map, size_t newcap) { + if (!map || !newcap) return false; + auto t = (struct hash_map_entry**)map->realloc(map->map, sizeof(void *) * newcap); + if (t) { + map->map = t; + map->cap = newcap; + map->growat = map->cap * map->loadfactor / 100; + return true; + } + return false; +} + +template +void hash_map_set_loadfactor(struct hash_map* map, uint8_t loadfactor) { + if (!map) return; + if (loadfactor > 100) loadfactor = 100; + map->loadfactor = loadfactor; + map->growat = map->cap * loadfactor / 100; +} + +#endif diff --git a/test/hash_map_test.cpp b/test/hash_map_test.cpp new file mode 100644 index 0000000..d1cba00 --- /dev/null +++ b/test/hash_map_test.cpp @@ -0,0 +1,81 @@ +#include "gtest/gtest.h" +#include "hash_map.h" +#include + +TEST(HashMapTest, NextCapTest) { + GTEST_ASSERT_EQ(hash_map_get_next_cap(0), 53); + GTEST_ASSERT_EQ(hash_map_get_next_cap(53), 97); + GTEST_ASSERT_EQ(hash_map_get_next_cap(1610612741), 3221225482); +} + +TEST(HashMapTest, ProbingTest) { + auto i = hash_map_linear_probing(1); + GTEST_ASSERT_EQ(i(1), 1); + GTEST_ASSERT_EQ(i(3), 3); + i = hash_map_linear_probing(33); + GTEST_ASSERT_EQ(i(3), 99); + GTEST_ASSERT_EQ(hash_map_quadratic_probing(4), 16); + GTEST_ASSERT_EQ(hash_map_quadratic_probing_alter(3), 4); + GTEST_ASSERT_EQ(hash_map_quadratic_probing_alter(4), -4); + i = hash_map_random_probing(0); + GTEST_ASSERT_EQ(i(1), 16539830640600551411llu); + GTEST_ASSERT_EQ(i(2), 9045840598434793555llu); +} + +TEST(HashMapTest, HashMap) { + auto map = hash_map_new(); + GTEST_ASSERT_TRUE(map); + GTEST_ASSERT_TRUE(hash_map_insert(map, "123", 123)); + GTEST_ASSERT_TRUE(hash_map_resize(map, hashmap_primes[1])); + GTEST_ASSERT_TRUE(hash_map_insert(map, "234", 234)); + GTEST_ASSERT_EQ(hash_map_get_entry(map, "123")->value, 123); + int v = 0; + GTEST_ASSERT_TRUE(hash_map_get(map, "234", v)); + GTEST_ASSERT_EQ(v, 234); + free_hash_map(map); +} + +class IntHash: public std::hash { + size_t operator()(const int& s) const noexcept { + return (size_t)s; + } +}; + +TEST(HashMapTest, HashMapConf) { + std::hash h = IntHash(); + auto l = hash_map_linear_probing(1); + auto map = hash_map_new(10, 60, h, l); + GTEST_ASSERT_TRUE(map); + GTEST_ASSERT_TRUE(hash_map_insert(map, 1, 123)); + GTEST_ASSERT_TRUE(hash_map_insert(map, 11, 234)); + GTEST_ASSERT_EQ(map->map[2]->key, 11); + GTEST_ASSERT_EQ(hash_map_get_entry(map, 11)->value, 234); + free_hash_map(map); + GTEST_ASSERT_FALSE(map); + map = hash_map_new(10, 60, h); + GTEST_ASSERT_TRUE(map); + GTEST_ASSERT_TRUE(hash_map_insert(map, 1, 123)); + GTEST_ASSERT_TRUE(hash_map_insert(map, 11, 234)); + GTEST_ASSERT_TRUE(hash_map_insert(map, 41, 255)); + GTEST_ASSERT_TRUE(hash_map_insert(map, 51, 188)); + GTEST_ASSERT_TRUE(hash_map_insert(map, 61, 133)); + GTEST_ASSERT_EQ(map->map[1]->key, 1); + GTEST_ASSERT_EQ(map->map[2]->key, 11); + GTEST_ASSERT_EQ(map->map[0]->key, 41); + GTEST_ASSERT_EQ(map->map[5]->key, 51); + GTEST_ASSERT_EQ(map->map[7]->key, 61); + GTEST_ASSERT_EQ(hash_map_get_entry(map, 51)->value, 188); + free_hash_map(map); +} + +TEST(HashMapTest, HashMap2) { + auto map = hash_map_new(); + GTEST_ASSERT_TRUE(map); + for (int i = 0; i < 100; i++) { + hash_map_insert(map, i, i * i); + } + GTEST_ASSERT_EQ(map->count, 100); + GTEST_ASSERT_EQ(map->cap, hashmap_primes[2]); + GTEST_ASSERT_EQ(hash_map_get_entry(map, 15)->value, 225); + free_hash_map(map); +} diff --git a/utils_static.cpp b/utils_static.cpp new file mode 100644 index 0000000..50aa65f --- /dev/null +++ b/utils_static.cpp @@ -0,0 +1,3 @@ +#include "utils_static.h" + +size_t hashmap_primes[26] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741 }; diff --git a/utils_static.h b/utils_static.h new file mode 100644 index 0000000..453cd30 --- /dev/null +++ b/utils_static.h @@ -0,0 +1,11 @@ +#ifndef _UTIL_UTILS_STATIC_H +#define _UTIL_UTILS_STATIC_H +#include +#ifdef __cplusplus +extern "C" { +#endif +extern size_t hashmap_primes[26]; +#ifdef __cplusplus +} +#endif +#endif