// clang-format off // /* The MIT License Copyright (c) 2008, 2009, 2011 by Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* An example: #include "khash.h" HASH_MAP_INIT_INT(32, char) int main() { int ret, is·missing; mapiter_t k; khash_t(32) *h = map·init(32); k = map·put(32, h, 5, &ret); map·value(h, k) = 10; k = map·get(32, h, 10); is·missing = (k == map·end(h)); k = map·get(32, h, 5); map·del(32, h, k); for (k = map·begin(h); k != map·end(h); ++k) if (map·exist(h, k)) map·value(h, k) = 1; map·destroy(32, h); return 0; } */ /* 2013-05-02 (0.2.8): * Use quadratic probing. When the capacity is power of 2, stepping function i*(i+1)/2 guarantees to traverse each bucket. It is better than double hashing on cache performance and is more robust than linear probing. In theory, double hashing should be more robust than quadratic probing. However, my implementation is probably not for large hash tables, because the second hash function is closely tied to the first hash function, which reduce the effectiveness of double hashing. Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php 2011-12-29 (0.2.7): * Minor code clean up; no actual effect. 2011-09-16 (0.2.6): * The capacity is a power of 2. This seems to dramatically improve the speed for simple keys. Thank Zilong Tan for the suggestion. Reference: - http://code.google.com/p/ulib/ - http://nothings.org/computer/judy/ * Allow to optionally use linear probing which usually has better performance for random input. Double hashing is still the default as it is more robust to certain non-random input. * Added Wang's integer hash function (not used by default). This hash function is more robust to certain non-random input. 2011-02-14 (0.2.5): * Allow to declare global functions. 2009-09-26 (0.2.4): * Improve portability 2008-09-19 (0.2.3): * Corrected the example * Improved interfaces 2008-09-11 (0.2.2): * Improved speed a little in map·put() 2008-09-10 (0.2.1): * Added map·clear() * Fixed a compiling error 2008-09-02 (0.2.0): * Changed to token concatenation which increases flexibility. 2008-08-31 (0.1.2): * Fixed a bug in map·get(), which has not been tested previously. 2008-08-31 (0.1.1): * Added destructor */ // Modified by Nicholas Noll 2019. // Modified API to be more inline with codebase. #pragma once /*! @header Generic hash table library. */ #define AC_VERSION_HASH_H "0.2.8" /* compiler specific configuration */ // #if UINT_MAX == 0xffffffffu // typedef unsigned int mapint32_t; // #elif ULONG_MAX == 0xffffffffu // typedef unsigned long mapint32_t; // #endif // // #if ULONG_MAX == ULLONG_MAX // typedef unsigned long mapint64_t; // #else // typedef unsigned long long mapint64_t; // #endif #ifndef map·inline #ifdef _MSC_VER #define map·inline __inline #else #define map·inline inline #endif #endif /* map·inline */ #ifndef klib_unused #if (defined __clang__ && __clang_major__ >= 3) || \ (defined __GNUC__ && __GNUC__ >= 3) #define klib_unused __attribute__((__unused__)) #else #define klib_unused #endif #endif /* klib_unused */ typedef int32 mapiter; #define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2) #define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1) #define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3) #define __ac_set_isdel_false(flag, i) \ (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1))) #define __ac_set_isempty_false(flag, i) \ (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1))) #define __ac_set_isboth_false(flag, i) \ (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1))) #define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1)) #define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4) #ifndef _roundup32 #define _roundup32(x) \ (--(x), \ (x) |= (x) >> 1, \ (x) |= (x) >> 2, \ (x) |= (x) >> 4, \ (x) |= (x) >> 8, \ (x) |= (x) >> 16, \ ++(x)) #endif static const double __ac_HASH_UPPER = 0.77; #define __HASH_TYPE(name, map·key_t, map·val·t) \ typedef struct map·##name##_s { \ int32 n_buckets, size, n_occupied, upper_bound; \ int32* flags; \ map·key_t* keys; \ map·val·t* vals; \ } map·##name##_t; #define __HASH_PROTOTYPES(name, map·key_t, map·val·t) \ extern map·##name##_t* map·init·##name(void); \ extern void map·destroy_##name(map·##name##_t* h); \ extern void map·clear_##name(map·##name##_t* h); \ extern int32 map·get_##name(const map·##name##_t* h, mapkey_t key); \ extern int map·resize_##name(map·##name##_t* h, int32 new_n_buckets); \ extern int32 map·put_##name(map·##name##_t* h, mapkey_t key, int* ret); \ extern void map·del_##name(map·##name##_t* h, int32 x); #define __HASH_IMPL( \ name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \ SCOPE map·##name##_t* map·init·##name(void) \ { \ return (map·##name##_t*)calloc(1, sizeof(map·##name##_t)); \ } \ SCOPE void map·destroy_##name(map·##name##_t* h) \ { \ if (h) \ { \ free((void*)h->keys); \ free(h->flags); \ free((void*)h->vals); \ free(h); \ } \ } \ SCOPE void map·clear_##name(map·##name##_t* h) \ { \ if (h && h->flags) \ { \ memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(int32)); \ h->size = h->n_occupied = 0; \ } \ } \ SCOPE int32 map·get_##name(const map·##name##_t* h, mapkey_t key) \ { \ if (h->n_buckets) \ { \ int32 k = 0; \ int32 i = 0; \ int32 last = 0; \ int32 mask = 0; \ int32 step = 0; \ mask = h->n_buckets - 1; \ k = __hash_func(key); \ i = k & mask; \ last = i; \ while ( \ !__ac_isempty(h->flags, i) && \ (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) \ { \ i = (i + (++step)) & mask; \ if (i == last) return h->n_buckets; \ } \ return __ac_iseither(h->flags, i) ? h->n_buckets : i; \ } else \ return 0; \ } \ SCOPE int map·resize_##name(map·##name##_t* h, int32 new_n_buckets) \ { /* This function uses 0.25*n_buckets bytes of working space instead of \ [sizeof(key_t+val·t)+.25]*n_buckets. */ \ int32* new_flags = 0; \ int32 j = 1; \ { \ _roundup32(new_n_buckets); \ if (new_n_buckets < 4) new_n_buckets = 4; \ if (h->size >= (int32)(new_n_buckets * __ac_HASH_UPPER + 0.5)) \ j = 0; /* requested size is too small */ \ else \ { /* hash table size to be changed (shrink or expand); rehash */ \ new_flags = \ (int32*)malloc(__ac_fsize(new_n_buckets) * sizeof(int32)); \ if (!new_flags) return -1; \ memset( \ new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(int32)); \ if (h->n_buckets < new_n_buckets) \ { /* expand */ \ mapkey_t* new_keys = (mapkey_t*)realloc( \ (void*)h->keys, new_n_buckets * sizeof(mapkey_t)); \ if (!new_keys) \ { \ free(new_flags); \ return -1; \ } \ h->keys = new_keys; \ if (map·is·map) \ { \ mapval·t* new_vals = (mapval·t*)realloc( \ (void*)h->vals, new_n_buckets * sizeof(mapval·t)); \ if (!new_vals) \ { \ free(new_flags); \ return -1; \ } \ h->vals = new_vals; \ } \ } /* otherwise shrink */ \ } \ } \ if (j) \ { /* rehashing is needed */ \ for (j = 0; j != h->n_buckets; ++j) \ { \ if (__ac_iseither(h->flags, j) == 0) \ { \ mapkey_t key = h->keys[j]; \ mapval·t val; \ int32 new_mask; \ new_mask = new_n_buckets - 1; \ if (map·is·map) val = h->vals[j]; \ __ac_set_isdel_true(h->flags, j); \ while (1) \ { /* kick-out process; sort of like in Cuckoo hashing */ \ int32 k, i, step = 0; \ k = __hash_func(key); \ i = k & new_mask; \ while (!__ac_isempty(new_flags, i)) \ i = (i + (++step)) & new_mask; \ __ac_set_isempty_false(new_flags, i); \ if (i < h->n_buckets && \ __ac_iseither(h->flags, i) == 0) \ { /* kick out the existing element */ \ { \ mapkey_t tmp = h->keys[i]; \ h->keys[i] = key; \ key = tmp; \ } \ if (map·is·map) \ { \ mapval·t tmp = h->vals[i]; \ h->vals[i] = val; \ val = tmp; \ } \ __ac_set_isdel_true(h->flags, \ i); /* mark it as deleted in \ the old hash table */ \ } else \ { /* write the element and jump out of the loop */ \ h->keys[i] = key; \ if (map·is·map) h->vals[i] = val; \ break; \ } \ } \ } \ } \ if (h->n_buckets > new_n_buckets) \ { /* shrink the hash table */ \ h->keys = (mapkey_t*)realloc((void*)h->keys, \ new_n_buckets * sizeof(mapkey_t)); \ if (map·is·map) \ h->vals = (mapval·t*)realloc( \ (void*)h->vals, new_n_buckets * sizeof(mapval·t)); \ } \ free(h->flags); /* free the working space */ \ h->flags = new_flags; \ h->n_buckets = new_n_buckets; \ h->n_occupied = h->size; \ h->upper_bound = (int32)(h->n_buckets * __ac_HASH_UPPER + 0.5); \ } \ return 0; \ } \ SCOPE int32 map·put_##name(map·##name##_t* h, mapkey_t key, int* ret) \ { \ int32 x = 0; \ if (h->n_occupied >= h->upper_bound) \ { /* update the hash table */ \ if (h->n_buckets > (h->size << 1)) \ { \ if (map·resize_##name(h, h->n_buckets - 1) < 0) \ { /* clear "deleted" elements */ \ *ret = -1; \ return h->n_buckets; \ } \ } else if (map·resize_##name(h, h->n_buckets + 1) < 0) \ { /* expand the hash table */ \ *ret = -1; \ return h->n_buckets; \ } \ } /* TODO: to implement automatically shrinking; resize() already \ support shrinking */ \ { \ int32 k, i, site, last, mask = h->n_buckets - 1, step = 0; \ x = site = h->n_buckets; \ k = __hash_func(key); \ i = k & mask; \ if (__ac_isempty(h->flags, i)) \ x = i; /* for speed up */ \ else \ { \ last = i; \ while ( \ !__ac_isempty(h->flags, i) && \ (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) \ { \ if (__ac_isdel(h->flags, i)) site = i; \ i = (i + (++step)) & mask; \ if (i == last) \ { \ x = site; \ break; \ } \ } \ if (x == h->n_buckets) \ { \ if (__ac_isempty(h->flags, i) && site != h->n_buckets) \ x = site; \ else \ x = i; \ } \ } \ } \ if (__ac_isempty(h->flags, x)) \ { /* not present at all */ \ h->keys[x] = key; \ __ac_set_isboth_false(h->flags, x); \ ++h->size; \ ++h->n_occupied; \ *ret = 1; \ } else if (__ac_isdel(h->flags, x)) \ { /* deleted */ \ h->keys[x] = key; \ __ac_set_isboth_false(h->flags, x); \ ++h->size; \ *ret = 2; \ } else \ *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \ return x; \ } \ SCOPE void map·del_##name(map·##name##_t* h, int32 x) \ { \ if (x != h->n_buckets && !__ac_iseither(h->flags, x)) \ { \ __ac_set_isdel_true(h->flags, x); \ --h->size; \ } \ } #define HASH_DECLARE(name, mapkey_t, mapval·t) \ __HASH_TYPE(name, mapkey_t, mapval·t) \ __HASH_PROTOTYPES(name, mapkey_t, mapval·t) #define MAP·MAKE2( \ name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \ __HASH_TYPE(name, mapkey_t, mapval·t) \ __HASH_IMPL( \ name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) #define MAP·MAKE( \ name, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \ MAP·MAKE2(name, \ static map·inline klib_unused, \ mapkey_t, \ mapval·t, \ map·is·map, \ __hash_func, \ __hash_equal) /* --- BEGIN OF HASH FUNCTIONS --- */ /*! @function @abstract Integer hash function @param key The integer [mapint32_t] @return The hash value [mapint_t] */ #define map·int_hash_func(key) (int32)(key) /*! @function @abstract Integer comparison function */ #define map·int_hash_equal(a, b) ((a) == (b)) /*! @function @abstract 64-bit integer hash function @param key The integer [mapint64_t] @return The hash value [mapint_t] */ #define map·int64_hash_func(key) (int32)((key) >> 33 ^ (key) ^ (key) << 11) /*! @function @abstract 64-bit integer comparison function */ #define map·int64_hash_equal(a, b) ((a) == (b)) /*! @function @abstract const char* hash function @param s Pointer to a null terminated string @return The hash value */ static map·inline int32 __ac_X31_hash_string(const char* s) { int32 h = (int32)(*s); if (h != 0) { for (const char* it = s; *it; ++it) h = (h << 5) - h + (int32)*it; } return h; } /*! @function @abstract Another interface to const char* hash function @param key Pointer to a null terminated string [const char*] @return The hash value [mapint_t] */ #define map·str_hash_func(key) __ac_X31_hash_string(key) /*! @function @abstract Const char* comparison function */ #define map·str_hash_equal(a, b) (strcmp(a, b) == 0) static inline int32 __ac_Wang_hash(int32 key) { key += ~(key << 15); key ^= (key >> 10); key += (key << 3); key ^= (key >> 6); key += ~(key << 11); key ^= (key >> 16); return key; } #define map·int_hash_func2(key) __ac_Wang_hash((int32)key) /* --- END OF HASH FUNCTIONS --- */ /* Other convenient macros... */ /*! @abstract Type of the hash table. @param name Name of the hash table [symbol] */ #define map·t(name) map·##name##_t /*! @function @abstract Initiate a hash table. @param name Name of the hash table [symbol] @return Pointer to the hash table [khash_t(name)*] */ #define map·init(name) map·init·##name() /*! @function @abstract Destroy a hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] */ #define map·destroy(name, h) map·destroy_##name(h) /*! @function @abstract Reset a hash table without deallocating memory. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] */ #define map·clear(name, h) map·clear_##name(h) /*! @function @abstract Resize a hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param s New size [mapint_t] */ #define map·resize(name, h, s) map·resize_##name(h, s) /*! @function @abstract Insert a key to the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Key [type of keys] @param r Extra return code: -1 if the operation failed; 0 if the key is present in the hash table; 1 if the bucket is empty (never used); 2 if the element in the bucket has been deleted [int*] @return Iterator to the inserted element [mapint_t] */ #define map·put(name, h, k, r) map·put_##name(h, k, r) /*! @function @abstract Retrieve a key from the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Key [type of keys] @return Iterator to the found element, or map·end(h) if the element is absent [mapint_t] */ #define map·get(name, h, k) map·get_##name(h, k) /*! @function @abstract Remove a key from the hash table. @param name Name of the hash table [symbol] @param h Pointer to the hash table [khash_t(name)*] @param k Iterator to the element to be deleted [mapint_t] */ #define map·del(name, h, k) map·del_##name(h, k) /*! @function @abstract Test whether a bucket contains data. @param h Pointer to the hash table [khash_t(name)*] @param x Iterator to the bucket [mapint_t] @return 1 if containing data; 0 otherwise [int] */ #define map·exist(h, x) (!__ac_iseither((h)->flags, (x))) /*! @function @abstract Get key given an iterator @param h Pointer to the hash table [khash_t(name)*] @param x Iterator to the bucket [mapint_t] @return Key [type of keys] */ #define map·key(h, x) ((h)->keys[x]) /*! @function @abstract Get value given an iterator @param h Pointer to the hash table [khash_t(name)*] @param x Iterator to the bucket [mapint_t] @return Value [type of values] @discussion For hash sets, calling this results in segfault. */ #define map·val(h, x) ((h)->vals[x]) /*! @function @abstract Alias of map·val() */ #define map·value(h, x) ((h)->vals[x]) /*! @function @abstract Get the start iterator @param h Pointer to the hash table [khash_t(name)*] @return The start iterator [mapint_t] */ #define map·begin(h) (int32)(0) /*! @function @abstract Get the end iterator @param h Pointer to the hash table [khash_t(name)*] @return The end iterator [mapint_t] */ #define map·end(h) ((h)->n_buckets) /*! @function @abstract Get the number of elements in the hash table @param h Pointer to the hash table [khash_t(name)*] @return Number of elements in the hash table [mapint_t] */ #define map·len(h) ((h)->size) /*! @function @abstract Get the number of buckets in the hash table @param h Pointer to the hash table [khash_t(name)*] @return Number of buckets in the hash table [mapint_t] */ #define map·n_buckets(h) ((h)->n_buckets) /*! @function @abstract Iterate over the entries in the hash table @param h Pointer to the hash table [khash_t(name)*] @param kvar Variable to which key will be assigned @param vvar Variable to which value will be assigned @param code Block of code to execute */ #define map·foreach(h, kvar, vvar, code) \ { \ int32 __i; \ for (__i = map·begin(h); __i != map·end(h); ++__i) \ { \ if (!map·exist(h, __i)) continue; \ (kvar) = map·key(h, __i); \ (vvar) = map·val(h, __i); \ code; \ } \ } /*! @function @abstract Iterate over the values in the hash table @param h Pointer to the hash table [khash_t(name)*] @param vvar Variable to which value will be assigned @param code Block of code to execute */ #define map·foreach_val(h, vvar, code) \ { \ int32 __i; \ for (__i = map·begin(h); __i != map·end(h); ++__i) \ { \ if (!map·exist(h, __i)) continue; \ (vvar) = map·val(h, __i); \ code; \ } \ } /*! @function @abstract Iterate over the keys in the hash table @param h Pointer to the hash table [khash_t(name)*] @param kvar Variable to which key will be assigned @param code Block of code to execute */ #define map·foreach_key(h, kvar, code) \ { \ int32 __i; \ for (__i = map·begin(h); __i != map·end(h); ++__i) \ { \ if (!map·exist(h, __i)) continue; \ (kvar) = map·key(h, __i); \ code; \ } \ } /*! @function @abstract Iterate over the values in the hash table @param h Pointer to the hash table [khash_t(name)*] @param code Block of code to execute for free operation. */ #define map·free_all(h, code) \ { \ int32 __i; \ for (__i = map·begin(h); __i != map·end(h); ++__i) \ { \ if (!map·exist(h, __i)) continue; \ code(map·val(h, __i)); \ } \ } /* More convenient interfaces */ /*! @function @abstract Instantiate a hash set containing integer keys @param name Name of the hash table [symbol] */ #define define_int32_set(name) \ MAP·MAKE(name, int32, char, 0, map·int_hash_func, map·int_hash_equal) /*! @function @abstract Instantiate a hash map containing integer keys @param name Name of the hash table [symbol] @param mapval·t Type of values [type] */ #define define_int32_map(name, mapval·t) \ MAP·MAKE(name, int32, mapval·t, 1, map·int_hash_func, map·int_hash_equal) /*! @function @abstract Instantiate a hash map containing integer keys @param name Name of the hash table [symbol] @param mapval·t Type of values [type] */ #define define_uint32_set(name) \ MAP·MAKE(name, uint32, char, 0, map·int_hash_func, map·int_hash_equal) /*! @function @abstract Instantiate a hash map containing integer keys @param name Name of the hash table [symbol] @param mapval·t Type of values [type] */ #define define_uint32_map(name, mapval·t) \ MAP·MAKE(name, uint32, mapval·t, 1, map·int_hash_func, map·int_hash_equal) /*! @function @abstract Instantiate a hash set containing 64-bit integer keys @param name Name of the hash table [symbol] */ #define define_int64_set(name) \ MAP·MAKE(name, int64, char, 0, map·int64_hash_func, map·int64_hash_equal) /*! @function @abstract Instantiate a hash map containing 64-bit integer keys @param name Name of the hash table [symbol] @param mapval·t Type of values [type] */ #define define_int64_map(name, mapval·t) \ MAP·MAKE(name, int64, mapval·t, 1, map·int64_hash_func, map·int64_hash_equal) /*! @function @abstract Instantiate a hash set containing 64-bit unsigned integer keys @param name Name of the hash table [symbol] */ #define define_uint64_set(name) \ MAP·MAKE(name, uint64, char, 0, map·int64_hash_func, map·int64_hash_equal) /*! @function @abstract Instantiate a hash map containing 64-bit unsigned integer keys @param name Name of the hash table [symbol] @param mapval·t Type of values [type] */ #define define_uint64_map(name, mapval·t) \ MAP·MAKE(name, uint64, mapval·t, 1, map·int64_hash_func, map·int64_hash_equal) /*! @function @abstract Instantiate a hash map containing const char* keys @param name Name of the hash table [symbol] */ #define define_str_hashset(name) \ MAP·MAKE(name, const byte*, byte, 0, map·str_hash_func, map·str_hash_equal) /*! @function @abstract Instantiate a hash map containing const char* keys @param name Name of the hash table [symbol] @param mapval·t Type of values [type] */ #define define_str·map(name, mapval·t) \ MAP·MAKE(name, const byte*, mapval·t, 1, map·str_hash_func, map·str_hash_equal)