aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
commit4b2ea2ca00eea7feea036b7642c0c1443b8f77a1 (patch)
tree5cdd635bd8240a6857258a056e3932e00966bfff /include
parent6b739739968a0cc9b4d9909d8f4ffec30f4461dd (diff)
removed buggy qsort header and implemented myself
Diffstat (limited to 'include')
-rw-r--r--include/libbio.h16
-rw-r--r--include/libn.h8
-rw-r--r--include/libn/macro/map.h929
-rw-r--r--include/libn/macro/qsort.h261
4 files changed, 311 insertions, 903 deletions
diff --git a/include/libbio.h b/include/libbio.h
index 9eebb0b..f802e22 100644
--- a/include/libbio.h
+++ b/include/libbio.h
@@ -25,14 +25,24 @@ typedef struct bio·Tree
void *h;
} bio·Tree;
-// clade functions
+/* clade manipulation */
error phylo·addchild(bio·Node* parent, bio·Node* child);
error phylo·rmchild(bio·Node* parent, bio·Node* child);
+/* clade statistics */
error phylo·countnodes(bio·Node *node, int *n);
error phylo·countleafs(bio·Node *node, int *n);
-error phylo·ladderize(bio·Node *root);
+/* topological sorting */
+error phylo·ladderize(bio·Node *root);
+double phylo·diameter(bio·Tree tree, int *len, bio·Node **path);
+
+/* generic computation on tree */
+void *phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx);
+void *phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx);
+
+/* simple helpers */
+void phylo·getleafs(bio·Tree tree, bio·Node **leafs);
/* newick i/o */
error bio·readnewick(io·Peeker stream, void*, bio·Tree* tree);
@@ -66,7 +76,7 @@ enum
{
aln·K = 20, // kmer size (k <= 32)
aln·L = 3, // number of kmers / hash
- aln·N = 1000, // number of hashes
+ aln·N = 10, // number of hashes
};
error aln·sketch(byte *seq, int l, uint64 *phi[aln·N], int *locs[aln·N]);
diff --git a/include/libn.h b/include/libn.h
index 063a408..8e30eae 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -311,3 +311,11 @@ void sort·floats(uintptr n, float arr[]);
void sort·doubles(uintptr n, double arr[]);
void sort·strings(uintptr n, byte* arr[]);
+
+// -----------------------------------------------------------------------------
+// fast random number generation
+
+error rng·init(uint64 seed);
+double rng·random();
+bool rng·bernoulli(double f);
+uint64 rng·randi(int max);
diff --git a/include/libn/macro/map.h b/include/libn/macro/map.h
index f81dc0f..2e65570 100644
--- a/include/libn/macro/map.h
+++ b/include/libn/macro/map.h
@@ -1,754 +1,237 @@
-// clang-format off
-//
-/* The MIT License
- Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/*
- An example:
-#include "khash.h"
-HASH_MAP_INIT_INT(32, char)
-int main() {
- int ret, is·missing;
- mapiter_t k;
- khash_t(32) *h = map·init(32);
- k = map·put(32, h, 5, &ret);
- map·value(h, k) = 10;
- k = map·get(32, h, 10);
- is·missing = (k == map·end(h));
- k = map·get(32, h, 5);
- map·del(32, h, k);
- for (k = map·begin(h); k != map·end(h); ++k)
- if (map·exist(h, k)) map·value(h, k) = 1;
- map·destroy(32, h);
- return 0;
-}
-*/
-
-/*
- 2013-05-02 (0.2.8):
- * Use quadratic probing. When the capacity is power of 2, stepping
- function i*(i+1)/2 guarantees to traverse each bucket. It is better than
- double hashing on cache performance and is more robust than linear probing. In
- theory, double hashing should be more robust than quadratic probing. However,
- my implementation is probably not for large hash tables, because the second
- hash function is closely tied to the first hash function, which reduce the
- effectiveness of double hashing. Reference:
- http://research.cs.vt.edu/AVresearch/hashing/quadratic.php 2011-12-29 (0.2.7):
- * Minor code clean up; no actual effect.
- 2011-09-16 (0.2.6):
- * The capacity is a power of 2. This seems to dramatically improve the
- speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
- - http://code.google.com/p/ulib/
- - http://nothings.org/computer/judy/
- * Allow to optionally use linear probing which usually has better
- performance for random input. Double hashing is still the default as
- it is more robust to certain non-random input.
- * Added Wang's integer hash function (not used by default). This hash
- function is more robust to certain non-random input.
- 2011-02-14 (0.2.5):
- * Allow to declare global functions.
- 2009-09-26 (0.2.4):
- * Improve portability
- 2008-09-19 (0.2.3):
- * Corrected the example
- * Improved interfaces
- 2008-09-11 (0.2.2):
- * Improved speed a little in map·put()
- 2008-09-10 (0.2.1):
- * Added map·clear()
- * Fixed a compiling error
- 2008-09-02 (0.2.0):
- * Changed to token concatenation which increases flexibility.
- 2008-08-31 (0.1.2):
- * Fixed a bug in map·get(), which has not been tested previously.
- 2008-08-31 (0.1.1):
- * Added destructor
-*/
-
-// Modified by Nicholas Noll 2019.
-// Modified API to be more inline with codebase.
-
#pragma once
-/*!
- @header
- Generic hash table library.
+/*
+ * Based on khash.h
+ * Modified to make control more granular and similar to QSORT
*/
-#define AC_VERSION_HASH_H "0.2.8"
-
-/* compiler specific configuration */
-
-// #if UINT_MAX == 0xffffffffu
-// typedef unsigned int mapint32_t;
-// #elif ULONG_MAX == 0xffffffffu
-// typedef unsigned long mapint32_t;
-// #endif
-//
-// #if ULONG_MAX == ULLONG_MAX
-// typedef unsigned long mapint64_t;
-// #else
-// typedef unsigned long long mapint64_t;
-// #endif
-
-#ifndef map·inline
-#ifdef _MSC_VER
-#define map·inline __inline
-#else
-#define map·inline inline
-#endif
-#endif /* map·inline */
-
-#ifndef klib_unused
-#if (defined __clang__ && __clang_major__ >= 3) || \
- (defined __GNUC__ && __GNUC__ >= 3)
-#define klib_unused __attribute__((__unused__))
-#else
-#define klib_unused
-#endif
-#endif /* klib_unused */
+static const double __ac_HASH_UPPER = 0.77;
-typedef int32 mapiter;
+#define _roundup32(x) \
+ (--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4, (x) |= (x) >> 8, \
+ (x) |= (x) >> 16, ++(x))
#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3)
#define __ac_set_isdel_false(flag, i) \
- (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
+ (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
#define __ac_set_isempty_false(flag, i) \
- (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
+ (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
#define __ac_set_isboth_false(flag, i) \
- (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
+ (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1))
#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4)
-#ifndef _roundup32
-#define _roundup32(x) \
- (--(x), \
- (x) |= (x) >> 1, \
- (x) |= (x) >> 2, \
- (x) |= (x) >> 4, \
- (x) |= (x) >> 8, \
- (x) |= (x) >> 16, \
- ++(x))
-#endif
-
-static const double __ac_HASH_UPPER = 0.77;
-
-#define __HASH_TYPE(name, map·key_t, map·val·t) \
- typedef struct map·##name##_s { \
- int32 n_buckets, size, n_occupied, upper_bound; \
- int32* flags; \
- map·key_t* keys; \
- map·val·t* vals; \
- } map·##name##_t;
-
-#define __HASH_PROTOTYPES(name, map·key_t, map·val·t) \
- extern map·##name##_t* map·init·##name(void); \
- extern void map·destroy_##name(map·##name##_t* h); \
- extern void map·clear_##name(map·##name##_t* h); \
- extern int32 map·get_##name(const map·##name##_t* h, mapkey_t key); \
- extern int map·resize_##name(map·##name##_t* h, int32 new_n_buckets); \
- extern int32 map·put_##name(map·##name##_t* h, mapkey_t key, int* ret); \
- extern void map·del_##name(map·##name##_t* h, int32 x);
-
-#define __HASH_IMPL( \
- name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \
- SCOPE map·##name##_t* map·init·##name(void) \
- { \
- return (map·##name##_t*)calloc(1, sizeof(map·##name##_t)); \
+#define MAP_STRUCT_BODY(key_t, val_t) \
+ mem·Allocator heap; \
+ void *h; \
+ int32 n_buckets, size, n_occupied, upper_bound; \
+ int32 *flags; \
+ key_t *keys; \
+ val_t *vals;
+
+#define MAP_MAKE(type) \
+ type *map; \
+ map = heap.alloc((h), 1, sizeof(*map)); \
+ map->heap = heap; \
+ map->h = h; \
+ return map
+
+#define MAP_FREE(map) \
+ map->heap.free(map->h, map->keys); \
+ map->heap.free(map->h, map->flags); \
+ map->heap.free(map->h, map->vals); \
+ map->heap.free(map->h, map);
+
+#define MAP_RESET(map) \
+ if (map && map->flags) { \
+ memset(map->flags, 0xaa, __ac_fsize(map->n_buckets) * sizeof(int32)); \
+ map->size = map->n_occupied = 0; \
+ }
+
+#define MAP_GET(map, key, hashfunc, equalfunc) \
+ if (map->n_buckets) { \
+ int32 k = 0; \
+ int32 i = 0; \
+ int32 last = 0; \
+ int32 mask = 0; \
+ int32 step = 0; \
+ mask = map->n_buckets - 1; \
+ k = hashfunc(key); \
+ i = k & mask; \
+ last = i; \
+ while (!__ac_isempty(map->flags, i) && \
+ (__ac_isdel(map->flags, i) || !equalfunc(map->keys[i], key))) { \
+ i = (i + (++step)) & mask; \
+ if (i == last) \
+ return map->n_buckets; \
} \
- SCOPE void map·destroy_##name(map·##name##_t* h) \
- { \
- if (h) \
- { \
- free((void*)h->keys); \
- free(h->flags); \
- free((void*)h->vals); \
- free(h); \
+ return __ac_iseither(map->flags, i) ? map->n_buckets : i; \
+ } else \
+ return 0;
+
+#define MAP_GROW(map, key_t, val_t, new_n_buckets, hashfunc) \
+ int32 *new_flags = nil; \
+ int32 j = 1; \
+ { \
+ _roundup32(new_n_buckets); \
+ if (new_n_buckets < 4) \
+ new_n_buckets = 4; \
+ if (map->size >= (int32)(new_n_buckets * __ac_HASH_UPPER + 0.5)) \
+ j = 0; \
+ else { \
+ new_flags = \
+ map->heap.alloc(map->h, __ac_fsize(new_n_buckets), sizeof(int32)); \
+ if (!new_flags) return -1; \
+ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(int32)); \
+ if (map->n_buckets < new_n_buckets) { /* expand */ \
+ key_t *new_keys = \
+ map->heap.alloc(map->h, new_n_buckets, sizeof(key_t)); \
+ if (!new_keys) { \
+ map->heap.free(map->h, new_flags); \
+ return -1; \
} \
- } \
- SCOPE void map·clear_##name(map·##name##_t* h) \
- { \
- if (h && h->flags) \
- { \
- memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(int32)); \
- h->size = h->n_occupied = 0; \
+ map->heap.free(map->h, map->keys); \
+ map->keys = new_keys; \
+ val_t *new_vals = \
+ map->heap.alloc(map->h, new_n_buckets, sizeof(val_t)); \
+ if (!new_vals) { \
+ map->heap.free(map->h, new_flags); \
+ return -1; \
} \
+ map->heap.free(map->h, map->vals); \
+ map->vals = new_vals; \
+ } \
} \
- SCOPE int32 map·get_##name(const map·##name##_t* h, mapkey_t key) \
- { \
- if (h->n_buckets) \
- { \
- int32 k = 0; \
- int32 i = 0; \
- int32 last = 0; \
- int32 mask = 0; \
- int32 step = 0; \
- mask = h->n_buckets - 1; \
- k = __hash_func(key); \
- i = k & mask; \
- last = i; \
- while ( \
- !__ac_isempty(h->flags, i) && \
- (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) \
+ } \
+ if (j) { /* rehashing is needed */ \
+ for (j = 0; j != map->n_buckets; ++j) { \
+ if (__ac_iseither(map->flags, j) == 0) { \
+ key_t key = map->keys[j]; \
+ val_t val; \
+ int32 new_mask; \
+ new_mask = new_n_buckets - 1; \
+ val = map->vals[j]; \
+ __ac_set_isdel_true(map->flags, j); \
+ while (1) { \
+ int32 k, i, step = 0; \
+ k = hashfunc(key); \
+ i = k & new_mask; \
+ while (!__ac_isempty(new_flags, i)) \
+ i = (i + (++step)) & new_mask; \
+ __ac_set_isempty_false(new_flags, i); \
+ if (i < map->n_buckets && __ac_iseither(map->flags, i) == 0) { \
{ \
- i = (i + (++step)) & mask; \
- if (i == last) return h->n_buckets; \
- } \
- return __ac_iseither(h->flags, i) ? h->n_buckets : i; \
- } else \
- return 0; \
- } \
- SCOPE int map·resize_##name(map·##name##_t* h, int32 new_n_buckets) \
- { /* This function uses 0.25*n_buckets bytes of working space instead of \
- [sizeof(key_t+val·t)+.25]*n_buckets. */ \
- int32* new_flags = 0; \
- int32 j = 1; \
- { \
- _roundup32(new_n_buckets); \
- if (new_n_buckets < 4) new_n_buckets = 4; \
- if (h->size >= (int32)(new_n_buckets * __ac_HASH_UPPER + 0.5)) \
- j = 0; /* requested size is too small */ \
- else \
- { /* hash table size to be changed (shrink or expand); rehash */ \
- new_flags = \
- (int32*)malloc(__ac_fsize(new_n_buckets) * sizeof(int32)); \
- if (!new_flags) return -1; \
- memset( \
- new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(int32)); \
- if (h->n_buckets < new_n_buckets) \
- { /* expand */ \
- mapkey_t* new_keys = (mapkey_t*)realloc( \
- (void*)h->keys, new_n_buckets * sizeof(mapkey_t)); \
- if (!new_keys) \
- { \
- free(new_flags); \
- return -1; \
- } \
- h->keys = new_keys; \
- if (map·is·map) \
- { \
- mapval·t* new_vals = (mapval·t*)realloc( \
- (void*)h->vals, new_n_buckets * sizeof(mapval·t)); \
- if (!new_vals) \
- { \
- free(new_flags); \
- return -1; \
- } \
- h->vals = new_vals; \
- } \
- } /* otherwise shrink */ \
+ key_t tmp = map->keys[i]; \
+ map->keys[i] = key; \
+ key = tmp; \
} \
- } \
- if (j) \
- { /* rehashing is needed */ \
- for (j = 0; j != h->n_buckets; ++j) \
{ \
- if (__ac_iseither(h->flags, j) == 0) \
- { \
- mapkey_t key = h->keys[j]; \
- mapval·t val; \
- int32 new_mask; \
- new_mask = new_n_buckets - 1; \
- if (map·is·map) val = h->vals[j]; \
- __ac_set_isdel_true(h->flags, j); \
- while (1) \
- { /* kick-out process; sort of like in Cuckoo hashing */ \
- int32 k, i, step = 0; \
- k = __hash_func(key); \
- i = k & new_mask; \
- while (!__ac_isempty(new_flags, i)) \
- i = (i + (++step)) & new_mask; \
- __ac_set_isempty_false(new_flags, i); \
- if (i < h->n_buckets && \
- __ac_iseither(h->flags, i) == 0) \
- { /* kick out the existing element */ \
- { \
- mapkey_t tmp = h->keys[i]; \
- h->keys[i] = key; \
- key = tmp; \
- } \
- if (map·is·map) \
- { \
- mapval·t tmp = h->vals[i]; \
- h->vals[i] = val; \
- val = tmp; \
- } \
- __ac_set_isdel_true(h->flags, \
- i); /* mark it as deleted in \
- the old hash table */ \
- } else \
- { /* write the element and jump out of the loop */ \
- h->keys[i] = key; \
- if (map·is·map) h->vals[i] = val; \
- break; \
- } \
- } \
- } \
+ val_t tmp = map->vals[i]; \
+ map->vals[i] = val; \
+ val = tmp; \
} \
- if (h->n_buckets > new_n_buckets) \
- { /* shrink the hash table */ \
- h->keys = (mapkey_t*)realloc((void*)h->keys, \
- new_n_buckets * sizeof(mapkey_t)); \
- if (map·is·map) \
- h->vals = (mapval·t*)realloc( \
- (void*)h->vals, new_n_buckets * sizeof(mapval·t)); \
- } \
- free(h->flags); /* free the working space */ \
- h->flags = new_flags; \
- h->n_buckets = new_n_buckets; \
- h->n_occupied = h->size; \
- h->upper_bound = (int32)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+ __ac_set_isdel_true(map->flags, i); \
+ } else { \
+ map->keys[i] = key; \
+ map->vals[i] = val; \
+ break; \
+ } \
} \
- return 0; \
+ } \
} \
- SCOPE int32 map·put_##name(map·##name##_t* h, mapkey_t key, int* ret) \
- { \
- int32 x = 0; \
- if (h->n_occupied >= h->upper_bound) \
- { /* update the hash table */ \
- if (h->n_buckets > (h->size << 1)) \
- { \
- if (map·resize_##name(h, h->n_buckets - 1) < 0) \
- { /* clear "deleted" elements */ \
- *ret = -1; \
- return h->n_buckets; \
- } \
- } else if (map·resize_##name(h, h->n_buckets + 1) < 0) \
- { /* expand the hash table */ \
- *ret = -1; \
- return h->n_buckets; \
- } \
- } /* TODO: to implement automatically shrinking; resize() already \
- support shrinking */ \
- { \
- int32 k, i, site, last, mask = h->n_buckets - 1, step = 0; \
- x = site = h->n_buckets; \
- k = __hash_func(key); \
- i = k & mask; \
- if (__ac_isempty(h->flags, i)) \
- x = i; /* for speed up */ \
- else \
- { \
- last = i; \
- while ( \
- !__ac_isempty(h->flags, i) && \
- (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) \
- { \
- if (__ac_isdel(h->flags, i)) site = i; \
- i = (i + (++step)) & mask; \
- if (i == last) \
- { \
- x = site; \
- break; \
- } \
- } \
- if (x == h->n_buckets) \
- { \
- if (__ac_isempty(h->flags, i) && site != h->n_buckets) \
- x = site; \
- else \
- x = i; \
- } \
- } \
- } \
- if (__ac_isempty(h->flags, x)) \
- { /* not present at all */ \
- h->keys[x] = key; \
- __ac_set_isboth_false(h->flags, x); \
- ++h->size; \
- ++h->n_occupied; \
- *ret = 1; \
- } else if (__ac_isdel(h->flags, x)) \
- { /* deleted */ \
- h->keys[x] = key; \
- __ac_set_isboth_false(h->flags, x); \
- ++h->size; \
- *ret = 2; \
- } else \
- *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
- return x; \
+ if (map->n_buckets > new_n_buckets) { /* shrink the hash table */ \
+ key_t *new_keys = map->heap.alloc(map->h, new_n_buckets, sizeof(key_t)); \
+ map->heap.free(map->h, map->keys); \
+ map->keys = new_keys; \
+ \
+ val_t *new_vals = map->heap.alloc(map->h, new_n_buckets, sizeof(val_t)); \
+ map->heap.free(map->h, map->vals); \
+ map->vals = new_vals; \
} \
- SCOPE void map·del_##name(map·##name##_t* h, int32 x) \
- { \
- if (x != h->n_buckets && !__ac_iseither(h->flags, x)) \
- { \
- __ac_set_isdel_true(h->flags, x); \
- --h->size; \
+ map->heap.free(map->h, map->flags); /* free the working space */ \
+ map->flags = new_flags; \
+ map->n_buckets = new_n_buckets; \
+ map->n_occupied = map->size; \
+ map->upper_bound = (int32)(map->n_buckets * __ac_HASH_UPPER + 0.5); \
+ } \
+ return 0;
+
+#define MAP_PUT(map, key, val, hashfunc, equalfunc, resizefunc, err) \
+ { \
+ int32 x = 0; \
+ if (map->n_occupied >= map->upper_bound) { \
+ if (map->n_buckets > (map->size << 1)) { \
+ if (resizefunc(map, map->n_buckets - 1) < 0) { \
+ *err = -1; \
+ return map->n_buckets; \
} \
- }
-
-#define HASH_DECLARE(name, mapkey_t, mapval·t) \
- __HASH_TYPE(name, mapkey_t, mapval·t) \
- __HASH_PROTOTYPES(name, mapkey_t, mapval·t)
-
-#define MAP·MAKE2( \
- name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \
- __HASH_TYPE(name, mapkey_t, mapval·t) \
- __HASH_IMPL( \
- name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal)
-
-#define MAP·MAKE( \
- name, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \
- MAP·MAKE2(name, \
- static map·inline klib_unused, \
- mapkey_t, \
- mapval·t, \
- map·is·map, \
- __hash_func, \
- __hash_equal)
-
-/* --- BEGIN OF HASH FUNCTIONS --- */
-
-/*! @function
- @abstract Integer hash function
- @param key The integer [mapint32_t]
- @return The hash value [mapint_t]
- */
-#define map·int_hash_func(key) (int32)(key)
-/*! @function
- @abstract Integer comparison function
- */
-#define map·int_hash_equal(a, b) ((a) == (b))
-/*! @function
- @abstract 64-bit integer hash function
- @param key The integer [mapint64_t]
- @return The hash value [mapint_t]
- */
-#define map·int64_hash_func(key) (int32)((key) >> 33 ^ (key) ^ (key) << 11)
-/*! @function
- @abstract 64-bit integer comparison function
- */
-#define map·int64_hash_equal(a, b) ((a) == (b))
-/*! @function
- @abstract const char* hash function
- @param s Pointer to a null terminated string
- @return The hash value
- */
-static map·inline int32
- __ac_X31_hash_string(const char* s)
-{
- int32 h = (int32)(*s);
- if (h != 0) {
- for (const char* it = s; *it; ++it)
- h = (h << 5) - h + (int32)*it;
- }
- return h;
-}
-/*! @function
- @abstract Another interface to const char* hash function
- @param key Pointer to a null terminated string [const char*]
- @return The hash value [mapint_t]
- */
-#define map·str_hash_func(key) __ac_X31_hash_string(key)
-/*! @function
- @abstract Const char* comparison function
- */
-#define map·str_hash_equal(a, b) (strcmp(a, b) == 0)
-
-static inline int32
-__ac_Wang_hash(int32 key)
-{
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
-}
-#define map·int_hash_func2(key) __ac_Wang_hash((int32)key)
-
-/* --- END OF HASH FUNCTIONS --- */
-
-/* Other convenient macros... */
-
-/*!
- @abstract Type of the hash table.
- @param name Name of the hash table [symbol]
- */
-#define map·t(name) map·##name##_t
-
-/*! @function
- @abstract Initiate a hash table.
- @param name Name of the hash table [symbol]
- @return Pointer to the hash table [khash_t(name)*]
- */
-#define map·init(name) map·init·##name()
-
-/*! @function
- @abstract Destroy a hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- */
-#define map·destroy(name, h) map·destroy_##name(h)
-
-/*! @function
- @abstract Reset a hash table without deallocating memory.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- */
-#define map·clear(name, h) map·clear_##name(h)
-
-/*! @function
- @abstract Resize a hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param s New size [mapint_t]
- */
-#define map·resize(name, h, s) map·resize_##name(h, s)
-
-/*! @function
- @abstract Insert a key to the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Key [type of keys]
- @param r Extra return code: -1 if the operation failed;
- 0 if the key is present in the hash table;
- 1 if the bucket is empty (never used); 2 if the element in
- the bucket has been deleted [int*]
- @return Iterator to the inserted element [mapint_t]
- */
-#define map·put(name, h, k, r) map·put_##name(h, k, r)
-
-/*! @function
- @abstract Retrieve a key from the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Key [type of keys]
- @return Iterator to the found element, or map·end(h) if the element is
- absent [mapint_t]
- */
-#define map·get(name, h, k) map·get_##name(h, k)
-
-/*! @function
- @abstract Remove a key from the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Iterator to the element to be deleted [mapint_t]
- */
-#define map·del(name, h, k) map·del_##name(h, k)
-
-/*! @function
- @abstract Test whether a bucket contains data.
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [mapint_t]
- @return 1 if containing data; 0 otherwise [int]
- */
-#define map·exist(h, x) (!__ac_iseither((h)->flags, (x)))
-
-/*! @function
- @abstract Get key given an iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [mapint_t]
- @return Key [type of keys]
- */
-#define map·key(h, x) ((h)->keys[x])
-
-/*! @function
- @abstract Get value given an iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [mapint_t]
- @return Value [type of values]
- @discussion For hash sets, calling this results in segfault.
- */
-#define map·val(h, x) ((h)->vals[x])
-
-/*! @function
- @abstract Alias of map·val()
- */
-#define map·value(h, x) ((h)->vals[x])
-
-/*! @function
- @abstract Get the start iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @return The start iterator [mapint_t]
- */
-#define map·begin(h) (int32)(0)
-
-/*! @function
- @abstract Get the end iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @return The end iterator [mapint_t]
- */
-#define map·end(h) ((h)->n_buckets)
-
-/*! @function
- @abstract Get the number of elements in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @return Number of elements in the hash table [mapint_t]
- */
-#define map·len(h) ((h)->size)
-
-/*! @function
- @abstract Get the number of buckets in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @return Number of buckets in the hash table [mapint_t]
- */
-#define map·n_buckets(h) ((h)->n_buckets)
-
-/*! @function
- @abstract Iterate over the entries in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param kvar Variable to which key will be assigned
- @param vvar Variable to which value will be assigned
- @param code Block of code to execute
- */
-#define map·foreach(h, kvar, vvar, code) \
- { \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- (kvar) = map·key(h, __i); \
- (vvar) = map·val(h, __i); \
- code; \
- } \
- }
-
-/*! @function
- @abstract Iterate over the values in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param vvar Variable to which value will be assigned
- @param code Block of code to execute
- */
-#define map·foreach_val(h, vvar, code) \
- { \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- (vvar) = map·val(h, __i); \
- code; \
- } \
- }
-
-/*! @function
- @abstract Iterate over the keys in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param kvar Variable to which key will be assigned
- @param code Block of code to execute
- */
-#define map·foreach_key(h, kvar, code) \
+ } else if (resizefunc(map, map->n_buckets + 1) < 0) { \
+ *err = -1; \
+ return map->n_buckets; \
+ } \
+ } \
+ \
{ \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- (kvar) = map·key(h, __i); \
- code; \
+ int32 k, i, site, last, mask = map->n_buckets - 1, step = 0; \
+ x = site = map->n_buckets; \
+ k = hashfunc(key); \
+ i = k & mask; \
+ if (__ac_isempty(map->flags, i)) \
+ x = i; /* for speed up */ \
+ else { \
+ last = i; \
+ while (!__ac_isempty(map->flags, i) && \
+ (__ac_isdel(map->flags, i) || !equalfunc(map->keys[i], key))) { \
+ if (__ac_isdel(map->flags, i)) \
+ site = i; \
+ i = (i + (++step)) & mask; \
+ if (i == last) { \
+ x = site; \
+ break; \
+ } \
} \
- }
-
-/*! @function
- @abstract Iterate over the values in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param code Block of code to execute for free operation.
- */
-#define map·free_all(h, code) \
- { \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- code(map·val(h, __i)); \
+ if (x == map->n_buckets) { \
+ if (__ac_isempty(map->flags, i) && site != map->n_buckets) \
+ x = site; \
+ else \
+ x = i; \
} \
- }
-
-/* More convenient interfaces */
-
-/*! @function
- @abstract Instantiate a hash set containing integer keys
- @param name Name of the hash table [symbol]
- */
-#define define_int32_set(name) \
- MAP·MAKE(name, int32, char, 0, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_int32_map(name, mapval·t) \
- MAP·MAKE(name, int32, mapval·t, 1, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_uint32_set(name) \
- MAP·MAKE(name, uint32, char, 0, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_uint32_map(name, mapval·t) \
- MAP·MAKE(name, uint32, mapval·t, 1, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash set containing 64-bit integer keys
- @param name Name of the hash table [symbol]
- */
-#define define_int64_set(name) \
- MAP·MAKE(name, int64, char, 0, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing 64-bit integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_int64_map(name, mapval·t) \
- MAP·MAKE(name, int64, mapval·t, 1, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash set containing 64-bit unsigned integer keys
- @param name Name of the hash table [symbol]
- */
-#define define_uint64_set(name) \
- MAP·MAKE(name, uint64, char, 0, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing 64-bit unsigned integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_uint64_map(name, mapval·t) \
- MAP·MAKE(name, uint64, mapval·t, 1, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing const char* keys
- @param name Name of the hash table [symbol]
- */
-#define define_str_hashset(name) \
- MAP·MAKE(name, const byte*, byte, 0, map·str_hash_func, map·str_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing const char* keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_str·map(name, mapval·t) \
- MAP·MAKE(name, const byte*, mapval·t, 1, map·str_hash_func, map·str_hash_equal)
+ } \
+ } \
+ if (__ac_isempty(map->flags, x)) { /* not present at all */ \
+ map->keys[x] = key; \
+ __ac_set_isboth_false(map->flags, x); \
+ ++map->size; \
+ ++map->n_occupied; \
+ *err = 1; \
+ } else if (__ac_isdel(map->flags, x)) { /* deleted */ \
+ map->keys[x] = key; \
+ __ac_set_isboth_false(map->flags, x); \
+ ++map->size; \
+ } else \
+ *err = 0; \
+ return x; \
+ }
+
+#define MAP_DEL(map, x) \
+ { \
+ if (x != map->n_buckets && !__ac_iseither(map->flags, x)) { \
+ __ac_set_isdel_true(map->flags, x); \
+ --map->size; \
+ } \
+ }
+
+static int32 hash_string(byte *s) {
+ int32 h;
+ byte *it;
+
+ h = (int32)(*s);
+ if (h != 0) {
+ for (it = s; *it; ++it)
+ h = (h << 5) - h + (int32)*it;
+ }
+ return h;
+}
diff --git a/include/libn/macro/qsort.h b/include/libn/macro/qsort.h
index 8daf0fa..cc38bca 100644
--- a/include/libn/macro/qsort.h
+++ b/include/libn/macro/qsort.h
@@ -1,181 +1,88 @@
-/*
- * Copyright (c) 2013, 2017 Alexey Tourbin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+#pragma once
-/*
- * This is a traditional Quicksort implementation which mostly follows
- * [Sedgewick 1978]. Sorting is performed entirely on array indices,
- * while actual access to the array elements is abstracted out with the
- * user-defined `LESS` and `SWAP` primitives.
+/*
+ * Straight implementation of Sedgewick's median qsort
+ *
+ * @LEN: name of parameter length
+ * @QLESS: name of function that computes array[i] < array[j]
+ * should return a boolean
+ * @QSWAP: name of function that swaps array[i] <-> array[j]
*
- * Synopsis:
- * QSORT(N, LESS, SWAP);
- * where
- * N - the number of elements in A[];
- * LESS(i, j) - compares A[i] to A[j];
- * SWAP(i, j) - exchanges A[i] with A[j].
+ * NOTE: This can perform on strided arrays.
+ * Make sure to use parens liberally to ensure hygeine!
*/
-#pragma once
-
-/* Sort 3 elements. */
-#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \
-do { \
- if (Q_LESS(q_a2, q_a1)) { \
- if (Q_LESS(q_a3, q_a2)) \
- Q_SWAP(q_a1, q_a3); \
- else { \
- Q_SWAP(q_a1, q_a2); \
- if (Q_LESS(q_a3, q_a2)) \
- Q_SWAP(q_a2, q_a3); \
- } \
- } \
- else if (Q_LESS(q_a3, q_a2)) { \
- Q_SWAP(q_a2, q_a3); \
- if (Q_LESS(q_a2, q_a1)) \
- Q_SWAP(q_a1, q_a2); \
- } \
-} while (0)
-
-/* Partition [q_l,q_r] around a pivot. After partitioning,
- * [q_l,q_j] are the elements that are less than or equal to the pivot,
- * while [q_i,q_r] are the elements greater than or equal to the pivot. */
-#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \
-do { \
- /* The middle element, not to be confused with the median. */ \
- Q_UINT q_m = q_l + ((q_r - q_l) >> 1); \
- /* Reorder the second, the middle, and the last items. \
- * As [Edelkamp Weiss 2016] explain, using the second element \
- * instead of the first one helps avoid bad behaviour for \
- * decreasingly sorted arrays. This method is used in recent \
- * versions of gcc's std::sort, see gcc bug 58437#c13, although \
- * the details are somewhat different (cf. #c14). */ \
- Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP); \
- /* Place the median at the beginning. */ \
- Q_SWAP(q_l, q_m); \
- /* Partition [q_l+2, q_r-1] around the median which is in q_l. \
- * q_i and q_j are initially off by one, they get decremented \
- * in the do-while loops. */ \
- q_i = q_l + 1; q_j = q_r; \
- while (1) { \
- do q_i++; while (Q_LESS(q_i, q_l)); \
- do q_j--; while (Q_LESS(q_l, q_j)); \
- if (q_i >= q_j) break; /* Sedgewick says "until j < i" */ \
- Q_SWAP(q_i, q_j); \
- } \
- /* Compensate for the i==j case. */ \
- q_i = q_j + 1; \
- /* Put the median to its final place. */ \
- Q_SWAP(q_l, q_j); \
- /* The median is not part of the left subfile. */ \
- q_j--; \
-} while (0)
-
-/* Insertion sort is applied to small subfiles - this is contrary to
- * Sedgewick's suggestion to run a separate insertion sort pass after
- * the partitioning is done. The reason I don't like a separate pass
- * is that it triggers extra comparisons, because it can't see that the
- * medians are already in their final positions and need not be rechecked.
- * Since I do not assume that comparisons are cheap, I also do not try
- * to eliminate the (q_j > q_l) boundary check. */
-#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \
-do { \
- Q_UINT q_i, q_j; \
- /* For each item starting with the second... */ \
- for (q_i = q_l + 1; q_i <= q_r; q_i++) \
- /* move it down the array so that the first part is sorted. */ \
- for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--) \
- Q_SWAP(q_j, q_j - 1); \
-} while (0)
-
-/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to
- * Q_THRESH, the algorithm performs recursive partitioning. When the size
- * drops below Q_THRESH, the algorithm switches to insertion sort.
- * The minimum valid value is probably 5 (with 5 items, the second and
- * the middle items, the middle itself being rounded down, are distinct). */
-#define Q_THRESH 16
-
-/* The main loop. */
-#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \
-do { \
- Q_UINT q_l = 0; \
- Q_UINT q_r = (Q_N) - 1; \
- Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \
- struct { Q_UINT q_l, q_r; } \
- /* On 32-bit platforms, to sort a "char[3GB+]" array, \
- * it may take full 32 stack frames. On 64-bit CPUs, \
- * though, the address space is limited to 48 bits. \
- * The usage is further reduced if Q_N has a 32-bit type. */ \
- q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \
- while (1) { \
- if (q_r - q_l + 1 >= Q_THRESH) { \
- Q_UINT q_i, q_j; \
- Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \
- /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \
- * Dealing with them depends on which one is bigger. */ \
- if (q_j - q_l >= q_r - q_i) \
- Q_SUBFILES(q_l, q_j, q_i, q_r); \
- else \
- Q_SUBFILES(q_i, q_r, q_l, q_j); \
- } \
- else { \
- Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \
- /* Pop subfiles from the stack, until it gets empty. */ \
- if (q_sp == 0) break; \
- q_sp--; \
- q_l = q_st[q_sp].q_l; \
- q_r = q_st[q_sp].q_r; \
- } \
- } \
-} while (0)
-
-/* The missing part: dealing with subfiles.
- * Assumes that the first subfile is not smaller than the second. */
-#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \
-do { \
- /* If the second subfile is only a single element, it needs \
- * no further processing. The first subfile will be processed \
- * on the next iteration (both subfiles cannot be only a single \
- * element, due to Q_THRESH). */ \
- if (q_l2 == q_r2) { \
- q_l = q_l1; \
- q_r = q_r1; \
- } \
- else { \
- /* Otherwise, both subfiles need processing. \
- * Push the larger subfile onto the stack. */ \
- q_st[q_sp].q_l = q_l1; \
- q_st[q_sp].q_r = q_r1; \
- q_sp++; \
- /* Process the smaller subfile on the next iteration. */ \
- q_l = q_l2; \
- q_r = q_r2; \
- } \
-} while (0)
-
-/* And now, ladies and gentlemen, may I proudly present to you... */
-#define QSORT(Q_N, Q_LESS, Q_SWAP) \
-do { \
- if ((Q_N) > 1) \
- /* We could check sizeof(Q_N) and use "unsigned", but at least \
- * on x86_64, this has the performance penalty of up to 5%. */ \
- Q_LOOP(ulong, Q_N, Q_LESS, Q_SWAP); \
-} while (0)
+#define QSORT(LEN, QLESS, QSWAP) \
+ int i, j, m, r, l; \
+ struct { \
+ int i, j; \
+ } *sp, base[48]; \
+ sp = base; \
+ \
+ l = 1; \
+ r = LEN-1; \
+ \
+ if (LEN <= 14) goto ENDOUTER; \
+OUTERLOOP: \
+ SWAP((l+r)/2, l+1); \
+ \
+ if (QLESS(r, l+1)) QSWAP(r, l+1); \
+ if (QLESS(r, l)) QSWAP(r, l); \
+ if (QLESS(l, l+1)) QSWAP(l, l+1); \
+ \
+ i = l+1; \
+ j = r; \
+ \
+INNERLOOP: \
+ do ++i; while(QLESS(i, l)); \
+ do --j; while(QLESS(l, j)); \
+ if (j < i) goto ENDINNER; \
+ QSWAP(i, j); \
+ goto INNERLOOP; \
+ \
+ENDINNER: \
+ QSWAP(l, j); \
+ \
+ if (MAX(j-l, r-i+1) <= 14) { \
+ if (sp == base) { \
+ goto ENDOUTER; \
+ } else { \
+ l = sp[-1].i; \
+ r = sp[-1].j; \
+ sp--; \
+ } \
+ } else { \
+ if (MIN(j-l, r-i+1) <= 14) { \
+ if (j-l > r-i+1) { \
+ l = l; \
+ r = j-1; \
+ } else { \
+ l = i; \
+ r = r; \
+ } \
+ } else { \
+ if (j-l > r-i+1) { \
+ sp->i = l; \
+ sp->j = j-1; \
+ sp++; \
+ \
+ l = i; \
+ r = r; \
+ } else { \
+ sp->i = i; \
+ sp->j = r; \
+ sp++; \
+ \
+ l = l; \
+ r = j-1; \
+ } \
+ } \
+ } \
+ goto OUTERLOOP; \
+ \
+ENDOUTER: \
+ for (i = 1; i < LEN; i++) { \
+ for (j = i; j > 0 && QLESS(j, j-1); j--) { \
+ QSWAP(j, j-1); \
+ } \
+ } \