aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
commit4b2ea2ca00eea7feea036b7642c0c1443b8f77a1 (patch)
tree5cdd635bd8240a6857258a056e3932e00966bfff
parent6b739739968a0cc9b4d9909d8f4ffec30f4461dd (diff)
removed buggy qsort header and implemented myself
-rw-r--r--.gitignore5
-rw-r--r--Makefile6
-rwxr-xr-xbin/gentags3
-rw-r--r--compile_commands.json478
-rw-r--r--include/libbio.h16
-rw-r--r--include/libn.h8
-rw-r--r--include/libn/macro/map.h929
-rw-r--r--include/libn/macro/qsort.h261
-rw-r--r--rules.mk1
-rw-r--r--sys/libbio/align.c70
-rw-r--r--sys/libbio/phylo.c255
-rw-r--r--sys/libbio/rules.mk2
-rw-r--r--sys/libbio/simulate.c117
-rw-r--r--sys/libbio/test.c18
-rw-r--r--sys/libmath/blas.c2
-rw-r--r--sys/libn/rules.mk4
-rw-r--r--sys/libn/test.c53
17 files changed, 1253 insertions, 975 deletions
diff --git a/.gitignore b/.gitignore
index fc20b4d..941439d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,5 +10,10 @@ sys/nixos
bin/fasttree
bin/mafft
+GRTAGS
+GTAGS
+GPATH
+
.cscope
+cscope.out
tags
diff --git a/Makefile b/Makefile
index 2d6485c..e09510d 100644
--- a/Makefile
+++ b/Makefile
@@ -11,13 +11,13 @@ LIB_DIR := lib
OBJ_DIR := build
# C runtime library
-CINIT := $(LIB_DIR)/crt/crt1.o $(LIB_DIR)/crt/x86_64/crti.o `gcc --print-file-name=crtbeginT.o`
+CINIT := $(LIB_DIR)/crt/crt1.o $(LIB_DIR)/crt/x86_64/crti.o `gcc --print-file-name=crtbeginS.o`
CFINI := `gcc --print-file-name=crtendS.o` $(LIB_DIR)/crt/x86_64/crtn.o
# Flags, Libraries and Includes
CFLAGS := -g -O3 -march=native \
- -ffast-math -fno-strict-aliasing -fwrapv -fms-extensions \
- -Wno-microsoft-anon-tag -Wno-incompatible-function-pointer-types
+ -ffast-math -fno-strict-aliasing -fwrapv -fms-extensions \
+ -Wno-microsoft-anon-tag -Wno-incompatible-function-pointer-types
STATIC := -static -nodefaultlibs -nostartfiles
AFLAGS := -f elf64
INCS := -isystem $(INC_DIR)/vendor/libc -I $(INC_DIR)
diff --git a/bin/gentags b/bin/gentags
index f0494f0..f398349 100755
--- a/bin/gentags
+++ b/bin/gentags
@@ -10,10 +10,11 @@ echo "finding files ..."
ROOT=/home/nolln/root
find $ROOT \
-path "$ROOT/sys/*.[chs]" -prune -o \
+ -path "$ROOT/vendor/musl/src/*.[chs]" -prune -o \
-path "$ROOT/include/*.h" > "$CSCOPE_DIR/files"
echo "adding files to cscope db: $ROOT/cscope.db ..."
-cscope -b -i "$CSCOPE_DIR/files"
+cscope -b -k -I include/vendor/libc -I include/ -i "$CSCOPE_DIR/files"
CSCOPE_DB="$ROOT/cscope.out"
echo "exported CSCOPE_DB to: '$CSCOPE_DB'"
diff --git a/compile_commands.json b/compile_commands.json
index 68f271e..fac553e 100644
--- a/compile_commands.json
+++ b/compile_commands.json
@@ -4,12 +4,67 @@
"clang",
"-c",
"-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
"-fno-strict-aliasing",
"-fwrapv",
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-Iinclude",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libbio/io/newick.o",
+ "sys/libbio/io/newick.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libbio/io/newick.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-D_GNU_SOURCE",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libmath/test.o",
+ "sys/libmath/test.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libmath/test.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
"-o",
"build/libn/flate.o",
"sys/libn/flate.c"
@@ -22,30 +77,169 @@
"clang",
"-c",
"-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
"-fno-strict-aliasing",
"-fwrapv",
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-Iinclude",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
"-o",
- "build/libn/bufio.o",
- "sys/libn/bufio.c"
+ "build/libn/gz.o",
+ "sys/libn/gz.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/bufio.c"
+ "file": "sys/libn/gz.c"
},
{
"arguments": [
"clang",
"-c",
"-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
"-fno-strict-aliasing",
"-fwrapv",
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-Iinclude",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/mmap.o",
+ "sys/libn/mmap.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/mmap.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-D_GNU_SOURCE",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libmath/blas.o",
+ "sys/libmath/blas.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libmath/blas.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libbio/test.o",
+ "sys/libbio/test.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libbio/test.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-ffreestanding",
+ "-fno-builtin",
+ "-nostdlib",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libc/string.o",
+ "sys/libc/string.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libc/string.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-ffreestanding",
+ "-fno-builtin",
+ "-nostdlib",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libc/stdio.o",
+ "sys/libc/stdio.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libc/stdio.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
"-o",
"build/libn/error.o",
"sys/libn/error.c"
@@ -58,17 +252,287 @@
"clang",
"-c",
"-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libbio/align.o",
+ "sys/libbio/align.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libbio/align.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/random.o",
+ "sys/libn/random.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/random.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
"-fno-strict-aliasing",
"-fwrapv",
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-Iinclude",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libbio/phylo.o",
+ "sys/libbio/phylo.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libbio/phylo.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/memory.o",
+ "sys/libn/memory.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/memory.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/io.o",
+ "sys/libn/io.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/io.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/string.o",
+ "sys/libn/string.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/string.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
"-o",
"build/libn/coro.o",
"sys/libn/coro.c"
],
"directory": "/home/nolln/root",
"file": "sys/libn/coro.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/sort.o",
+ "sys/libn/sort.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/sort.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/bufio.o",
+ "sys/libn/bufio.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/bufio.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libbio/io/fasta.o",
+ "sys/libbio/io/fasta.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libbio/io/fasta.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libbio/simulate.o",
+ "sys/libbio/simulate.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libbio/simulate.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-O0",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/test.o",
+ "sys/libn/test.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/test.c"
}
] \ No newline at end of file
diff --git a/include/libbio.h b/include/libbio.h
index 9eebb0b..f802e22 100644
--- a/include/libbio.h
+++ b/include/libbio.h
@@ -25,14 +25,24 @@ typedef struct bio·Tree
void *h;
} bio·Tree;
-// clade functions
+/* clade manipulation */
error phylo·addchild(bio·Node* parent, bio·Node* child);
error phylo·rmchild(bio·Node* parent, bio·Node* child);
+/* clade statistics */
error phylo·countnodes(bio·Node *node, int *n);
error phylo·countleafs(bio·Node *node, int *n);
-error phylo·ladderize(bio·Node *root);
+/* topological sorting */
+error phylo·ladderize(bio·Node *root);
+double phylo·diameter(bio·Tree tree, int *len, bio·Node **path);
+
+/* generic computation on tree */
+void *phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx);
+void *phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx);
+
+/* simple helpers */
+void phylo·getleafs(bio·Tree tree, bio·Node **leafs);
/* newick i/o */
error bio·readnewick(io·Peeker stream, void*, bio·Tree* tree);
@@ -66,7 +76,7 @@ enum
{
aln·K = 20, // kmer size (k <= 32)
aln·L = 3, // number of kmers / hash
- aln·N = 1000, // number of hashes
+ aln·N = 10, // number of hashes
};
error aln·sketch(byte *seq, int l, uint64 *phi[aln·N], int *locs[aln·N]);
diff --git a/include/libn.h b/include/libn.h
index 063a408..8e30eae 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -311,3 +311,11 @@ void sort·floats(uintptr n, float arr[]);
void sort·doubles(uintptr n, double arr[]);
void sort·strings(uintptr n, byte* arr[]);
+
+// -----------------------------------------------------------------------------
+// fast random number generation
+
+error rng·init(uint64 seed);
+double rng·random();
+bool rng·bernoulli(double f);
+uint64 rng·randi(int max);
diff --git a/include/libn/macro/map.h b/include/libn/macro/map.h
index f81dc0f..2e65570 100644
--- a/include/libn/macro/map.h
+++ b/include/libn/macro/map.h
@@ -1,754 +1,237 @@
-// clang-format off
-//
-/* The MIT License
- Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/*
- An example:
-#include "khash.h"
-HASH_MAP_INIT_INT(32, char)
-int main() {
- int ret, is·missing;
- mapiter_t k;
- khash_t(32) *h = map·init(32);
- k = map·put(32, h, 5, &ret);
- map·value(h, k) = 10;
- k = map·get(32, h, 10);
- is·missing = (k == map·end(h));
- k = map·get(32, h, 5);
- map·del(32, h, k);
- for (k = map·begin(h); k != map·end(h); ++k)
- if (map·exist(h, k)) map·value(h, k) = 1;
- map·destroy(32, h);
- return 0;
-}
-*/
-
-/*
- 2013-05-02 (0.2.8):
- * Use quadratic probing. When the capacity is power of 2, stepping
- function i*(i+1)/2 guarantees to traverse each bucket. It is better than
- double hashing on cache performance and is more robust than linear probing. In
- theory, double hashing should be more robust than quadratic probing. However,
- my implementation is probably not for large hash tables, because the second
- hash function is closely tied to the first hash function, which reduce the
- effectiveness of double hashing. Reference:
- http://research.cs.vt.edu/AVresearch/hashing/quadratic.php 2011-12-29 (0.2.7):
- * Minor code clean up; no actual effect.
- 2011-09-16 (0.2.6):
- * The capacity is a power of 2. This seems to dramatically improve the
- speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
- - http://code.google.com/p/ulib/
- - http://nothings.org/computer/judy/
- * Allow to optionally use linear probing which usually has better
- performance for random input. Double hashing is still the default as
- it is more robust to certain non-random input.
- * Added Wang's integer hash function (not used by default). This hash
- function is more robust to certain non-random input.
- 2011-02-14 (0.2.5):
- * Allow to declare global functions.
- 2009-09-26 (0.2.4):
- * Improve portability
- 2008-09-19 (0.2.3):
- * Corrected the example
- * Improved interfaces
- 2008-09-11 (0.2.2):
- * Improved speed a little in map·put()
- 2008-09-10 (0.2.1):
- * Added map·clear()
- * Fixed a compiling error
- 2008-09-02 (0.2.0):
- * Changed to token concatenation which increases flexibility.
- 2008-08-31 (0.1.2):
- * Fixed a bug in map·get(), which has not been tested previously.
- 2008-08-31 (0.1.1):
- * Added destructor
-*/
-
-// Modified by Nicholas Noll 2019.
-// Modified API to be more inline with codebase.
-
#pragma once
-/*!
- @header
- Generic hash table library.
+/*
+ * Based on khash.h
+ * Modified to make control more granular and similar to QSORT
*/
-#define AC_VERSION_HASH_H "0.2.8"
-
-/* compiler specific configuration */
-
-// #if UINT_MAX == 0xffffffffu
-// typedef unsigned int mapint32_t;
-// #elif ULONG_MAX == 0xffffffffu
-// typedef unsigned long mapint32_t;
-// #endif
-//
-// #if ULONG_MAX == ULLONG_MAX
-// typedef unsigned long mapint64_t;
-// #else
-// typedef unsigned long long mapint64_t;
-// #endif
-
-#ifndef map·inline
-#ifdef _MSC_VER
-#define map·inline __inline
-#else
-#define map·inline inline
-#endif
-#endif /* map·inline */
-
-#ifndef klib_unused
-#if (defined __clang__ && __clang_major__ >= 3) || \
- (defined __GNUC__ && __GNUC__ >= 3)
-#define klib_unused __attribute__((__unused__))
-#else
-#define klib_unused
-#endif
-#endif /* klib_unused */
+static const double __ac_HASH_UPPER = 0.77;
-typedef int32 mapiter;
+#define _roundup32(x) \
+ (--(x), (x) |= (x) >> 1, (x) |= (x) >> 2, (x) |= (x) >> 4, (x) |= (x) >> 8, \
+ (x) |= (x) >> 16, ++(x))
#define __ac_isempty(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 2)
#define __ac_isdel(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 1)
#define __ac_iseither(flag, i) ((flag[i >> 4] >> ((i & 0xfU) << 1)) & 3)
#define __ac_set_isdel_false(flag, i) \
- (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
+ (flag[i >> 4] &= ~(1ul << ((i & 0xfU) << 1)))
#define __ac_set_isempty_false(flag, i) \
- (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
+ (flag[i >> 4] &= ~(2ul << ((i & 0xfU) << 1)))
#define __ac_set_isboth_false(flag, i) \
- (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
+ (flag[i >> 4] &= ~(3ul << ((i & 0xfU) << 1)))
#define __ac_set_isdel_true(flag, i) (flag[i >> 4] |= 1ul << ((i & 0xfU) << 1))
#define __ac_fsize(m) ((m) < 16 ? 1 : (m) >> 4)
-#ifndef _roundup32
-#define _roundup32(x) \
- (--(x), \
- (x) |= (x) >> 1, \
- (x) |= (x) >> 2, \
- (x) |= (x) >> 4, \
- (x) |= (x) >> 8, \
- (x) |= (x) >> 16, \
- ++(x))
-#endif
-
-static const double __ac_HASH_UPPER = 0.77;
-
-#define __HASH_TYPE(name, map·key_t, map·val·t) \
- typedef struct map·##name##_s { \
- int32 n_buckets, size, n_occupied, upper_bound; \
- int32* flags; \
- map·key_t* keys; \
- map·val·t* vals; \
- } map·##name##_t;
-
-#define __HASH_PROTOTYPES(name, map·key_t, map·val·t) \
- extern map·##name##_t* map·init·##name(void); \
- extern void map·destroy_##name(map·##name##_t* h); \
- extern void map·clear_##name(map·##name##_t* h); \
- extern int32 map·get_##name(const map·##name##_t* h, mapkey_t key); \
- extern int map·resize_##name(map·##name##_t* h, int32 new_n_buckets); \
- extern int32 map·put_##name(map·##name##_t* h, mapkey_t key, int* ret); \
- extern void map·del_##name(map·##name##_t* h, int32 x);
-
-#define __HASH_IMPL( \
- name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \
- SCOPE map·##name##_t* map·init·##name(void) \
- { \
- return (map·##name##_t*)calloc(1, sizeof(map·##name##_t)); \
+#define MAP_STRUCT_BODY(key_t, val_t) \
+ mem·Allocator heap; \
+ void *h; \
+ int32 n_buckets, size, n_occupied, upper_bound; \
+ int32 *flags; \
+ key_t *keys; \
+ val_t *vals;
+
+#define MAP_MAKE(type) \
+ type *map; \
+ map = heap.alloc((h), 1, sizeof(*map)); \
+ map->heap = heap; \
+ map->h = h; \
+ return map
+
+#define MAP_FREE(map) \
+ map->heap.free(map->h, map->keys); \
+ map->heap.free(map->h, map->flags); \
+ map->heap.free(map->h, map->vals); \
+ map->heap.free(map->h, map);
+
+#define MAP_RESET(map) \
+ if (map && map->flags) { \
+ memset(map->flags, 0xaa, __ac_fsize(map->n_buckets) * sizeof(int32)); \
+ map->size = map->n_occupied = 0; \
+ }
+
+#define MAP_GET(map, key, hashfunc, equalfunc) \
+ if (map->n_buckets) { \
+ int32 k = 0; \
+ int32 i = 0; \
+ int32 last = 0; \
+ int32 mask = 0; \
+ int32 step = 0; \
+ mask = map->n_buckets - 1; \
+ k = hashfunc(key); \
+ i = k & mask; \
+ last = i; \
+ while (!__ac_isempty(map->flags, i) && \
+ (__ac_isdel(map->flags, i) || !equalfunc(map->keys[i], key))) { \
+ i = (i + (++step)) & mask; \
+ if (i == last) \
+ return map->n_buckets; \
} \
- SCOPE void map·destroy_##name(map·##name##_t* h) \
- { \
- if (h) \
- { \
- free((void*)h->keys); \
- free(h->flags); \
- free((void*)h->vals); \
- free(h); \
+ return __ac_iseither(map->flags, i) ? map->n_buckets : i; \
+ } else \
+ return 0;
+
+#define MAP_GROW(map, key_t, val_t, new_n_buckets, hashfunc) \
+ int32 *new_flags = nil; \
+ int32 j = 1; \
+ { \
+ _roundup32(new_n_buckets); \
+ if (new_n_buckets < 4) \
+ new_n_buckets = 4; \
+ if (map->size >= (int32)(new_n_buckets * __ac_HASH_UPPER + 0.5)) \
+ j = 0; \
+ else { \
+ new_flags = \
+ map->heap.alloc(map->h, __ac_fsize(new_n_buckets), sizeof(int32)); \
+ if (!new_flags) return -1; \
+ memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(int32)); \
+ if (map->n_buckets < new_n_buckets) { /* expand */ \
+ key_t *new_keys = \
+ map->heap.alloc(map->h, new_n_buckets, sizeof(key_t)); \
+ if (!new_keys) { \
+ map->heap.free(map->h, new_flags); \
+ return -1; \
} \
- } \
- SCOPE void map·clear_##name(map·##name##_t* h) \
- { \
- if (h && h->flags) \
- { \
- memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(int32)); \
- h->size = h->n_occupied = 0; \
+ map->heap.free(map->h, map->keys); \
+ map->keys = new_keys; \
+ val_t *new_vals = \
+ map->heap.alloc(map->h, new_n_buckets, sizeof(val_t)); \
+ if (!new_vals) { \
+ map->heap.free(map->h, new_flags); \
+ return -1; \
} \
+ map->heap.free(map->h, map->vals); \
+ map->vals = new_vals; \
+ } \
} \
- SCOPE int32 map·get_##name(const map·##name##_t* h, mapkey_t key) \
- { \
- if (h->n_buckets) \
- { \
- int32 k = 0; \
- int32 i = 0; \
- int32 last = 0; \
- int32 mask = 0; \
- int32 step = 0; \
- mask = h->n_buckets - 1; \
- k = __hash_func(key); \
- i = k & mask; \
- last = i; \
- while ( \
- !__ac_isempty(h->flags, i) && \
- (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) \
+ } \
+ if (j) { /* rehashing is needed */ \
+ for (j = 0; j != map->n_buckets; ++j) { \
+ if (__ac_iseither(map->flags, j) == 0) { \
+ key_t key = map->keys[j]; \
+ val_t val; \
+ int32 new_mask; \
+ new_mask = new_n_buckets - 1; \
+ val = map->vals[j]; \
+ __ac_set_isdel_true(map->flags, j); \
+ while (1) { \
+ int32 k, i, step = 0; \
+ k = hashfunc(key); \
+ i = k & new_mask; \
+ while (!__ac_isempty(new_flags, i)) \
+ i = (i + (++step)) & new_mask; \
+ __ac_set_isempty_false(new_flags, i); \
+ if (i < map->n_buckets && __ac_iseither(map->flags, i) == 0) { \
{ \
- i = (i + (++step)) & mask; \
- if (i == last) return h->n_buckets; \
- } \
- return __ac_iseither(h->flags, i) ? h->n_buckets : i; \
- } else \
- return 0; \
- } \
- SCOPE int map·resize_##name(map·##name##_t* h, int32 new_n_buckets) \
- { /* This function uses 0.25*n_buckets bytes of working space instead of \
- [sizeof(key_t+val·t)+.25]*n_buckets. */ \
- int32* new_flags = 0; \
- int32 j = 1; \
- { \
- _roundup32(new_n_buckets); \
- if (new_n_buckets < 4) new_n_buckets = 4; \
- if (h->size >= (int32)(new_n_buckets * __ac_HASH_UPPER + 0.5)) \
- j = 0; /* requested size is too small */ \
- else \
- { /* hash table size to be changed (shrink or expand); rehash */ \
- new_flags = \
- (int32*)malloc(__ac_fsize(new_n_buckets) * sizeof(int32)); \
- if (!new_flags) return -1; \
- memset( \
- new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(int32)); \
- if (h->n_buckets < new_n_buckets) \
- { /* expand */ \
- mapkey_t* new_keys = (mapkey_t*)realloc( \
- (void*)h->keys, new_n_buckets * sizeof(mapkey_t)); \
- if (!new_keys) \
- { \
- free(new_flags); \
- return -1; \
- } \
- h->keys = new_keys; \
- if (map·is·map) \
- { \
- mapval·t* new_vals = (mapval·t*)realloc( \
- (void*)h->vals, new_n_buckets * sizeof(mapval·t)); \
- if (!new_vals) \
- { \
- free(new_flags); \
- return -1; \
- } \
- h->vals = new_vals; \
- } \
- } /* otherwise shrink */ \
+ key_t tmp = map->keys[i]; \
+ map->keys[i] = key; \
+ key = tmp; \
} \
- } \
- if (j) \
- { /* rehashing is needed */ \
- for (j = 0; j != h->n_buckets; ++j) \
{ \
- if (__ac_iseither(h->flags, j) == 0) \
- { \
- mapkey_t key = h->keys[j]; \
- mapval·t val; \
- int32 new_mask; \
- new_mask = new_n_buckets - 1; \
- if (map·is·map) val = h->vals[j]; \
- __ac_set_isdel_true(h->flags, j); \
- while (1) \
- { /* kick-out process; sort of like in Cuckoo hashing */ \
- int32 k, i, step = 0; \
- k = __hash_func(key); \
- i = k & new_mask; \
- while (!__ac_isempty(new_flags, i)) \
- i = (i + (++step)) & new_mask; \
- __ac_set_isempty_false(new_flags, i); \
- if (i < h->n_buckets && \
- __ac_iseither(h->flags, i) == 0) \
- { /* kick out the existing element */ \
- { \
- mapkey_t tmp = h->keys[i]; \
- h->keys[i] = key; \
- key = tmp; \
- } \
- if (map·is·map) \
- { \
- mapval·t tmp = h->vals[i]; \
- h->vals[i] = val; \
- val = tmp; \
- } \
- __ac_set_isdel_true(h->flags, \
- i); /* mark it as deleted in \
- the old hash table */ \
- } else \
- { /* write the element and jump out of the loop */ \
- h->keys[i] = key; \
- if (map·is·map) h->vals[i] = val; \
- break; \
- } \
- } \
- } \
+ val_t tmp = map->vals[i]; \
+ map->vals[i] = val; \
+ val = tmp; \
} \
- if (h->n_buckets > new_n_buckets) \
- { /* shrink the hash table */ \
- h->keys = (mapkey_t*)realloc((void*)h->keys, \
- new_n_buckets * sizeof(mapkey_t)); \
- if (map·is·map) \
- h->vals = (mapval·t*)realloc( \
- (void*)h->vals, new_n_buckets * sizeof(mapval·t)); \
- } \
- free(h->flags); /* free the working space */ \
- h->flags = new_flags; \
- h->n_buckets = new_n_buckets; \
- h->n_occupied = h->size; \
- h->upper_bound = (int32)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+ __ac_set_isdel_true(map->flags, i); \
+ } else { \
+ map->keys[i] = key; \
+ map->vals[i] = val; \
+ break; \
+ } \
} \
- return 0; \
+ } \
} \
- SCOPE int32 map·put_##name(map·##name##_t* h, mapkey_t key, int* ret) \
- { \
- int32 x = 0; \
- if (h->n_occupied >= h->upper_bound) \
- { /* update the hash table */ \
- if (h->n_buckets > (h->size << 1)) \
- { \
- if (map·resize_##name(h, h->n_buckets - 1) < 0) \
- { /* clear "deleted" elements */ \
- *ret = -1; \
- return h->n_buckets; \
- } \
- } else if (map·resize_##name(h, h->n_buckets + 1) < 0) \
- { /* expand the hash table */ \
- *ret = -1; \
- return h->n_buckets; \
- } \
- } /* TODO: to implement automatically shrinking; resize() already \
- support shrinking */ \
- { \
- int32 k, i, site, last, mask = h->n_buckets - 1, step = 0; \
- x = site = h->n_buckets; \
- k = __hash_func(key); \
- i = k & mask; \
- if (__ac_isempty(h->flags, i)) \
- x = i; /* for speed up */ \
- else \
- { \
- last = i; \
- while ( \
- !__ac_isempty(h->flags, i) && \
- (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) \
- { \
- if (__ac_isdel(h->flags, i)) site = i; \
- i = (i + (++step)) & mask; \
- if (i == last) \
- { \
- x = site; \
- break; \
- } \
- } \
- if (x == h->n_buckets) \
- { \
- if (__ac_isempty(h->flags, i) && site != h->n_buckets) \
- x = site; \
- else \
- x = i; \
- } \
- } \
- } \
- if (__ac_isempty(h->flags, x)) \
- { /* not present at all */ \
- h->keys[x] = key; \
- __ac_set_isboth_false(h->flags, x); \
- ++h->size; \
- ++h->n_occupied; \
- *ret = 1; \
- } else if (__ac_isdel(h->flags, x)) \
- { /* deleted */ \
- h->keys[x] = key; \
- __ac_set_isboth_false(h->flags, x); \
- ++h->size; \
- *ret = 2; \
- } else \
- *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
- return x; \
+ if (map->n_buckets > new_n_buckets) { /* shrink the hash table */ \
+ key_t *new_keys = map->heap.alloc(map->h, new_n_buckets, sizeof(key_t)); \
+ map->heap.free(map->h, map->keys); \
+ map->keys = new_keys; \
+ \
+ val_t *new_vals = map->heap.alloc(map->h, new_n_buckets, sizeof(val_t)); \
+ map->heap.free(map->h, map->vals); \
+ map->vals = new_vals; \
} \
- SCOPE void map·del_##name(map·##name##_t* h, int32 x) \
- { \
- if (x != h->n_buckets && !__ac_iseither(h->flags, x)) \
- { \
- __ac_set_isdel_true(h->flags, x); \
- --h->size; \
+ map->heap.free(map->h, map->flags); /* free the working space */ \
+ map->flags = new_flags; \
+ map->n_buckets = new_n_buckets; \
+ map->n_occupied = map->size; \
+ map->upper_bound = (int32)(map->n_buckets * __ac_HASH_UPPER + 0.5); \
+ } \
+ return 0;
+
+#define MAP_PUT(map, key, val, hashfunc, equalfunc, resizefunc, err) \
+ { \
+ int32 x = 0; \
+ if (map->n_occupied >= map->upper_bound) { \
+ if (map->n_buckets > (map->size << 1)) { \
+ if (resizefunc(map, map->n_buckets - 1) < 0) { \
+ *err = -1; \
+ return map->n_buckets; \
} \
- }
-
-#define HASH_DECLARE(name, mapkey_t, mapval·t) \
- __HASH_TYPE(name, mapkey_t, mapval·t) \
- __HASH_PROTOTYPES(name, mapkey_t, mapval·t)
-
-#define MAP·MAKE2( \
- name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \
- __HASH_TYPE(name, mapkey_t, mapval·t) \
- __HASH_IMPL( \
- name, SCOPE, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal)
-
-#define MAP·MAKE( \
- name, mapkey_t, mapval·t, map·is·map, __hash_func, __hash_equal) \
- MAP·MAKE2(name, \
- static map·inline klib_unused, \
- mapkey_t, \
- mapval·t, \
- map·is·map, \
- __hash_func, \
- __hash_equal)
-
-/* --- BEGIN OF HASH FUNCTIONS --- */
-
-/*! @function
- @abstract Integer hash function
- @param key The integer [mapint32_t]
- @return The hash value [mapint_t]
- */
-#define map·int_hash_func(key) (int32)(key)
-/*! @function
- @abstract Integer comparison function
- */
-#define map·int_hash_equal(a, b) ((a) == (b))
-/*! @function
- @abstract 64-bit integer hash function
- @param key The integer [mapint64_t]
- @return The hash value [mapint_t]
- */
-#define map·int64_hash_func(key) (int32)((key) >> 33 ^ (key) ^ (key) << 11)
-/*! @function
- @abstract 64-bit integer comparison function
- */
-#define map·int64_hash_equal(a, b) ((a) == (b))
-/*! @function
- @abstract const char* hash function
- @param s Pointer to a null terminated string
- @return The hash value
- */
-static map·inline int32
- __ac_X31_hash_string(const char* s)
-{
- int32 h = (int32)(*s);
- if (h != 0) {
- for (const char* it = s; *it; ++it)
- h = (h << 5) - h + (int32)*it;
- }
- return h;
-}
-/*! @function
- @abstract Another interface to const char* hash function
- @param key Pointer to a null terminated string [const char*]
- @return The hash value [mapint_t]
- */
-#define map·str_hash_func(key) __ac_X31_hash_string(key)
-/*! @function
- @abstract Const char* comparison function
- */
-#define map·str_hash_equal(a, b) (strcmp(a, b) == 0)
-
-static inline int32
-__ac_Wang_hash(int32 key)
-{
- key += ~(key << 15);
- key ^= (key >> 10);
- key += (key << 3);
- key ^= (key >> 6);
- key += ~(key << 11);
- key ^= (key >> 16);
- return key;
-}
-#define map·int_hash_func2(key) __ac_Wang_hash((int32)key)
-
-/* --- END OF HASH FUNCTIONS --- */
-
-/* Other convenient macros... */
-
-/*!
- @abstract Type of the hash table.
- @param name Name of the hash table [symbol]
- */
-#define map·t(name) map·##name##_t
-
-/*! @function
- @abstract Initiate a hash table.
- @param name Name of the hash table [symbol]
- @return Pointer to the hash table [khash_t(name)*]
- */
-#define map·init(name) map·init·##name()
-
-/*! @function
- @abstract Destroy a hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- */
-#define map·destroy(name, h) map·destroy_##name(h)
-
-/*! @function
- @abstract Reset a hash table without deallocating memory.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- */
-#define map·clear(name, h) map·clear_##name(h)
-
-/*! @function
- @abstract Resize a hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param s New size [mapint_t]
- */
-#define map·resize(name, h, s) map·resize_##name(h, s)
-
-/*! @function
- @abstract Insert a key to the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Key [type of keys]
- @param r Extra return code: -1 if the operation failed;
- 0 if the key is present in the hash table;
- 1 if the bucket is empty (never used); 2 if the element in
- the bucket has been deleted [int*]
- @return Iterator to the inserted element [mapint_t]
- */
-#define map·put(name, h, k, r) map·put_##name(h, k, r)
-
-/*! @function
- @abstract Retrieve a key from the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Key [type of keys]
- @return Iterator to the found element, or map·end(h) if the element is
- absent [mapint_t]
- */
-#define map·get(name, h, k) map·get_##name(h, k)
-
-/*! @function
- @abstract Remove a key from the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Iterator to the element to be deleted [mapint_t]
- */
-#define map·del(name, h, k) map·del_##name(h, k)
-
-/*! @function
- @abstract Test whether a bucket contains data.
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [mapint_t]
- @return 1 if containing data; 0 otherwise [int]
- */
-#define map·exist(h, x) (!__ac_iseither((h)->flags, (x)))
-
-/*! @function
- @abstract Get key given an iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [mapint_t]
- @return Key [type of keys]
- */
-#define map·key(h, x) ((h)->keys[x])
-
-/*! @function
- @abstract Get value given an iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [mapint_t]
- @return Value [type of values]
- @discussion For hash sets, calling this results in segfault.
- */
-#define map·val(h, x) ((h)->vals[x])
-
-/*! @function
- @abstract Alias of map·val()
- */
-#define map·value(h, x) ((h)->vals[x])
-
-/*! @function
- @abstract Get the start iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @return The start iterator [mapint_t]
- */
-#define map·begin(h) (int32)(0)
-
-/*! @function
- @abstract Get the end iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @return The end iterator [mapint_t]
- */
-#define map·end(h) ((h)->n_buckets)
-
-/*! @function
- @abstract Get the number of elements in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @return Number of elements in the hash table [mapint_t]
- */
-#define map·len(h) ((h)->size)
-
-/*! @function
- @abstract Get the number of buckets in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @return Number of buckets in the hash table [mapint_t]
- */
-#define map·n_buckets(h) ((h)->n_buckets)
-
-/*! @function
- @abstract Iterate over the entries in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param kvar Variable to which key will be assigned
- @param vvar Variable to which value will be assigned
- @param code Block of code to execute
- */
-#define map·foreach(h, kvar, vvar, code) \
- { \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- (kvar) = map·key(h, __i); \
- (vvar) = map·val(h, __i); \
- code; \
- } \
- }
-
-/*! @function
- @abstract Iterate over the values in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param vvar Variable to which value will be assigned
- @param code Block of code to execute
- */
-#define map·foreach_val(h, vvar, code) \
- { \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- (vvar) = map·val(h, __i); \
- code; \
- } \
- }
-
-/*! @function
- @abstract Iterate over the keys in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param kvar Variable to which key will be assigned
- @param code Block of code to execute
- */
-#define map·foreach_key(h, kvar, code) \
+ } else if (resizefunc(map, map->n_buckets + 1) < 0) { \
+ *err = -1; \
+ return map->n_buckets; \
+ } \
+ } \
+ \
{ \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- (kvar) = map·key(h, __i); \
- code; \
+ int32 k, i, site, last, mask = map->n_buckets - 1, step = 0; \
+ x = site = map->n_buckets; \
+ k = hashfunc(key); \
+ i = k & mask; \
+ if (__ac_isempty(map->flags, i)) \
+ x = i; /* for speed up */ \
+ else { \
+ last = i; \
+ while (!__ac_isempty(map->flags, i) && \
+ (__ac_isdel(map->flags, i) || !equalfunc(map->keys[i], key))) { \
+ if (__ac_isdel(map->flags, i)) \
+ site = i; \
+ i = (i + (++step)) & mask; \
+ if (i == last) { \
+ x = site; \
+ break; \
+ } \
} \
- }
-
-/*! @function
- @abstract Iterate over the values in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @param code Block of code to execute for free operation.
- */
-#define map·free_all(h, code) \
- { \
- int32 __i; \
- for (__i = map·begin(h); __i != map·end(h); ++__i) \
- { \
- if (!map·exist(h, __i)) continue; \
- code(map·val(h, __i)); \
+ if (x == map->n_buckets) { \
+ if (__ac_isempty(map->flags, i) && site != map->n_buckets) \
+ x = site; \
+ else \
+ x = i; \
} \
- }
-
-/* More convenient interfaces */
-
-/*! @function
- @abstract Instantiate a hash set containing integer keys
- @param name Name of the hash table [symbol]
- */
-#define define_int32_set(name) \
- MAP·MAKE(name, int32, char, 0, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_int32_map(name, mapval·t) \
- MAP·MAKE(name, int32, mapval·t, 1, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_uint32_set(name) \
- MAP·MAKE(name, uint32, char, 0, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_uint32_map(name, mapval·t) \
- MAP·MAKE(name, uint32, mapval·t, 1, map·int_hash_func, map·int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash set containing 64-bit integer keys
- @param name Name of the hash table [symbol]
- */
-#define define_int64_set(name) \
- MAP·MAKE(name, int64, char, 0, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing 64-bit integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_int64_map(name, mapval·t) \
- MAP·MAKE(name, int64, mapval·t, 1, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash set containing 64-bit unsigned integer keys
- @param name Name of the hash table [symbol]
- */
-#define define_uint64_set(name) \
- MAP·MAKE(name, uint64, char, 0, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing 64-bit unsigned integer keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_uint64_map(name, mapval·t) \
- MAP·MAKE(name, uint64, mapval·t, 1, map·int64_hash_func, map·int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing const char* keys
- @param name Name of the hash table [symbol]
- */
-#define define_str_hashset(name) \
- MAP·MAKE(name, const byte*, byte, 0, map·str_hash_func, map·str_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing const char* keys
- @param name Name of the hash table [symbol]
- @param mapval·t Type of values [type]
- */
-#define define_str·map(name, mapval·t) \
- MAP·MAKE(name, const byte*, mapval·t, 1, map·str_hash_func, map·str_hash_equal)
+ } \
+ } \
+ if (__ac_isempty(map->flags, x)) { /* not present at all */ \
+ map->keys[x] = key; \
+ __ac_set_isboth_false(map->flags, x); \
+ ++map->size; \
+ ++map->n_occupied; \
+ *err = 1; \
+ } else if (__ac_isdel(map->flags, x)) { /* deleted */ \
+ map->keys[x] = key; \
+ __ac_set_isboth_false(map->flags, x); \
+ ++map->size; \
+ } else \
+ *err = 0; \
+ return x; \
+ }
+
+#define MAP_DEL(map, x) \
+ { \
+ if (x != map->n_buckets && !__ac_iseither(map->flags, x)) { \
+ __ac_set_isdel_true(map->flags, x); \
+ --map->size; \
+ } \
+ }
+
+static int32 hash_string(byte *s) {
+ int32 h;
+ byte *it;
+
+ h = (int32)(*s);
+ if (h != 0) {
+ for (it = s; *it; ++it)
+ h = (h << 5) - h + (int32)*it;
+ }
+ return h;
+}
diff --git a/include/libn/macro/qsort.h b/include/libn/macro/qsort.h
index 8daf0fa..cc38bca 100644
--- a/include/libn/macro/qsort.h
+++ b/include/libn/macro/qsort.h
@@ -1,181 +1,88 @@
-/*
- * Copyright (c) 2013, 2017 Alexey Tourbin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+#pragma once
-/*
- * This is a traditional Quicksort implementation which mostly follows
- * [Sedgewick 1978]. Sorting is performed entirely on array indices,
- * while actual access to the array elements is abstracted out with the
- * user-defined `LESS` and `SWAP` primitives.
+/*
+ * Straight implementation of Sedgewick's median qsort
+ *
+ * @LEN: name of parameter length
+ * @QLESS: name of function that computes array[i] < array[j]
+ * should return a boolean
+ * @QSWAP: name of function that swaps array[i] <-> array[j]
*
- * Synopsis:
- * QSORT(N, LESS, SWAP);
- * where
- * N - the number of elements in A[];
- * LESS(i, j) - compares A[i] to A[j];
- * SWAP(i, j) - exchanges A[i] with A[j].
+ * NOTE: This can perform on strided arrays.
+ * Make sure to use parens liberally to ensure hygeine!
*/
-#pragma once
-
-/* Sort 3 elements. */
-#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \
-do { \
- if (Q_LESS(q_a2, q_a1)) { \
- if (Q_LESS(q_a3, q_a2)) \
- Q_SWAP(q_a1, q_a3); \
- else { \
- Q_SWAP(q_a1, q_a2); \
- if (Q_LESS(q_a3, q_a2)) \
- Q_SWAP(q_a2, q_a3); \
- } \
- } \
- else if (Q_LESS(q_a3, q_a2)) { \
- Q_SWAP(q_a2, q_a3); \
- if (Q_LESS(q_a2, q_a1)) \
- Q_SWAP(q_a1, q_a2); \
- } \
-} while (0)
-
-/* Partition [q_l,q_r] around a pivot. After partitioning,
- * [q_l,q_j] are the elements that are less than or equal to the pivot,
- * while [q_i,q_r] are the elements greater than or equal to the pivot. */
-#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \
-do { \
- /* The middle element, not to be confused with the median. */ \
- Q_UINT q_m = q_l + ((q_r - q_l) >> 1); \
- /* Reorder the second, the middle, and the last items. \
- * As [Edelkamp Weiss 2016] explain, using the second element \
- * instead of the first one helps avoid bad behaviour for \
- * decreasingly sorted arrays. This method is used in recent \
- * versions of gcc's std::sort, see gcc bug 58437#c13, although \
- * the details are somewhat different (cf. #c14). */ \
- Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP); \
- /* Place the median at the beginning. */ \
- Q_SWAP(q_l, q_m); \
- /* Partition [q_l+2, q_r-1] around the median which is in q_l. \
- * q_i and q_j are initially off by one, they get decremented \
- * in the do-while loops. */ \
- q_i = q_l + 1; q_j = q_r; \
- while (1) { \
- do q_i++; while (Q_LESS(q_i, q_l)); \
- do q_j--; while (Q_LESS(q_l, q_j)); \
- if (q_i >= q_j) break; /* Sedgewick says "until j < i" */ \
- Q_SWAP(q_i, q_j); \
- } \
- /* Compensate for the i==j case. */ \
- q_i = q_j + 1; \
- /* Put the median to its final place. */ \
- Q_SWAP(q_l, q_j); \
- /* The median is not part of the left subfile. */ \
- q_j--; \
-} while (0)
-
-/* Insertion sort is applied to small subfiles - this is contrary to
- * Sedgewick's suggestion to run a separate insertion sort pass after
- * the partitioning is done. The reason I don't like a separate pass
- * is that it triggers extra comparisons, because it can't see that the
- * medians are already in their final positions and need not be rechecked.
- * Since I do not assume that comparisons are cheap, I also do not try
- * to eliminate the (q_j > q_l) boundary check. */
-#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \
-do { \
- Q_UINT q_i, q_j; \
- /* For each item starting with the second... */ \
- for (q_i = q_l + 1; q_i <= q_r; q_i++) \
- /* move it down the array so that the first part is sorted. */ \
- for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--) \
- Q_SWAP(q_j, q_j - 1); \
-} while (0)
-
-/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to
- * Q_THRESH, the algorithm performs recursive partitioning. When the size
- * drops below Q_THRESH, the algorithm switches to insertion sort.
- * The minimum valid value is probably 5 (with 5 items, the second and
- * the middle items, the middle itself being rounded down, are distinct). */
-#define Q_THRESH 16
-
-/* The main loop. */
-#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \
-do { \
- Q_UINT q_l = 0; \
- Q_UINT q_r = (Q_N) - 1; \
- Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \
- struct { Q_UINT q_l, q_r; } \
- /* On 32-bit platforms, to sort a "char[3GB+]" array, \
- * it may take full 32 stack frames. On 64-bit CPUs, \
- * though, the address space is limited to 48 bits. \
- * The usage is further reduced if Q_N has a 32-bit type. */ \
- q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \
- while (1) { \
- if (q_r - q_l + 1 >= Q_THRESH) { \
- Q_UINT q_i, q_j; \
- Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \
- /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \
- * Dealing with them depends on which one is bigger. */ \
- if (q_j - q_l >= q_r - q_i) \
- Q_SUBFILES(q_l, q_j, q_i, q_r); \
- else \
- Q_SUBFILES(q_i, q_r, q_l, q_j); \
- } \
- else { \
- Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \
- /* Pop subfiles from the stack, until it gets empty. */ \
- if (q_sp == 0) break; \
- q_sp--; \
- q_l = q_st[q_sp].q_l; \
- q_r = q_st[q_sp].q_r; \
- } \
- } \
-} while (0)
-
-/* The missing part: dealing with subfiles.
- * Assumes that the first subfile is not smaller than the second. */
-#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \
-do { \
- /* If the second subfile is only a single element, it needs \
- * no further processing. The first subfile will be processed \
- * on the next iteration (both subfiles cannot be only a single \
- * element, due to Q_THRESH). */ \
- if (q_l2 == q_r2) { \
- q_l = q_l1; \
- q_r = q_r1; \
- } \
- else { \
- /* Otherwise, both subfiles need processing. \
- * Push the larger subfile onto the stack. */ \
- q_st[q_sp].q_l = q_l1; \
- q_st[q_sp].q_r = q_r1; \
- q_sp++; \
- /* Process the smaller subfile on the next iteration. */ \
- q_l = q_l2; \
- q_r = q_r2; \
- } \
-} while (0)
-
-/* And now, ladies and gentlemen, may I proudly present to you... */
-#define QSORT(Q_N, Q_LESS, Q_SWAP) \
-do { \
- if ((Q_N) > 1) \
- /* We could check sizeof(Q_N) and use "unsigned", but at least \
- * on x86_64, this has the performance penalty of up to 5%. */ \
- Q_LOOP(ulong, Q_N, Q_LESS, Q_SWAP); \
-} while (0)
+#define QSORT(LEN, QLESS, QSWAP) \
+ int i, j, m, r, l; \
+ struct { \
+ int i, j; \
+ } *sp, base[48]; \
+ sp = base; \
+ \
+ l = 1; \
+ r = LEN-1; \
+ \
+ if (LEN <= 14) goto ENDOUTER; \
+OUTERLOOP: \
+ SWAP((l+r)/2, l+1); \
+ \
+ if (QLESS(r, l+1)) QSWAP(r, l+1); \
+ if (QLESS(r, l)) QSWAP(r, l); \
+ if (QLESS(l, l+1)) QSWAP(l, l+1); \
+ \
+ i = l+1; \
+ j = r; \
+ \
+INNERLOOP: \
+ do ++i; while(QLESS(i, l)); \
+ do --j; while(QLESS(l, j)); \
+ if (j < i) goto ENDINNER; \
+ QSWAP(i, j); \
+ goto INNERLOOP; \
+ \
+ENDINNER: \
+ QSWAP(l, j); \
+ \
+ if (MAX(j-l, r-i+1) <= 14) { \
+ if (sp == base) { \
+ goto ENDOUTER; \
+ } else { \
+ l = sp[-1].i; \
+ r = sp[-1].j; \
+ sp--; \
+ } \
+ } else { \
+ if (MIN(j-l, r-i+1) <= 14) { \
+ if (j-l > r-i+1) { \
+ l = l; \
+ r = j-1; \
+ } else { \
+ l = i; \
+ r = r; \
+ } \
+ } else { \
+ if (j-l > r-i+1) { \
+ sp->i = l; \
+ sp->j = j-1; \
+ sp++; \
+ \
+ l = i; \
+ r = r; \
+ } else { \
+ sp->i = i; \
+ sp->j = r; \
+ sp++; \
+ \
+ l = l; \
+ r = j-1; \
+ } \
+ } \
+ } \
+ goto OUTERLOOP; \
+ \
+ENDOUTER: \
+ for (i = 1; i < LEN; i++) { \
+ for (j = i; j > 0 && QLESS(j, j-1); j--) { \
+ QSWAP(j, j-1); \
+ } \
+ } \
diff --git a/rules.mk b/rules.mk
index 5260157..3f2de37 100644
--- a/rules.mk
+++ b/rules.mk
@@ -39,6 +39,7 @@ targets: $(LIBS) $(BINS)
.PHONY: database
database: $(LIBS) $(BINS)
gentags
+ ctags -R sys
.PHONY: clean
clean:
diff --git a/sys/libbio/align.c b/sys/libbio/align.c
index 8d1c119..0f00fce 100644
--- a/sys/libbio/align.c
+++ b/sys/libbio/align.c
@@ -87,16 +87,24 @@ sortpos(uintptr len, uint64 vals[], int locs[])
#undef SWAP
}
+/*
+ * sketch
+ * @param seq: '0' terminated string
+ * @param len: number of sequential sketches to keep
+ * @param vals: buffer to store hashes of sketch.
+ * @param locs: buffer to store location of sketch hashes
+ */
error
-aln·sketch(byte *seq, int L, uint64 *vals[aln·N], int *locs[aln·N])
+aln·sketch(byte *seq, int len, uint64 *vals[aln·N], int *locs[aln·N])
{
int i, n, l, *loc;
- uint64 kmer, tmp[4], h[3], *val;
+ uint64 kmer, h[3], *val;
+ int tmpi[2];
+ uint64 tmpu[2];
for (n = 0; n < aln·N; n++) {
- val = vals[n];
- for (l = 0; l < L; l++) {
- val[l] = UINT64_MAX;
+ for (l = 0; l < len; l++) {
+ vals[n][l] = UINT64_MAX;
}
}
@@ -112,26 +120,21 @@ aln·sketch(byte *seq, int L, uint64 *vals[aln·N], int *locs[aln·N])
loc = locs[n];
h[2] = (h[0] + n * h[1]) & aln·mask;
- for (i = 0; i < L && h[2] < val[i]; i++) {
+ for (i = 0; i < len && h[2] < val[i]; i++) {
;
}
- tmp[1] = h[2];
- tmp[3] = l;
+ tmpu[1] = h[2];
+ tmpi[1] = l;
for (i -= 1; i >= 0; i--) {
- tmp[0] = tmp[1];
- tmp[1] = val[i];
- val[i] = tmp[0];
-
- tmp[2] = tmp[3];
- tmp[3] = loc[i];
- loc[i] = tmp[2];
+ tmpu[0] = tmpu[1], tmpu[1] = val[i], val[i] = tmpu[0];
+ tmpi[0] = tmpi[1], tmpi[1] = loc[i], loc[i] = tmpi[0];
}
}
}
for (n = 0; n < aln·N; n++) {
- sortpos(L, vals[n], locs[n]);
+ sortpos(len, vals[n], locs[n]);
}
return 0;
@@ -139,30 +142,37 @@ aln·sketch(byte *seq, int L, uint64 *vals[aln·N], int *locs[aln·N])
static
int
-cmparrs(int L, uint64 a[], uint64 b[])
+lessarrs(int len, uint64 a[], uint64 b[])
{
int l;
- for (l = 0; l < L; l++) {
- if (a[l] < b[l]) return -1;
+ for (l = 0; l < len; l++) {
+ if (a[l] < b[l]) return 1;
+ if (a[l] > b[l]) return 0;
}
- return +1;
+ return 0;
+}
+
+static
+void
+swaparrs(int len, uint64 a[], uint64 b[])
+{
+ int l;
+ uint64 tmp;
+
+ for (l = 0; l < len; l++) {
+ tmp = a[l], a[l] = b[l], b[l] = tmp;
+ }
}
-// TODO: replace with inlined version
error
-aln·sort(uintptr len, int L, uint64 *vals)
+aln·sort(uintptr n, int len, uint64 *vals)
{
- uint64 tmp[128];
-#define LESS(i, j) (cmparrs((L), (vals+(i)*L), (vals+(j)*L)))
-#define SWAP(i, j) (memcpy(tmp, (vals+(i)*L), (sizeof(uint64)*L)), \
- memcpy((vals+(i)*L), (vals+(j)*L), (sizeof(uint64)*L)), \
- memcpy((vals+(j)*L), tmp, (sizeof(uint64)*L)))
-
- QSORT(len, LESS, SWAP);
+#define LESS(i, j) (lessarrs(len, vals+((i)*len), vals+((j)*len)))
+#define SWAP(i, j) (swaparrs(len, vals+((i)*len), vals+((j)*len)))
+ QSORT(n, LESS, SWAP);
#undef LESS
#undef SWAP
- // qsort(vals, len, l*sizeof(uint64), &compare);
return 0;
}
diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c
index 1cc5b5d..1bb5bc7 100644
--- a/sys/libbio/phylo.c
+++ b/sys/libbio/phylo.c
@@ -1,5 +1,6 @@
#include <u.h>
#include <libn.h>
+#include <libn/macro/qsort.h>
#include <libbio.h>
// -----------------------------------------------------------------------
@@ -50,6 +51,8 @@ FOUND:
} else {
prev->sibling = child->sibling;
}
+
+ parent->nchild--;
return error·nil;
}
@@ -97,6 +100,55 @@ phylo·countleafs(bio·Node *node, int *n)
}
// -----------------------------------------------------------------------
+// generic operations on tree
+
+void*
+phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
+{
+ bio·Node *it;
+
+ for (it = clade->child; it != nil; it = it->sibling) {
+ ctx = phylo·postorder(it, op, ctx);
+ }
+
+ return op(clade, ctx);
+}
+
+void*
+phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
+{
+ bio·Node *it;
+
+ ctx = op(clade, ctx);
+ for (it = clade->child; it != nil; it = it->sibling) {
+ ctx = phylo·preorder(it, op, ctx);
+ }
+
+ return ctx;
+}
+
+static
+inline
+void*
+appendleaf(bio·Node *node, void* list)
+{
+ bio·Node **leafs;
+
+ leafs = list;
+ if (!node->nchild) {
+ *leafs++ = node;
+ }
+
+ return leafs;
+}
+
+void
+phylo·getleafs(bio·Tree tree, bio·Node **leafs)
+{
+ phylo·postorder(tree.root, &appendleaf, leafs);
+}
+
+// -----------------------------------------------------------------------
// tree editing
static
@@ -147,64 +199,219 @@ phylo·ladderize(bio·Node *root)
return 0;
}
+/*
+ * compute all distances from a given node
+ * must provide a working buffer
+ */
+
+struct Tuple
+{
+ double *d;
+ bio·Node **n;
+};
+
+static
+struct Tuple
+getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list)
+{
+ bio·Node *it;
+ struct Tuple ret;
+
+ *dist++ = curr;
+ *list++ = node;
+
+ ret.d = dist;
+ ret.n = list;
+
+ if (node->parent && node->parent != prev) {
+ ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list);
+
+ dist = ret.d;
+ list = ret.n;
+ }
+
+ for (it = node->child; it != nil; it = it->sibling) {
+ if (it != prev) {
+ ret = getdistsfrom(it, node, curr + it->dist, dist, list);
+
+ dist = ret.d;
+ list = ret.n;
+ }
+ }
+
+ return ret;
+}
+
+int
+phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list)
+{
+ struct Tuple ret;
+ // TODO: Better bounds checking.
+
+ ret = getdistsfrom(node, nil, 0.0, dist, list);
+
+ assert(ret.n - list == len);
+ assert(ret.d - dist == len);
+
+ return len;
+}
+
+/*
+static
+void
+disttoroot(bio·Node *clade, double anc, double *dists)
+{
+ double d;
+ bio·Node *it;
+
+ *dists++ = anc + clade->dist;
+ d = dists[-1];
+ for (it = clade->child; it != nil; it = it->sibling) {
+ disttoroot(it, d, ++dists);
+ }
+}
+
+void
+phylo·disttoroot(bio·Tree tree, double *dists)
+{
+ disttoroot(tree.root, 0.0, dists);
+}
+*/
+
+/*
+ * compute the path constituting the tree diameter
+ * returns the number of edges in the path
+ */
+
+static
+void
+sort·nodedists(uintptr len, double fs[], bio·Node* ns[])
+{
+ double f;
+ bio·Node *n;
+#define LESS(i, j) (fs[i] < fs[j])
+#define SWAP(i, j) (n = ns[i], f = fs[i], \
+ fs[i] = fs[j], ns[i] = ns[j], \
+ fs[j] = f, ns[j] = n)
+ QSORT(len, LESS, SWAP);
+#undef LESS
+#undef SWAP
+}
+
+#define BUFLEN 4096
+double
+phylo·diameter(bio·Tree tree, int *len, bio·Node **path)
+{
+ // TODO: deal with large tree > BUFLEN gracefully
+ int n;
+ double fbuf[BUFLEN];
+ bio·Node *nbuf[BUFLEN];
+
+ n = tree.root->nnode;
+
+ assert(n < BUFLEN);
+
+ n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf);
+ sort·nodedists(n, fbuf, nbuf);
+
+ path[0] = nbuf[n-1];
+ printf("first end '%s'\n", path[0]->name);
+
+ n = phylo·getdistsfrom(path[0], n, fbuf, nbuf);
+ sort·nodedists(n, fbuf, nbuf);
+ printf("second end '%s'\n", nbuf[n-1]->name);
+
+ *len = 0;
+
+ // TODO: Traverse up the tree from each node
+ // Find MRCA by intersection of nodes hit
+
+ return 0.0;
+}
+#undef BUFLEN
+
+/*
+ * reroot a tree on a new node
+ */
static
error
rotateparent(bio·Node *node, bio·Node *to)
{
error err;
- bio·Node *it, *prev;
+ // NOTE: will this ever be taken?
if (node->parent == to) {
return 0;
}
- prev = nil;
- for (it = node->child; it != nil; it = it->sibling) {
- if (it == to) goto FOUND;
- prev = it;
+ if (!node->parent) {
+ goto RMCHILD;
}
- errorf("inconsistent topology: attempting to rotate to non-child");
- return 1;
-FOUND:
- if (err = rotateparent(node->parent, node), err) {
+ err = rotateparent(node->parent, node);
+ if (err) {
errorf("failure: broken tree");
return err;
}
- if (!prev) {
- node->child = node->parent;
- } else {
- prev->sibling = node->parent;
+ err = phylo·addchild(node, node->parent);
+ if (err) {
+ errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name);
+ return err;
}
- node->parent->sibling = it->sibling;
- node->parent = to;
+RMCHILD:
+ err = phylo·rmchild(node, to);
+ if (err) {
+ errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name);
+ return err;
+ }
+
+ node->parent = to;
return 0;
}
-#define FLOAT_PREC .00000001
+#define PREC .00000001
error
phylo·reroot(bio·Tree *tree, bio·Node *node, double d)
{
- bio·Node *old;
bio·Node *new;
// TODO: should check that node is part of this tree?
+ // TODO: should we check if node->parent != nil?
- old = tree->root;
- if (fabs(d) < FLOAT_PREC) new = node;
- if (fabs(d-node->dist) < FLOAT_PREC) new = node->parent;
- else {
+ if (fabs(d) < PREC) {
+ new = node;
+ rotateparent(node->parent, node);
+ } else if (fabs(d-node->dist) < PREC) {
+ new = node->parent;
+ if (new->parent->parent) {
+ rotateparent(new->parent->parent, new->parent);
+ }
+ } else {
new = tree->heap.alloc(tree->h, 1, sizeof(*new));
memset(new, 0, sizeof(*new));
+
+ phylo·addchild(new, node);
+ node->parent = new;
+
+ phylo·addchild(new, node->parent);
+ if (node->parent->parent) {
+ rotateparent(node->parent->parent, node->parent);
+ }
+ node->parent->parent = new;
}
- // TODO: Use rotateparent() to perform necessary flips
+ printf("number of children on old root: %d\n", tree->root->nchild);
+ tree->root = new;
+ tree->nleaf = 0;
+
+ phylo·countleafs(new, &tree->nleaf);
+ phylo·countnodes(new, &new->nnode);
- tree->root = new;
return 0;
}
+#undef PREC
// -----------------------------------------------------------------------
// ancestral inference
@@ -216,7 +423,7 @@ struct phylo·InferOpts
};
error
-phylo·ancestralinfer(bio·Tree *tree, struct phylo·InferOpts opts)
+phylo·inferancestral(bio·Tree *tree, struct phylo·InferOpts opts)
{
return 0;
}
diff --git a/sys/libbio/rules.mk b/sys/libbio/rules.mk
index fd3e066..0e3d482 100644
--- a/sys/libbio/rules.mk
+++ b/sys/libbio/rules.mk
@@ -21,7 +21,7 @@ LIBS_$(d) := $(d)/libbio.a
LIBS_$(d) := $(patsubst $(SRC_DIR)/%, $(OBJ_DIR)/%, $(LIBS_$(d)))
LIBS := $(LIBS) $(LIBS_$(d))
-BINS_$(d) := $(d)/test
+BINS_$(d) := $(d)/test $(d)/simulate
BINS_$(d) := $(patsubst $(SRC_DIR)/%, $(OBJ_DIR)/%, $(BINS_$(d)))
BINS := $(BINS) $(BINS_$(d))
diff --git a/sys/libbio/simulate.c b/sys/libbio/simulate.c
new file mode 100644
index 0000000..a4567d5
--- /dev/null
+++ b/sys/libbio/simulate.c
@@ -0,0 +1,117 @@
+#include <u.h>
+#include <libn.h>
+#include <libbio.h>
+
+#define SEQLEN 2560
+static byte *SEQ =
+"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC"
+"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
+"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
+"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
+"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA"
+"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
+"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
+"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA"
+"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
+"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC"
+"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
+"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
+"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT"
+"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
+"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
+"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
+"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
+"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
+"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
+"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
+"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
+"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT"
+"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
+"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC"
+"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
+"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
+"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
+"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
+"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
+"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
+"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
+"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT";
+
+byte*
+modify(byte *seq, int *len, double p)
+{
+ byte *head, *new;
+
+ head = calloc(SEQLEN+1, sizeof(byte));
+ new = head;
+ for (; *seq != '\0'; seq++) {
+ if (rng·bernoulli(p)) {
+ switch (rng·randi(5)) {
+ case 0: *new++ = 'A'; break;
+ case 1: *new++ = 'C'; break;
+ case 2: *new++ = 'G'; break;
+ case 3: *new++ = 'T'; break;
+ case 4: continue;
+ }
+ } else {
+ *new++ = *seq;
+ }
+ }
+ *new = '\0';
+ *len = new - head;
+ return head;
+}
+
+#define NSEQS 20
+int
+main()
+{
+ int n, i, l, lens[NSEQS];
+ byte *seqs[NSEQS];
+
+ int locs[aln·N][NSEQS][aln·L];
+ int *loc[aln·N];
+ uint64 vals[aln·N][NSEQS][aln·L];
+ uint64 *val[aln·N];
+ rng·init(0);
+
+ seqs[0] = SEQ;
+ lens[0] = SEQLEN;
+
+ for (n = 0; n < aln·N; n++) {
+ for (i = 0; i < NSEQS; i++) {
+ for (l = 0; l < aln·L; l++) {
+ vals[n][i][l] = 0;
+ }
+ }
+ }
+
+ for (i = 1; i < NSEQS; i++) {
+ seqs[i] = modify(SEQ, lens + i, .01*i);
+ }
+
+ for (i = 0; i < NSEQS; i++) {
+ for (n = 0; n < aln·N; n++) {
+ val[n] = vals[n][i];
+ loc[n] = locs[n][i];
+ }
+ aln·sketch(seqs[i], aln·L, val, loc);
+ }
+
+ // for (n = 0; n < aln·N; n++) {
+ // printf("iteration %d\n", n);
+ // printf("[\n");
+ // for (i = 0; i < NSEQS; i++) {
+ // printf(" [");
+ // for (l = 0; l < aln·L; l++) {
+ // printf("%lu,", vals[n][i][l]);
+ // }
+ // printf("],\n");
+ // }
+ // printf("]\n");
+ // }
+
+ for (n = 0; n < aln·N; n++) {
+ aln·sort(NSEQS, aln·L, (uint64*)vals[n]);
+ }
+}
diff --git a/sys/libbio/test.c b/sys/libbio/test.c
index 8054864..da29c84 100644
--- a/sys/libbio/test.c
+++ b/sys/libbio/test.c
@@ -99,6 +99,8 @@ test·newick()
io·Peeker rdr;
io·Putter wtr;
+ bio·Node **end, **it, **list;
+
heap = mem·makearena(mem·sys, nil);
rdr = (io·Peeker){.get = &io·getbyte, .unget = &io·ungetbyte};
wtr = (io·Putter){.put = &io·putbyte, .putstr = &io·putstring};
@@ -109,8 +111,22 @@ test·newick()
t.h = heap;
t.heap = (mem·Allocator){ .alloc = &mem·arenaalloc, .free = nil, };
- err = bio·readnewick(rdr, fd[0], &t);
+ if (err = bio·readnewick(rdr, fd[0], &t), err) {
+ errorf("failed to read newick");
+ return 1;
+ }
+ printf("number of children: %d\n", t.root->nchild);
+
phylo·ladderize(t.root);
+
+ list = mem·arenaalloc(heap, t.nleaf, sizeof(**list));
+ phylo·getleafs(t, list);
+ for (it = list, end = list + t.nleaf; it != end; ++it) {
+ printf("Leaf '%s'\n", (*it)->name);
+ }
+
+ bio·Node *path[100];
+ // phylo·diameter(t, path);
printf("Loaded tree with %d leafs and %d nodes\n", t.nleaf, t.root->nnode);
err = bio·writenewick(t, wtr, fd[1]);
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index c2f7e6c..bbbe1c8 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -3,7 +3,6 @@
#include <vendor/blas/cblas.h>
#include <x86intrin.h>
-
#include <time.h>
// -----------------------------------------------------------------------
@@ -382,7 +381,6 @@ void
blas·gemm(int n1, int n2, int n3, double a, double *m1, double *m2, double b, double *m3)
{
int i, j, k, len;
- double prod;
// TODO: Is there anyway this computation can be integrated into the one below?
for (i = 0; i < n1; i++) {
diff --git a/sys/libn/rules.mk b/sys/libn/rules.mk
index 4731558..ffb41e4 100644
--- a/sys/libn/rules.mk
+++ b/sys/libn/rules.mk
@@ -24,7 +24,7 @@ LIBS_$(d) := $(d)/libn.a
LIBS_$(d) := $(patsubst $(SRC_DIR)/%, $(OBJ_DIR)/%, $(LIBS_$(d)))
LIBS := $(LIBS) $(LIBS_$(d))
-BINS_$(d) := $(d)/test
+BINS_$(d) := $(d)/test $(d)/scratch
BINS_$(d) := $(patsubst $(SRC_DIR)/%, $(OBJ_DIR)/%, $(BINS_$(d)))
BINS := $(BINS) $(BINS_$(d))
@@ -37,7 +37,7 @@ $(LIBS_$(d)): $(OBJS_$(d))
$(ARCHIVE)
$(BINS_$(d)): TCLIBS := $(LIBS_$(d)) $(LIB_DIR)/vendor/libz.a
-$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/libn/test.o
+$(BINS_$(d)): $(OBJ_DIR)/libn/scratch.o
$(LINK)
# ---- Pop off stack ----
diff --git a/sys/libn/test.c b/sys/libn/test.c
index 3a21a71..973245f 100644
--- a/sys/libn/test.c
+++ b/sys/libn/test.c
@@ -1,5 +1,6 @@
#include <u.h>
#include <libn.h>
+#include <libn/macro/map.h>
#include <time.h>
@@ -122,7 +123,7 @@ error
test·sort()
{
clock_t t;
- int i, test[1000000];
+ int i, test[10000];
for (i = 0; i < arrlen(test); i++) {
test[i] = rand();
}
@@ -154,6 +155,56 @@ test·sort()
return 0;
}
+#define HASH(str) hash_string(str)
+#define EQUAL(str, str2) (str == str2)
+
+typedef struct Map
+{
+ MAP_STRUCT_BODY(byte*, float)
+} Map;
+
+Map*
+makemap(mem·Allocator heap, void* h)
+{
+ MAP_MAKE(Map);
+}
+
+void
+mapfree(Map *map)
+{
+ MAP_FREE(map);
+}
+
+void
+mapreset(Map *map)
+{
+ MAP_RESET(map);
+}
+
+double
+mapget(Map *map, byte *key)
+{
+ MAP_GET(map, key, HASH, EQUAL);
+}
+
+static
+error
+mapresize(Map *map, int n)
+{
+ MAP_GROW(map, byte*, float, n, HASH);
+}
+
+static
+int
+mapput(Map *map, byte *key, float val, error *err)
+{
+ MAP_PUT(map, key, val, HASH, EQUAL, mapresize, err);
+}
+
+
+#undef HASH
+#undef EQUAL
+
error
main()
{