aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio/phylo.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
commit4b2ea2ca00eea7feea036b7642c0c1443b8f77a1 (patch)
tree5cdd635bd8240a6857258a056e3932e00966bfff /sys/libbio/phylo.c
parent6b739739968a0cc9b4d9909d8f4ffec30f4461dd (diff)
removed buggy qsort header and implemented myself
Diffstat (limited to 'sys/libbio/phylo.c')
-rw-r--r--sys/libbio/phylo.c255
1 files changed, 231 insertions, 24 deletions
diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c
index 1cc5b5d..1bb5bc7 100644
--- a/sys/libbio/phylo.c
+++ b/sys/libbio/phylo.c
@@ -1,5 +1,6 @@
#include <u.h>
#include <libn.h>
+#include <libn/macro/qsort.h>
#include <libbio.h>
// -----------------------------------------------------------------------
@@ -50,6 +51,8 @@ FOUND:
} else {
prev->sibling = child->sibling;
}
+
+ parent->nchild--;
return error·nil;
}
@@ -97,6 +100,55 @@ phylo·countleafs(bio·Node *node, int *n)
}
// -----------------------------------------------------------------------
+// generic operations on tree
+
+void*
+phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
+{
+ bio·Node *it;
+
+ for (it = clade->child; it != nil; it = it->sibling) {
+ ctx = phylo·postorder(it, op, ctx);
+ }
+
+ return op(clade, ctx);
+}
+
+void*
+phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
+{
+ bio·Node *it;
+
+ ctx = op(clade, ctx);
+ for (it = clade->child; it != nil; it = it->sibling) {
+ ctx = phylo·preorder(it, op, ctx);
+ }
+
+ return ctx;
+}
+
+static
+inline
+void*
+appendleaf(bio·Node *node, void* list)
+{
+ bio·Node **leafs;
+
+ leafs = list;
+ if (!node->nchild) {
+ *leafs++ = node;
+ }
+
+ return leafs;
+}
+
+void
+phylo·getleafs(bio·Tree tree, bio·Node **leafs)
+{
+ phylo·postorder(tree.root, &appendleaf, leafs);
+}
+
+// -----------------------------------------------------------------------
// tree editing
static
@@ -147,64 +199,219 @@ phylo·ladderize(bio·Node *root)
return 0;
}
+/*
+ * compute all distances from a given node
+ * must provide a working buffer
+ */
+
+struct Tuple
+{
+ double *d;
+ bio·Node **n;
+};
+
+static
+struct Tuple
+getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list)
+{
+ bio·Node *it;
+ struct Tuple ret;
+
+ *dist++ = curr;
+ *list++ = node;
+
+ ret.d = dist;
+ ret.n = list;
+
+ if (node->parent && node->parent != prev) {
+ ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list);
+
+ dist = ret.d;
+ list = ret.n;
+ }
+
+ for (it = node->child; it != nil; it = it->sibling) {
+ if (it != prev) {
+ ret = getdistsfrom(it, node, curr + it->dist, dist, list);
+
+ dist = ret.d;
+ list = ret.n;
+ }
+ }
+
+ return ret;
+}
+
+int
+phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list)
+{
+ struct Tuple ret;
+ // TODO: Better bounds checking.
+
+ ret = getdistsfrom(node, nil, 0.0, dist, list);
+
+ assert(ret.n - list == len);
+ assert(ret.d - dist == len);
+
+ return len;
+}
+
+/*
+static
+void
+disttoroot(bio·Node *clade, double anc, double *dists)
+{
+ double d;
+ bio·Node *it;
+
+ *dists++ = anc + clade->dist;
+ d = dists[-1];
+ for (it = clade->child; it != nil; it = it->sibling) {
+ disttoroot(it, d, ++dists);
+ }
+}
+
+void
+phylo·disttoroot(bio·Tree tree, double *dists)
+{
+ disttoroot(tree.root, 0.0, dists);
+}
+*/
+
+/*
+ * compute the path constituting the tree diameter
+ * returns the number of edges in the path
+ */
+
+static
+void
+sort·nodedists(uintptr len, double fs[], bio·Node* ns[])
+{
+ double f;
+ bio·Node *n;
+#define LESS(i, j) (fs[i] < fs[j])
+#define SWAP(i, j) (n = ns[i], f = fs[i], \
+ fs[i] = fs[j], ns[i] = ns[j], \
+ fs[j] = f, ns[j] = n)
+ QSORT(len, LESS, SWAP);
+#undef LESS
+#undef SWAP
+}
+
+#define BUFLEN 4096
+double
+phylo·diameter(bio·Tree tree, int *len, bio·Node **path)
+{
+ // TODO: deal with large tree > BUFLEN gracefully
+ int n;
+ double fbuf[BUFLEN];
+ bio·Node *nbuf[BUFLEN];
+
+ n = tree.root->nnode;
+
+ assert(n < BUFLEN);
+
+ n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf);
+ sort·nodedists(n, fbuf, nbuf);
+
+ path[0] = nbuf[n-1];
+ printf("first end '%s'\n", path[0]->name);
+
+ n = phylo·getdistsfrom(path[0], n, fbuf, nbuf);
+ sort·nodedists(n, fbuf, nbuf);
+ printf("second end '%s'\n", nbuf[n-1]->name);
+
+ *len = 0;
+
+ // TODO: Traverse up the tree from each node
+ // Find MRCA by intersection of nodes hit
+
+ return 0.0;
+}
+#undef BUFLEN
+
+/*
+ * reroot a tree on a new node
+ */
static
error
rotateparent(bio·Node *node, bio·Node *to)
{
error err;
- bio·Node *it, *prev;
+ // NOTE: will this ever be taken?
if (node->parent == to) {
return 0;
}
- prev = nil;
- for (it = node->child; it != nil; it = it->sibling) {
- if (it == to) goto FOUND;
- prev = it;
+ if (!node->parent) {
+ goto RMCHILD;
}
- errorf("inconsistent topology: attempting to rotate to non-child");
- return 1;
-FOUND:
- if (err = rotateparent(node->parent, node), err) {
+ err = rotateparent(node->parent, node);
+ if (err) {
errorf("failure: broken tree");
return err;
}
- if (!prev) {
- node->child = node->parent;
- } else {
- prev->sibling = node->parent;
+ err = phylo·addchild(node, node->parent);
+ if (err) {
+ errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name);
+ return err;
}
- node->parent->sibling = it->sibling;
- node->parent = to;
+RMCHILD:
+ err = phylo·rmchild(node, to);
+ if (err) {
+ errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name);
+ return err;
+ }
+
+ node->parent = to;
return 0;
}
-#define FLOAT_PREC .00000001
+#define PREC .00000001
error
phylo·reroot(bio·Tree *tree, bio·Node *node, double d)
{
- bio·Node *old;
bio·Node *new;
// TODO: should check that node is part of this tree?
+ // TODO: should we check if node->parent != nil?
- old = tree->root;
- if (fabs(d) < FLOAT_PREC) new = node;
- if (fabs(d-node->dist) < FLOAT_PREC) new = node->parent;
- else {
+ if (fabs(d) < PREC) {
+ new = node;
+ rotateparent(node->parent, node);
+ } else if (fabs(d-node->dist) < PREC) {
+ new = node->parent;
+ if (new->parent->parent) {
+ rotateparent(new->parent->parent, new->parent);
+ }
+ } else {
new = tree->heap.alloc(tree->h, 1, sizeof(*new));
memset(new, 0, sizeof(*new));
+
+ phylo·addchild(new, node);
+ node->parent = new;
+
+ phylo·addchild(new, node->parent);
+ if (node->parent->parent) {
+ rotateparent(node->parent->parent, node->parent);
+ }
+ node->parent->parent = new;
}
- // TODO: Use rotateparent() to perform necessary flips
+ printf("number of children on old root: %d\n", tree->root->nchild);
+ tree->root = new;
+ tree->nleaf = 0;
+
+ phylo·countleafs(new, &tree->nleaf);
+ phylo·countnodes(new, &new->nnode);
- tree->root = new;
return 0;
}
+#undef PREC
// -----------------------------------------------------------------------
// ancestral inference
@@ -216,7 +423,7 @@ struct phylo·InferOpts
};
error
-phylo·ancestralinfer(bio·Tree *tree, struct phylo·InferOpts opts)
+phylo·inferancestral(bio·Tree *tree, struct phylo·InferOpts opts)
{
return 0;
}