aboutsummaryrefslogtreecommitdiff
path: root/src/libbio/phylo.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libbio/phylo.c')
-rw-r--r--src/libbio/phylo.c427
1 files changed, 427 insertions, 0 deletions
diff --git a/src/libbio/phylo.c b/src/libbio/phylo.c
new file mode 100644
index 0000000..d50934f
--- /dev/null
+++ b/src/libbio/phylo.c
@@ -0,0 +1,427 @@
+#include <u.h>
+#include <base.h>
+#include <base/macro/qsort.h>
+#include <libbio.h>
+
+// -----------------------------------------------------------------------
+// subtree manipulation methods
+// NOTE: As of now these don't update nnode & nleaf stats.
+// It is the caller's responsibility to refresh counts.
+
+error
+phylo·addchild(bio·Node* parent, bio·Node* child)
+{
+ bio·Node *it, *sibling;
+ if (!parent->nchild) {
+ parent->child = child;
+ goto SUCCESS;
+ }
+
+ for (it = parent->child, sibling = it; it != nil; it = it->sibling) {
+ sibling = it;
+ }
+ sibling->sibling = child;
+
+SUCCESS:
+ child->parent = parent;
+ parent->nchild++;
+ return 0;
+}
+
+error
+phylo·rmchild(bio·Node *parent, bio·Node *child)
+{
+ bio·Node *it, *prev;
+ enum {
+ error·nil,
+ error·notfound,
+ error·nochildren,
+ };
+
+ prev = nil;
+ for (it = parent->child; it != nil; it = it->sibling) {
+ if (it == child) goto FOUND;
+ prev = it;
+ }
+ return error·notfound;
+
+FOUND:
+ if (prev == nil) {
+ parent->child = child->sibling;
+ } else {
+ prev->sibling = child->sibling;
+ }
+
+ parent->nchild--;
+ return error·nil;
+}
+
+// -----------------------------------------------------------------------
+// subtree statistics
+
+error
+phylo·countnodes(bio·Node *node, int *n)
+{
+ int m;
+ error err;
+ bio·Node *child;
+
+ m = *n;
+ for (child = node->child; child != nil; child = child->sibling) {
+ if (err = phylo·countnodes(child, n), err) {
+ errorf("node count: failure at '%s'", child->name);
+ return 1;
+ }
+ }
+ node->nnode = *n - m;
+ *n += 1;
+
+ return 0;
+}
+
+error
+phylo·countleafs(bio·Node *node, int *n)
+{
+ error err;
+ bio·Node *child;
+
+ if (!node->nchild) {
+ *n += 1;
+ }
+
+ for (child = node->child; child != nil; child = child->sibling) {
+ if (err = phylo·countleafs(child, n), err) {
+ errorf("leaf count: failure at '%s'", child->name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+// -----------------------------------------------------------------------
+// generic operations on tree
+
+void*
+phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
+{
+ bio·Node *it;
+
+ for(it = clade->child; it != nil; it = it->sibling) {
+ ctx = phylo·postorder(it, op, ctx);
+ }
+
+ return op(clade, ctx);
+}
+
+void*
+phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
+{
+ bio·Node *it;
+
+ ctx = op(clade, ctx);
+ for(it = clade->child; it != nil; it = it->sibling) {
+ ctx = phylo·preorder(it, op, ctx);
+ }
+
+ return ctx;
+}
+
+int
+phylo·collectpostorder(bio·Node *clade, bio·Node **list)
+{
+ bio·Node *it;
+ int n;
+
+ for(n = 0, it = clade->child; it != nil; it = it->sibling) {
+ n += phylo·collectpostorder(it, list+n);
+ }
+
+ return n;
+}
+
+static
+inline
+void*
+appendleaf(bio·Node *node, void* list)
+{
+ bio·Node **leafs;
+
+ leafs = list;
+ if (!node->nchild) {
+ *leafs++ = node;
+ }
+
+ return leafs;
+}
+
+void
+phylo·getleafs(bio·Tree tree, bio·Node **leafs)
+{
+ phylo·postorder(tree.root, &appendleaf, leafs);
+}
+
+// -----------------------------------------------------------------------
+// tree editing
+
+static
+void
+sortnodelist(bio·Node **head, bio·Node *next)
+{
+ bio·Node tmp, *it;
+
+ it = &tmp;
+ tmp.sibling = *head;
+
+ while (it->sibling != nil && it->sibling->nnode < next->nnode) {
+ it = it->sibling;
+ }
+
+ next->sibling = it->sibling;
+ it->sibling = next;
+ *head = tmp.sibling;
+}
+
+error
+phylo·ladderize(bio·Node *root)
+{
+ int i;
+ error err;
+ bio·Node *child, *sorted, *sibling;
+
+ if (!root->nchild) return 0;
+
+ // ladderize below
+ for (child = root->child; child != nil; child = child->sibling) {
+ if (err = phylo·ladderize(child), err) {
+ errorf("ladderize: failure at '%s'", child->name);
+ return 1;
+ }
+ }
+
+ // ladderize yourself
+ sorted = nil;
+ child = root->child;
+ while (child != nil) {
+ sibling = child->sibling;
+ sortnodelist(&sorted, child);
+ child = sibling;
+ }
+ root->child = sorted;
+
+ return 0;
+}
+
+/*
+ * compute all distances from a given node
+ * must provide a working buffer
+ */
+
+struct Tuple
+{
+ double *d;
+ bio·Node **n;
+};
+
+static
+struct Tuple
+getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list)
+{
+ bio·Node *it;
+ struct Tuple ret;
+
+ *dist++ = curr;
+ *list++ = node;
+
+ ret.d = dist;
+ ret.n = list;
+
+ if (node->parent && node->parent != prev) {
+ ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list);
+
+ dist = ret.d;
+ list = ret.n;
+ }
+
+ for (it = node->child; it != nil; it = it->sibling) {
+ if (it != prev) {
+ ret = getdistsfrom(it, node, curr + it->dist, dist, list);
+
+ dist = ret.d;
+ list = ret.n;
+ }
+ }
+
+ return ret;
+}
+
+int
+phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list)
+{
+ struct Tuple ret;
+ // TODO: Better bounds checking.
+
+ ret = getdistsfrom(node, nil, 0.0, dist, list);
+
+ assert(ret.n - list == len);
+ assert(ret.d - dist == len);
+
+ return len;
+}
+
+/*
+static
+void
+disttoroot(bio·Node *clade, double anc, double *dists)
+{
+ double d;
+ bio·Node *it;
+
+ *dists++ = anc + clade->dist;
+ d = dists[-1];
+ for (it = clade->child; it != nil; it = it->sibling) {
+ disttoroot(it, d, ++dists);
+ }
+}
+
+void
+phylo·disttoroot(bio·Tree tree, double *dists)
+{
+ disttoroot(tree.root, 0.0, dists);
+}
+*/
+
+/*
+ * compute the path constituting the tree diameter
+ * returns the number of edges in the path
+ */
+
+static
+void
+sort·nodedists(uintptr len, double fs[], bio·Node* ns[])
+{
+ double f;
+ bio·Node *n;
+#define LESS(i, j) (fs[i] < fs[j])
+#define SWAP(i, j) (n = ns[i], f = fs[i], \
+ fs[i] = fs[j], ns[i] = ns[j], \
+ fs[j] = f, ns[j] = n)
+ QSORT(len, LESS, SWAP);
+#undef LESS
+#undef SWAP
+}
+
+#define BUFLEN 4096
+double
+phylo·diameter(bio·Tree tree, int *len, bio·Node **path)
+{
+ // TODO: deal with large tree > BUFLEN gracefully
+ int n;
+ double fbuf[BUFLEN];
+ bio·Node *nbuf[BUFLEN];
+
+ n = tree.root->nnode;
+
+ assert(n < BUFLEN);
+
+ n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf);
+ sort·nodedists(n, fbuf, nbuf);
+
+ path[0] = nbuf[n-1];
+ printf("first end '%s'\n", path[0]->name);
+
+ n = phylo·getdistsfrom(path[0], n, fbuf, nbuf);
+ sort·nodedists(n, fbuf, nbuf);
+ printf("second end '%s'\n", nbuf[n-1]->name);
+
+ *len = 0;
+
+ // TODO: Traverse up the tree from each node
+ // Find MRCA by intersection of nodes hit
+
+ return 0.0;
+}
+#undef BUFLEN
+
+/*
+ * reroot a tree on a new node
+ */
+static
+error
+rotateparent(bio·Node *node, bio·Node *to)
+{
+ error err;
+
+ // NOTE: will this ever be taken?
+ if (node->parent == to) {
+ return 0;
+ }
+
+ if (!node->parent) {
+ goto RMCHILD;
+ }
+
+ err = rotateparent(node->parent, node);
+ if (err) {
+ errorf("failure: broken tree");
+ return err;
+ }
+
+ err = phylo·addchild(node, node->parent);
+ if (err) {
+ errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name);
+ return err;
+ }
+
+RMCHILD:
+ err = phylo·rmchild(node, to);
+ if (err) {
+ errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name);
+ return err;
+ }
+
+ node->parent = to;
+ return 0;
+}
+
+#define PREC .00000001
+error
+phylo·reroot(bio·Tree *tree, bio·Node *node, double d)
+{
+ bio·Node *new;
+
+ // TODO: should check that node is part of this tree?
+ // TODO: should we check if node->parent != nil?
+
+ if (fabs(d) < PREC) {
+ new = node;
+ rotateparent(node->parent, node);
+ } else if (fabs(d-node->dist) < PREC) {
+ new = node->parent;
+ if (new->parent->parent) {
+ rotateparent(new->parent->parent, new->parent);
+ }
+ } else {
+ new = tree->mem.alloc(tree->heap, 1, sizeof(*new));
+ memset(new, 0, sizeof(*new));
+
+ phylo·addchild(new, node);
+ node->parent = new;
+
+ phylo·addchild(new, node->parent);
+ if (node->parent->parent) {
+ rotateparent(node->parent->parent, node->parent);
+ }
+ node->parent->parent = new;
+ }
+
+ printf("number of children on old root: %d\n", tree->root->nchild);
+ tree->root = new;
+ tree->nleaf = 0;
+
+ phylo·countleafs(new, &tree->nleaf);
+ phylo·countnodes(new, &new->nnode);
+
+ return 0;
+}
+#undef PREC