From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libbio/phylo.c | 427 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 427 insertions(+) create mode 100644 src/libbio/phylo.c (limited to 'src/libbio/phylo.c') diff --git a/src/libbio/phylo.c b/src/libbio/phylo.c new file mode 100644 index 0000000..d50934f --- /dev/null +++ b/src/libbio/phylo.c @@ -0,0 +1,427 @@ +#include +#include +#include +#include + +// ----------------------------------------------------------------------- +// subtree manipulation methods +// NOTE: As of now these don't update nnode & nleaf stats. +// It is the caller's responsibility to refresh counts. + +error +phylo·addchild(bio·Node* parent, bio·Node* child) +{ + bio·Node *it, *sibling; + if (!parent->nchild) { + parent->child = child; + goto SUCCESS; + } + + for (it = parent->child, sibling = it; it != nil; it = it->sibling) { + sibling = it; + } + sibling->sibling = child; + +SUCCESS: + child->parent = parent; + parent->nchild++; + return 0; +} + +error +phylo·rmchild(bio·Node *parent, bio·Node *child) +{ + bio·Node *it, *prev; + enum { + error·nil, + error·notfound, + error·nochildren, + }; + + prev = nil; + for (it = parent->child; it != nil; it = it->sibling) { + if (it == child) goto FOUND; + prev = it; + } + return error·notfound; + +FOUND: + if (prev == nil) { + parent->child = child->sibling; + } else { + prev->sibling = child->sibling; + } + + parent->nchild--; + return error·nil; +} + +// ----------------------------------------------------------------------- +// subtree statistics + +error +phylo·countnodes(bio·Node *node, int *n) +{ + int m; + error err; + bio·Node *child; + + m = *n; + for (child = node->child; child != nil; child = child->sibling) { + if (err = phylo·countnodes(child, n), err) { + errorf("node count: failure at '%s'", child->name); + return 1; + } + } + node->nnode = *n - m; + *n += 1; + + return 0; +} + +error +phylo·countleafs(bio·Node *node, int *n) +{ + error err; + bio·Node *child; + + if (!node->nchild) { + *n += 1; + } + + for (child = node->child; child != nil; child = child->sibling) { + if (err = phylo·countleafs(child, n), err) { + errorf("leaf count: failure at '%s'", child->name); + return 1; + } + } + + return 0; +} + +// ----------------------------------------------------------------------- +// generic operations on tree + +void* +phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) +{ + bio·Node *it; + + for(it = clade->child; it != nil; it = it->sibling) { + ctx = phylo·postorder(it, op, ctx); + } + + return op(clade, ctx); +} + +void* +phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) +{ + bio·Node *it; + + ctx = op(clade, ctx); + for(it = clade->child; it != nil; it = it->sibling) { + ctx = phylo·preorder(it, op, ctx); + } + + return ctx; +} + +int +phylo·collectpostorder(bio·Node *clade, bio·Node **list) +{ + bio·Node *it; + int n; + + for(n = 0, it = clade->child; it != nil; it = it->sibling) { + n += phylo·collectpostorder(it, list+n); + } + + return n; +} + +static +inline +void* +appendleaf(bio·Node *node, void* list) +{ + bio·Node **leafs; + + leafs = list; + if (!node->nchild) { + *leafs++ = node; + } + + return leafs; +} + +void +phylo·getleafs(bio·Tree tree, bio·Node **leafs) +{ + phylo·postorder(tree.root, &appendleaf, leafs); +} + +// ----------------------------------------------------------------------- +// tree editing + +static +void +sortnodelist(bio·Node **head, bio·Node *next) +{ + bio·Node tmp, *it; + + it = &tmp; + tmp.sibling = *head; + + while (it->sibling != nil && it->sibling->nnode < next->nnode) { + it = it->sibling; + } + + next->sibling = it->sibling; + it->sibling = next; + *head = tmp.sibling; +} + +error +phylo·ladderize(bio·Node *root) +{ + int i; + error err; + bio·Node *child, *sorted, *sibling; + + if (!root->nchild) return 0; + + // ladderize below + for (child = root->child; child != nil; child = child->sibling) { + if (err = phylo·ladderize(child), err) { + errorf("ladderize: failure at '%s'", child->name); + return 1; + } + } + + // ladderize yourself + sorted = nil; + child = root->child; + while (child != nil) { + sibling = child->sibling; + sortnodelist(&sorted, child); + child = sibling; + } + root->child = sorted; + + return 0; +} + +/* + * compute all distances from a given node + * must provide a working buffer + */ + +struct Tuple +{ + double *d; + bio·Node **n; +}; + +static +struct Tuple +getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list) +{ + bio·Node *it; + struct Tuple ret; + + *dist++ = curr; + *list++ = node; + + ret.d = dist; + ret.n = list; + + if (node->parent && node->parent != prev) { + ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list); + + dist = ret.d; + list = ret.n; + } + + for (it = node->child; it != nil; it = it->sibling) { + if (it != prev) { + ret = getdistsfrom(it, node, curr + it->dist, dist, list); + + dist = ret.d; + list = ret.n; + } + } + + return ret; +} + +int +phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list) +{ + struct Tuple ret; + // TODO: Better bounds checking. + + ret = getdistsfrom(node, nil, 0.0, dist, list); + + assert(ret.n - list == len); + assert(ret.d - dist == len); + + return len; +} + +/* +static +void +disttoroot(bio·Node *clade, double anc, double *dists) +{ + double d; + bio·Node *it; + + *dists++ = anc + clade->dist; + d = dists[-1]; + for (it = clade->child; it != nil; it = it->sibling) { + disttoroot(it, d, ++dists); + } +} + +void +phylo·disttoroot(bio·Tree tree, double *dists) +{ + disttoroot(tree.root, 0.0, dists); +} +*/ + +/* + * compute the path constituting the tree diameter + * returns the number of edges in the path + */ + +static +void +sort·nodedists(uintptr len, double fs[], bio·Node* ns[]) +{ + double f; + bio·Node *n; +#define LESS(i, j) (fs[i] < fs[j]) +#define SWAP(i, j) (n = ns[i], f = fs[i], \ + fs[i] = fs[j], ns[i] = ns[j], \ + fs[j] = f, ns[j] = n) + QSORT(len, LESS, SWAP); +#undef LESS +#undef SWAP +} + +#define BUFLEN 4096 +double +phylo·diameter(bio·Tree tree, int *len, bio·Node **path) +{ + // TODO: deal with large tree > BUFLEN gracefully + int n; + double fbuf[BUFLEN]; + bio·Node *nbuf[BUFLEN]; + + n = tree.root->nnode; + + assert(n < BUFLEN); + + n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf); + sort·nodedists(n, fbuf, nbuf); + + path[0] = nbuf[n-1]; + printf("first end '%s'\n", path[0]->name); + + n = phylo·getdistsfrom(path[0], n, fbuf, nbuf); + sort·nodedists(n, fbuf, nbuf); + printf("second end '%s'\n", nbuf[n-1]->name); + + *len = 0; + + // TODO: Traverse up the tree from each node + // Find MRCA by intersection of nodes hit + + return 0.0; +} +#undef BUFLEN + +/* + * reroot a tree on a new node + */ +static +error +rotateparent(bio·Node *node, bio·Node *to) +{ + error err; + + // NOTE: will this ever be taken? + if (node->parent == to) { + return 0; + } + + if (!node->parent) { + goto RMCHILD; + } + + err = rotateparent(node->parent, node); + if (err) { + errorf("failure: broken tree"); + return err; + } + + err = phylo·addchild(node, node->parent); + if (err) { + errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name); + return err; + } + +RMCHILD: + err = phylo·rmchild(node, to); + if (err) { + errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name); + return err; + } + + node->parent = to; + return 0; +} + +#define PREC .00000001 +error +phylo·reroot(bio·Tree *tree, bio·Node *node, double d) +{ + bio·Node *new; + + // TODO: should check that node is part of this tree? + // TODO: should we check if node->parent != nil? + + if (fabs(d) < PREC) { + new = node; + rotateparent(node->parent, node); + } else if (fabs(d-node->dist) < PREC) { + new = node->parent; + if (new->parent->parent) { + rotateparent(new->parent->parent, new->parent); + } + } else { + new = tree->mem.alloc(tree->heap, 1, sizeof(*new)); + memset(new, 0, sizeof(*new)); + + phylo·addchild(new, node); + node->parent = new; + + phylo·addchild(new, node->parent); + if (node->parent->parent) { + rotateparent(node->parent->parent, node->parent); + } + node->parent->parent = new; + } + + printf("number of children on old root: %d\n", tree->root->nchild); + tree->root = new; + tree->nleaf = 0; + + phylo·countleafs(new, &tree->nleaf); + phylo·countnodes(new, &new->nnode); + + return 0; +} +#undef PREC -- cgit v1.2.1