From 4b2ea2ca00eea7feea036b7642c0c1443b8f77a1 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sun, 3 May 2020 20:20:22 -0700 Subject: removed buggy qsort header and implemented myself --- sys/libbio/phylo.c | 255 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 231 insertions(+), 24 deletions(-) (limited to 'sys/libbio/phylo.c') diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c index 1cc5b5d..1bb5bc7 100644 --- a/sys/libbio/phylo.c +++ b/sys/libbio/phylo.c @@ -1,5 +1,6 @@ #include #include +#include #include // ----------------------------------------------------------------------- @@ -50,6 +51,8 @@ FOUND: } else { prev->sibling = child->sibling; } + + parent->nchild--; return error·nil; } @@ -96,6 +99,55 @@ phylo·countleafs(bio·Node *node, int *n) return 0; } +// ----------------------------------------------------------------------- +// generic operations on tree + +void* +phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) +{ + bio·Node *it; + + for (it = clade->child; it != nil; it = it->sibling) { + ctx = phylo·postorder(it, op, ctx); + } + + return op(clade, ctx); +} + +void* +phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) +{ + bio·Node *it; + + ctx = op(clade, ctx); + for (it = clade->child; it != nil; it = it->sibling) { + ctx = phylo·preorder(it, op, ctx); + } + + return ctx; +} + +static +inline +void* +appendleaf(bio·Node *node, void* list) +{ + bio·Node **leafs; + + leafs = list; + if (!node->nchild) { + *leafs++ = node; + } + + return leafs; +} + +void +phylo·getleafs(bio·Tree tree, bio·Node **leafs) +{ + phylo·postorder(tree.root, &appendleaf, leafs); +} + // ----------------------------------------------------------------------- // tree editing @@ -147,64 +199,219 @@ phylo·ladderize(bio·Node *root) return 0; } +/* + * compute all distances from a given node + * must provide a working buffer + */ + +struct Tuple +{ + double *d; + bio·Node **n; +}; + +static +struct Tuple +getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list) +{ + bio·Node *it; + struct Tuple ret; + + *dist++ = curr; + *list++ = node; + + ret.d = dist; + ret.n = list; + + if (node->parent && node->parent != prev) { + ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list); + + dist = ret.d; + list = ret.n; + } + + for (it = node->child; it != nil; it = it->sibling) { + if (it != prev) { + ret = getdistsfrom(it, node, curr + it->dist, dist, list); + + dist = ret.d; + list = ret.n; + } + } + + return ret; +} + +int +phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list) +{ + struct Tuple ret; + // TODO: Better bounds checking. + + ret = getdistsfrom(node, nil, 0.0, dist, list); + + assert(ret.n - list == len); + assert(ret.d - dist == len); + + return len; +} + +/* +static +void +disttoroot(bio·Node *clade, double anc, double *dists) +{ + double d; + bio·Node *it; + + *dists++ = anc + clade->dist; + d = dists[-1]; + for (it = clade->child; it != nil; it = it->sibling) { + disttoroot(it, d, ++dists); + } +} + +void +phylo·disttoroot(bio·Tree tree, double *dists) +{ + disttoroot(tree.root, 0.0, dists); +} +*/ + +/* + * compute the path constituting the tree diameter + * returns the number of edges in the path + */ + +static +void +sort·nodedists(uintptr len, double fs[], bio·Node* ns[]) +{ + double f; + bio·Node *n; +#define LESS(i, j) (fs[i] < fs[j]) +#define SWAP(i, j) (n = ns[i], f = fs[i], \ + fs[i] = fs[j], ns[i] = ns[j], \ + fs[j] = f, ns[j] = n) + QSORT(len, LESS, SWAP); +#undef LESS +#undef SWAP +} + +#define BUFLEN 4096 +double +phylo·diameter(bio·Tree tree, int *len, bio·Node **path) +{ + // TODO: deal with large tree > BUFLEN gracefully + int n; + double fbuf[BUFLEN]; + bio·Node *nbuf[BUFLEN]; + + n = tree.root->nnode; + + assert(n < BUFLEN); + + n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf); + sort·nodedists(n, fbuf, nbuf); + + path[0] = nbuf[n-1]; + printf("first end '%s'\n", path[0]->name); + + n = phylo·getdistsfrom(path[0], n, fbuf, nbuf); + sort·nodedists(n, fbuf, nbuf); + printf("second end '%s'\n", nbuf[n-1]->name); + + *len = 0; + + // TODO: Traverse up the tree from each node + // Find MRCA by intersection of nodes hit + + return 0.0; +} +#undef BUFLEN + +/* + * reroot a tree on a new node + */ static error rotateparent(bio·Node *node, bio·Node *to) { error err; - bio·Node *it, *prev; + // NOTE: will this ever be taken? if (node->parent == to) { return 0; } - prev = nil; - for (it = node->child; it != nil; it = it->sibling) { - if (it == to) goto FOUND; - prev = it; + if (!node->parent) { + goto RMCHILD; } - errorf("inconsistent topology: attempting to rotate to non-child"); - return 1; -FOUND: - if (err = rotateparent(node->parent, node), err) { + err = rotateparent(node->parent, node); + if (err) { errorf("failure: broken tree"); return err; } - if (!prev) { - node->child = node->parent; - } else { - prev->sibling = node->parent; + err = phylo·addchild(node, node->parent); + if (err) { + errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name); + return err; } - node->parent->sibling = it->sibling; - node->parent = to; +RMCHILD: + err = phylo·rmchild(node, to); + if (err) { + errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name); + return err; + } + + node->parent = to; return 0; } -#define FLOAT_PREC .00000001 +#define PREC .00000001 error phylo·reroot(bio·Tree *tree, bio·Node *node, double d) { - bio·Node *old; bio·Node *new; // TODO: should check that node is part of this tree? + // TODO: should we check if node->parent != nil? - old = tree->root; - if (fabs(d) < FLOAT_PREC) new = node; - if (fabs(d-node->dist) < FLOAT_PREC) new = node->parent; - else { + if (fabs(d) < PREC) { + new = node; + rotateparent(node->parent, node); + } else if (fabs(d-node->dist) < PREC) { + new = node->parent; + if (new->parent->parent) { + rotateparent(new->parent->parent, new->parent); + } + } else { new = tree->heap.alloc(tree->h, 1, sizeof(*new)); memset(new, 0, sizeof(*new)); + + phylo·addchild(new, node); + node->parent = new; + + phylo·addchild(new, node->parent); + if (node->parent->parent) { + rotateparent(node->parent->parent, node->parent); + } + node->parent->parent = new; } - // TODO: Use rotateparent() to perform necessary flips + printf("number of children on old root: %d\n", tree->root->nchild); + tree->root = new; + tree->nleaf = 0; + + phylo·countleafs(new, &tree->nleaf); + phylo·countnodes(new, &new->nnode); - tree->root = new; return 0; } +#undef PREC // ----------------------------------------------------------------------- // ancestral inference @@ -216,7 +423,7 @@ struct phylo·InferOpts }; error -phylo·ancestralinfer(bio·Tree *tree, struct phylo·InferOpts opts) +phylo·inferancestral(bio·Tree *tree, struct phylo·InferOpts opts) { return 0; } -- cgit v1.2.1