#include #include #include #include // ----------------------------------------------------------------------- // subtree manipulation methods // NOTE: As of now these don't update nnode & nleaf stats. // It is the caller's responsibility to refresh counts. error phylo·addchild(bio·Node* parent, bio·Node* child) { bio·Node *it, *sibling; if (!parent->nchild) { parent->child = child; goto SUCCESS; } for (it = parent->child, sibling = it; it != nil; it = it->sibling) { sibling = it; } sibling->sibling = child; SUCCESS: child->parent = parent; parent->nchild++; return 0; } error phylo·rmchild(bio·Node *parent, bio·Node *child) { bio·Node *it, *prev; enum { error·nil, error·notfound, error·nochildren, }; prev = nil; for (it = parent->child; it != nil; it = it->sibling) { if (it == child) goto FOUND; prev = it; } return error·notfound; FOUND: if (prev == nil) { parent->child = child->sibling; } else { prev->sibling = child->sibling; } parent->nchild--; return error·nil; } // ----------------------------------------------------------------------- // subtree statistics error phylo·countnodes(bio·Node *node, int *n) { int m; error err; bio·Node *child; m = *n; for (child = node->child; child != nil; child = child->sibling) { if (err = phylo·countnodes(child, n), err) { errorf("node count: failure at '%s'", child->name); return 1; } } node->nnode = *n - m; *n += 1; return 0; } error phylo·countleafs(bio·Node *node, int *n) { error err; bio·Node *child; if (!node->nchild) { *n += 1; } for (child = node->child; child != nil; child = child->sibling) { if (err = phylo·countleafs(child, n), err) { errorf("leaf count: failure at '%s'", child->name); return 1; } } return 0; } // ----------------------------------------------------------------------- // generic operations on tree void* phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) { bio·Node *it; for(it = clade->child; it != nil; it = it->sibling) { ctx = phylo·postorder(it, op, ctx); } return op(clade, ctx); } void* phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) { bio·Node *it; ctx = op(clade, ctx); for(it = clade->child; it != nil; it = it->sibling) { ctx = phylo·preorder(it, op, ctx); } return ctx; } int phylo·collectpostorder(bio·Node *clade, bio·Node **list) { bio·Node *it; int n; for(n = 0, it = clade->child; it != nil; it = it->sibling) { n += phylo·collectpostorder(it, list+n); } return n; } static inline void* appendleaf(bio·Node *node, void* list) { bio·Node **leafs; leafs = list; if (!node->nchild) { *leafs++ = node; } return leafs; } void phylo·getleafs(bio·Tree tree, bio·Node **leafs) { phylo·postorder(tree.root, &appendleaf, leafs); } // ----------------------------------------------------------------------- // tree editing static void sortnodelist(bio·Node **head, bio·Node *next) { bio·Node tmp, *it; it = &tmp; tmp.sibling = *head; while (it->sibling != nil && it->sibling->nnode < next->nnode) { it = it->sibling; } next->sibling = it->sibling; it->sibling = next; *head = tmp.sibling; } error phylo·ladderize(bio·Node *root) { int i; error err; bio·Node *child, *sorted, *sibling; if (!root->nchild) return 0; // ladderize below for (child = root->child; child != nil; child = child->sibling) { if (err = phylo·ladderize(child), err) { errorf("ladderize: failure at '%s'", child->name); return 1; } } // ladderize yourself sorted = nil; child = root->child; while (child != nil) { sibling = child->sibling; sortnodelist(&sorted, child); child = sibling; } root->child = sorted; return 0; } /* * compute all distances from a given node * must provide a working buffer */ struct Tuple { double *d; bio·Node **n; }; static struct Tuple getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list) { bio·Node *it; struct Tuple ret; *dist++ = curr; *list++ = node; ret.d = dist; ret.n = list; if (node->parent && node->parent != prev) { ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list); dist = ret.d; list = ret.n; } for (it = node->child; it != nil; it = it->sibling) { if (it != prev) { ret = getdistsfrom(it, node, curr + it->dist, dist, list); dist = ret.d; list = ret.n; } } return ret; } int phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list) { struct Tuple ret; // TODO: Better bounds checking. ret = getdistsfrom(node, nil, 0.0, dist, list); assert(ret.n - list == len); assert(ret.d - dist == len); return len; } /* static void disttoroot(bio·Node *clade, double anc, double *dists) { double d; bio·Node *it; *dists++ = anc + clade->dist; d = dists[-1]; for (it = clade->child; it != nil; it = it->sibling) { disttoroot(it, d, ++dists); } } void phylo·disttoroot(bio·Tree tree, double *dists) { disttoroot(tree.root, 0.0, dists); } */ /* * compute the path constituting the tree diameter * returns the number of edges in the path */ static void sort·nodedists(uintptr len, double fs[], bio·Node* ns[]) { double f; bio·Node *n; #define LESS(i, j) (fs[i] < fs[j]) #define SWAP(i, j) (n = ns[i], f = fs[i], \ fs[i] = fs[j], ns[i] = ns[j], \ fs[j] = f, ns[j] = n) QSORT(len, LESS, SWAP); #undef LESS #undef SWAP } #define BUFLEN 4096 double phylo·diameter(bio·Tree tree, int *len, bio·Node **path) { // TODO: deal with large tree > BUFLEN gracefully int n; double fbuf[BUFLEN]; bio·Node *nbuf[BUFLEN]; n = tree.root->nnode; assert(n < BUFLEN); n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf); sort·nodedists(n, fbuf, nbuf); path[0] = nbuf[n-1]; printf("first end '%s'\n", path[0]->name); n = phylo·getdistsfrom(path[0], n, fbuf, nbuf); sort·nodedists(n, fbuf, nbuf); printf("second end '%s'\n", nbuf[n-1]->name); *len = 0; // TODO: Traverse up the tree from each node // Find MRCA by intersection of nodes hit return 0.0; } #undef BUFLEN /* * reroot a tree on a new node */ static error rotateparent(bio·Node *node, bio·Node *to) { error err; // NOTE: will this ever be taken? if (node->parent == to) { return 0; } if (!node->parent) { goto RMCHILD; } err = rotateparent(node->parent, node); if (err) { errorf("failure: broken tree"); return err; } err = phylo·addchild(node, node->parent); if (err) { errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name); return err; } RMCHILD: err = phylo·rmchild(node, to); if (err) { errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name); return err; } node->parent = to; return 0; } #define PREC .00000001 error phylo·reroot(bio·Tree *tree, bio·Node *node, double d) { bio·Node *new; // TODO: should check that node is part of this tree? // TODO: should we check if node->parent != nil? if (fabs(d) < PREC) { new = node; rotateparent(node->parent, node); } else if (fabs(d-node->dist) < PREC) { new = node->parent; if (new->parent->parent) { rotateparent(new->parent->parent, new->parent); } } else { new = tree->mem.alloc(tree->heap, 1, sizeof(*new)); memset(new, 0, sizeof(*new)); phylo·addchild(new, node); node->parent = new; phylo·addchild(new, node->parent); if (node->parent->parent) { rotateparent(node->parent->parent, node->parent); } node->parent->parent = new; } printf("number of children on old root: %d\n", tree->root->nchild); tree->root = new; tree->nleaf = 0; phylo·countleafs(new, &tree->nleaf); phylo·countnodes(new, &new->nnode); return 0; } #undef PREC