From 9fd30c9e0ec9c5716b8cb7b8896318178d4b08bd Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Tue, 28 Apr 2020 14:41:33 -0700 Subject: fix: ladderize now sorts by number of nodes contained below --- .gitignore | 3 +++ include/libbio.h | 5 ++++- sys/libbio/phylo.c | 41 ++++++++++++++++++++++++++++++++++------- sys/libbio/test.c | 1 + 4 files changed, 42 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 470c533..8e33690 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ lib/ dep/ build/ data/ +share/ vendor/ sys/cc sys/nixos +bin/fasttree +bin/mafft diff --git a/include/libbio.h b/include/libbio.h index 261c732..9ce96b5 100644 --- a/include/libbio.h +++ b/include/libbio.h @@ -9,8 +9,8 @@ typedef struct bio·Node string comment; double dist; double support; + int nnode; int nchild; - int ndescendent; struct bio·Node *parent; struct bio·Node *sibling; struct bio·Node *child; @@ -25,10 +25,13 @@ typedef struct bio·Tree // clade functions error phylo·addchild(bio·Node* parent, bio·Node* child); +error phylo·rmchild(bio·Node* parent, bio·Node* child); error phylo·countnodes(bio·Node *node, int *n); error phylo·countleafs(bio·Node *node, int *n); +error phylo·ladderize(bio·Node *root); + /* newick i/o */ error bio·readnewick(io·Peeker stream, void*, mem·Allocator heap, void*, bio·Tree* tree); error bio·writenewick(bio·Tree tree, io·Putter out, void*); diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c index 2bff92d..71901ba 100644 --- a/sys/libbio/phylo.c +++ b/sys/libbio/phylo.c @@ -54,14 +54,15 @@ phylo·countnodes(bio·Node *node, int *n) error err; bio·Node *child; - *n += 1; + node->nnode = 0; for (child = node->child; child != nil; child = child->sibling) { - if (err = phylo·countnodes(child, n), err) { + if (err = phylo·countnodes(child, &node->nnode), err) { errorf("node count: failure at '%s'", child->name); return 1; } } + *n += node->nnode + 1; return 0; } @@ -88,24 +89,50 @@ phylo·countleafs(bio·Node *node, int *n) // ----------------------------------------------------------------------- // tree editing +static +void +sortnodelist(bio·Node **head, bio·Node *next) +{ + bio·Node tmp, *it; + + it = &tmp; + tmp.sibling = *head; + + while (it->sibling != nil && it->sibling->nnode < next->nnode) { + it = it->sibling; + } + + next->sibling = it->sibling; + it->sibling = next; + *head = tmp.sibling; +} + error phylo·ladderize(bio·Node *root) { int i; error err; - bio·Node *child; - double dists[50]; + bio·Node *child, *sorted, *sibling; if (!root->nchild) return 0; - Assert(root->nchild < arrlen(dists)); - for (i = 0, child = root->child; child != nil; child = child->sibling, i++) { + // ladderize below + for (child = root->child; child != nil; child = child->sibling) { if (err = phylo·ladderize(child), err) { errorf("ladderize: failure at '%s'", child->name); return 1; } - dists[i] = child->dist; } + // ladderize yourself + sorted = nil; + child = root->child; + while (child != nil) { + sibling = child->sibling; + sortnodelist(&sorted, child); + child = sibling; + } + root->child = sorted; + return 0; } diff --git a/sys/libbio/test.c b/sys/libbio/test.c index dddecd4..2efcdb6 100644 --- a/sys/libbio/test.c +++ b/sys/libbio/test.c @@ -109,6 +109,7 @@ test·newick() fd[1] = io·open("/home/nolln/root/data/test/zika.proc.nwk", "w"); err = bio·readnewick(rdr, fd[0], al, mem, &t); + phylo·ladderize(t.root); printf("Loaded tree with %d leafs and %d nodes\n", t.nleaf, t.nnode); err = bio·writenewick(t, wtr, fd[1]); -- cgit v1.2.1