aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-04-22 18:15:17 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-04-22 18:15:17 -0700
commitfb4c4ceba416376751196cdbbdb5f7240e08a405 (patch)
treebb874085d6d7715c24da544e9f71ef05e6a2a80a /sys/libbio
parent583656a3537bc43a28c58111520143df04bf27f2 (diff)
fix: bug squashing with newick parser
Diffstat (limited to 'sys/libbio')
-rw-r--r--sys/libbio/io/newick.c106
-rw-r--r--sys/libbio/phylo.c11
-rw-r--r--sys/libbio/test.c13
3 files changed, 102 insertions, 28 deletions
diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c
index b81e1bd..5bd2d9a 100644
--- a/sys/libbio/io/newick.c
+++ b/sys/libbio/io/newick.c
@@ -33,6 +33,7 @@ struct Token {
} lit;
};
+static
byte*
tokstr(struct Token tok)
{
@@ -62,6 +63,7 @@ tokstr(struct Token tok)
// Read
// TODO: Bounds checking on buffer
+static
struct Token
lex(Stream *s)
{
@@ -125,13 +127,14 @@ lex(Stream *s)
}
}
+static
struct Token
lex_nospace(Stream *s)
{
struct Token tok;
tok = lex(s);
if (tok.kind == tok·space) {
- lex_nospace(s);
+ tok = lex_nospace(s);
}
return tok;
@@ -147,6 +150,7 @@ struct Parser
mem·Allocator heap;
};
+static
error
parse(struct Parser *p)
{
@@ -155,17 +159,20 @@ parse(struct Parser *p)
bio·Node *root;
struct Token tok;
+ node = p->root;
for (;;) {
tok = lex_nospace(p->file);
switch (tok.kind) {
case tok·lparen:
- if (p->lev > 0) {
- errorf("incorrect format: opening another node before termination of last tree\n");
+ if (!p->root && p->lev > 0) {
+ errorf("parse format: attempted to make root at non-zero level");
goto ERROR;
}
+
node = p->heap.alloc(sizeof(*node));
memset(node, 0, sizeof(*node));
+
if (p->root) {
phylo·addchild(p->root, node);
root = p->root;
@@ -174,12 +181,14 @@ parse(struct Parser *p)
}
p->lev++;
- err = parse(p);
+ p->root = node;
+ p->tok = tok;
+ err = parse(p);
if (err) {
goto ERROR;
}
if (p->tok.kind != tok·rparen) {
- errorf("incorrect format: closing parentheses expected to proceed opening\n");
+ errorf("incorrect format: closing parentheses expected to proceed opening");
goto ERROR;
}
p->root = root;
@@ -193,14 +202,14 @@ parse(struct Parser *p)
/* Comments */
case tok·lbrak:
if (!node) {
- errorf("incorrect format: comment found in disallowed region\n");
+ errorf("incorrect format: comment found in disallowed region");
goto ERROR;
}
node->comment = str·new("");
while (tok.kind != tok·rbrak) {
tok = lex_nospace(p->file);
if (tok.kind == tok·eof || tok.kind == tok·nil) {
- errorf("incorrect format: unmatched comment bracket '['\n");
+ errorf("incorrect format: unmatched comment bracket '['");
goto ERROR;
}
str·append(node->comment, tokstr(tok));
@@ -208,18 +217,18 @@ parse(struct Parser *p)
break;
case tok·rbrak:
- errorf("incorrect format: end comment token found in disallowed region\n");
+ errorf("incorrect format: end comment token found in disallowed region");
goto ERROR;
break;
case tok·colon:
tok = lex_nospace(p->file);
if (tok.kind != tok·number) {
- errorf("incorrect format: expected number after colon\n");
+ errorf("incorrect format: expected number after colon");
goto ERROR;
}
if (node == nil) {
- errorf("parse error: attempting to set distance of nil node\n");
+ errorf("parse error: attempting to set distance of nil node");
goto ERROR;
}
node->dist = tok.lit.x;
@@ -230,23 +239,28 @@ parse(struct Parser *p)
break;
case tok·ident:
- if (p->tok.kind != tok·rparen) {
+ if (p->tok.kind == tok·rparen) {
if (!node) {
- errorf("parse error: attempting to set name of nil node\n");
+ errorf("parse error: attempting to set name of nil node");
goto ERROR;
}
node->name = str·new(tok.lit.s);
+ printf("settting name %s\n", node->name);
} else {
- if (p->tok.kind != tok·comma) {
- errorf("format error: misplaced identifier found\n");
+ if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) {
+ errorf("format error: misplaced identifier for leaf found");
goto ERROR;
}
- if (!node) {
- errorf("parse error: attempting to create child for no parent\n");
+
+ if (!p->root) {
+ errorf("parse error: attempting to create child for no parent");
goto ERROR;
}
+
node = p->heap.alloc(sizeof(*node));
memset(node, 0, sizeof(*node));
+ node->name = str·new(tok.lit.s);
+
phylo·addchild(p->root, node);
}
break;
@@ -254,12 +268,12 @@ parse(struct Parser *p)
case tok·number:
if (p->tok.kind == tok·rparen) {
if (p->lev == 0) {
- errorf("format error: support value on root not supported\n");
+ errorf("format error: support value on root not supported");
goto ERROR;
}
node->support = tok.lit.x;
} else {
- errorf("format error: found number in unexpected location\n");
+ errorf("format error: found number in unexpected location");
goto ERROR;
}
break;
@@ -267,23 +281,22 @@ parse(struct Parser *p)
case tok·semi:
io·ungetbyte(p->file, ';');
if (p->lev) {
- errorf("format error: uneven number of parentheses found at ';'\n");
+ errorf("format error: uneven number of parentheses found at ';'");
+ goto ERROR;
}
goto DONE;
- break;
case tok·eof:
goto DONE;
- break;
default:
+ errorf("parse error: unrecognized token");
goto ERROR;
}
p->tok = tok;
}
-
DONE:
p->tok = tok;
return 0;
@@ -299,6 +312,13 @@ bio·readnewick(Stream *file, mem·Allocator heap)
struct Parser p;
bio·Tree tree;
+ p = (struct Parser){
+ .lev = 0,
+ .root = nil,
+ .tok = (struct Token){ 0 },
+ .file = file,
+ .heap = heap,
+ };
err = parse(&p);
if (err) {
errorf("parsing failed\n");
@@ -311,3 +331,45 @@ bio·readnewick(Stream *file, mem·Allocator heap)
// -----------------------------------------------------------------------
// Write
+
+error
+dump(bio·Node *node, Stream *out)
+{
+ if (!node) {
+ return 1;
+ }
+ bio·Node *child;
+ if (node->nchild) {
+ io·putbyte(out, '(');
+
+ dump(node->child[0], out);
+ for (child = node->child[1]; child != nil; child = child->sibling) {
+ io·putbyte(out, ',');
+ dump(child, out);
+ }
+
+ io·putbyte(out, ')');
+ }
+ if (node->name) {
+ io·putstring(out, node->name);
+ }
+
+ if (node->parent) {
+ io·putbyte(out, ':');
+ // TODO(nnoll): Format float
+ io·putbyte(out, '0');
+ }
+
+ return 0;
+}
+
+error
+bio·writenewick(bio·Tree tree, Stream *out)
+{
+ dump(tree.root, out);
+ io·putbyte(out, ';');
+ io·putbyte(out, '\n');
+ io·flush(out);
+
+ return 0;
+}
diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c
index 8033e35..374cd08 100644
--- a/sys/libbio/phylo.c
+++ b/sys/libbio/phylo.c
@@ -6,9 +6,15 @@ error
phylo·addchild(bio·Node* parent, bio·Node* child)
{
bio·Node *it, *sibling;
- if (parent->nchild < 2) {
+ switch (parent->nchild) {
+ case 1:
+ parent->child[0]->sibling = child;
+ case 0:
parent->child[parent->nchild++] = child;
- } else {
+ break;
+
+ default:
+ sibling = parent->child[1];
for (it = parent->child[1]->sibling; it != nil; it = it->sibling) {
sibling = it;
}
@@ -16,5 +22,6 @@ phylo·addchild(bio·Node* parent, bio·Node* child)
parent->nchild++;
}
+ child->parent = parent;
return 0;
}
diff --git a/sys/libbio/test.c b/sys/libbio/test.c
index 00345c4..18bb993 100644
--- a/sys/libbio/test.c
+++ b/sys/libbio/test.c
@@ -47,14 +47,19 @@ main()
{
init();
+ error err;
bio·Tree t;
- Stream *fd;
+ Stream *fd[2];
+
+ fd[0] = io·open("/home/nolln/root/data/test/example.nwk", "r");
+ fd[1] = io·open("/home/nolln/root/data/test/example.proc.nwk", "w");
- fd = io·open("/home/nolln/root/data/test/example.nwk", "r");
printf("starting\n");
- t = bio·readnewick(fd, arena);
- io·close(fd);
+ t = bio·readnewick(fd[0], arena);
+ err = bio·writenewick(t, fd[1]);
printf("ending\n");
+
+ io·close(fd[0]); io·close(fd[1]);
return 0;
}