From fb4c4ceba416376751196cdbbdb5f7240e08a405 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 22 Apr 2020 18:15:17 -0700 Subject: fix: bug squashing with newick parser --- sys/libbio/io/newick.c | 106 +++++++++++++++++++++++++++++++++++++++---------- sys/libbio/phylo.c | 11 ++++- sys/libbio/test.c | 13 ++++-- sys/libn/io.c | 6 +++ 4 files changed, 108 insertions(+), 28 deletions(-) (limited to 'sys') diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c index b81e1bd..5bd2d9a 100644 --- a/sys/libbio/io/newick.c +++ b/sys/libbio/io/newick.c @@ -33,6 +33,7 @@ struct Token { } lit; }; +static byte* tokstr(struct Token tok) { @@ -62,6 +63,7 @@ tokstr(struct Token tok) // Read // TODO: Bounds checking on buffer +static struct Token lex(Stream *s) { @@ -125,13 +127,14 @@ lex(Stream *s) } } +static struct Token lex_nospace(Stream *s) { struct Token tok; tok = lex(s); if (tok.kind == tok·space) { - lex_nospace(s); + tok = lex_nospace(s); } return tok; @@ -147,6 +150,7 @@ struct Parser mem·Allocator heap; }; +static error parse(struct Parser *p) { @@ -155,17 +159,20 @@ parse(struct Parser *p) bio·Node *root; struct Token tok; + node = p->root; for (;;) { tok = lex_nospace(p->file); switch (tok.kind) { case tok·lparen: - if (p->lev > 0) { - errorf("incorrect format: opening another node before termination of last tree\n"); + if (!p->root && p->lev > 0) { + errorf("parse format: attempted to make root at non-zero level"); goto ERROR; } + node = p->heap.alloc(sizeof(*node)); memset(node, 0, sizeof(*node)); + if (p->root) { phylo·addchild(p->root, node); root = p->root; @@ -174,12 +181,14 @@ parse(struct Parser *p) } p->lev++; - err = parse(p); + p->root = node; + p->tok = tok; + err = parse(p); if (err) { goto ERROR; } if (p->tok.kind != tok·rparen) { - errorf("incorrect format: closing parentheses expected to proceed opening\n"); + errorf("incorrect format: closing parentheses expected to proceed opening"); goto ERROR; } p->root = root; @@ -193,14 +202,14 @@ parse(struct Parser *p) /* Comments */ case tok·lbrak: if (!node) { - errorf("incorrect format: comment found in disallowed region\n"); + errorf("incorrect format: comment found in disallowed region"); goto ERROR; } node->comment = str·new(""); while (tok.kind != tok·rbrak) { tok = lex_nospace(p->file); if (tok.kind == tok·eof || tok.kind == tok·nil) { - errorf("incorrect format: unmatched comment bracket '['\n"); + errorf("incorrect format: unmatched comment bracket '['"); goto ERROR; } str·append(node->comment, tokstr(tok)); @@ -208,18 +217,18 @@ parse(struct Parser *p) break; case tok·rbrak: - errorf("incorrect format: end comment token found in disallowed region\n"); + errorf("incorrect format: end comment token found in disallowed region"); goto ERROR; break; case tok·colon: tok = lex_nospace(p->file); if (tok.kind != tok·number) { - errorf("incorrect format: expected number after colon\n"); + errorf("incorrect format: expected number after colon"); goto ERROR; } if (node == nil) { - errorf("parse error: attempting to set distance of nil node\n"); + errorf("parse error: attempting to set distance of nil node"); goto ERROR; } node->dist = tok.lit.x; @@ -230,23 +239,28 @@ parse(struct Parser *p) break; case tok·ident: - if (p->tok.kind != tok·rparen) { + if (p->tok.kind == tok·rparen) { if (!node) { - errorf("parse error: attempting to set name of nil node\n"); + errorf("parse error: attempting to set name of nil node"); goto ERROR; } node->name = str·new(tok.lit.s); + printf("settting name %s\n", node->name); } else { - if (p->tok.kind != tok·comma) { - errorf("format error: misplaced identifier found\n"); + if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) { + errorf("format error: misplaced identifier for leaf found"); goto ERROR; } - if (!node) { - errorf("parse error: attempting to create child for no parent\n"); + + if (!p->root) { + errorf("parse error: attempting to create child for no parent"); goto ERROR; } + node = p->heap.alloc(sizeof(*node)); memset(node, 0, sizeof(*node)); + node->name = str·new(tok.lit.s); + phylo·addchild(p->root, node); } break; @@ -254,12 +268,12 @@ parse(struct Parser *p) case tok·number: if (p->tok.kind == tok·rparen) { if (p->lev == 0) { - errorf("format error: support value on root not supported\n"); + errorf("format error: support value on root not supported"); goto ERROR; } node->support = tok.lit.x; } else { - errorf("format error: found number in unexpected location\n"); + errorf("format error: found number in unexpected location"); goto ERROR; } break; @@ -267,23 +281,22 @@ parse(struct Parser *p) case tok·semi: io·ungetbyte(p->file, ';'); if (p->lev) { - errorf("format error: uneven number of parentheses found at ';'\n"); + errorf("format error: uneven number of parentheses found at ';'"); + goto ERROR; } goto DONE; - break; case tok·eof: goto DONE; - break; default: + errorf("parse error: unrecognized token"); goto ERROR; } p->tok = tok; } - DONE: p->tok = tok; return 0; @@ -299,6 +312,13 @@ bio·readnewick(Stream *file, mem·Allocator heap) struct Parser p; bio·Tree tree; + p = (struct Parser){ + .lev = 0, + .root = nil, + .tok = (struct Token){ 0 }, + .file = file, + .heap = heap, + }; err = parse(&p); if (err) { errorf("parsing failed\n"); @@ -311,3 +331,45 @@ bio·readnewick(Stream *file, mem·Allocator heap) // ----------------------------------------------------------------------- // Write + +error +dump(bio·Node *node, Stream *out) +{ + if (!node) { + return 1; + } + bio·Node *child; + if (node->nchild) { + io·putbyte(out, '('); + + dump(node->child[0], out); + for (child = node->child[1]; child != nil; child = child->sibling) { + io·putbyte(out, ','); + dump(child, out); + } + + io·putbyte(out, ')'); + } + if (node->name) { + io·putstring(out, node->name); + } + + if (node->parent) { + io·putbyte(out, ':'); + // TODO(nnoll): Format float + io·putbyte(out, '0'); + } + + return 0; +} + +error +bio·writenewick(bio·Tree tree, Stream *out) +{ + dump(tree.root, out); + io·putbyte(out, ';'); + io·putbyte(out, '\n'); + io·flush(out); + + return 0; +} diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c index 8033e35..374cd08 100644 --- a/sys/libbio/phylo.c +++ b/sys/libbio/phylo.c @@ -6,9 +6,15 @@ error phylo·addchild(bio·Node* parent, bio·Node* child) { bio·Node *it, *sibling; - if (parent->nchild < 2) { + switch (parent->nchild) { + case 1: + parent->child[0]->sibling = child; + case 0: parent->child[parent->nchild++] = child; - } else { + break; + + default: + sibling = parent->child[1]; for (it = parent->child[1]->sibling; it != nil; it = it->sibling) { sibling = it; } @@ -16,5 +22,6 @@ phylo·addchild(bio·Node* parent, bio·Node* child) parent->nchild++; } + child->parent = parent; return 0; } diff --git a/sys/libbio/test.c b/sys/libbio/test.c index 00345c4..18bb993 100644 --- a/sys/libbio/test.c +++ b/sys/libbio/test.c @@ -47,14 +47,19 @@ main() { init(); + error err; bio·Tree t; - Stream *fd; + Stream *fd[2]; + + fd[0] = io·open("/home/nolln/root/data/test/example.nwk", "r"); + fd[1] = io·open("/home/nolln/root/data/test/example.proc.nwk", "w"); - fd = io·open("/home/nolln/root/data/test/example.nwk", "r"); printf("starting\n"); - t = bio·readnewick(fd, arena); - io·close(fd); + t = bio·readnewick(fd[0], arena); + err = bio·writenewick(t, fd[1]); printf("ending\n"); + + io·close(fd[0]); io·close(fd[1]); return 0; } diff --git a/sys/libn/io.c b/sys/libn/io.c index 922dec3..7eec74e 100644 --- a/sys/libn/io.c +++ b/sys/libn/io.c @@ -67,6 +67,12 @@ io·write(Stream *s, int sz, int n, void *buf) return fwrite(buf, sz, n, s); } +int +io·flush(Stream *s) +{ + return fflush(s); +} + // ----------------------------------------------------------------------- // Seek -- cgit v1.2.1