From fb4c4ceba416376751196cdbbdb5f7240e08a405 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 22 Apr 2020 18:15:17 -0700 Subject: fix: bug squashing with newick parser --- compile_commands.json | 140 ++----------------------------------------------- include/libbio.h | 4 +- include/libn.h | 1 + sys/libbio/io/newick.c | 106 +++++++++++++++++++++++++++++-------- sys/libbio/phylo.c | 11 +++- sys/libbio/test.c | 13 +++-- sys/libn/io.c | 6 +++ 7 files changed, 115 insertions(+), 166 deletions(-) diff --git a/compile_commands.json b/compile_commands.json index 99168be..aa79c9d 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -7,146 +7,12 @@ "-fno-strict-aliasing", "-fwrapv", "-fms-extensions", - "-ffreestanding", - "-fno-builtin", - "-nostdlib", "-Iinclude", "-o", - "build/libc/string.o", - "sys/libc/string.c" + "build/libbio/io/newick.o", + "sys/libbio/io/newick.c" ], "directory": "/home/nolln/root", - "file": "sys/libc/string.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/test.o", - "sys/libn/test.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/test.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-ffreestanding", - "-fno-builtin", - "-nostdlib", - "-Iinclude", - "-o", - "build/libc/stdio.o", - "sys/libc/stdio.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libc/stdio.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/error.o", - "sys/libn/error.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/error.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/bufio.o", - "sys/libn/bufio.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/bufio.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/memory.o", - "sys/libn/memory.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/memory.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/io.o", - "sys/libn/io.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/io.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/coro.o", - "sys/libn/coro.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/coro.c" - }, - { - "arguments": [ - "clang", - "-c", - "-g", - "-fno-strict-aliasing", - "-fwrapv", - "-fms-extensions", - "-Iinclude", - "-o", - "build/libn/string.o", - "sys/libn/string.c" - ], - "directory": "/home/nolln/root", - "file": "sys/libn/string.c" + "file": "sys/libbio/io/newick.c" } ] \ No newline at end of file diff --git a/include/libbio.h b/include/libbio.h index d8430b3..58540d4 100644 --- a/include/libbio.h +++ b/include/libbio.h @@ -16,7 +16,8 @@ typedef struct bio·Node struct bio·Node *sibling; } bio·Node; -error phylo·addchild(bio·Node* parent, bio·Node* child); +error phylo·addchild(bio·Node *parent, bio·Node* child); +bool phylo·isleaf(bio·Node *node); typedef struct bio·Tree { @@ -24,6 +25,7 @@ typedef struct bio·Tree } bio·Tree; bio·Tree bio·readnewick(Stream *file, mem·Allocator heap); +error bio·writenewick(bio·Tree tree, Stream *out); // ----------------------------------------------------------------------- // Sequences diff --git a/include/libn.h b/include/libn.h index 203a477..c8d510f 100644 --- a/include/libn.h +++ b/include/libn.h @@ -104,6 +104,7 @@ int io·readln(Stream *s, int n, byte* buf); error io·putbyte(Stream *s, byte c); int io·putstring(Stream *s, string str); vlong io·write(Stream *s, int sz, int n, void *buf); +int io·flush(Stream *s); int io·seek(Stream *s, long off, enum SeekPos origin); // ----------------------------------------------------------------------------- diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c index b81e1bd..5bd2d9a 100644 --- a/sys/libbio/io/newick.c +++ b/sys/libbio/io/newick.c @@ -33,6 +33,7 @@ struct Token { } lit; }; +static byte* tokstr(struct Token tok) { @@ -62,6 +63,7 @@ tokstr(struct Token tok) // Read // TODO: Bounds checking on buffer +static struct Token lex(Stream *s) { @@ -125,13 +127,14 @@ lex(Stream *s) } } +static struct Token lex_nospace(Stream *s) { struct Token tok; tok = lex(s); if (tok.kind == tok·space) { - lex_nospace(s); + tok = lex_nospace(s); } return tok; @@ -147,6 +150,7 @@ struct Parser mem·Allocator heap; }; +static error parse(struct Parser *p) { @@ -155,17 +159,20 @@ parse(struct Parser *p) bio·Node *root; struct Token tok; + node = p->root; for (;;) { tok = lex_nospace(p->file); switch (tok.kind) { case tok·lparen: - if (p->lev > 0) { - errorf("incorrect format: opening another node before termination of last tree\n"); + if (!p->root && p->lev > 0) { + errorf("parse format: attempted to make root at non-zero level"); goto ERROR; } + node = p->heap.alloc(sizeof(*node)); memset(node, 0, sizeof(*node)); + if (p->root) { phylo·addchild(p->root, node); root = p->root; @@ -174,12 +181,14 @@ parse(struct Parser *p) } p->lev++; - err = parse(p); + p->root = node; + p->tok = tok; + err = parse(p); if (err) { goto ERROR; } if (p->tok.kind != tok·rparen) { - errorf("incorrect format: closing parentheses expected to proceed opening\n"); + errorf("incorrect format: closing parentheses expected to proceed opening"); goto ERROR; } p->root = root; @@ -193,14 +202,14 @@ parse(struct Parser *p) /* Comments */ case tok·lbrak: if (!node) { - errorf("incorrect format: comment found in disallowed region\n"); + errorf("incorrect format: comment found in disallowed region"); goto ERROR; } node->comment = str·new(""); while (tok.kind != tok·rbrak) { tok = lex_nospace(p->file); if (tok.kind == tok·eof || tok.kind == tok·nil) { - errorf("incorrect format: unmatched comment bracket '['\n"); + errorf("incorrect format: unmatched comment bracket '['"); goto ERROR; } str·append(node->comment, tokstr(tok)); @@ -208,18 +217,18 @@ parse(struct Parser *p) break; case tok·rbrak: - errorf("incorrect format: end comment token found in disallowed region\n"); + errorf("incorrect format: end comment token found in disallowed region"); goto ERROR; break; case tok·colon: tok = lex_nospace(p->file); if (tok.kind != tok·number) { - errorf("incorrect format: expected number after colon\n"); + errorf("incorrect format: expected number after colon"); goto ERROR; } if (node == nil) { - errorf("parse error: attempting to set distance of nil node\n"); + errorf("parse error: attempting to set distance of nil node"); goto ERROR; } node->dist = tok.lit.x; @@ -230,23 +239,28 @@ parse(struct Parser *p) break; case tok·ident: - if (p->tok.kind != tok·rparen) { + if (p->tok.kind == tok·rparen) { if (!node) { - errorf("parse error: attempting to set name of nil node\n"); + errorf("parse error: attempting to set name of nil node"); goto ERROR; } node->name = str·new(tok.lit.s); + printf("settting name %s\n", node->name); } else { - if (p->tok.kind != tok·comma) { - errorf("format error: misplaced identifier found\n"); + if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) { + errorf("format error: misplaced identifier for leaf found"); goto ERROR; } - if (!node) { - errorf("parse error: attempting to create child for no parent\n"); + + if (!p->root) { + errorf("parse error: attempting to create child for no parent"); goto ERROR; } + node = p->heap.alloc(sizeof(*node)); memset(node, 0, sizeof(*node)); + node->name = str·new(tok.lit.s); + phylo·addchild(p->root, node); } break; @@ -254,12 +268,12 @@ parse(struct Parser *p) case tok·number: if (p->tok.kind == tok·rparen) { if (p->lev == 0) { - errorf("format error: support value on root not supported\n"); + errorf("format error: support value on root not supported"); goto ERROR; } node->support = tok.lit.x; } else { - errorf("format error: found number in unexpected location\n"); + errorf("format error: found number in unexpected location"); goto ERROR; } break; @@ -267,23 +281,22 @@ parse(struct Parser *p) case tok·semi: io·ungetbyte(p->file, ';'); if (p->lev) { - errorf("format error: uneven number of parentheses found at ';'\n"); + errorf("format error: uneven number of parentheses found at ';'"); + goto ERROR; } goto DONE; - break; case tok·eof: goto DONE; - break; default: + errorf("parse error: unrecognized token"); goto ERROR; } p->tok = tok; } - DONE: p->tok = tok; return 0; @@ -299,6 +312,13 @@ bio·readnewick(Stream *file, mem·Allocator heap) struct Parser p; bio·Tree tree; + p = (struct Parser){ + .lev = 0, + .root = nil, + .tok = (struct Token){ 0 }, + .file = file, + .heap = heap, + }; err = parse(&p); if (err) { errorf("parsing failed\n"); @@ -311,3 +331,45 @@ bio·readnewick(Stream *file, mem·Allocator heap) // ----------------------------------------------------------------------- // Write + +error +dump(bio·Node *node, Stream *out) +{ + if (!node) { + return 1; + } + bio·Node *child; + if (node->nchild) { + io·putbyte(out, '('); + + dump(node->child[0], out); + for (child = node->child[1]; child != nil; child = child->sibling) { + io·putbyte(out, ','); + dump(child, out); + } + + io·putbyte(out, ')'); + } + if (node->name) { + io·putstring(out, node->name); + } + + if (node->parent) { + io·putbyte(out, ':'); + // TODO(nnoll): Format float + io·putbyte(out, '0'); + } + + return 0; +} + +error +bio·writenewick(bio·Tree tree, Stream *out) +{ + dump(tree.root, out); + io·putbyte(out, ';'); + io·putbyte(out, '\n'); + io·flush(out); + + return 0; +} diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c index 8033e35..374cd08 100644 --- a/sys/libbio/phylo.c +++ b/sys/libbio/phylo.c @@ -6,9 +6,15 @@ error phylo·addchild(bio·Node* parent, bio·Node* child) { bio·Node *it, *sibling; - if (parent->nchild < 2) { + switch (parent->nchild) { + case 1: + parent->child[0]->sibling = child; + case 0: parent->child[parent->nchild++] = child; - } else { + break; + + default: + sibling = parent->child[1]; for (it = parent->child[1]->sibling; it != nil; it = it->sibling) { sibling = it; } @@ -16,5 +22,6 @@ phylo·addchild(bio·Node* parent, bio·Node* child) parent->nchild++; } + child->parent = parent; return 0; } diff --git a/sys/libbio/test.c b/sys/libbio/test.c index 00345c4..18bb993 100644 --- a/sys/libbio/test.c +++ b/sys/libbio/test.c @@ -47,14 +47,19 @@ main() { init(); + error err; bio·Tree t; - Stream *fd; + Stream *fd[2]; + + fd[0] = io·open("/home/nolln/root/data/test/example.nwk", "r"); + fd[1] = io·open("/home/nolln/root/data/test/example.proc.nwk", "w"); - fd = io·open("/home/nolln/root/data/test/example.nwk", "r"); printf("starting\n"); - t = bio·readnewick(fd, arena); - io·close(fd); + t = bio·readnewick(fd[0], arena); + err = bio·writenewick(t, fd[1]); printf("ending\n"); + + io·close(fd[0]); io·close(fd[1]); return 0; } diff --git a/sys/libn/io.c b/sys/libn/io.c index 922dec3..7eec74e 100644 --- a/sys/libn/io.c +++ b/sys/libn/io.c @@ -67,6 +67,12 @@ io·write(Stream *s, int sz, int n, void *buf) return fwrite(buf, sz, n, s); } +int +io·flush(Stream *s) +{ + return fflush(s); +} + // ----------------------------------------------------------------------- // Seek -- cgit v1.2.1