aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-04-22 18:15:17 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-04-22 18:15:17 -0700
commitfb4c4ceba416376751196cdbbdb5f7240e08a405 (patch)
treebb874085d6d7715c24da544e9f71ef05e6a2a80a
parent583656a3537bc43a28c58111520143df04bf27f2 (diff)
fix: bug squashing with newick parser
-rw-r--r--compile_commands.json140
-rw-r--r--include/libbio.h4
-rw-r--r--include/libn.h1
-rw-r--r--sys/libbio/io/newick.c106
-rw-r--r--sys/libbio/phylo.c11
-rw-r--r--sys/libbio/test.c13
-rw-r--r--sys/libn/io.c6
7 files changed, 115 insertions, 166 deletions
diff --git a/compile_commands.json b/compile_commands.json
index 99168be..aa79c9d 100644
--- a/compile_commands.json
+++ b/compile_commands.json
@@ -7,146 +7,12 @@
"-fno-strict-aliasing",
"-fwrapv",
"-fms-extensions",
- "-ffreestanding",
- "-fno-builtin",
- "-nostdlib",
"-Iinclude",
"-o",
- "build/libc/string.o",
- "sys/libc/string.c"
+ "build/libbio/io/newick.o",
+ "sys/libbio/io/newick.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libc/string.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/test.o",
- "sys/libn/test.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/test.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-ffreestanding",
- "-fno-builtin",
- "-nostdlib",
- "-Iinclude",
- "-o",
- "build/libc/stdio.o",
- "sys/libc/stdio.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libc/stdio.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/error.o",
- "sys/libn/error.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/error.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/bufio.o",
- "sys/libn/bufio.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/bufio.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/memory.o",
- "sys/libn/memory.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/memory.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/io.o",
- "sys/libn/io.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/io.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/coro.o",
- "sys/libn/coro.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/coro.c"
- },
- {
- "arguments": [
- "clang",
- "-c",
- "-g",
- "-fno-strict-aliasing",
- "-fwrapv",
- "-fms-extensions",
- "-Iinclude",
- "-o",
- "build/libn/string.o",
- "sys/libn/string.c"
- ],
- "directory": "/home/nolln/root",
- "file": "sys/libn/string.c"
+ "file": "sys/libbio/io/newick.c"
}
] \ No newline at end of file
diff --git a/include/libbio.h b/include/libbio.h
index d8430b3..58540d4 100644
--- a/include/libbio.h
+++ b/include/libbio.h
@@ -16,7 +16,8 @@ typedef struct bio·Node
struct bio·Node *sibling;
} bio·Node;
-error phylo·addchild(bio·Node* parent, bio·Node* child);
+error phylo·addchild(bio·Node *parent, bio·Node* child);
+bool phylo·isleaf(bio·Node *node);
typedef struct bio·Tree
{
@@ -24,6 +25,7 @@ typedef struct bio·Tree
} bio·Tree;
bio·Tree bio·readnewick(Stream *file, mem·Allocator heap);
+error bio·writenewick(bio·Tree tree, Stream *out);
// -----------------------------------------------------------------------
// Sequences
diff --git a/include/libn.h b/include/libn.h
index 203a477..c8d510f 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -104,6 +104,7 @@ int io·readln(Stream *s, int n, byte* buf);
error io·putbyte(Stream *s, byte c);
int io·putstring(Stream *s, string str);
vlong io·write(Stream *s, int sz, int n, void *buf);
+int io·flush(Stream *s);
int io·seek(Stream *s, long off, enum SeekPos origin);
// -----------------------------------------------------------------------------
diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c
index b81e1bd..5bd2d9a 100644
--- a/sys/libbio/io/newick.c
+++ b/sys/libbio/io/newick.c
@@ -33,6 +33,7 @@ struct Token {
} lit;
};
+static
byte*
tokstr(struct Token tok)
{
@@ -62,6 +63,7 @@ tokstr(struct Token tok)
// Read
// TODO: Bounds checking on buffer
+static
struct Token
lex(Stream *s)
{
@@ -125,13 +127,14 @@ lex(Stream *s)
}
}
+static
struct Token
lex_nospace(Stream *s)
{
struct Token tok;
tok = lex(s);
if (tok.kind == tok·space) {
- lex_nospace(s);
+ tok = lex_nospace(s);
}
return tok;
@@ -147,6 +150,7 @@ struct Parser
mem·Allocator heap;
};
+static
error
parse(struct Parser *p)
{
@@ -155,17 +159,20 @@ parse(struct Parser *p)
bio·Node *root;
struct Token tok;
+ node = p->root;
for (;;) {
tok = lex_nospace(p->file);
switch (tok.kind) {
case tok·lparen:
- if (p->lev > 0) {
- errorf("incorrect format: opening another node before termination of last tree\n");
+ if (!p->root && p->lev > 0) {
+ errorf("parse format: attempted to make root at non-zero level");
goto ERROR;
}
+
node = p->heap.alloc(sizeof(*node));
memset(node, 0, sizeof(*node));
+
if (p->root) {
phylo·addchild(p->root, node);
root = p->root;
@@ -174,12 +181,14 @@ parse(struct Parser *p)
}
p->lev++;
- err = parse(p);
+ p->root = node;
+ p->tok = tok;
+ err = parse(p);
if (err) {
goto ERROR;
}
if (p->tok.kind != tok·rparen) {
- errorf("incorrect format: closing parentheses expected to proceed opening\n");
+ errorf("incorrect format: closing parentheses expected to proceed opening");
goto ERROR;
}
p->root = root;
@@ -193,14 +202,14 @@ parse(struct Parser *p)
/* Comments */
case tok·lbrak:
if (!node) {
- errorf("incorrect format: comment found in disallowed region\n");
+ errorf("incorrect format: comment found in disallowed region");
goto ERROR;
}
node->comment = str·new("");
while (tok.kind != tok·rbrak) {
tok = lex_nospace(p->file);
if (tok.kind == tok·eof || tok.kind == tok·nil) {
- errorf("incorrect format: unmatched comment bracket '['\n");
+ errorf("incorrect format: unmatched comment bracket '['");
goto ERROR;
}
str·append(node->comment, tokstr(tok));
@@ -208,18 +217,18 @@ parse(struct Parser *p)
break;
case tok·rbrak:
- errorf("incorrect format: end comment token found in disallowed region\n");
+ errorf("incorrect format: end comment token found in disallowed region");
goto ERROR;
break;
case tok·colon:
tok = lex_nospace(p->file);
if (tok.kind != tok·number) {
- errorf("incorrect format: expected number after colon\n");
+ errorf("incorrect format: expected number after colon");
goto ERROR;
}
if (node == nil) {
- errorf("parse error: attempting to set distance of nil node\n");
+ errorf("parse error: attempting to set distance of nil node");
goto ERROR;
}
node->dist = tok.lit.x;
@@ -230,23 +239,28 @@ parse(struct Parser *p)
break;
case tok·ident:
- if (p->tok.kind != tok·rparen) {
+ if (p->tok.kind == tok·rparen) {
if (!node) {
- errorf("parse error: attempting to set name of nil node\n");
+ errorf("parse error: attempting to set name of nil node");
goto ERROR;
}
node->name = str·new(tok.lit.s);
+ printf("settting name %s\n", node->name);
} else {
- if (p->tok.kind != tok·comma) {
- errorf("format error: misplaced identifier found\n");
+ if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) {
+ errorf("format error: misplaced identifier for leaf found");
goto ERROR;
}
- if (!node) {
- errorf("parse error: attempting to create child for no parent\n");
+
+ if (!p->root) {
+ errorf("parse error: attempting to create child for no parent");
goto ERROR;
}
+
node = p->heap.alloc(sizeof(*node));
memset(node, 0, sizeof(*node));
+ node->name = str·new(tok.lit.s);
+
phylo·addchild(p->root, node);
}
break;
@@ -254,12 +268,12 @@ parse(struct Parser *p)
case tok·number:
if (p->tok.kind == tok·rparen) {
if (p->lev == 0) {
- errorf("format error: support value on root not supported\n");
+ errorf("format error: support value on root not supported");
goto ERROR;
}
node->support = tok.lit.x;
} else {
- errorf("format error: found number in unexpected location\n");
+ errorf("format error: found number in unexpected location");
goto ERROR;
}
break;
@@ -267,23 +281,22 @@ parse(struct Parser *p)
case tok·semi:
io·ungetbyte(p->file, ';');
if (p->lev) {
- errorf("format error: uneven number of parentheses found at ';'\n");
+ errorf("format error: uneven number of parentheses found at ';'");
+ goto ERROR;
}
goto DONE;
- break;
case tok·eof:
goto DONE;
- break;
default:
+ errorf("parse error: unrecognized token");
goto ERROR;
}
p->tok = tok;
}
-
DONE:
p->tok = tok;
return 0;
@@ -299,6 +312,13 @@ bio·readnewick(Stream *file, mem·Allocator heap)
struct Parser p;
bio·Tree tree;
+ p = (struct Parser){
+ .lev = 0,
+ .root = nil,
+ .tok = (struct Token){ 0 },
+ .file = file,
+ .heap = heap,
+ };
err = parse(&p);
if (err) {
errorf("parsing failed\n");
@@ -311,3 +331,45 @@ bio·readnewick(Stream *file, mem·Allocator heap)
// -----------------------------------------------------------------------
// Write
+
+error
+dump(bio·Node *node, Stream *out)
+{
+ if (!node) {
+ return 1;
+ }
+ bio·Node *child;
+ if (node->nchild) {
+ io·putbyte(out, '(');
+
+ dump(node->child[0], out);
+ for (child = node->child[1]; child != nil; child = child->sibling) {
+ io·putbyte(out, ',');
+ dump(child, out);
+ }
+
+ io·putbyte(out, ')');
+ }
+ if (node->name) {
+ io·putstring(out, node->name);
+ }
+
+ if (node->parent) {
+ io·putbyte(out, ':');
+ // TODO(nnoll): Format float
+ io·putbyte(out, '0');
+ }
+
+ return 0;
+}
+
+error
+bio·writenewick(bio·Tree tree, Stream *out)
+{
+ dump(tree.root, out);
+ io·putbyte(out, ';');
+ io·putbyte(out, '\n');
+ io·flush(out);
+
+ return 0;
+}
diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c
index 8033e35..374cd08 100644
--- a/sys/libbio/phylo.c
+++ b/sys/libbio/phylo.c
@@ -6,9 +6,15 @@ error
phylo·addchild(bio·Node* parent, bio·Node* child)
{
bio·Node *it, *sibling;
- if (parent->nchild < 2) {
+ switch (parent->nchild) {
+ case 1:
+ parent->child[0]->sibling = child;
+ case 0:
parent->child[parent->nchild++] = child;
- } else {
+ break;
+
+ default:
+ sibling = parent->child[1];
for (it = parent->child[1]->sibling; it != nil; it = it->sibling) {
sibling = it;
}
@@ -16,5 +22,6 @@ phylo·addchild(bio·Node* parent, bio·Node* child)
parent->nchild++;
}
+ child->parent = parent;
return 0;
}
diff --git a/sys/libbio/test.c b/sys/libbio/test.c
index 00345c4..18bb993 100644
--- a/sys/libbio/test.c
+++ b/sys/libbio/test.c
@@ -47,14 +47,19 @@ main()
{
init();
+ error err;
bio·Tree t;
- Stream *fd;
+ Stream *fd[2];
+
+ fd[0] = io·open("/home/nolln/root/data/test/example.nwk", "r");
+ fd[1] = io·open("/home/nolln/root/data/test/example.proc.nwk", "w");
- fd = io·open("/home/nolln/root/data/test/example.nwk", "r");
printf("starting\n");
- t = bio·readnewick(fd, arena);
- io·close(fd);
+ t = bio·readnewick(fd[0], arena);
+ err = bio·writenewick(t, fd[1]);
printf("ending\n");
+
+ io·close(fd[0]); io·close(fd[1]);
return 0;
}
diff --git a/sys/libn/io.c b/sys/libn/io.c
index 922dec3..7eec74e 100644
--- a/sys/libn/io.c
+++ b/sys/libn/io.c
@@ -67,6 +67,12 @@ io·write(Stream *s, int sz, int n, void *buf)
return fwrite(buf, sz, n, s);
}
+int
+io·flush(Stream *s)
+{
+ return fflush(s);
+}
+
// -----------------------------------------------------------------------
// Seek