aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio/newick.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libbio/newick.c')
-rw-r--r--sys/libbio/newick.c414
1 files changed, 0 insertions, 414 deletions
diff --git a/sys/libbio/newick.c b/sys/libbio/newick.c
deleted file mode 100644
index 5e6d30a..0000000
--- a/sys/libbio/newick.c
+++ /dev/null
@@ -1,414 +0,0 @@
-#include <u.h>
-#include <base.h>
-#include <libbio.h>
-
-// -----------------------------------------------------------------------
-// Tokens
-
-enum TokenKind
-{
- tok·nil,
- tok·eof,
- tok·space,
- tok·ident,
- tok·number,
- tok·lparen,
- tok·rparen,
- tok·lbrak,
- tok·rbrak,
- tok·comma,
- tok·semi,
- tok·colon,
-
- NUM_TOKENS,
-};
-
-
-struct Token {
- enum TokenKind kind;
- union
- {
- byte *s;
- double x;
- } lit;
-};
-
-static
-byte*
-tokstr(struct Token tok)
-{
- static byte b[50];
- switch (tok.kind) {
- case tok·nil: return "";
- case tok·eof: return nil;
- case tok·space: return " ";
- case tok·ident: return tok.lit.s;
- case tok·lparen: return "(";
- case tok·rparen: return ")";
- case tok·lbrak: return "[";
- case tok·rbrak: return "]";
- case tok·comma: return ",";
- case tok·semi: return ";";
- case tok·colon: return ":";
- case tok·number:
- snprintf(b, arrlen(b), "%f", tok.lit.x);
- return b;
- default:
- return nil;
- }
-}
-
-
-// -----------------------------------------------------------------------
-// Read
-
-// TODO: Bounds checking on buffer
-static
-struct Token
-lex(io·Peeker s, void* fp)
-{
-#define isvalidchar(C) ((C) == '!') || \
- ('\"' < (C) && (C) < '\'') || \
- (')' < (C) && (C) < '+') || \
- (',' < (C) && (C) < ':') || \
- (':' < (C) && (C) < '[') || \
- ((C) == '\\') || \
- (']' < (C) && (C) <= '~')
- byte *c;
- struct Token tok;
- static byte b[1024];
- c = b;
- *c = s.get(fp);
-
- if (isspace(*c)) {
- while (isspace(*c)) {
- *(++c) = s.get(fp);
- }
-
- s.unget(fp, *c);
- assert(c - b < 1024);
-
- *c = 0;
- tok.kind = tok·space;
- tok.lit.s = b;
- return tok;
- }
-
- switch (*c) {
- case EOF: tok.kind = tok·eof; return tok;
- case '(': tok.kind = tok·lparen; return tok;
- case ')': tok.kind = tok·rparen; return tok;
- case '[': tok.kind = tok·lbrak; return tok;
- case ']': tok.kind = tok·rbrak; return tok;
- case ',': tok.kind = tok·comma; return tok;
- case ';': tok.kind = tok·semi; return tok;
- case ':': tok.kind = tok·colon; return tok;
-
- case '.':
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- while (isdigit(*c)) {
- NUM: *(++c) = s.get(fp);
- }
- if (*c == '.') goto NUM;
- if (isvalidchar(*c)) goto IDENT;
-
- s.unget(fp, *c);
- assert(c - b < 1024);
-
- *c = 0;
- tok.kind = tok·number;
- tok.lit.x = atof(b);
- return tok;
-
- case '\"':
- while ((*c) != '\"') {
- *(++c) = s.get(fp);
- }
- assert(c - b < 1024);
-
- *c = '\0';
- tok.kind = tok·ident;
- tok.lit.s = b + 1;
- return tok;
-
- default:
- IDENT:
- while (isvalidchar(*c)) {
- *(++c) = s.get(fp);
- }
- s.unget(fp, *c);
- assert(c - b < 1024);
-
- *c = '\0';
- tok.kind = tok·ident;
- tok.lit.s = b;
- return tok;
- }
-#undef isvalidchar
-}
-
-static
-struct Token
-lex_nospace(io·Peeker s, void *impl)
-{
- struct Token tok;
- tok = lex(s, impl);
- if (tok.kind == tok·space) {
- tok = lex_nospace(s, impl);
- }
-
- return tok;
-}
-
-struct Parser
-{
- int lev;
- bio·Node *root;
- struct Token tok;
-
- void *io;
- io·Peeker file;
- void *heap;
- mem·Allocator mem;
-};
-
-static
-error
-parse(struct Parser *p)
-{
- error err;
- bio·Node *node;
- bio·Node *root;
- struct Token tok;
-
- node = p->root;
- for (;;) {
- tok = lex_nospace(p->file, p->io);
-
- switch (tok.kind) {
- case tok·lparen:
- if (!p->root && p->lev > 0) {
- errorf("parse format: attempted to make root at non-zero level");
- goto ERROR;
- }
-
- node = p->mem.alloc(p->heap, 1, sizeof(*node));
- memset(node, 0, sizeof(*node));
-
- if (p->root) {
- phylo·addchild(p->root, node);
- root = p->root;
- } else {
- root = node;
- }
-
- p->lev++;
- p->root = node;
- p->tok = tok;
- err = parse(p);
- if (err) {
- goto ERROR;
- }
- if (p->tok.kind != tok·rparen) {
- errorf("incorrect format: closing parentheses expected to proceed opening");
- goto ERROR;
- }
- p->root = root;
- // NOTE(nnoll): We don't want to override the state of p->tok here!
- // Jump straight to grabbing next token.
- continue;
-
- case tok·rparen:
- p->lev--;
- goto DONE;
-
- /* Comments */
- case tok·lbrak:
- if (!node) {
- errorf("incorrect format: comment found in disallowed region");
- goto ERROR;
- }
- node->comment = str·make("");
- while (tok.kind != tok·rbrak) {
- tok = lex_nospace(p->file, p->io);
- if (tok.kind == tok·eof || tok.kind == tok·nil) {
- errorf("incorrect format: unmatched comment bracket '['");
- goto ERROR;
- }
- str·append(&node->comment, tokstr(tok));
- }
- break;
-
- case tok·rbrak:
- errorf("incorrect format: end comment token found in disallowed region");
- goto ERROR;
- break;
-
- case tok·colon:
- tok = lex_nospace(p->file, p->io);
- if (tok.kind != tok·number) {
- errorf("incorrect format: expected number after colon");
- goto ERROR;
- }
- if (node == nil) {
- errorf("parse error: attempting to set distance of nil node");
- goto ERROR;
- }
- node->dist = tok.lit.x;
- break;
-
- case tok·comma:
- node = nil;
- break;
-
- case tok·ident:
- if (p->tok.kind == tok·rparen) {
- if (!node) {
- errorf("parse error: attempting to set name of nil node");
- goto ERROR;
- }
- node->name = str·make(tok.lit.s);
- } else {
- if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) {
- errorf("format error: misplaced identifier for leaf found");
- goto ERROR;
- }
-
- if (!p->root) {
- errorf("parse error: attempting to create child for no parent");
- goto ERROR;
- }
-
- node = p->mem.alloc(p->heap, 1, sizeof(*node));
- memset(node, 0, sizeof(*node));
-
- node->name = str·make(tok.lit.s);
-
- phylo·addchild(p->root, node);
- }
- break;
-
- case tok·number:
- if (p->tok.kind == tok·rparen) {
- if (p->lev == 0) {
- errorf("format error: support value on root not supported");
- goto ERROR;
- }
- node->support = tok.lit.x;
- } else {
- errorf("format error: found number in unexpected location");
- goto ERROR;
- }
- break;
-
- case tok·semi:
- p->file.unget(p->io, ';');
- if (p->lev) {
- errorf("format error: uneven number of parentheses found at ';'");
- goto ERROR;
- }
- goto DONE;
-
- case tok·eof:
- goto DONE;
-
- default:
- errorf("parse error: unrecognized token");
- goto ERROR;
- }
-
- p->tok = tok;
- }
-
-DONE:
- p->tok = tok;
- return 0;
-ERROR:
- // TODO(nnoll): cleanup
- return 1;
-}
-
-int
-bio·readnewick(io·Peeker stream, void *s, bio·Tree *tree)
-{
- error err;
- struct Parser p;
-
- if (!tree) {
- errorf("tree pointer nil");
- return 0;
- }
-
- p = (struct Parser){
- .lev = 0,
- .root = nil,
- .tok = (struct Token){ 0 },
- .io = s,
- .file = stream,
- .mem = tree->mem,
- .heap = tree->heap,
- };
- err = parse(&p);
- if (err) {
- errorf("parsing failed\n");
- return 0;
- }
-
- tree->root = p.root;
- tree->nleaf = 0;
- tree->root->nnode = 0;
-
- phylo·countleafs(tree->root, &tree->nleaf);
- phylo·countnodes(tree->root, &tree->root->nnode);
-
- return 1;
-}
-
-// -----------------------------------------------------------------------
-// Write
-
-static
-error
-dump(bio·Node *node, void *impl, io·Putter out)
-{
- byte b[24];
-
- if (!node) {
- return 1;
- }
-
- bio·Node *child;
- if (node->nchild) {
- out.put(impl, '(');
-
- dump(node->child, impl, out);
- for (child = node->child->sibling; child != nil; child = child->sibling) {
- out.put(impl, ',');
- dump(child, impl, out);
- }
-
- out.put(impl, ')');
- }
- if (node->name) {
- out.puts(impl, node->name);
- }
-
- if (node->parent) {
- out.put(impl, ':');
- snprintf(b, arrlen(b), "%f", node->dist);
- out.puts(impl, b);
- }
-
- return 0;
-}
-
-error
-bio·writenewick(bio·Tree tree, io·Putter out, void* impl)
-{
- dump(tree.root, impl, out);
- out.put(impl, ';');
- out.put(impl, '\n');
-
- return 0;
-}