From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/libbio/newick.c | 414 ---------------------------------------------------- 1 file changed, 414 deletions(-) delete mode 100644 sys/libbio/newick.c (limited to 'sys/libbio/newick.c') diff --git a/sys/libbio/newick.c b/sys/libbio/newick.c deleted file mode 100644 index 5e6d30a..0000000 --- a/sys/libbio/newick.c +++ /dev/null @@ -1,414 +0,0 @@ -#include -#include -#include - -// ----------------------------------------------------------------------- -// Tokens - -enum TokenKind -{ - tok·nil, - tok·eof, - tok·space, - tok·ident, - tok·number, - tok·lparen, - tok·rparen, - tok·lbrak, - tok·rbrak, - tok·comma, - tok·semi, - tok·colon, - - NUM_TOKENS, -}; - - -struct Token { - enum TokenKind kind; - union - { - byte *s; - double x; - } lit; -}; - -static -byte* -tokstr(struct Token tok) -{ - static byte b[50]; - switch (tok.kind) { - case tok·nil: return ""; - case tok·eof: return nil; - case tok·space: return " "; - case tok·ident: return tok.lit.s; - case tok·lparen: return "("; - case tok·rparen: return ")"; - case tok·lbrak: return "["; - case tok·rbrak: return "]"; - case tok·comma: return ","; - case tok·semi: return ";"; - case tok·colon: return ":"; - case tok·number: - snprintf(b, arrlen(b), "%f", tok.lit.x); - return b; - default: - return nil; - } -} - - -// ----------------------------------------------------------------------- -// Read - -// TODO: Bounds checking on buffer -static -struct Token -lex(io·Peeker s, void* fp) -{ -#define isvalidchar(C) ((C) == '!') || \ - ('\"' < (C) && (C) < '\'') || \ - (')' < (C) && (C) < '+') || \ - (',' < (C) && (C) < ':') || \ - (':' < (C) && (C) < '[') || \ - ((C) == '\\') || \ - (']' < (C) && (C) <= '~') - byte *c; - struct Token tok; - static byte b[1024]; - c = b; - *c = s.get(fp); - - if (isspace(*c)) { - while (isspace(*c)) { - *(++c) = s.get(fp); - } - - s.unget(fp, *c); - assert(c - b < 1024); - - *c = 0; - tok.kind = tok·space; - tok.lit.s = b; - return tok; - } - - switch (*c) { - case EOF: tok.kind = tok·eof; return tok; - case '(': tok.kind = tok·lparen; return tok; - case ')': tok.kind = tok·rparen; return tok; - case '[': tok.kind = tok·lbrak; return tok; - case ']': tok.kind = tok·rbrak; return tok; - case ',': tok.kind = tok·comma; return tok; - case ';': tok.kind = tok·semi; return tok; - case ':': tok.kind = tok·colon; return tok; - - case '.': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - while (isdigit(*c)) { - NUM: *(++c) = s.get(fp); - } - if (*c == '.') goto NUM; - if (isvalidchar(*c)) goto IDENT; - - s.unget(fp, *c); - assert(c - b < 1024); - - *c = 0; - tok.kind = tok·number; - tok.lit.x = atof(b); - return tok; - - case '\"': - while ((*c) != '\"') { - *(++c) = s.get(fp); - } - assert(c - b < 1024); - - *c = '\0'; - tok.kind = tok·ident; - tok.lit.s = b + 1; - return tok; - - default: - IDENT: - while (isvalidchar(*c)) { - *(++c) = s.get(fp); - } - s.unget(fp, *c); - assert(c - b < 1024); - - *c = '\0'; - tok.kind = tok·ident; - tok.lit.s = b; - return tok; - } -#undef isvalidchar -} - -static -struct Token -lex_nospace(io·Peeker s, void *impl) -{ - struct Token tok; - tok = lex(s, impl); - if (tok.kind == tok·space) { - tok = lex_nospace(s, impl); - } - - return tok; -} - -struct Parser -{ - int lev; - bio·Node *root; - struct Token tok; - - void *io; - io·Peeker file; - void *heap; - mem·Allocator mem; -}; - -static -error -parse(struct Parser *p) -{ - error err; - bio·Node *node; - bio·Node *root; - struct Token tok; - - node = p->root; - for (;;) { - tok = lex_nospace(p->file, p->io); - - switch (tok.kind) { - case tok·lparen: - if (!p->root && p->lev > 0) { - errorf("parse format: attempted to make root at non-zero level"); - goto ERROR; - } - - node = p->mem.alloc(p->heap, 1, sizeof(*node)); - memset(node, 0, sizeof(*node)); - - if (p->root) { - phylo·addchild(p->root, node); - root = p->root; - } else { - root = node; - } - - p->lev++; - p->root = node; - p->tok = tok; - err = parse(p); - if (err) { - goto ERROR; - } - if (p->tok.kind != tok·rparen) { - errorf("incorrect format: closing parentheses expected to proceed opening"); - goto ERROR; - } - p->root = root; - // NOTE(nnoll): We don't want to override the state of p->tok here! - // Jump straight to grabbing next token. - continue; - - case tok·rparen: - p->lev--; - goto DONE; - - /* Comments */ - case tok·lbrak: - if (!node) { - errorf("incorrect format: comment found in disallowed region"); - goto ERROR; - } - node->comment = str·make(""); - while (tok.kind != tok·rbrak) { - tok = lex_nospace(p->file, p->io); - if (tok.kind == tok·eof || tok.kind == tok·nil) { - errorf("incorrect format: unmatched comment bracket '['"); - goto ERROR; - } - str·append(&node->comment, tokstr(tok)); - } - break; - - case tok·rbrak: - errorf("incorrect format: end comment token found in disallowed region"); - goto ERROR; - break; - - case tok·colon: - tok = lex_nospace(p->file, p->io); - if (tok.kind != tok·number) { - errorf("incorrect format: expected number after colon"); - goto ERROR; - } - if (node == nil) { - errorf("parse error: attempting to set distance of nil node"); - goto ERROR; - } - node->dist = tok.lit.x; - break; - - case tok·comma: - node = nil; - break; - - case tok·ident: - if (p->tok.kind == tok·rparen) { - if (!node) { - errorf("parse error: attempting to set name of nil node"); - goto ERROR; - } - node->name = str·make(tok.lit.s); - } else { - if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) { - errorf("format error: misplaced identifier for leaf found"); - goto ERROR; - } - - if (!p->root) { - errorf("parse error: attempting to create child for no parent"); - goto ERROR; - } - - node = p->mem.alloc(p->heap, 1, sizeof(*node)); - memset(node, 0, sizeof(*node)); - - node->name = str·make(tok.lit.s); - - phylo·addchild(p->root, node); - } - break; - - case tok·number: - if (p->tok.kind == tok·rparen) { - if (p->lev == 0) { - errorf("format error: support value on root not supported"); - goto ERROR; - } - node->support = tok.lit.x; - } else { - errorf("format error: found number in unexpected location"); - goto ERROR; - } - break; - - case tok·semi: - p->file.unget(p->io, ';'); - if (p->lev) { - errorf("format error: uneven number of parentheses found at ';'"); - goto ERROR; - } - goto DONE; - - case tok·eof: - goto DONE; - - default: - errorf("parse error: unrecognized token"); - goto ERROR; - } - - p->tok = tok; - } - -DONE: - p->tok = tok; - return 0; -ERROR: - // TODO(nnoll): cleanup - return 1; -} - -int -bio·readnewick(io·Peeker stream, void *s, bio·Tree *tree) -{ - error err; - struct Parser p; - - if (!tree) { - errorf("tree pointer nil"); - return 0; - } - - p = (struct Parser){ - .lev = 0, - .root = nil, - .tok = (struct Token){ 0 }, - .io = s, - .file = stream, - .mem = tree->mem, - .heap = tree->heap, - }; - err = parse(&p); - if (err) { - errorf("parsing failed\n"); - return 0; - } - - tree->root = p.root; - tree->nleaf = 0; - tree->root->nnode = 0; - - phylo·countleafs(tree->root, &tree->nleaf); - phylo·countnodes(tree->root, &tree->root->nnode); - - return 1; -} - -// ----------------------------------------------------------------------- -// Write - -static -error -dump(bio·Node *node, void *impl, io·Putter out) -{ - byte b[24]; - - if (!node) { - return 1; - } - - bio·Node *child; - if (node->nchild) { - out.put(impl, '('); - - dump(node->child, impl, out); - for (child = node->child->sibling; child != nil; child = child->sibling) { - out.put(impl, ','); - dump(child, impl, out); - } - - out.put(impl, ')'); - } - if (node->name) { - out.puts(impl, node->name); - } - - if (node->parent) { - out.put(impl, ':'); - snprintf(b, arrlen(b), "%f", node->dist); - out.puts(impl, b); - } - - return 0; -} - -error -bio·writenewick(bio·Tree tree, io·Putter out, void* impl) -{ - dump(tree.root, impl, out); - out.put(impl, ';'); - out.put(impl, '\n'); - - return 0; -} -- cgit v1.2.1