#include #include #include // ----------------------------------------------------------------------- // Tokens enum TokenKind { tok·nil, tok·eof, tok·space, tok·ident, tok·number, tok·lparen, tok·rparen, tok·lbrak, tok·rbrak, tok·comma, tok·semi, tok·colon, NUM_TOKENS, }; struct Token { enum TokenKind kind; union { byte *s; double x; } lit; }; byte* tokstr(struct Token tok) { static byte b[50]; switch (tok.kind) { case tok·nil: return ""; case tok·eof: return nil; case tok·space: return " "; case tok·ident: return tok.lit.s; case tok·lparen: return "("; case tok·rparen: return ")"; case tok·lbrak: return "["; case tok·rbrak: return "]"; case tok·comma: return ","; case tok·semi: return ";"; case tok·colon: return ":"; case tok·number: snprintf(b, arrlen(b), "%f", tok.lit.x); return b; default: return nil; } } // ----------------------------------------------------------------------- // Read // TODO: Bounds checking on buffer struct Token lex(Stream *s) { struct Token tok; byte *c, b[1024]; c = b; *c = io·getbyte(s); if (isspace(*c)) { while (isspace(*c)) { *(++c) = io·getbyte(s); } io·ungetbyte(s, *c); Assert(c - b < 1024); *c = 0; tok.kind = tok·space; tok.lit.s = b; return tok; } switch (*c) { case EOF: tok.kind = tok·eof; return tok; case '(': tok.kind = tok·lparen; return tok; case ')': tok.kind = tok·rparen; return tok; case '[': tok.kind = tok·lbrak; return tok; case ']': tok.kind = tok·rbrak; return tok; case ',': tok.kind = tok·comma; return tok; case ';': tok.kind = tok·semi; return tok; case ':': tok.kind = tok·colon; return tok; case '.': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': while (isdigit(*c)) { NUM: *(++c) = io·getbyte(s); } if (*c == '.') goto NUM; io·ungetbyte(s, *c); Assert(c - b < 1024); *c = 0; tok.kind = tok·number; tok.lit.x = atof(b); return tok; default: while (isalnum(*c)) { *(++c) = io·getbyte(s); } io·ungetbyte(s, *c); Assert(c - b < 1024); *c = 0; tok.kind = tok·ident; tok.lit.s = b; return tok; } } struct Token lex_nospace(Stream *s) { struct Token tok; tok = lex(s); if (tok.kind == tok·space) { lex_nospace(s); } return tok; } struct Parser { int lev; bio·Node *root; struct Token tok; Stream *file; mem·Allocator heap; }; error parse(struct Parser *p) { error err; bio·Node *node; bio·Node *root; struct Token tok; for (;;) { tok = lex_nospace(p->file); switch (tok.kind) { case tok·lparen: if (p->lev > 0) { errorf("incorrect format: opening another node before termination of last tree\n"); goto ERROR; } node = p->heap.alloc(sizeof(*node)); memset(node, 0, sizeof(*node)); if (p->root) { phylo·addchild(p->root, node); root = p->root; } else { root = node; } p->lev++; err = parse(p); if (err) { goto ERROR; } if (p->tok.kind != tok·rparen) { errorf("incorrect format: closing parentheses expected to proceed opening\n"); goto ERROR; } p->root = root; break; case tok·rparen: p->lev--; goto DONE; /* Comments */ case tok·lbrak: if (!node) { errorf("incorrect format: comment found in disallowed region\n"); goto ERROR; } node->comment = str·new(""); while (tok.kind != tok·rbrak) { tok = lex_nospace(p->file); if (tok.kind == tok·eof || tok.kind == tok·nil) { errorf("incorrect format: unmatched comment bracket '['\n"); goto ERROR; } str·append(node->comment, tokstr(tok)); } break; case tok·rbrak: errorf("incorrect format: end comment token found in disallowed region\n"); goto ERROR; break; case tok·colon: tok = lex_nospace(p->file); if (tok.kind != tok·number) { errorf("incorrect format: expected number after colon\n"); goto ERROR; } if (node == nil) { errorf("parse error: attempting to set distance of nil node\n"); goto ERROR; } node->dist = tok.lit.x; break; case tok·comma: node = nil; break; case tok·ident: if (p->tok.kind != tok·rparen) { if (!node) { errorf("parse error: attempting to set name of nil node\n"); goto ERROR; } node->name = str·new(tok.lit.s); } else { if (p->tok.kind != tok·comma) { errorf("format error: misplaced identifier found\n"); goto ERROR; } if (!node) { errorf("parse error: attempting to create child for no parent\n"); goto ERROR; } node = p->heap.alloc(sizeof(*node)); memset(node, 0, sizeof(*node)); phylo·addchild(p->root, node); } break; case tok·number: if (p->tok.kind == tok·rparen) { if (p->lev == 0) { errorf("format error: support value on root not supported\n"); goto ERROR; } node->support = tok.lit.x; } else { errorf("format error: found number in unexpected location\n"); goto ERROR; } break; case tok·semi: io·ungetbyte(p->file, ';'); if (p->lev) { errorf("format error: uneven number of parentheses found at ';'\n"); } goto DONE; break; case tok·eof: goto DONE; break; default: goto ERROR; } p->tok = tok; } DONE: p->tok = tok; return 0; ERROR: // TODO(nnoll): cleanup return 1; } bio·Tree bio·readnewick(Stream *file, mem·Allocator heap) { error err; struct Parser p; bio·Tree tree; err = parse(&p); if (err) { errorf("parsing failed\n"); return tree; } tree.root = p.root; return tree; } // ----------------------------------------------------------------------- // Write