aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libbio')
-rw-r--r--sys/libbio/align.c178
-rw-r--r--sys/libbio/fasta.c393
-rw-r--r--sys/libbio/newick.c414
-rw-r--r--sys/libbio/phylo.c427
-rw-r--r--sys/libbio/rules.mk28
-rw-r--r--sys/libbio/simulate.c120
-rw-r--r--sys/libbio/test.c283
7 files changed, 0 insertions, 1843 deletions
diff --git a/sys/libbio/align.c b/sys/libbio/align.c
deleted file mode 100644
index 20a57df..0000000
--- a/sys/libbio/align.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#include <u.h>
-#include <libn.h>
-#include <libn/macro/qsort.h>
-#include <libbio.h>
-
-// -----------------------------------------------------------------------
-// globals
-
-uint64 aln·shft = (2ULL * (aln·K - 1ULL));
-uint64 aln·mask = (1ULL << (2*aln·K)) - 1ULL;
-
-// -----------------------------------------------------------------------
-// static data
-
-static uint64 nuctab[256] = {
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-};
-
-// -----------------------------------------------------------------------
-// hash functions
-
-enum
-{
- MURM641 = 0xff51afd7ed558ccd,
- MURM642 = 0xc4ceb9fe1a85ec53
-};
-
-static
-uint64
-minihash(uint64 x, uint64 mask)
-{
- x = (~x + (x << 21)) & mask;
- x = x ^ (x >> 24);
- x = (x + (x << 3) + (x << 8)) & mask;
- x = x ^ (x >> 14);
- x = (x + (x << 2) + (x << 4)) & mask;
- x = x ^ (x >> 28);
- x = (x + (x << 31));
-
- return x;
-}
-
-static
-uint64
-murmurhash(uint64 x, uint64 mask)
-{
- x = x ^ (x >> 33);
- x = (x * MURM641);
- x = x ^ (x >> 33);
- x = (x * MURM642);
- x = x ^ (x >> 33);
-
- return x;
-}
-
-// -----------------------------------------------------------------------
-// locality sensitive hashing (with spatial extent)
-
-static
-void
-sortpos(uintptr len, uint64 vals[], int locs[])
-{
- int tmpi;
- uint64 tmpu64;
-
-#define LESS(i, j) (locs[i] < locs[j])
-#define SWAP(i, j) (tmpu64 = vals[i], tmpi = locs[i], \
- vals[i] = vals[j], locs[i] = locs[j], \
- vals[j] = tmpu64 , locs[j] = tmpi)
- QSORT(len, LESS, SWAP);
-#undef LESS
-#undef SWAP
-}
-
-/*
- * sketch
- * @param seq: '0' terminated string
- * @param len: number of sequential sketches to keep
- * @param vals: buffer to store hashes of sketch.
- * @param locs: buffer to store location of sketch hashes
- */
-error
-aln·sketch(byte *seq, int len, uint64 *vals[aln·N], int *locs[aln·N])
-{
- int i, n, l, *loc;
- uint64 kmer, h[3], *val;
- int tmpi[2];
- uint64 tmpu[2];
-
- for(n = 0; n < aln·N; n++) {
- for(l = 0; l < len; l++) {
- vals[n][l] = UINT64_MAX;
- }
- }
-
- kmer = UINT64_MAX;
- for(l = 0; *seq != '\0'; seq++, l++) {
- kmer = ((kmer << 2) | nuctab[*seq]) & aln·mask;
-
- h[0] = minihash(kmer, aln·mask);
- h[1] = murmurhash(kmer, aln·mask);
-
- for(n = 0; n < aln·N; n++) {
- val = vals[n];
- loc = locs[n];
-
- h[2] = (h[0] + n * h[1]) & aln·mask;
- for (i = 0; i < len && h[2] < val[i]; i++) {
- ;
- }
-
- tmpu[1] = h[2];
- tmpi[1] = l;
- for(i -= 1; i >= 0; i--) {
- tmpu[0] = tmpu[1], tmpu[1] = val[i], val[i] = tmpu[0];
- tmpi[0] = tmpi[1], tmpi[1] = loc[i], loc[i] = tmpi[0];
- }
- }
- }
-
- for(n = 0; n < aln·N; n++) {
- sortpos(len, vals[n], locs[n]);
- }
-
- return 0;
-}
-
-static
-int
-lessarrs(int len, uint64 a[], uint64 b[])
-{
- int l;
-
- for(l = 0; l < len; l++) {
- if (a[l] < b[l]) return 1;
- if (a[l] > b[l]) return 0;
- }
-
- return 0;
-}
-
-static
-void
-swaparrs(int len, uint64 a[], uint64 b[])
-{
- int l;
- uint64 tmp;
-
- for(l = 0; l < len; l++) {
- tmp = a[l], a[l] = b[l], b[l] = tmp;
- }
-}
-
-error
-aln·sort(uintptr n, int len, uint64 *vals)
-{
-#define LESS(i, j) (lessarrs(len, vals+((i)*len), vals+((j)*len)))
-#define SWAP(i, j) (swaparrs(len, vals+((i)*len), vals+((j)*len)))
- QSORT(n, LESS, SWAP);
-#undef LESS
-#undef SWAP
- return 0;
-}
diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c
deleted file mode 100644
index 3788544..0000000
--- a/sys/libbio/fasta.c
+++ /dev/null
@@ -1,393 +0,0 @@
-#include <u.h>
-#include <base.h>
-#include <libbio.h>
-
-#define INIT_NM_SIZE 128
-#define INIT_SQ_SIZE 4096
-
-struct SeqBuf
-{
- mem·Allocator mem;
- void *heap;
-
- int cap, off;
- byte *it, b[];
-};
-
-static
-void
-reset(struct SeqBuf *sb)
-{
- sb->off = 0;
- sb->it = sb->b;
-}
-
-static
-error
-grow(struct SeqBuf **sb, int min)
-{
- void* heap;
- mem·Allocator mem;
-
- vlong newcap;
- struct SeqBuf *old, *new;
-
- old = *sb;
- mem = old->mem;
- heap = old->heap;
-
- assert((*sb)->cap <= (SIZE_MAX - 1) / 2);
- newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min));
- assert(newcap >= (*sb)->cap+min);
-
- if(new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) {
- errorf("memory: could not allocate new buffer\n");
- return 1;
- }
-
- memcpy(new, old, sizeof(*new) + (*sb)->cap);
-
- new->cap = newcap;
- new->it = new->b + (old->it - old->b);
- mem.free(heap, old);
-
- *sb = new;
- return 0;
-}
-
-static
-error
-put(struct SeqBuf **sb, byte c)
-{
- int err;
- struct SeqBuf *sq;
-
- sq = *sb;
- if(sq->it < (sq->b + sq->cap)) {
- *sq->it++ = c;
- return 0;
- }
-
- if(err = grow(sb, 1), err) {
- errorf("memory fail: could not allocate more buffer\n");
- sq->mem.free(sq->heap, sq);
- return 1;
- }
-
- *((*sb)->it++) = c;
- return 0;
-}
-
-static
-error
-push(struct SeqBuf **sb, int n, void *buf)
-{
- int d, err;
- struct SeqBuf *seq;
-
- seq = *sb;
- if(d = (seq->cap - (seq->it - seq->b)), d < n) {
- assert(d >= 0);
- if (err = grow(sb, n-d), err) {
- errorf("memory fail: could not allocate more buffer\n");
- seq->mem.free(seq->heap, seq);
- return 1;
- }
- }
- seq = *sb;
-
- memcpy(seq->it, buf, n);
- seq->it += n;
-
- return 0;
-}
-
-// -----------------------------------------------------------------------
-// sequence data
-
-struct bio·SeqReader {
- byte eof;
- io·Reader rdr;
- void *io;
-
- struct SeqBuf *seq;
-
- /* read buffer */
- byte *b, *bend, buf[4*4098];
-};
-
-static
-error
-fill(bio·SeqReader *rdr)
-{
- int n;
- // NOTE: This could lead to an infinite loop.
- if(rdr->eof)
- return 0;
-
- n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf);
- if(n < 0) {
- errorf("read: no data obtained from reader\n");
- return 1;
- }
- rdr->b = rdr->buf;
- rdr->bend = rdr->b + n;
- if(rdr->eof = (n < arrlen(rdr->buf)), rdr->eof)
- *rdr->bend++ = '\0';
-
- return 0;
-}
-
-bio·SeqReader*
-bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap)
-{
- error err;
- bio·SeqReader *r;
-
- r = mem.alloc(heap, 1, sizeof(bio·SeqReader));
- r->rdr = rdr;
- r->io = io;
- r->eof = 0;
-
- r->seq = mem.alloc(heap, 1, sizeof(*r->seq) + INIT_NM_SIZE + INIT_SQ_SIZE);
- r->seq->mem = mem;
- r->seq->heap = heap;
- r->seq->it = r->seq->b;
- r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE;
-
- if (err=fill(r), err) {
- errorf("fill: could not populate buffer\n");
- goto ERROR;
- }
-
- return r;
-
-ERROR:
- mem.free(heap, r->seq);
- mem.free(heap, r);
- return nil;
-}
-
-error
-bio·closeseq(bio·SeqReader *rdr)
-{
- mem·Allocator mem;
- void *heap;
-
- mem = rdr->seq->mem;
- heap = rdr->seq->heap;
-
- mem.free(heap, rdr->seq);
- mem.free(heap, rdr);
-
- return 0;
-}
-
-
-static
-error
-readfasta(bio·SeqReader *rdr, bio·Seq *seq, byte hdr, byte stop)
-{
- error err;
- byte *beg;
-
- if(rdr->eof && rdr->b == rdr->bend-1)
- return EOF;
-
- reset(rdr->seq);
-
- // NOTE: Can this case happen?
- assert(rdr->b != rdr->bend);
- if(*rdr->b++ != hdr) {
- errorf("fasta/q format: expected '%c', found '%c'\n", hdr, *rdr->b--);
- return 1;
- }
-
-NAME:
- beg = rdr->b;
- while(rdr->b != rdr->bend) {
- if(*rdr->b++ == '\n') {
- push(&rdr->seq, (rdr->b - 1) - beg, beg);
- goto SEQ;
- }
- }
- push(&rdr->seq, rdr->b - beg, beg);
-
- if(err=fill(rdr), err) {
- errorf("read: could not populate buffer\n");
- return 1;
- }
- goto NAME;
-
-SEQ:
- put(&rdr->seq, '\0');
- rdr->seq->off = rdr->seq->it - rdr->seq->b;
-
-SEQLOOP:
- beg = rdr->b;
- while(rdr->b != rdr->bend) {
- if(*rdr->b == '\n') {
- push(&rdr->seq, rdr->b - beg, beg);
- beg = rdr->b + 1;
- }
-
- if(*rdr->b == stop || *rdr->b == '\0')
- goto SUCCESS;
-
- rdr->b++;
- }
-
- push(&rdr->seq, rdr->b - beg, beg);
-
- if(err=fill(rdr), err) {
- errorf("read: could not populate buffer\n");
- return 1;
- }
- goto SEQLOOP;
-
-SUCCESS:
- push(&rdr->seq, rdr->b - beg, beg);
- put(&rdr->seq, '\0');
-
- return 0;
-}
-
-/*
- * fasta files
- */
-
-error
-bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq)
-{
- error err;
-
- err = readfasta(rdr, seq, '>', '>');
- if(err && err != EOF) {
- errorf("parse fail: could not read sequence of record\n");
- return err;
- }
-
- seq->name = rdr->seq->b;
- seq->s = rdr->seq->b + rdr->seq->off;
- seq->len = rdr->seq->it - seq->s - 1; // shift by 1 as we pushed a '0' to end
- seq->q = nil;
-
- return err;
-}
-
-/*
- * fastq files
- */
-
-error
-bio·readfastq(bio·SeqReader *rdr, bio·Seq *seq)
-{
- int n;
- byte *beg;
- error err;
-
- err = readfasta(rdr, seq, '@', '+');
- if(err) {
- errorf("parse fail: could not read sequence of record\n");
- return err;
- }
-
- seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off);
-
- if(*rdr->b++ != '+') {
- errorf("format error: no '+' character seperator found\n");
- return -1;
- }
-
-EATLN:
- while(rdr->b != rdr->bend) {
- if (*rdr->b++ == '\n') {
- n = 0;
- goto QUAL;
- }
- }
-
- if(err = fill((bio·SeqReader*)rdr), err) {
- errorf("read: could not populate buffer\n");
- return 1;
- }
- goto EATLN;
-
-QUAL:
- beg = rdr->b;
- while(rdr->b != rdr->bend) {
- if(*rdr->b == '\n') {
- push(&rdr->seq, rdr->b - beg, beg);
- beg = rdr->b + 1;
- }
-
- if(n++ == seq->len || *rdr->b == '\0') {
- err = *rdr->b == '\0' ? EOF : 0;
- goto SUCCESS;
- }
-
- rdr->b++;
- }
-
- push(&rdr->seq, rdr->b - beg, beg);
-
- if(err = fill((bio·SeqReader*)rdr), err) {
- errorf("read: could not populate buffer\n");
- return 1;
- }
- goto QUAL;
-
-
-SUCCESS:
- push(&rdr->seq, rdr->b - beg, beg);
- put(&rdr->seq, '\0');
-
- seq->name = rdr->seq->b;
- seq->s = rdr->seq->b + rdr->seq->off - 1;
- seq->q = seq->s + seq->len + 1;
-
- return err;
-}
-
-// -----------------------------------------------------------------------
-// sequence writing
-
-error
-bio·writefasta(io·Writer io, void *wtr, bio·Seq seq)
-{
- int i, j, d;
- char buf[2048], *b = buf, *e = arrend(buf);
-
- *b++ = '>';
- while(*seq.name) {
- *b++ = *seq.name++;
- if(b == e) {
- io.write(wtr, 1, arrlen(buf), buf);
- b = buf;
- }
- }
-
- for(i=0; i<seq.len; i = j) {
- j = MIN(i+70, seq.len);
- d = j - i;
- if((e-b) <= d+1) {
- io.write(wtr, 1, b-buf, buf);
- b = buf;
- }
- *b++ = '\n';
- memcpy(b, seq.s+i, d);
- b += d;
- }
-
- *b++ = '\n';
- io.write(wtr, 1, b-buf, buf);
-
- return 0;
-}
-
-error
-bio·writefastq(io·Writer io, void *wtr, bio·Seq seq)
-{
- panicf("need to implement");
- return 1;
-}
diff --git a/sys/libbio/newick.c b/sys/libbio/newick.c
deleted file mode 100644
index 5e6d30a..0000000
--- a/sys/libbio/newick.c
+++ /dev/null
@@ -1,414 +0,0 @@
-#include <u.h>
-#include <base.h>
-#include <libbio.h>
-
-// -----------------------------------------------------------------------
-// Tokens
-
-enum TokenKind
-{
- tok·nil,
- tok·eof,
- tok·space,
- tok·ident,
- tok·number,
- tok·lparen,
- tok·rparen,
- tok·lbrak,
- tok·rbrak,
- tok·comma,
- tok·semi,
- tok·colon,
-
- NUM_TOKENS,
-};
-
-
-struct Token {
- enum TokenKind kind;
- union
- {
- byte *s;
- double x;
- } lit;
-};
-
-static
-byte*
-tokstr(struct Token tok)
-{
- static byte b[50];
- switch (tok.kind) {
- case tok·nil: return "";
- case tok·eof: return nil;
- case tok·space: return " ";
- case tok·ident: return tok.lit.s;
- case tok·lparen: return "(";
- case tok·rparen: return ")";
- case tok·lbrak: return "[";
- case tok·rbrak: return "]";
- case tok·comma: return ",";
- case tok·semi: return ";";
- case tok·colon: return ":";
- case tok·number:
- snprintf(b, arrlen(b), "%f", tok.lit.x);
- return b;
- default:
- return nil;
- }
-}
-
-
-// -----------------------------------------------------------------------
-// Read
-
-// TODO: Bounds checking on buffer
-static
-struct Token
-lex(io·Peeker s, void* fp)
-{
-#define isvalidchar(C) ((C) == '!') || \
- ('\"' < (C) && (C) < '\'') || \
- (')' < (C) && (C) < '+') || \
- (',' < (C) && (C) < ':') || \
- (':' < (C) && (C) < '[') || \
- ((C) == '\\') || \
- (']' < (C) && (C) <= '~')
- byte *c;
- struct Token tok;
- static byte b[1024];
- c = b;
- *c = s.get(fp);
-
- if (isspace(*c)) {
- while (isspace(*c)) {
- *(++c) = s.get(fp);
- }
-
- s.unget(fp, *c);
- assert(c - b < 1024);
-
- *c = 0;
- tok.kind = tok·space;
- tok.lit.s = b;
- return tok;
- }
-
- switch (*c) {
- case EOF: tok.kind = tok·eof; return tok;
- case '(': tok.kind = tok·lparen; return tok;
- case ')': tok.kind = tok·rparen; return tok;
- case '[': tok.kind = tok·lbrak; return tok;
- case ']': tok.kind = tok·rbrak; return tok;
- case ',': tok.kind = tok·comma; return tok;
- case ';': tok.kind = tok·semi; return tok;
- case ':': tok.kind = tok·colon; return tok;
-
- case '.':
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- while (isdigit(*c)) {
- NUM: *(++c) = s.get(fp);
- }
- if (*c == '.') goto NUM;
- if (isvalidchar(*c)) goto IDENT;
-
- s.unget(fp, *c);
- assert(c - b < 1024);
-
- *c = 0;
- tok.kind = tok·number;
- tok.lit.x = atof(b);
- return tok;
-
- case '\"':
- while ((*c) != '\"') {
- *(++c) = s.get(fp);
- }
- assert(c - b < 1024);
-
- *c = '\0';
- tok.kind = tok·ident;
- tok.lit.s = b + 1;
- return tok;
-
- default:
- IDENT:
- while (isvalidchar(*c)) {
- *(++c) = s.get(fp);
- }
- s.unget(fp, *c);
- assert(c - b < 1024);
-
- *c = '\0';
- tok.kind = tok·ident;
- tok.lit.s = b;
- return tok;
- }
-#undef isvalidchar
-}
-
-static
-struct Token
-lex_nospace(io·Peeker s, void *impl)
-{
- struct Token tok;
- tok = lex(s, impl);
- if (tok.kind == tok·space) {
- tok = lex_nospace(s, impl);
- }
-
- return tok;
-}
-
-struct Parser
-{
- int lev;
- bio·Node *root;
- struct Token tok;
-
- void *io;
- io·Peeker file;
- void *heap;
- mem·Allocator mem;
-};
-
-static
-error
-parse(struct Parser *p)
-{
- error err;
- bio·Node *node;
- bio·Node *root;
- struct Token tok;
-
- node = p->root;
- for (;;) {
- tok = lex_nospace(p->file, p->io);
-
- switch (tok.kind) {
- case tok·lparen:
- if (!p->root && p->lev > 0) {
- errorf("parse format: attempted to make root at non-zero level");
- goto ERROR;
- }
-
- node = p->mem.alloc(p->heap, 1, sizeof(*node));
- memset(node, 0, sizeof(*node));
-
- if (p->root) {
- phylo·addchild(p->root, node);
- root = p->root;
- } else {
- root = node;
- }
-
- p->lev++;
- p->root = node;
- p->tok = tok;
- err = parse(p);
- if (err) {
- goto ERROR;
- }
- if (p->tok.kind != tok·rparen) {
- errorf("incorrect format: closing parentheses expected to proceed opening");
- goto ERROR;
- }
- p->root = root;
- // NOTE(nnoll): We don't want to override the state of p->tok here!
- // Jump straight to grabbing next token.
- continue;
-
- case tok·rparen:
- p->lev--;
- goto DONE;
-
- /* Comments */
- case tok·lbrak:
- if (!node) {
- errorf("incorrect format: comment found in disallowed region");
- goto ERROR;
- }
- node->comment = str·make("");
- while (tok.kind != tok·rbrak) {
- tok = lex_nospace(p->file, p->io);
- if (tok.kind == tok·eof || tok.kind == tok·nil) {
- errorf("incorrect format: unmatched comment bracket '['");
- goto ERROR;
- }
- str·append(&node->comment, tokstr(tok));
- }
- break;
-
- case tok·rbrak:
- errorf("incorrect format: end comment token found in disallowed region");
- goto ERROR;
- break;
-
- case tok·colon:
- tok = lex_nospace(p->file, p->io);
- if (tok.kind != tok·number) {
- errorf("incorrect format: expected number after colon");
- goto ERROR;
- }
- if (node == nil) {
- errorf("parse error: attempting to set distance of nil node");
- goto ERROR;
- }
- node->dist = tok.lit.x;
- break;
-
- case tok·comma:
- node = nil;
- break;
-
- case tok·ident:
- if (p->tok.kind == tok·rparen) {
- if (!node) {
- errorf("parse error: attempting to set name of nil node");
- goto ERROR;
- }
- node->name = str·make(tok.lit.s);
- } else {
- if (p->tok.kind != tok·lparen && p->tok.kind != tok·comma) {
- errorf("format error: misplaced identifier for leaf found");
- goto ERROR;
- }
-
- if (!p->root) {
- errorf("parse error: attempting to create child for no parent");
- goto ERROR;
- }
-
- node = p->mem.alloc(p->heap, 1, sizeof(*node));
- memset(node, 0, sizeof(*node));
-
- node->name = str·make(tok.lit.s);
-
- phylo·addchild(p->root, node);
- }
- break;
-
- case tok·number:
- if (p->tok.kind == tok·rparen) {
- if (p->lev == 0) {
- errorf("format error: support value on root not supported");
- goto ERROR;
- }
- node->support = tok.lit.x;
- } else {
- errorf("format error: found number in unexpected location");
- goto ERROR;
- }
- break;
-
- case tok·semi:
- p->file.unget(p->io, ';');
- if (p->lev) {
- errorf("format error: uneven number of parentheses found at ';'");
- goto ERROR;
- }
- goto DONE;
-
- case tok·eof:
- goto DONE;
-
- default:
- errorf("parse error: unrecognized token");
- goto ERROR;
- }
-
- p->tok = tok;
- }
-
-DONE:
- p->tok = tok;
- return 0;
-ERROR:
- // TODO(nnoll): cleanup
- return 1;
-}
-
-int
-bio·readnewick(io·Peeker stream, void *s, bio·Tree *tree)
-{
- error err;
- struct Parser p;
-
- if (!tree) {
- errorf("tree pointer nil");
- return 0;
- }
-
- p = (struct Parser){
- .lev = 0,
- .root = nil,
- .tok = (struct Token){ 0 },
- .io = s,
- .file = stream,
- .mem = tree->mem,
- .heap = tree->heap,
- };
- err = parse(&p);
- if (err) {
- errorf("parsing failed\n");
- return 0;
- }
-
- tree->root = p.root;
- tree->nleaf = 0;
- tree->root->nnode = 0;
-
- phylo·countleafs(tree->root, &tree->nleaf);
- phylo·countnodes(tree->root, &tree->root->nnode);
-
- return 1;
-}
-
-// -----------------------------------------------------------------------
-// Write
-
-static
-error
-dump(bio·Node *node, void *impl, io·Putter out)
-{
- byte b[24];
-
- if (!node) {
- return 1;
- }
-
- bio·Node *child;
- if (node->nchild) {
- out.put(impl, '(');
-
- dump(node->child, impl, out);
- for (child = node->child->sibling; child != nil; child = child->sibling) {
- out.put(impl, ',');
- dump(child, impl, out);
- }
-
- out.put(impl, ')');
- }
- if (node->name) {
- out.puts(impl, node->name);
- }
-
- if (node->parent) {
- out.put(impl, ':');
- snprintf(b, arrlen(b), "%f", node->dist);
- out.puts(impl, b);
- }
-
- return 0;
-}
-
-error
-bio·writenewick(bio·Tree tree, io·Putter out, void* impl)
-{
- dump(tree.root, impl, out);
- out.put(impl, ';');
- out.put(impl, '\n');
-
- return 0;
-}
diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c
deleted file mode 100644
index d50934f..0000000
--- a/sys/libbio/phylo.c
+++ /dev/null
@@ -1,427 +0,0 @@
-#include <u.h>
-#include <base.h>
-#include <base/macro/qsort.h>
-#include <libbio.h>
-
-// -----------------------------------------------------------------------
-// subtree manipulation methods
-// NOTE: As of now these don't update nnode & nleaf stats.
-// It is the caller's responsibility to refresh counts.
-
-error
-phylo·addchild(bio·Node* parent, bio·Node* child)
-{
- bio·Node *it, *sibling;
- if (!parent->nchild) {
- parent->child = child;
- goto SUCCESS;
- }
-
- for (it = parent->child, sibling = it; it != nil; it = it->sibling) {
- sibling = it;
- }
- sibling->sibling = child;
-
-SUCCESS:
- child->parent = parent;
- parent->nchild++;
- return 0;
-}
-
-error
-phylo·rmchild(bio·Node *parent, bio·Node *child)
-{
- bio·Node *it, *prev;
- enum {
- error·nil,
- error·notfound,
- error·nochildren,
- };
-
- prev = nil;
- for (it = parent->child; it != nil; it = it->sibling) {
- if (it == child) goto FOUND;
- prev = it;
- }
- return error·notfound;
-
-FOUND:
- if (prev == nil) {
- parent->child = child->sibling;
- } else {
- prev->sibling = child->sibling;
- }
-
- parent->nchild--;
- return error·nil;
-}
-
-// -----------------------------------------------------------------------
-// subtree statistics
-
-error
-phylo·countnodes(bio·Node *node, int *n)
-{
- int m;
- error err;
- bio·Node *child;
-
- m = *n;
- for (child = node->child; child != nil; child = child->sibling) {
- if (err = phylo·countnodes(child, n), err) {
- errorf("node count: failure at '%s'", child->name);
- return 1;
- }
- }
- node->nnode = *n - m;
- *n += 1;
-
- return 0;
-}
-
-error
-phylo·countleafs(bio·Node *node, int *n)
-{
- error err;
- bio·Node *child;
-
- if (!node->nchild) {
- *n += 1;
- }
-
- for (child = node->child; child != nil; child = child->sibling) {
- if (err = phylo·countleafs(child, n), err) {
- errorf("leaf count: failure at '%s'", child->name);
- return 1;
- }
- }
-
- return 0;
-}
-
-// -----------------------------------------------------------------------
-// generic operations on tree
-
-void*
-phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
-{
- bio·Node *it;
-
- for(it = clade->child; it != nil; it = it->sibling) {
- ctx = phylo·postorder(it, op, ctx);
- }
-
- return op(clade, ctx);
-}
-
-void*
-phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx)
-{
- bio·Node *it;
-
- ctx = op(clade, ctx);
- for(it = clade->child; it != nil; it = it->sibling) {
- ctx = phylo·preorder(it, op, ctx);
- }
-
- return ctx;
-}
-
-int
-phylo·collectpostorder(bio·Node *clade, bio·Node **list)
-{
- bio·Node *it;
- int n;
-
- for(n = 0, it = clade->child; it != nil; it = it->sibling) {
- n += phylo·collectpostorder(it, list+n);
- }
-
- return n;
-}
-
-static
-inline
-void*
-appendleaf(bio·Node *node, void* list)
-{
- bio·Node **leafs;
-
- leafs = list;
- if (!node->nchild) {
- *leafs++ = node;
- }
-
- return leafs;
-}
-
-void
-phylo·getleafs(bio·Tree tree, bio·Node **leafs)
-{
- phylo·postorder(tree.root, &appendleaf, leafs);
-}
-
-// -----------------------------------------------------------------------
-// tree editing
-
-static
-void
-sortnodelist(bio·Node **head, bio·Node *next)
-{
- bio·Node tmp, *it;
-
- it = &tmp;
- tmp.sibling = *head;
-
- while (it->sibling != nil && it->sibling->nnode < next->nnode) {
- it = it->sibling;
- }
-
- next->sibling = it->sibling;
- it->sibling = next;
- *head = tmp.sibling;
-}
-
-error
-phylo·ladderize(bio·Node *root)
-{
- int i;
- error err;
- bio·Node *child, *sorted, *sibling;
-
- if (!root->nchild) return 0;
-
- // ladderize below
- for (child = root->child; child != nil; child = child->sibling) {
- if (err = phylo·ladderize(child), err) {
- errorf("ladderize: failure at '%s'", child->name);
- return 1;
- }
- }
-
- // ladderize yourself
- sorted = nil;
- child = root->child;
- while (child != nil) {
- sibling = child->sibling;
- sortnodelist(&sorted, child);
- child = sibling;
- }
- root->child = sorted;
-
- return 0;
-}
-
-/*
- * compute all distances from a given node
- * must provide a working buffer
- */
-
-struct Tuple
-{
- double *d;
- bio·Node **n;
-};
-
-static
-struct Tuple
-getdistsfrom(bio·Node *node, bio·Node *prev, double curr, double *dist, bio·Node **list)
-{
- bio·Node *it;
- struct Tuple ret;
-
- *dist++ = curr;
- *list++ = node;
-
- ret.d = dist;
- ret.n = list;
-
- if (node->parent && node->parent != prev) {
- ret = getdistsfrom(node->parent, node, curr + node->dist, dist, list);
-
- dist = ret.d;
- list = ret.n;
- }
-
- for (it = node->child; it != nil; it = it->sibling) {
- if (it != prev) {
- ret = getdistsfrom(it, node, curr + it->dist, dist, list);
-
- dist = ret.d;
- list = ret.n;
- }
- }
-
- return ret;
-}
-
-int
-phylo·getdistsfrom(bio·Node *node, int len, double *dist, bio·Node **list)
-{
- struct Tuple ret;
- // TODO: Better bounds checking.
-
- ret = getdistsfrom(node, nil, 0.0, dist, list);
-
- assert(ret.n - list == len);
- assert(ret.d - dist == len);
-
- return len;
-}
-
-/*
-static
-void
-disttoroot(bio·Node *clade, double anc, double *dists)
-{
- double d;
- bio·Node *it;
-
- *dists++ = anc + clade->dist;
- d = dists[-1];
- for (it = clade->child; it != nil; it = it->sibling) {
- disttoroot(it, d, ++dists);
- }
-}
-
-void
-phylo·disttoroot(bio·Tree tree, double *dists)
-{
- disttoroot(tree.root, 0.0, dists);
-}
-*/
-
-/*
- * compute the path constituting the tree diameter
- * returns the number of edges in the path
- */
-
-static
-void
-sort·nodedists(uintptr len, double fs[], bio·Node* ns[])
-{
- double f;
- bio·Node *n;
-#define LESS(i, j) (fs[i] < fs[j])
-#define SWAP(i, j) (n = ns[i], f = fs[i], \
- fs[i] = fs[j], ns[i] = ns[j], \
- fs[j] = f, ns[j] = n)
- QSORT(len, LESS, SWAP);
-#undef LESS
-#undef SWAP
-}
-
-#define BUFLEN 4096
-double
-phylo·diameter(bio·Tree tree, int *len, bio·Node **path)
-{
- // TODO: deal with large tree > BUFLEN gracefully
- int n;
- double fbuf[BUFLEN];
- bio·Node *nbuf[BUFLEN];
-
- n = tree.root->nnode;
-
- assert(n < BUFLEN);
-
- n = phylo·getdistsfrom(tree.root, tree.root->nnode, fbuf, nbuf);
- sort·nodedists(n, fbuf, nbuf);
-
- path[0] = nbuf[n-1];
- printf("first end '%s'\n", path[0]->name);
-
- n = phylo·getdistsfrom(path[0], n, fbuf, nbuf);
- sort·nodedists(n, fbuf, nbuf);
- printf("second end '%s'\n", nbuf[n-1]->name);
-
- *len = 0;
-
- // TODO: Traverse up the tree from each node
- // Find MRCA by intersection of nodes hit
-
- return 0.0;
-}
-#undef BUFLEN
-
-/*
- * reroot a tree on a new node
- */
-static
-error
-rotateparent(bio·Node *node, bio·Node *to)
-{
- error err;
-
- // NOTE: will this ever be taken?
- if (node->parent == to) {
- return 0;
- }
-
- if (!node->parent) {
- goto RMCHILD;
- }
-
- err = rotateparent(node->parent, node);
- if (err) {
- errorf("failure: broken tree");
- return err;
- }
-
- err = phylo·addchild(node, node->parent);
- if (err) {
- errorf("inconsistent topology: could not add parent '%s' as child of '%s'", node->parent->name, node->name);
- return err;
- }
-
-RMCHILD:
- err = phylo·rmchild(node, to);
- if (err) {
- errorf("inconsistent topology: could not remove child '%s' from '%s'", to->name, node->name);
- return err;
- }
-
- node->parent = to;
- return 0;
-}
-
-#define PREC .00000001
-error
-phylo·reroot(bio·Tree *tree, bio·Node *node, double d)
-{
- bio·Node *new;
-
- // TODO: should check that node is part of this tree?
- // TODO: should we check if node->parent != nil?
-
- if (fabs(d) < PREC) {
- new = node;
- rotateparent(node->parent, node);
- } else if (fabs(d-node->dist) < PREC) {
- new = node->parent;
- if (new->parent->parent) {
- rotateparent(new->parent->parent, new->parent);
- }
- } else {
- new = tree->mem.alloc(tree->heap, 1, sizeof(*new));
- memset(new, 0, sizeof(*new));
-
- phylo·addchild(new, node);
- node->parent = new;
-
- phylo·addchild(new, node->parent);
- if (node->parent->parent) {
- rotateparent(node->parent->parent, node->parent);
- }
- node->parent->parent = new;
- }
-
- printf("number of children on old root: %d\n", tree->root->nchild);
- tree->root = new;
- tree->nleaf = 0;
-
- phylo·countleafs(new, &tree->nleaf);
- phylo·countnodes(new, &new->nnode);
-
- return 0;
-}
-#undef PREC
diff --git a/sys/libbio/rules.mk b/sys/libbio/rules.mk
deleted file mode 100644
index cbc6887..0000000
--- a/sys/libbio/rules.mk
+++ /dev/null
@@ -1,28 +0,0 @@
-include share/push.mk
-
-# Local sources
-SRCS_$(d) := \
- $(d)/fasta.c \
- $(d)/newick.c \
- $(d)/phylo.c
-LIBS_$(d) := $(d)/libbio.a
-BINS_$(d) :=
-# TSTS_$(d) := \
-# $(d)/test.c \
-# $(d)/simulate.c
-
-include share/paths.mk
-
-# Local rules
-# $(LIBS_$(d)) = TCFLAGS :=
-# $(LIBS_$(d)) = TCINCS :=
-# $(LIBS_$(d)) = TCLIBS :=
-
-$(LIBS_$(d)): $(OBJS_$(d)) $(OBJS_$(d)/io)
- $(ARCHIVE)
-
-$(UNTS_$(d)): TCLIBS := $(LIBS_$(d)) $(OBJ_DIR)/libn/libn.a
-$(UNTS_$(d)): $(TOBJS_$(d)) $(LIBS_$(d)) $(OBJ_DIR)/libn/libn.a
- $(LINK)
-
-include share/pop.mk
diff --git a/sys/libbio/simulate.c b/sys/libbio/simulate.c
deleted file mode 100644
index 0f5a97e..0000000
--- a/sys/libbio/simulate.c
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <u.h>
-#include <libn.h>
-#include <libbio.h>
-
-#define SEQLEN 2560
-static byte *SEQ =
-"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC"
-"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
-"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
-"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
-"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA"
-"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
-"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
-"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA"
-"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
-"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC"
-"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
-"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
-"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT"
-"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
-"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
-"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
-"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
-"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
-"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
-"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
-"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
-"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT"
-"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
-"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC"
-"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
-"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
-"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
-"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
-"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
-"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
-"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
-"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT";
-
-byte*
-modify(byte *seq, int *len, double p)
-{
- byte *head, *new;
-
- head = calloc(SEQLEN+1, sizeof(byte));
- new = head;
- for (; *seq != '\0'; seq++) {
- if (rng·bernoulli(p)) {
- switch (rng·randi(5)) {
- case 0: *new++ = 'A'; break;
- case 1: *new++ = 'C'; break;
- case 2: *new++ = 'G'; break;
- case 3: *new++ = 'T'; break;
- case 4: continue;
- }
- } else {
- *new++ = *seq;
- }
- }
- *new = '\0';
- *len = new - head;
- return head;
-}
-
-#define NSEQS 20
-int
-main()
-{
- int n, i, l, lens[NSEQS];
- byte *seqs[NSEQS];
-
- int locs[aln·N][NSEQS][aln·L];
- int *loc[aln·N];
- uint64 vals[aln·N][NSEQS][aln·L];
- uint64 *val[aln·N];
-
- rng·init(0);
-
- seqs[0] = SEQ;
- lens[0] = SEQLEN;
-
- for (n = 0; n < aln·N; n++) {
- for (i = 0; i < NSEQS; i++) {
- for (l = 0; l < aln·L; l++) {
- vals[n][i][l] = 0;
- }
- }
- }
-
- for (i = 1; i < NSEQS; i++) {
- seqs[i] = modify(SEQ, lens + i, .01*i);
- }
-
- for (i = 0; i < NSEQS; i++) {
- for (n = 0; n < aln·N; n++) {
- val[n] = vals[n][i];
- loc[n] = locs[n][i];
- }
- aln·sketch(seqs[i], aln·L, val, loc);
- }
-
- // for (n = 0; n < aln·N; n++) {
- // printf("iteration %d\n", n);
- // printf("[\n");
- // for (i = 0; i < NSEQS; i++) {
- // printf(" [");
- // for (l = 0; l < aln·L; l++) {
- // printf("%lu,", vals[n][i][l]);
- // }
- // printf("],\n");
- // }
- // printf("]\n");
- // }
-
- for (n = 0; n < aln·N; n++) {
- aln·sort(NSEQS, aln·L, (uint64*)vals[n]);
- }
-
- return 0;
-}
diff --git a/sys/libbio/test.c b/sys/libbio/test.c
deleted file mode 100644
index 9926764..0000000
--- a/sys/libbio/test.c
+++ /dev/null
@@ -1,283 +0,0 @@
-#include <u.h>
-#include <libn.h>
-#include <libbio.h>
-
-#include <time.h>
-
-// -----------------------------------------------------------------------
-// Global data
-
-static byte *SEQ[] = {
-"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC"
-"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
-"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
-"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
-"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA"
-"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
-"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
-"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA"
-"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
-"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC"
-"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
-"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
-"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT"
-"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
-"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
-"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
-"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
-"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
-"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
-"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
-"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
-"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT"
-"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
-"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC"
-"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
-"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
-"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
-"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
-"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
-"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
-"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
-"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT",
-
-"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAAATAC"
-"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
-"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
-"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
-"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCAAAGTCA"
-"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
-"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
-"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
-"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCCCCTGCC"
-"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
-"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
-"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGCGGCCTT"
-"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
-"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
-"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
-"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
-"ACAGTCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
-"ATCCTCTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
-"CCGCTGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
-"CTGTTAATCGCAAGCTCCAGCAGCCCCCCCCCCCCTTTTCTGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
-"TCCATGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATAAAAAGAGCTCAGT"
-"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
-"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATTATTAGCGAAGAAC"
-"AAGGTTCCAACTCTCATTCCCGCCATTTACTGGGAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
-"CTCTTCAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
-"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
-"TGCATTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
-"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
-"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
-"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
-"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT",
-};
-
-
-static
-int
-my_read(Stream *s, void *buf, int n)
-{
- return io·read(s, 1, n, buf);
-}
-
-// -----------------------------------------------------------------------
-// Point of entry for testing
-
-error
-test·newick()
-{
- error err;
- bio·Tree t;
- mem·Arena *heap;
- Stream *fd[2];
-
- io·Peeker rdr;
- io·Putter wtr;
-
- bio·Node **end, **it, **list;
-
- heap = mem·makearena(mem·sys, nil);
- rdr = (io·Peeker){.get = (byte (*)(void *))io·getbyte, .unget = (error (*)(void *, byte))io·ungetbyte};
- wtr = (io·Putter){.put = (error (*)(void *, byte))io·putbyte, .putstr = (int (*)(void *, string))io·putstring};
-
- fd[0] = io·open("/home/nolln/root/data/test/zika.nwk", "r");
- fd[1] = io·open("/home/nolln/root/data/test/zika.proc.nwk", "w");
-
- t.h = heap;
- t.heap = (mem·Allocator){ .alloc = (void *(*)(void *, uint, ulong))mem·arenaalloc, .free = nil, };
-
- if (err = bio·readnewick(rdr, fd[0], &t), err) {
- errorf("failed to read newick");
- return 1;
- }
- printf("number of children: %d\n", t.root->nchild);
-
- phylo·ladderize(t.root);
-
- list = mem·arenaalloc(heap, t.nleaf, sizeof(**list));
- phylo·getleafs(t, list);
- for (it = list, end = list + t.nleaf; it != end; ++it) {
- printf("Leaf '%s'\n", (*it)->name);
- }
-
- bio·Node *path[100];
- // phylo·diameter(t, path);
-
- printf("Loaded tree with %d leafs and %d nodes\n", t.nleaf, t.root->nnode);
- err = bio·writenewick(t, wtr, fd[1]);
-
- io·flush(fd[1]);
-
- io·close(fd[0]);
- io·close(fd[1]);
-
- mem·freearena(heap);
- return 0;
-}
-
-error
-test·fasta()
-{
- error err;
- Stream *fd;
-
- bio·Seq seq;
- bio·FastaReader *rdr;
-
- clock_t t;
-
- fd = io·open("/home/nolln/root/data/test/zika.fa", "r");
-
- /* Benchmark against Heng */
-#if 0
- int n, slen;
- kseq_t *kseq;
-
- t = clock();
- kseq = kseq_init(fd);
- while (kseq_read(kseq) >= 0) {
- ++n, slen += kseq->seq.l;
- }
- t = clock() - t;
- printf("heng's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC);
-
- kseq_destroy(kseq);
-
- io·seek(fd, 0, seek·set);
-#endif
-
- rdr = bio·openfasta((io·Reader){.read = (int (*)(void *, int, int, void *))io·read}, fd, mem·sys, nil);
-
- t = clock();
- err = 0;
- while (!err) {
- err = bio·readfasta(rdr, &seq);
- }
- t = clock() - t;
- printf("nick's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC);
- bio·closefasta(rdr);
-
-
- io·close(fd);
- return err <= 0 ? 0 : 1;
-}
-
-#define asrdr(x) (int (*)(void *, int, int, void *))(x)
-error
-test·fastq()
-{
- error err;
- Stream *fd;
-
- bio·Seq seq;
- bio·FastqReader *rdr;
-
- clock_t t;
-
- fd = io·open("/home/nolln/root/data/test/eg.fq", "r");
-
- rdr = bio·openfastq((io·Reader){.read = asrdr(io·read)}, fd, mem·sys, nil);
-
- t = clock();
- err = 0;
- while (!err) {
- err = bio·readfastq(rdr, &seq);
- }
- t = clock() - t;
- printf("nick's fastq code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC);
- bio·closefastq(rdr);
-
-
- io·close(fd);
- return err <= 0 ? 0 : 1;
-}
-
-error
-test·align()
-{
- double f;
- error err;
- int i, l, n;
-
- uint64 mem[aln·N][arrlen(SEQ)][aln·L];
- uint64 *phi[aln·N];
- int loc[aln·N][arrlen(SEQ)][aln·L];
- int *pos[aln·N];
-
- for (i = 0; i < arrlen(SEQ); i++) {
- for (n = 0; n < aln·N; n++) {
- phi[n] = mem[n][i];
- pos[n] = loc[n][i];
- }
-
- err = aln·sketch(SEQ[i], aln·L, phi, pos);
- }
-
- f = 0;
- for (n = 0; n < aln·N; n++) {
- aln·sort(arrlen(SEQ), aln·L, (uint64*)mem[n]);
-
- if (!memcmp(mem[n][0], mem[n][1], sizeof(uint64)*aln·L)) {
- f += 1.;
- printf("True : ");
- } else {
- printf("False: ");
- }
- for (i = 0; i < arrlen(SEQ); i++) {
- printf("[");
- for (l = 0; l < aln·L; l++) {
- printf("%lu,", mem[n][i][l]);
- }
- printf("]");
- if (i == 0) printf(" ~ ");
- }
- printf("\n");
- }
-
- printf("Fraction hits %f\n", f/aln·N);
- return err;
-
-}
-
-error
-main()
-{
- error err;
-
- if (err = test·newick(), err) {
- errorf("test fail: newick");
- }
-
-#if 0
- if (err = test·fasta(), err) {
- errorf("test fail: fasta");
- }
-
- if (err = test·fastq(), err) {
- errorf("test fail: fastq");
- }
-#endif
-}
-