From c0a7b53baf2a6e7bf9bc1fbec7ac05e43ac59154 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 8 Sep 2021 15:51:40 -0700 Subject: checkin --- sys/libbio/fasta.c | 95 ++++++++++++++++++++++++++++++++++++----------------- sys/libbio/newick.c | 1 + sys/libbio/phylo.c | 32 +++++++++--------- sys/libn/error.c | 5 ++- sys/libn/memory.c | 22 ++++++------- sys/libn/random.c | 35 +++++++++++++++++++- 6 files changed, 128 insertions(+), 62 deletions(-) (limited to 'sys') diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c index 6b40e99..078325a 100644 --- a/sys/libbio/fasta.c +++ b/sys/libbio/fasta.c @@ -40,8 +40,8 @@ grow(struct SeqBuf **sb, int min) newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min)); assert(newcap >= (*sb)->cap+min); - if (new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) { - errorf("memory: could not allocate new buffer"); + if(new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) { + errorf("memory: could not allocate new buffer\n"); return 1; } @@ -63,13 +63,13 @@ put(struct SeqBuf **sb, byte c) struct SeqBuf *sq; sq = *sb; - if (sq->it < (sq->b + sq->cap)) { + if(sq->it < (sq->b + sq->cap)) { *sq->it++ = c; return 0; } - if (err = grow(sb, 1), err) { - errorf("memory fail: could not allocate more buffer"); + if(err = grow(sb, 1), err) { + errorf("memory fail: could not allocate more buffer\n"); sq->mem.free(sq->heap, sq); return 1; } @@ -85,21 +85,11 @@ push(struct SeqBuf **sb, int n, void *buf) int d, err; struct SeqBuf *seq; - char *cb = buf; - for(d=0; d < n; d++) { - if(cb[d] == 0) { - printf("ERROR: zero byte being copied @ pos %d/%d\n", d, n); - printf("ERROR: string afterwards is %s\n", cb+n+1); - printf("ERROR: string afterwards is %s\n", cb+n+2); - exit(1); - } - } - seq = *sb; - if(d = seq->cap - (seq->it - seq->b), d < n) { - assert(d > 0); + if(d = (seq->cap - (seq->it - seq->b)), d < n) { + assert(d >= 0); if (err = grow(sb, n-d), err) { - errorf("memory fail: could not allocate more buffer"); + errorf("memory fail: could not allocate more buffer\n"); seq->mem.free(seq->heap, seq); return 1; } @@ -137,7 +127,7 @@ fill(bio·SeqReader *rdr) n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf); if(n < 0) { - errorf("read: no data obtained from reader"); + errorf("read: no data obtained from reader\n"); return 1; } rdr->b = rdr->buf; @@ -166,7 +156,7 @@ bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap) r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE; if (err=fill(r), err) { - errorf("fill: could not populate buffer"); + errorf("fill: could not populate buffer\n"); goto ERROR; } @@ -209,7 +199,7 @@ readfasta(bio·SeqReader *rdr, bio·Seq *seq, byte hdr, byte stop) // NOTE: Can this case happen? assert(rdr->b != rdr->bend); if(*rdr->b++ != hdr) { - errorf("fasta/q format: expected '%c', found '%c'", hdr, *rdr->b--); + errorf("fasta/q format: expected '%c', found '%c'\n", hdr, *rdr->b--); return 1; } @@ -224,7 +214,7 @@ NAME: push(&rdr->seq, rdr->b - beg, beg); if(err=fill(rdr), err) { - errorf("read: could not populate buffer"); + errorf("read: could not populate buffer\n"); return 1; } goto NAME; @@ -246,16 +236,18 @@ SEQLOOP: rdr->b++; } +#if 0 for(byte *cb = rdr->seq->b+rdr->seq->off; cb != rdr->seq->it; ++cb) { if(*cb == 0) { printf("ERROR @ pos=%ld: Found zero byte\n", cb - rdr->seq->b+rdr->seq->off); } } +#endif push(&rdr->seq, rdr->b - beg, beg); if(err=fill(rdr), err) { - errorf("read: could not populate buffer"); + errorf("read: could not populate buffer\n"); return 1; } goto SEQLOOP; @@ -278,13 +270,13 @@ bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq) err = readfasta(rdr, seq, '>', '>'); if(err && err != EOF) { - errorf("parse fail: could not read sequence of record"); + errorf("parse fail: could not read sequence of record\n"); return err; } seq->name = rdr->seq->b; seq->s = rdr->seq->b + rdr->seq->off; - seq->len = rdr->seq->it - seq->s - 1; + seq->len = rdr->seq->it - seq->s - 1; // shift by 1 as we pushed a '0' to end seq->q = nil; return err; @@ -303,14 +295,14 @@ bio·readfastq(bio·SeqReader *rdr, bio·Seq *seq) err = readfasta(rdr, seq, '@', '+'); if(err) { - errorf("parse fail: could not read sequence of record"); + errorf("parse fail: could not read sequence of record\n"); return err; } seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off); if(*rdr->b++ != '+') { - errorf("format error: no '+' character seperator found"); + errorf("format error: no '+' character seperator found\n"); return -1; } @@ -323,7 +315,7 @@ EATLN: } if(err = fill((bio·SeqReader*)rdr), err) { - errorf("read: could not populate buffer"); + errorf("read: could not populate buffer\n"); return 1; } goto EATLN; @@ -347,7 +339,7 @@ QUAL: push(&rdr->seq, rdr->b - beg, beg); if(err = fill((bio·SeqReader*)rdr), err) { - errorf("read: could not populate buffer"); + errorf("read: could not populate buffer\n"); return 1; } goto QUAL; @@ -358,8 +350,51 @@ SUCCESS: put(&rdr->seq, '\0'); seq->name = rdr->seq->b; - seq->s = rdr->seq->b + rdr->seq->off; + seq->s = rdr->seq->b + rdr->seq->off - 1; seq->q = seq->s + seq->len + 1; return err; } + +// ----------------------------------------------------------------------- +// sequence writing + +error +bio·writefasta(io·Writer io, void *wtr, bio·Seq seq) +{ + int i, j, d; + char buf[2048], *b = buf, *e = arrend(buf); + + *b++ = '>'; + while(*seq.name) { + *b++ = *seq.name++; + if(b == e) { + io.write(wtr, 1, arrlen(buf), buf); + b = buf; + } + } + + for(i=0; iheap.alloc(p->himpl, 1, sizeof(*node)); memset(node, 0, sizeof(*node)); + node->name = str·make(tok.lit.s); phylo·addchild(p->root, node); diff --git a/sys/libbio/phylo.c b/sys/libbio/phylo.c index 1bb5bc7..920102b 100644 --- a/sys/libbio/phylo.c +++ b/sys/libbio/phylo.c @@ -107,7 +107,7 @@ phylo·postorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) { bio·Node *it; - for (it = clade->child; it != nil; it = it->sibling) { + for(it = clade->child; it != nil; it = it->sibling) { ctx = phylo·postorder(it, op, ctx); } @@ -120,13 +120,26 @@ phylo·preorder(bio·Node *clade, void *(*op)(bio·Node*, void*), void *ctx) bio·Node *it; ctx = op(clade, ctx); - for (it = clade->child; it != nil; it = it->sibling) { + for(it = clade->child; it != nil; it = it->sibling) { ctx = phylo·preorder(it, op, ctx); } return ctx; } +int +phylo·collectpostorder(bio·Node *clade, bio·Node **list) +{ + bio·Node *it; + int n; + + for(n = 0, it = clade->child; it != nil; it = it->sibling) { + n += phylo·collectpostorder(it, list+n); + } + + return n; +} + static inline void* @@ -412,18 +425,3 @@ phylo·reroot(bio·Tree *tree, bio·Node *node, double d) return 0; } #undef PREC - -// ----------------------------------------------------------------------- -// ancestral inference - -struct phylo·InferOpts -{ - int nstates; - double *Q; -}; - -error -phylo·inferancestral(bio·Tree *tree, struct phylo·InferOpts opts) -{ - return 0; -} diff --git a/sys/libn/error.c b/sys/libn/error.c index a9d684c..508136c 100644 --- a/sys/libn/error.c +++ b/sys/libn/error.c @@ -17,9 +17,8 @@ errorf(byte* fmt, ...) va_list args; va_start(args, fmt); - printf("error: "); - vprintf(fmt, args); - printf("\n"); + fprintf(stderr, "error: "); + vfprintf(stderr, fmt, args); va_end(args); } diff --git a/sys/libn/memory.c b/sys/libn/memory.c index d7df8e3..1c7ab07 100644 --- a/sys/libn/memory.c +++ b/sys/libn/memory.c @@ -36,23 +36,23 @@ mem·Allocator sys·Memory = { /* Grow to particular size */ void* -bufgrow(void* buf, vlong newLen, vlong eltsize) +·bufgrow(void* buf, vlong newLen, vlong eltsize) { assert(bufcap(buf) <= (SIZE_MAX - 1) / 2); vlong newCap = MAX(16, MAX(1 + 2 * bufcap(buf), newLen)); assert(newLen <= newCap); - assert(newCap <= (SIZE_MAX - offsetof(bufHdr, buf)) / eltsize); + assert(newCap <= (SIZE_MAX - offsetof(BufHdr, buf)) / eltsize); - vlong newSize = offsetof(bufHdr, buf) + newCap * eltsize; + vlong newSize = offsetof(BufHdr, buf) + newCap * eltsize; - bufHdr* newHdr; + BufHdr* newHdr; if (buf) { - newHdr = _bufHdr(buf); - newHdr = (bufHdr*)realloc((void*)newHdr, newSize); + newHdr = bufhdr(buf); + newHdr = (BufHdr*)realloc((void*)newHdr, newSize); } else { - newHdr = (bufHdr*)malloc(newSize); + newHdr = (BufHdr*)malloc(newSize); newHdr->len = 0; } @@ -62,20 +62,20 @@ bufgrow(void* buf, vlong newLen, vlong eltsize) /* Pop out a value */ void -_bufpop(void *buf, int i, vlong eltsize) +·bufdel(void *buf, int i, vlong eltsize) { int n; byte *b; byte stk[1024]; assert(eltsize < sizeof(stk)); - b = (byte*) buf; - if (n = buflen(buf), i < n) { + b = (byte*)buf; + if(n = buflen(buf), i < n) { memcpy(stk, b+eltsize*i, eltsize); memcpy(b+eltsize*i, b+eltsize*(i+1), eltsize*(n-i-1)); memcpy(b+eltsize*(n-1), stk, eltsize); } - _bufHdr(buf)->len--; + bufhdr(buf)->len--; } // ------------------------------------------------------------------------- diff --git a/sys/libn/random.c b/sys/libn/random.c index 558641e..551d1e9 100644 --- a/sys/libn/random.c +++ b/sys/libn/random.c @@ -1,4 +1,5 @@ #include +#include // ---------------------------------------------------------------------------- // Internal structure @@ -66,12 +67,29 @@ rng·init(uint64 seed) /* Returns a random float64 between 0 and 1 */ double -rng·random() +rng·random(void) { uint64 r = xoshiro256ss(&RNG); return (double)r / (double)UINT64_MAX; } +double +rng·exponential(double lambda) +{ + double f; + + f = rng·random(); + return -log(1 - f)/lambda; +} + +double +rng·normal(void) +{ + double f; + f = rng·random(); + +} + /* Returns true or false on success of trial */ bool rng·bernoulli(double f) @@ -88,3 +106,18 @@ rng·randi(int max) uint64 r = xoshiro256ss(&RNG); return r % max; } + +uint64 +rng·poisson(double mean) +{ + uint64 n; + double c; + + if(mean<10.0) { + for(n=0, c=rng·exponential(1.0); c