From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/libbio/fasta.c | 393 ----------------------------------------------------- 1 file changed, 393 deletions(-) delete mode 100644 sys/libbio/fasta.c (limited to 'sys/libbio/fasta.c') diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c deleted file mode 100644 index 3788544..0000000 --- a/sys/libbio/fasta.c +++ /dev/null @@ -1,393 +0,0 @@ -#include -#include -#include - -#define INIT_NM_SIZE 128 -#define INIT_SQ_SIZE 4096 - -struct SeqBuf -{ - mem·Allocator mem; - void *heap; - - int cap, off; - byte *it, b[]; -}; - -static -void -reset(struct SeqBuf *sb) -{ - sb->off = 0; - sb->it = sb->b; -} - -static -error -grow(struct SeqBuf **sb, int min) -{ - void* heap; - mem·Allocator mem; - - vlong newcap; - struct SeqBuf *old, *new; - - old = *sb; - mem = old->mem; - heap = old->heap; - - assert((*sb)->cap <= (SIZE_MAX - 1) / 2); - newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min)); - assert(newcap >= (*sb)->cap+min); - - if(new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) { - errorf("memory: could not allocate new buffer\n"); - return 1; - } - - memcpy(new, old, sizeof(*new) + (*sb)->cap); - - new->cap = newcap; - new->it = new->b + (old->it - old->b); - mem.free(heap, old); - - *sb = new; - return 0; -} - -static -error -put(struct SeqBuf **sb, byte c) -{ - int err; - struct SeqBuf *sq; - - sq = *sb; - if(sq->it < (sq->b + sq->cap)) { - *sq->it++ = c; - return 0; - } - - if(err = grow(sb, 1), err) { - errorf("memory fail: could not allocate more buffer\n"); - sq->mem.free(sq->heap, sq); - return 1; - } - - *((*sb)->it++) = c; - return 0; -} - -static -error -push(struct SeqBuf **sb, int n, void *buf) -{ - int d, err; - struct SeqBuf *seq; - - seq = *sb; - if(d = (seq->cap - (seq->it - seq->b)), d < n) { - assert(d >= 0); - if (err = grow(sb, n-d), err) { - errorf("memory fail: could not allocate more buffer\n"); - seq->mem.free(seq->heap, seq); - return 1; - } - } - seq = *sb; - - memcpy(seq->it, buf, n); - seq->it += n; - - return 0; -} - -// ----------------------------------------------------------------------- -// sequence data - -struct bio·SeqReader { - byte eof; - io·Reader rdr; - void *io; - - struct SeqBuf *seq; - - /* read buffer */ - byte *b, *bend, buf[4*4098]; -}; - -static -error -fill(bio·SeqReader *rdr) -{ - int n; - // NOTE: This could lead to an infinite loop. - if(rdr->eof) - return 0; - - n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf); - if(n < 0) { - errorf("read: no data obtained from reader\n"); - return 1; - } - rdr->b = rdr->buf; - rdr->bend = rdr->b + n; - if(rdr->eof = (n < arrlen(rdr->buf)), rdr->eof) - *rdr->bend++ = '\0'; - - return 0; -} - -bio·SeqReader* -bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap) -{ - error err; - bio·SeqReader *r; - - r = mem.alloc(heap, 1, sizeof(bio·SeqReader)); - r->rdr = rdr; - r->io = io; - r->eof = 0; - - r->seq = mem.alloc(heap, 1, sizeof(*r->seq) + INIT_NM_SIZE + INIT_SQ_SIZE); - r->seq->mem = mem; - r->seq->heap = heap; - r->seq->it = r->seq->b; - r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE; - - if (err=fill(r), err) { - errorf("fill: could not populate buffer\n"); - goto ERROR; - } - - return r; - -ERROR: - mem.free(heap, r->seq); - mem.free(heap, r); - return nil; -} - -error -bio·closeseq(bio·SeqReader *rdr) -{ - mem·Allocator mem; - void *heap; - - mem = rdr->seq->mem; - heap = rdr->seq->heap; - - mem.free(heap, rdr->seq); - mem.free(heap, rdr); - - return 0; -} - - -static -error -readfasta(bio·SeqReader *rdr, bio·Seq *seq, byte hdr, byte stop) -{ - error err; - byte *beg; - - if(rdr->eof && rdr->b == rdr->bend-1) - return EOF; - - reset(rdr->seq); - - // NOTE: Can this case happen? - assert(rdr->b != rdr->bend); - if(*rdr->b++ != hdr) { - errorf("fasta/q format: expected '%c', found '%c'\n", hdr, *rdr->b--); - return 1; - } - -NAME: - beg = rdr->b; - while(rdr->b != rdr->bend) { - if(*rdr->b++ == '\n') { - push(&rdr->seq, (rdr->b - 1) - beg, beg); - goto SEQ; - } - } - push(&rdr->seq, rdr->b - beg, beg); - - if(err=fill(rdr), err) { - errorf("read: could not populate buffer\n"); - return 1; - } - goto NAME; - -SEQ: - put(&rdr->seq, '\0'); - rdr->seq->off = rdr->seq->it - rdr->seq->b; - -SEQLOOP: - beg = rdr->b; - while(rdr->b != rdr->bend) { - if(*rdr->b == '\n') { - push(&rdr->seq, rdr->b - beg, beg); - beg = rdr->b + 1; - } - - if(*rdr->b == stop || *rdr->b == '\0') - goto SUCCESS; - - rdr->b++; - } - - push(&rdr->seq, rdr->b - beg, beg); - - if(err=fill(rdr), err) { - errorf("read: could not populate buffer\n"); - return 1; - } - goto SEQLOOP; - -SUCCESS: - push(&rdr->seq, rdr->b - beg, beg); - put(&rdr->seq, '\0'); - - return 0; -} - -/* - * fasta files - */ - -error -bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq) -{ - error err; - - err = readfasta(rdr, seq, '>', '>'); - if(err && err != EOF) { - errorf("parse fail: could not read sequence of record\n"); - return err; - } - - seq->name = rdr->seq->b; - seq->s = rdr->seq->b + rdr->seq->off; - seq->len = rdr->seq->it - seq->s - 1; // shift by 1 as we pushed a '0' to end - seq->q = nil; - - return err; -} - -/* - * fastq files - */ - -error -bio·readfastq(bio·SeqReader *rdr, bio·Seq *seq) -{ - int n; - byte *beg; - error err; - - err = readfasta(rdr, seq, '@', '+'); - if(err) { - errorf("parse fail: could not read sequence of record\n"); - return err; - } - - seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off); - - if(*rdr->b++ != '+') { - errorf("format error: no '+' character seperator found\n"); - return -1; - } - -EATLN: - while(rdr->b != rdr->bend) { - if (*rdr->b++ == '\n') { - n = 0; - goto QUAL; - } - } - - if(err = fill((bio·SeqReader*)rdr), err) { - errorf("read: could not populate buffer\n"); - return 1; - } - goto EATLN; - -QUAL: - beg = rdr->b; - while(rdr->b != rdr->bend) { - if(*rdr->b == '\n') { - push(&rdr->seq, rdr->b - beg, beg); - beg = rdr->b + 1; - } - - if(n++ == seq->len || *rdr->b == '\0') { - err = *rdr->b == '\0' ? EOF : 0; - goto SUCCESS; - } - - rdr->b++; - } - - push(&rdr->seq, rdr->b - beg, beg); - - if(err = fill((bio·SeqReader*)rdr), err) { - errorf("read: could not populate buffer\n"); - return 1; - } - goto QUAL; - - -SUCCESS: - push(&rdr->seq, rdr->b - beg, beg); - put(&rdr->seq, '\0'); - - seq->name = rdr->seq->b; - seq->s = rdr->seq->b + rdr->seq->off - 1; - seq->q = seq->s + seq->len + 1; - - return err; -} - -// ----------------------------------------------------------------------- -// sequence writing - -error -bio·writefasta(io·Writer io, void *wtr, bio·Seq seq) -{ - int i, j, d; - char buf[2048], *b = buf, *e = arrend(buf); - - *b++ = '>'; - while(*seq.name) { - *b++ = *seq.name++; - if(b == e) { - io.write(wtr, 1, arrlen(buf), buf); - b = buf; - } - } - - for(i=0; i