From 5d3642b8ef920316693031d2ea34b9def0b1abc5 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 22 Apr 2021 08:55:35 -0700 Subject: chore: rm unfinished projects --- sys/libbio/io/fasta.c | 360 -------------------------------------------------- 1 file changed, 360 deletions(-) delete mode 100644 sys/libbio/io/fasta.c (limited to 'sys/libbio/io/fasta.c') diff --git a/sys/libbio/io/fasta.c b/sys/libbio/io/fasta.c deleted file mode 100644 index 28c9ca4..0000000 --- a/sys/libbio/io/fasta.c +++ /dev/null @@ -1,360 +0,0 @@ -#include -#include -#include - -#define INIT_NM_SIZE 128 -#define INIT_SQ_SIZE 4096 - -struct Seqbuf -{ - mem·Allocator heap; - void *h; - - int len; - int off; - byte *it; - byte b[]; -}; - -static -void -reset(struct Seqbuf *sb) -{ - sb->off = 0; - sb->it = sb->b; -} - -static -error -grow(struct Seqbuf **sb, int min) -{ - struct Seqbuf *old, *new; - vlong newlen; - - old = *sb; - assert((*sb)->len <= (SIZE_MAX - 1) / 2); - newlen = MAX(16, MAX(1 + 2*(*sb)->len, (*sb)->len+min)); - assert(newlen >= (*sb)->len+min); - - if (new = old->heap.alloc(old->h, 1, sizeof(*new)+newlen), !new) { - errorf("memory: could not allocate new buffer"); - return 1; - } - - memcpy(new, old, sizeof(*sb) + (*sb)->len); - new->len = newlen; - new->it = new->b + (old->len); - old->heap.free(old->h, old); - - *sb = new; - return 0; -} - -static -error -put(struct Seqbuf **sb, byte c) -{ - int err; - struct Seqbuf *sq; - - sq = *sb; - if (sq->it < (sq->b + sq->len)) { - *sq->it++ = c; - return 0; - } - - if (err = grow(sb, 1), err) { - errorf("memory fail: could not allocate more buffer"); - sq->heap.free(sq->h, sq); - return 1; - } - - *((*sb)->it++) = c; - return 0; -} - -static -error -push(struct Seqbuf **sb, int n, void *buf) -{ - int d, err; - struct Seqbuf *seq; - - seq = *sb; - if (d = seq->len - (seq->it - seq->b), d < n) { - assert(d > 0); - if (err = grow(sb, n-d), err) { - errorf("memory fail: could not allocate more buffer"); - seq->heap.free(seq->h, seq); - return 1; - } - } - - memcpy((*sb)->it, buf, n); - (*sb)->it += n; - - return 0; -} - -// ----------------------------------------------------------------------- -// Fasta files - -struct bio·FastaReader { - byte eof; - io·Reader file; - void *f; - - struct Seqbuf *seq; - - /* read buffer */ - byte *b, *bend; - byte buf[4*4098]; -}; - -static -error -fill(bio·FastaReader *rdr) -{ - int n; - // NOTE: This could lead to an infinite loop. - if (rdr->eof) { - return 0; - } - - n = rdr->file.read(rdr->f, 1, arrlen(rdr->buf), rdr->buf); - if (n < 0) { - errorf("read: no data obtained from reader"); - return 1; - } - rdr->b = rdr->buf; - rdr->bend = rdr->b + n; - if (rdr->eof = n < arrlen(rdr->buf), rdr->eof) { - *rdr->bend++ = '\0'; - } - - return 0; -} - -bio·FastaReader* -bio·openfasta(io·Reader file, void *f, mem·Allocator heap, void *h) -{ - error err; - bio·FastaReader *rdr; - - rdr = heap.alloc(h, 1, sizeof(bio·FastaReader)); - rdr->file = file; - rdr->f = f; - rdr->eof = 0; - - rdr->seq = heap.alloc(h, 1, sizeof(*rdr->seq) + INIT_NM_SIZE + INIT_SQ_SIZE); - rdr->seq->heap = heap; - rdr->seq->h = h; - rdr->seq->it = rdr->seq->b; - rdr->seq->len = INIT_NM_SIZE + INIT_SQ_SIZE; - - if (err = fill(rdr), err) { - errorf("fill: could not populate buffer"); - goto ERROR; - } - - return rdr; - -ERROR: - heap.free(h, rdr->seq); - heap.free(h, rdr); - return nil; -} - -static -error -readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte hdr, byte stop) -{ - error err; - byte *beg; - - if (rdr->eof && rdr->b == rdr->bend-1) { - return EOF; - } - - reset(rdr->seq); - - // NOTE: Can this case happen? - assert(rdr->b != rdr->bend); - if (*rdr->b++ != hdr) { - errorf("fasta/q format: expected '%c', found '%c'", hdr, *rdr->b--); - return 1; - } - -NAME: - beg = rdr->b; - while (rdr->b != rdr->bend) { - if (*rdr->b++ == '\n') { - push(&rdr->seq, (rdr->b - 1) - beg, beg); - goto SEQ; - } - } - - push(&rdr->seq, rdr->b - beg, beg); - - if (err = fill(rdr), err) { - errorf("read: could not populate buffer"); - return 1; - } - goto NAME; - -SEQ: - put(&rdr->seq, '\0'); - rdr->seq->off = rdr->seq->it - rdr->seq->b; - -SEQL: - beg = rdr->b; - while (rdr->b != rdr->bend) { - if (*rdr->b == '\n') { - push(&rdr->seq, rdr->b - beg, beg); - beg = rdr->b + 1; - } - - if (*rdr->b == stop || *rdr->b == '\0') { - goto SUCCESS; - } - - rdr->b++; - } - - push(&rdr->seq, rdr->b - beg, beg); - - if (err = fill(rdr), err) { - errorf("read: could not populate buffer"); - return 1; - } - goto SEQL; - -SUCCESS: - push(&rdr->seq, rdr->b - beg, beg); - put(&rdr->seq, '\0'); - - return 0; -} - -error -bio·readfasta(bio·FastaReader *rdr, bio·Seq *seq) -{ - error err; - - err = readfasta(rdr, seq, '>', '>'); - if (err && err != EOF) { - errorf("parse fail: could not read sequence of record"); - return err; - } - - seq->name = rdr->seq->b; - seq->s = rdr->seq->b + rdr->seq->off; - seq->len = rdr->seq->it - seq->s; - seq->q = nil; - - return err; -} - -error -bio·closefasta(bio·FastaReader *rdr) -{ - mem·Allocator heap; - void *h; - - heap = rdr->seq->heap; - h = rdr->seq->h; - - heap.free(h, rdr->seq); - heap.free(h, rdr); - - return 0; -} - -// ----------------------------------------------------------------------- -// Fastq files - -struct bio·FastqReader { - struct bio·FastaReader; -}; - -bio·FastqReader* -bio·openfastq(io·Reader file, void *f, mem·Allocator heap, void *h) -{ - return (bio·FastqReader*)bio·openfasta(file, f, heap, h); -} - -error -bio·closefastq(bio·FastqReader *rdr) -{ - return bio·closefasta((bio·FastaReader*)rdr); -} - -error -bio·readfastq(bio·FastqReader *rdr, bio·Seq *seq) -{ - int n; - byte *beg; - error err; - - err = readfasta((bio·FastaReader*)rdr, seq, '@', '+'); - if (err) { - errorf("parse fail: could not read sequence of record"); - return err; - } - - seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off); - - if (*rdr->b++ != '+') { - errorf("format error: no '+' character seperator found"); - return -1; - } - -EATLN: - while (rdr->b != rdr->bend) { - if (*rdr->b++ == '\n') { - n = 0; - goto QUAL; - } - } - - if (err = fill((bio·FastaReader*)rdr), err) { - errorf("read: could not populate buffer"); - return 1; - } - goto EATLN; - -QUAL: - beg = rdr->b; - while (rdr->b != rdr->bend) { - if (*rdr->b == '\n') { - push(&rdr->seq, rdr->b - beg, beg); - beg = rdr->b + 1; - } - - if (n++ == seq->len || *rdr->b == '\0') { - err = *rdr->b == '\0' ? EOF : 0; - goto SUCCESS; - } - - rdr->b++; - } - - push(&rdr->seq, rdr->b - beg, beg); - - if (err = fill((bio·FastaReader*)rdr), err) { - errorf("read: could not populate buffer"); - return 1; - } - goto QUAL; - - -SUCCESS: - push(&rdr->seq, rdr->b - beg, beg); - put(&rdr->seq, '\0'); - - seq->name = rdr->seq->b; - seq->s = rdr->seq->b + rdr->seq->off; - seq->q = seq->s + seq->len + 1; - - return err; -} -- cgit v1.2.1