From a5d15fbccff504461b824b130f9cbc27905264a8 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Mon, 26 Apr 2021 16:05:17 -0700 Subject: fix(bio): buffer copying error --- sys/libbio/fasta.c | 149 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 65 deletions(-) (limited to 'sys/libbio/fasta.c') diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c index 484ebb2..6b40e99 100644 --- a/sys/libbio/fasta.c +++ b/sys/libbio/fasta.c @@ -5,18 +5,18 @@ #define INIT_NM_SIZE 128 #define INIT_SQ_SIZE 4096 -struct Seqbuf +struct SeqBuf { - mem·Allocator heap; - void *h; + mem·Allocator mem; + void *heap; - int len, off; + int cap, off; byte *it, b[]; }; static void -reset(struct Seqbuf *sb) +reset(struct SeqBuf *sb) { sb->off = 0; sb->it = sb->b; @@ -24,25 +24,32 @@ reset(struct Seqbuf *sb) static error -grow(struct Seqbuf **sb, int min) +grow(struct SeqBuf **sb, int min) { - struct Seqbuf *old, *new; - vlong newlen; + void* heap; + mem·Allocator mem; - old = *sb; - assert((*sb)->len <= (SIZE_MAX - 1) / 2); - newlen = MAX(16, MAX(1 + 2*(*sb)->len, (*sb)->len+min)); - assert(newlen >= (*sb)->len+min); + vlong newcap; + struct SeqBuf *old, *new; - if (new = old->heap.alloc(old->h, 1, sizeof(*new)+newlen), !new) { + old = *sb; + mem = old->mem; + heap = old->heap; + + assert((*sb)->cap <= (SIZE_MAX - 1) / 2); + newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min)); + assert(newcap >= (*sb)->cap+min); + + if (new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) { errorf("memory: could not allocate new buffer"); return 1; } - memcpy(new, old, sizeof(*sb) + (*sb)->len); - new->len = newlen; - new->it = new->b + (old->len); - old->heap.free(old->h, old); + memcpy(new, old, sizeof(*new) + (*sb)->cap); + + new->cap = newcap; + new->it = new->b + (old->it - old->b); + mem.free(heap, old); *sb = new; return 0; @@ -50,20 +57,20 @@ grow(struct Seqbuf **sb, int min) static error -put(struct Seqbuf **sb, byte c) +put(struct SeqBuf **sb, byte c) { int err; - struct Seqbuf *sq; + struct SeqBuf *sq; sq = *sb; - if (sq->it < (sq->b + sq->len)) { + if (sq->it < (sq->b + sq->cap)) { *sq->it++ = c; return 0; } if (err = grow(sb, 1), err) { errorf("memory fail: could not allocate more buffer"); - sq->heap.free(sq->h, sq); + sq->mem.free(sq->heap, sq); return 1; } @@ -73,23 +80,34 @@ put(struct Seqbuf **sb, byte c) static error -push(struct Seqbuf **sb, int n, void *buf) +push(struct SeqBuf **sb, int n, void *buf) { int d, err; - struct Seqbuf *seq; + struct SeqBuf *seq; + + char *cb = buf; + for(d=0; d < n; d++) { + if(cb[d] == 0) { + printf("ERROR: zero byte being copied @ pos %d/%d\n", d, n); + printf("ERROR: string afterwards is %s\n", cb+n+1); + printf("ERROR: string afterwards is %s\n", cb+n+2); + exit(1); + } + } seq = *sb; - if (d = seq->len - (seq->it - seq->b), d < n) { + if(d = seq->cap - (seq->it - seq->b), d < n) { assert(d > 0); if (err = grow(sb, n-d), err) { errorf("memory fail: could not allocate more buffer"); - seq->heap.free(seq->h, seq); + seq->mem.free(seq->heap, seq); return 1; } } + seq = *sb; - memcpy((*sb)->it, buf, n); - (*sb)->it += n; + memcpy(seq->it, buf, n); + seq->it += n; return 0; } @@ -99,14 +117,13 @@ push(struct Seqbuf **sb, int n, void *buf) struct bio·SeqReader { byte eof; - io·Reader file; - void *f; + io·Reader rdr; + void *io; - struct Seqbuf *seq; + struct SeqBuf *seq; /* read buffer */ - byte *b, *bend; - byte buf[4*4098]; + byte *b, *bend, buf[4*4098]; }; static @@ -115,65 +132,63 @@ fill(bio·SeqReader *rdr) { int n; // NOTE: This could lead to an infinite loop. - if (rdr->eof) { + if(rdr->eof) return 0; - } - n = rdr->file.read(rdr->f, 1, arrlen(rdr->buf), rdr->buf); - if (n < 0) { + n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf); + if(n < 0) { errorf("read: no data obtained from reader"); return 1; } rdr->b = rdr->buf; rdr->bend = rdr->b + n; - if (rdr->eof = n < arrlen(rdr->buf), rdr->eof) { + if(rdr->eof = (n < arrlen(rdr->buf)), rdr->eof) *rdr->bend++ = '\0'; - } return 0; } bio·SeqReader* -bio·openseq(io·Reader file, void *f, mem·Allocator heap, void *h) +bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap) { error err; - bio·SeqReader *rdr; + bio·SeqReader *r; - rdr = heap.alloc(h, 1, sizeof(bio·SeqReader)); - rdr->file = file; - rdr->f = f; - rdr->eof = 0; + r = mem.alloc(heap, 1, sizeof(bio·SeqReader)); + r->rdr = rdr; + r->io = io; + r->eof = 0; - rdr->seq = heap.alloc(h, 1, sizeof(*rdr->seq) + INIT_NM_SIZE + INIT_SQ_SIZE); - rdr->seq->heap = heap; - rdr->seq->h = h; - rdr->seq->it = rdr->seq->b; - rdr->seq->len = INIT_NM_SIZE + INIT_SQ_SIZE; + r->seq = mem.alloc(heap, 1, sizeof(*r->seq) + INIT_NM_SIZE + INIT_SQ_SIZE); + r->seq->mem = mem; + r->seq->heap = heap; + r->seq->it = r->seq->b; + r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE; - if (err = fill(rdr), err) { + if (err=fill(r), err) { errorf("fill: could not populate buffer"); goto ERROR; } - return rdr; + return r; ERROR: - heap.free(h, rdr->seq); - heap.free(h, rdr); + mem.free(heap, r->seq); + mem.free(heap, r); return nil; } error bio·closeseq(bio·SeqReader *rdr) { - mem·Allocator heap; - void *h; + mem·Allocator mem; + void *heap; - heap = rdr->seq->heap; - h = rdr->seq->h; + mem = rdr->seq->mem; + heap = rdr->seq->heap; - heap.free(h, rdr->seq); - heap.free(h, rdr); + mem.free(heap, rdr->seq); + mem.free(heap, rdr); return 0; } @@ -206,10 +221,9 @@ NAME: goto SEQ; } } - push(&rdr->seq, rdr->b - beg, beg); - if(err = fill(rdr), err) { + if(err=fill(rdr), err) { errorf("read: could not populate buffer"); return 1; } @@ -219,7 +233,7 @@ SEQ: put(&rdr->seq, '\0'); rdr->seq->off = rdr->seq->it - rdr->seq->b; -SEQL: +SEQLOOP: beg = rdr->b; while(rdr->b != rdr->bend) { if(*rdr->b == '\n') { @@ -232,14 +246,19 @@ SEQL: rdr->b++; } + for(byte *cb = rdr->seq->b+rdr->seq->off; cb != rdr->seq->it; ++cb) { + if(*cb == 0) { + printf("ERROR @ pos=%ld: Found zero byte\n", cb - rdr->seq->b+rdr->seq->off); + } + } push(&rdr->seq, rdr->b - beg, beg); - if(err = fill(rdr), err) { + if(err=fill(rdr), err) { errorf("read: could not populate buffer"); return 1; } - goto SEQL; + goto SEQLOOP; SUCCESS: push(&rdr->seq, rdr->b - beg, beg); @@ -265,7 +284,7 @@ bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq) seq->name = rdr->seq->b; seq->s = rdr->seq->b + rdr->seq->off; - seq->len = rdr->seq->it - seq->s; + seq->len = rdr->seq->it - seq->s - 1; seq->q = nil; return err; -- cgit v1.2.1