From a5d15fbccff504461b824b130f9cbc27905264a8 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Mon, 26 Apr 2021 16:05:17 -0700 Subject: fix(bio): buffer copying error --- Makefile | 4 +- include/libn/macro/map.h | 20 ++----- rules.mk | 2 +- sys/libbio/fasta.c | 149 ++++++++++++++++++++++++++--------------------- 4 files changed, 93 insertions(+), 82 deletions(-) diff --git a/Makefile b/Makefile index 13473fa..19db73a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Compiler, Linker, and Assembler -CC := gcc +CC := clang AR := ar AS := nasm PKG := pkg-config @@ -18,7 +18,7 @@ CINIT := $(LIB_DIR)/crt/crt1.o $(LIB_DIR)/crt/x86_64/crti.o `gcc --print-file-na CFINI := `gcc --print-file-name=crtendS.o` $(LIB_DIR)/crt/x86_64/crtn.o # Flags, Libraries and Includes -CFLAGS := -g -march=native -fno-strict-aliasing -fwrapv -fms-extensions +CFLAGS := -g -march=native -fno-strict-aliasing -fwrapv -fms-extensions -Wno-microsoft-anon-tag STATIC := -nodefaultlibs -nostartfiles -static AFLAGS := -f elf64 INCS := -I $(INC_DIR) -isystem $(INC_DIR)/vendor/libc diff --git a/include/libn/macro/map.h b/include/libn/macro/map.h index fae50ee..0743c1b 100644 --- a/include/libn/macro/map.h +++ b/include/libn/macro/map.h @@ -161,7 +161,6 @@ static const double __ac_HASH_UPPER = 0.77; return 0; #define MAP_PUT(map, key, val, hashfunc, equalfunc, resizefunc, err) \ - { \ int32 x = 0; \ if (map->n_occupied >= map->upper_bound) { \ if (map->n_buckets > (map->size << 1)) { \ @@ -214,16 +213,13 @@ static const double __ac_HASH_UPPER = 0.77; ++map->size; \ } else \ *err = 0; \ - return x; \ - } + return x; #define MAP_DEL(map, x) \ - { \ if (x != map->n_buckets && !__ac_iseither(map->flags, x)) { \ __ac_set_isdel_true(map->flags, x); \ --map->size; \ - } \ - } + } #define KEY_EXIST(m, x) (!__ac_iseither((m)->flags, (x))) @@ -232,7 +228,7 @@ static const double __ac_HASH_UPPER = 0.77; #define SET_STRUCT_BODY(key_t) \ int32 n_buckets, size, n_occupied, upper_bound; \ int32 *flags; \ - key_t *keys; \ + key_t *keys \ #define SET_MAKE(type, h, alloc) \ type *set; \ @@ -242,7 +238,7 @@ static const double __ac_HASH_UPPER = 0.77; #define SET_FREE(set, free, h) \ free(h, set->keys); \ free(h, set->flags); \ - free(h, set); + free(h, set) #define SET_RESET(set) \ if (set && set->flags) { \ @@ -343,7 +339,6 @@ static const double __ac_HASH_UPPER = 0.77; return 0; #define SET_PUT(set, key, hashfunc, equalfunc, resizefunc, err) \ - { \ int32 x = 0; \ if (set->n_occupied >= set->upper_bound) { \ if (set->n_buckets > (set->size << 1)) { \ @@ -396,13 +391,10 @@ static const double __ac_HASH_UPPER = 0.77; ++set->size; \ } else \ *err = 0; \ - return x; \ - } + return x #define SET_DEL(set, x) \ - { \ if (x != set->n_buckets && !__ac_iseither(set->flags, x)) { \ __ac_set_isdel_true(set->flags, x); \ --set->size; \ - } \ - } + } diff --git a/rules.mk b/rules.mk index 71444f5..7e5afa1 100644 --- a/rules.mk +++ b/rules.mk @@ -4,7 +4,7 @@ all: targets -debug: CFLAGS += -DDEBUG +debug: CFLAGS += -DDEBUG -g -fsanitize=address debug: targets release: CFLAGS += -O3 -mtune=native -flto -ffast-math #-DNDEBUG diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c index 484ebb2..6b40e99 100644 --- a/sys/libbio/fasta.c +++ b/sys/libbio/fasta.c @@ -5,18 +5,18 @@ #define INIT_NM_SIZE 128 #define INIT_SQ_SIZE 4096 -struct Seqbuf +struct SeqBuf { - mem·Allocator heap; - void *h; + mem·Allocator mem; + void *heap; - int len, off; + int cap, off; byte *it, b[]; }; static void -reset(struct Seqbuf *sb) +reset(struct SeqBuf *sb) { sb->off = 0; sb->it = sb->b; @@ -24,25 +24,32 @@ reset(struct Seqbuf *sb) static error -grow(struct Seqbuf **sb, int min) +grow(struct SeqBuf **sb, int min) { - struct Seqbuf *old, *new; - vlong newlen; + void* heap; + mem·Allocator mem; - old = *sb; - assert((*sb)->len <= (SIZE_MAX - 1) / 2); - newlen = MAX(16, MAX(1 + 2*(*sb)->len, (*sb)->len+min)); - assert(newlen >= (*sb)->len+min); + vlong newcap; + struct SeqBuf *old, *new; - if (new = old->heap.alloc(old->h, 1, sizeof(*new)+newlen), !new) { + old = *sb; + mem = old->mem; + heap = old->heap; + + assert((*sb)->cap <= (SIZE_MAX - 1) / 2); + newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min)); + assert(newcap >= (*sb)->cap+min); + + if (new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) { errorf("memory: could not allocate new buffer"); return 1; } - memcpy(new, old, sizeof(*sb) + (*sb)->len); - new->len = newlen; - new->it = new->b + (old->len); - old->heap.free(old->h, old); + memcpy(new, old, sizeof(*new) + (*sb)->cap); + + new->cap = newcap; + new->it = new->b + (old->it - old->b); + mem.free(heap, old); *sb = new; return 0; @@ -50,20 +57,20 @@ grow(struct Seqbuf **sb, int min) static error -put(struct Seqbuf **sb, byte c) +put(struct SeqBuf **sb, byte c) { int err; - struct Seqbuf *sq; + struct SeqBuf *sq; sq = *sb; - if (sq->it < (sq->b + sq->len)) { + if (sq->it < (sq->b + sq->cap)) { *sq->it++ = c; return 0; } if (err = grow(sb, 1), err) { errorf("memory fail: could not allocate more buffer"); - sq->heap.free(sq->h, sq); + sq->mem.free(sq->heap, sq); return 1; } @@ -73,23 +80,34 @@ put(struct Seqbuf **sb, byte c) static error -push(struct Seqbuf **sb, int n, void *buf) +push(struct SeqBuf **sb, int n, void *buf) { int d, err; - struct Seqbuf *seq; + struct SeqBuf *seq; + + char *cb = buf; + for(d=0; d < n; d++) { + if(cb[d] == 0) { + printf("ERROR: zero byte being copied @ pos %d/%d\n", d, n); + printf("ERROR: string afterwards is %s\n", cb+n+1); + printf("ERROR: string afterwards is %s\n", cb+n+2); + exit(1); + } + } seq = *sb; - if (d = seq->len - (seq->it - seq->b), d < n) { + if(d = seq->cap - (seq->it - seq->b), d < n) { assert(d > 0); if (err = grow(sb, n-d), err) { errorf("memory fail: could not allocate more buffer"); - seq->heap.free(seq->h, seq); + seq->mem.free(seq->heap, seq); return 1; } } + seq = *sb; - memcpy((*sb)->it, buf, n); - (*sb)->it += n; + memcpy(seq->it, buf, n); + seq->it += n; return 0; } @@ -99,14 +117,13 @@ push(struct Seqbuf **sb, int n, void *buf) struct bio·SeqReader { byte eof; - io·Reader file; - void *f; + io·Reader rdr; + void *io; - struct Seqbuf *seq; + struct SeqBuf *seq; /* read buffer */ - byte *b, *bend; - byte buf[4*4098]; + byte *b, *bend, buf[4*4098]; }; static @@ -115,65 +132,63 @@ fill(bio·SeqReader *rdr) { int n; // NOTE: This could lead to an infinite loop. - if (rdr->eof) { + if(rdr->eof) return 0; - } - n = rdr->file.read(rdr->f, 1, arrlen(rdr->buf), rdr->buf); - if (n < 0) { + n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf); + if(n < 0) { errorf("read: no data obtained from reader"); return 1; } rdr->b = rdr->buf; rdr->bend = rdr->b + n; - if (rdr->eof = n < arrlen(rdr->buf), rdr->eof) { + if(rdr->eof = (n < arrlen(rdr->buf)), rdr->eof) *rdr->bend++ = '\0'; - } return 0; } bio·SeqReader* -bio·openseq(io·Reader file, void *f, mem·Allocator heap, void *h) +bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap) { error err; - bio·SeqReader *rdr; + bio·SeqReader *r; - rdr = heap.alloc(h, 1, sizeof(bio·SeqReader)); - rdr->file = file; - rdr->f = f; - rdr->eof = 0; + r = mem.alloc(heap, 1, sizeof(bio·SeqReader)); + r->rdr = rdr; + r->io = io; + r->eof = 0; - rdr->seq = heap.alloc(h, 1, sizeof(*rdr->seq) + INIT_NM_SIZE + INIT_SQ_SIZE); - rdr->seq->heap = heap; - rdr->seq->h = h; - rdr->seq->it = rdr->seq->b; - rdr->seq->len = INIT_NM_SIZE + INIT_SQ_SIZE; + r->seq = mem.alloc(heap, 1, sizeof(*r->seq) + INIT_NM_SIZE + INIT_SQ_SIZE); + r->seq->mem = mem; + r->seq->heap = heap; + r->seq->it = r->seq->b; + r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE; - if (err = fill(rdr), err) { + if (err=fill(r), err) { errorf("fill: could not populate buffer"); goto ERROR; } - return rdr; + return r; ERROR: - heap.free(h, rdr->seq); - heap.free(h, rdr); + mem.free(heap, r->seq); + mem.free(heap, r); return nil; } error bio·closeseq(bio·SeqReader *rdr) { - mem·Allocator heap; - void *h; + mem·Allocator mem; + void *heap; - heap = rdr->seq->heap; - h = rdr->seq->h; + mem = rdr->seq->mem; + heap = rdr->seq->heap; - heap.free(h, rdr->seq); - heap.free(h, rdr); + mem.free(heap, rdr->seq); + mem.free(heap, rdr); return 0; } @@ -206,10 +221,9 @@ NAME: goto SEQ; } } - push(&rdr->seq, rdr->b - beg, beg); - if(err = fill(rdr), err) { + if(err=fill(rdr), err) { errorf("read: could not populate buffer"); return 1; } @@ -219,7 +233,7 @@ SEQ: put(&rdr->seq, '\0'); rdr->seq->off = rdr->seq->it - rdr->seq->b; -SEQL: +SEQLOOP: beg = rdr->b; while(rdr->b != rdr->bend) { if(*rdr->b == '\n') { @@ -232,14 +246,19 @@ SEQL: rdr->b++; } + for(byte *cb = rdr->seq->b+rdr->seq->off; cb != rdr->seq->it; ++cb) { + if(*cb == 0) { + printf("ERROR @ pos=%ld: Found zero byte\n", cb - rdr->seq->b+rdr->seq->off); + } + } push(&rdr->seq, rdr->b - beg, beg); - if(err = fill(rdr), err) { + if(err=fill(rdr), err) { errorf("read: could not populate buffer"); return 1; } - goto SEQL; + goto SEQLOOP; SUCCESS: push(&rdr->seq, rdr->b - beg, beg); @@ -265,7 +284,7 @@ bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq) seq->name = rdr->seq->b; seq->s = rdr->seq->b + rdr->seq->off; - seq->len = rdr->seq->it - seq->s; + seq->len = rdr->seq->it - seq->s - 1; seq->q = nil; return err; -- cgit v1.2.1