aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio/fasta.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2021-09-08 15:51:40 -0700
committerNicholas Noll <nbnoll@eml.cc>2021-09-08 15:51:53 -0700
commitc0a7b53baf2a6e7bf9bc1fbec7ac05e43ac59154 (patch)
tree385b83f4d505da960820932f7357eb46b31abac3 /sys/libbio/fasta.c
parent8c0475f97675245d1bcbb112dc79c9f490fad361 (diff)
checkin
Diffstat (limited to 'sys/libbio/fasta.c')
-rw-r--r--sys/libbio/fasta.c95
1 files changed, 65 insertions, 30 deletions
diff --git a/sys/libbio/fasta.c b/sys/libbio/fasta.c
index 6b40e99..078325a 100644
--- a/sys/libbio/fasta.c
+++ b/sys/libbio/fasta.c
@@ -40,8 +40,8 @@ grow(struct SeqBuf **sb, int min)
newcap = MAX(16, MAX(1 + 2*(*sb)->cap, (*sb)->cap+min));
assert(newcap >= (*sb)->cap+min);
- if (new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) {
- errorf("memory: could not allocate new buffer");
+ if(new = mem.alloc(heap, 1, sizeof(*new)+newcap), !new) {
+ errorf("memory: could not allocate new buffer\n");
return 1;
}
@@ -63,13 +63,13 @@ put(struct SeqBuf **sb, byte c)
struct SeqBuf *sq;
sq = *sb;
- if (sq->it < (sq->b + sq->cap)) {
+ if(sq->it < (sq->b + sq->cap)) {
*sq->it++ = c;
return 0;
}
- if (err = grow(sb, 1), err) {
- errorf("memory fail: could not allocate more buffer");
+ if(err = grow(sb, 1), err) {
+ errorf("memory fail: could not allocate more buffer\n");
sq->mem.free(sq->heap, sq);
return 1;
}
@@ -85,21 +85,11 @@ push(struct SeqBuf **sb, int n, void *buf)
int d, err;
struct SeqBuf *seq;
- char *cb = buf;
- for(d=0; d < n; d++) {
- if(cb[d] == 0) {
- printf("ERROR: zero byte being copied @ pos %d/%d\n", d, n);
- printf("ERROR: string afterwards is %s\n", cb+n+1);
- printf("ERROR: string afterwards is %s\n", cb+n+2);
- exit(1);
- }
- }
-
seq = *sb;
- if(d = seq->cap - (seq->it - seq->b), d < n) {
- assert(d > 0);
+ if(d = (seq->cap - (seq->it - seq->b)), d < n) {
+ assert(d >= 0);
if (err = grow(sb, n-d), err) {
- errorf("memory fail: could not allocate more buffer");
+ errorf("memory fail: could not allocate more buffer\n");
seq->mem.free(seq->heap, seq);
return 1;
}
@@ -137,7 +127,7 @@ fill(bio·SeqReader *rdr)
n = rdr->rdr.read(rdr->io, 1, arrlen(rdr->buf), rdr->buf);
if(n < 0) {
- errorf("read: no data obtained from reader");
+ errorf("read: no data obtained from reader\n");
return 1;
}
rdr->b = rdr->buf;
@@ -166,7 +156,7 @@ bio·openseq(io·Reader rdr, void *io, mem·Allocator mem, void *heap)
r->seq->cap = INIT_NM_SIZE + INIT_SQ_SIZE;
if (err=fill(r), err) {
- errorf("fill: could not populate buffer");
+ errorf("fill: could not populate buffer\n");
goto ERROR;
}
@@ -209,7 +199,7 @@ readfasta(bio·SeqReader *rdr, bio·Seq *seq, byte hdr, byte stop)
// NOTE: Can this case happen?
assert(rdr->b != rdr->bend);
if(*rdr->b++ != hdr) {
- errorf("fasta/q format: expected '%c', found '%c'", hdr, *rdr->b--);
+ errorf("fasta/q format: expected '%c', found '%c'\n", hdr, *rdr->b--);
return 1;
}
@@ -224,7 +214,7 @@ NAME:
push(&rdr->seq, rdr->b - beg, beg);
if(err=fill(rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto NAME;
@@ -246,16 +236,18 @@ SEQLOOP:
rdr->b++;
}
+#if 0
for(byte *cb = rdr->seq->b+rdr->seq->off; cb != rdr->seq->it; ++cb) {
if(*cb == 0) {
printf("ERROR @ pos=%ld: Found zero byte\n", cb - rdr->seq->b+rdr->seq->off);
}
}
+#endif
push(&rdr->seq, rdr->b - beg, beg);
if(err=fill(rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto SEQLOOP;
@@ -278,13 +270,13 @@ bio·readfasta(bio·SeqReader *rdr, bio·Seq *seq)
err = readfasta(rdr, seq, '>', '>');
if(err && err != EOF) {
- errorf("parse fail: could not read sequence of record");
+ errorf("parse fail: could not read sequence of record\n");
return err;
}
seq->name = rdr->seq->b;
seq->s = rdr->seq->b + rdr->seq->off;
- seq->len = rdr->seq->it - seq->s - 1;
+ seq->len = rdr->seq->it - seq->s - 1; // shift by 1 as we pushed a '0' to end
seq->q = nil;
return err;
@@ -303,14 +295,14 @@ bio·readfastq(bio·SeqReader *rdr, bio·Seq *seq)
err = readfasta(rdr, seq, '@', '+');
if(err) {
- errorf("parse fail: could not read sequence of record");
+ errorf("parse fail: could not read sequence of record\n");
return err;
}
seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off);
if(*rdr->b++ != '+') {
- errorf("format error: no '+' character seperator found");
+ errorf("format error: no '+' character seperator found\n");
return -1;
}
@@ -323,7 +315,7 @@ EATLN:
}
if(err = fill((bio·SeqReader*)rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto EATLN;
@@ -347,7 +339,7 @@ QUAL:
push(&rdr->seq, rdr->b - beg, beg);
if(err = fill((bio·SeqReader*)rdr), err) {
- errorf("read: could not populate buffer");
+ errorf("read: could not populate buffer\n");
return 1;
}
goto QUAL;
@@ -358,8 +350,51 @@ SUCCESS:
put(&rdr->seq, '\0');
seq->name = rdr->seq->b;
- seq->s = rdr->seq->b + rdr->seq->off;
+ seq->s = rdr->seq->b + rdr->seq->off - 1;
seq->q = seq->s + seq->len + 1;
return err;
}
+
+// -----------------------------------------------------------------------
+// sequence writing
+
+error
+bio·writefasta(io·Writer io, void *wtr, bio·Seq seq)
+{
+ int i, j, d;
+ char buf[2048], *b = buf, *e = arrend(buf);
+
+ *b++ = '>';
+ while(*seq.name) {
+ *b++ = *seq.name++;
+ if(b == e) {
+ io.write(wtr, 1, arrlen(buf), buf);
+ b = buf;
+ }
+ }
+
+ for(i=0; i<seq.len; i = j) {
+ j = MIN(i+70, seq.len);
+ d = j - i;
+ if((e-b) <= d+1) {
+ io.write(wtr, 1, b-buf, buf);
+ b = buf;
+ }
+ *b++ = '\n';
+ memcpy(b, seq.s+i, d);
+ b += d;
+ }
+
+ *b++ = '\n';
+ io.write(wtr, 1, b-buf, buf);
+
+ return 0;
+}
+
+error
+bio·writefastq(io·Writer io, void *wtr, bio·Seq seq)
+{
+ panicf("need to implement");
+ return 1;
+}