aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio/io/fasta.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libbio/io/fasta.c')
-rw-r--r--sys/libbio/io/fasta.c98
1 files changed, 92 insertions, 6 deletions
diff --git a/sys/libbio/io/fasta.c b/sys/libbio/io/fasta.c
index e26fd70..bb6bfc7 100644
--- a/sys/libbio/io/fasta.c
+++ b/sys/libbio/io/fasta.c
@@ -167,7 +167,7 @@ ERROR:
static
error
-readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte tok)
+readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte hdr, byte stop)
{
error err;
byte *beg;
@@ -180,8 +180,8 @@ readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte tok)
// NOTE: Can this case happen?
Assert(rdr->b != rdr->bend);
- if (*rdr->b++ != tok) {
- errorf("fasta format: expected '>', found '%c'", *rdr->b--);
+ if (*rdr->b++ != hdr) {
+ errorf("fasta/q format: expected '%c', found '%c'", hdr, *rdr->b--);
return 1;
}
@@ -214,7 +214,7 @@ SEQL:
beg = rdr->b + 1;
}
- if (*rdr->b == tok || *rdr->b == '\0') {
+ if (*rdr->b == stop || *rdr->b == '\0') {
goto SUCCESS;
}
@@ -241,9 +241,9 @@ bio·readfasta(bio·FastaReader *rdr, bio·Seq *seq)
{
error err;
- err = readfasta(rdr, seq, '>');
+ err = readfasta(rdr, seq, '>', '>');
if (err && err != EOF) {
- errorf("parse fail: could not read record");
+ errorf("parse fail: could not read sequence of record");
return err;
}
@@ -272,3 +272,89 @@ bio·closefasta(bio·FastaReader *rdr)
// -----------------------------------------------------------------------
// Fastq files
+
+struct bio·FastqReader {
+ struct bio·FastaReader;
+};
+
+bio·FastqReader*
+bio·openfastq(io·Reader file, void *f, mem·Allocator heap, void *h)
+{
+ return (bio·FastqReader*)bio·openfasta(file, f, heap, h);
+}
+
+error
+bio·closefastq(bio·FastqReader *rdr)
+{
+ return bio·closefasta((bio·FastaReader*)rdr);
+}
+
+error
+bio·readfastq(bio·FastqReader *rdr, bio·Seq *seq)
+{
+ int n;
+ byte *beg;
+ error err;
+
+ err = readfasta((bio·FastaReader*)rdr, seq, '@', '+');
+ if (err) {
+ errorf("parse fail: could not read sequence of record");
+ return err;
+ }
+
+ seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off);
+
+ if (*rdr->b++ != '+') {
+ errorf("format error: no '+' character seperator found");
+ return -1;
+ }
+
+EATLN:
+ while (rdr->b != rdr->bend) {
+ if (*rdr->b++ == '\n') {
+ n = 0;
+ goto QUAL;
+ }
+ }
+
+ if (err = fill((bio·FastaReader*)rdr), err) {
+ errorf("read: could not populate buffer");
+ return 1;
+ }
+ goto EATLN;
+
+QUAL:
+ beg = rdr->b;
+ while (rdr->b != rdr->bend) {
+ if (*rdr->b == '\n') {
+ push(&rdr->seq, rdr->b - beg, beg);
+ beg = rdr->b + 1;
+ }
+
+ if (n++ == seq->len || *rdr->b == '\0') {
+ err = *rdr->b == '\0' ? EOF : 0;
+ goto SUCCESS;
+ }
+
+ rdr->b++;
+ }
+
+ push(&rdr->seq, rdr->b - beg, beg);
+
+ if (err = fill((bio·FastaReader*)rdr), err) {
+ errorf("read: could not populate buffer");
+ return 1;
+ }
+ goto QUAL;
+
+
+SUCCESS:
+ push(&rdr->seq, rdr->b - beg, beg);
+ put(&rdr->seq, '\0');
+
+ seq->name = rdr->seq->b;
+ seq->s = rdr->seq->b + rdr->seq->off;
+ seq->q = seq->s + seq->len + 1;
+
+ return err;
+}