diff options
Diffstat (limited to 'sys/libbio/io')
-rw-r--r-- | sys/libbio/io/fasta.c | 98 |
1 files changed, 92 insertions, 6 deletions
diff --git a/sys/libbio/io/fasta.c b/sys/libbio/io/fasta.c index e26fd70..bb6bfc7 100644 --- a/sys/libbio/io/fasta.c +++ b/sys/libbio/io/fasta.c @@ -167,7 +167,7 @@ ERROR: static error -readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte tok) +readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte hdr, byte stop) { error err; byte *beg; @@ -180,8 +180,8 @@ readfasta(bio·FastaReader *rdr, bio·Seq *seq, byte tok) // NOTE: Can this case happen? Assert(rdr->b != rdr->bend); - if (*rdr->b++ != tok) { - errorf("fasta format: expected '>', found '%c'", *rdr->b--); + if (*rdr->b++ != hdr) { + errorf("fasta/q format: expected '%c', found '%c'", hdr, *rdr->b--); return 1; } @@ -214,7 +214,7 @@ SEQL: beg = rdr->b + 1; } - if (*rdr->b == tok || *rdr->b == '\0') { + if (*rdr->b == stop || *rdr->b == '\0') { goto SUCCESS; } @@ -241,9 +241,9 @@ bio·readfasta(bio·FastaReader *rdr, bio·Seq *seq) { error err; - err = readfasta(rdr, seq, '>'); + err = readfasta(rdr, seq, '>', '>'); if (err && err != EOF) { - errorf("parse fail: could not read record"); + errorf("parse fail: could not read sequence of record"); return err; } @@ -272,3 +272,89 @@ bio·closefasta(bio·FastaReader *rdr) // ----------------------------------------------------------------------- // Fastq files + +struct bio·FastqReader { + struct bio·FastaReader; +}; + +bio·FastqReader* +bio·openfastq(io·Reader file, void *f, mem·Allocator heap, void *h) +{ + return (bio·FastqReader*)bio·openfasta(file, f, heap, h); +} + +error +bio·closefastq(bio·FastqReader *rdr) +{ + return bio·closefasta((bio·FastaReader*)rdr); +} + +error +bio·readfastq(bio·FastqReader *rdr, bio·Seq *seq) +{ + int n; + byte *beg; + error err; + + err = readfasta((bio·FastaReader*)rdr, seq, '@', '+'); + if (err) { + errorf("parse fail: could not read sequence of record"); + return err; + } + + seq->len = rdr->seq->it - (rdr->seq->b + rdr->seq->off); + + if (*rdr->b++ != '+') { + errorf("format error: no '+' character seperator found"); + return -1; + } + +EATLN: + while (rdr->b != rdr->bend) { + if (*rdr->b++ == '\n') { + n = 0; + goto QUAL; + } + } + + if (err = fill((bio·FastaReader*)rdr), err) { + errorf("read: could not populate buffer"); + return 1; + } + goto EATLN; + +QUAL: + beg = rdr->b; + while (rdr->b != rdr->bend) { + if (*rdr->b == '\n') { + push(&rdr->seq, rdr->b - beg, beg); + beg = rdr->b + 1; + } + + if (n++ == seq->len || *rdr->b == '\0') { + err = *rdr->b == '\0' ? EOF : 0; + goto SUCCESS; + } + + rdr->b++; + } + + push(&rdr->seq, rdr->b - beg, beg); + + if (err = fill((bio·FastaReader*)rdr), err) { + errorf("read: could not populate buffer"); + return 1; + } + goto QUAL; + + +SUCCESS: + push(&rdr->seq, rdr->b - beg, beg); + put(&rdr->seq, '\0'); + + seq->name = rdr->seq->b; + seq->s = rdr->seq->b + rdr->seq->off; + seq->q = seq->s + seq->len + 1; + + return err; +} |