aboutsummaryrefslogtreecommitdiff
path: root/src/libbio/test.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libbio/test.c')
-rw-r--r--src/libbio/test.c283
1 files changed, 283 insertions, 0 deletions
diff --git a/src/libbio/test.c b/src/libbio/test.c
new file mode 100644
index 0000000..9926764
--- /dev/null
+++ b/src/libbio/test.c
@@ -0,0 +1,283 @@
+#include <u.h>
+#include <libn.h>
+#include <libbio.h>
+
+#include <time.h>
+
+// -----------------------------------------------------------------------
+// Global data
+
+static byte *SEQ[] = {
+"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC"
+"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
+"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
+"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
+"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA"
+"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
+"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
+"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA"
+"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
+"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC"
+"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
+"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
+"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT"
+"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
+"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
+"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
+"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
+"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
+"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
+"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
+"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
+"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT"
+"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
+"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC"
+"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
+"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
+"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
+"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
+"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
+"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
+"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
+"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT",
+
+"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAAATAC"
+"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
+"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
+"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
+"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCAAAGTCA"
+"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
+"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
+"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
+"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCCCCTGCC"
+"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
+"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
+"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGCGGCCTT"
+"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
+"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
+"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
+"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
+"ACAGTCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
+"ATCCTCTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
+"CCGCTGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
+"CTGTTAATCGCAAGCTCCAGCAGCCCCCCCCCCCCTTTTCTGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
+"TCCATGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATAAAAAGAGCTCAGT"
+"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
+"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATTATTAGCGAAGAAC"
+"AAGGTTCCAACTCTCATTCCCGCCATTTACTGGGAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
+"CTCTTCAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
+"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
+"TGCATTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
+"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
+"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
+"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
+"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT",
+};
+
+
+static
+int
+my_read(Stream *s, void *buf, int n)
+{
+ return io·read(s, 1, n, buf);
+}
+
+// -----------------------------------------------------------------------
+// Point of entry for testing
+
+error
+test·newick()
+{
+ error err;
+ bio·Tree t;
+ mem·Arena *heap;
+ Stream *fd[2];
+
+ io·Peeker rdr;
+ io·Putter wtr;
+
+ bio·Node **end, **it, **list;
+
+ heap = mem·makearena(mem·sys, nil);
+ rdr = (io·Peeker){.get = (byte (*)(void *))io·getbyte, .unget = (error (*)(void *, byte))io·ungetbyte};
+ wtr = (io·Putter){.put = (error (*)(void *, byte))io·putbyte, .putstr = (int (*)(void *, string))io·putstring};
+
+ fd[0] = io·open("/home/nolln/root/data/test/zika.nwk", "r");
+ fd[1] = io·open("/home/nolln/root/data/test/zika.proc.nwk", "w");
+
+ t.h = heap;
+ t.heap = (mem·Allocator){ .alloc = (void *(*)(void *, uint, ulong))mem·arenaalloc, .free = nil, };
+
+ if (err = bio·readnewick(rdr, fd[0], &t), err) {
+ errorf("failed to read newick");
+ return 1;
+ }
+ printf("number of children: %d\n", t.root->nchild);
+
+ phylo·ladderize(t.root);
+
+ list = mem·arenaalloc(heap, t.nleaf, sizeof(**list));
+ phylo·getleafs(t, list);
+ for (it = list, end = list + t.nleaf; it != end; ++it) {
+ printf("Leaf '%s'\n", (*it)->name);
+ }
+
+ bio·Node *path[100];
+ // phylo·diameter(t, path);
+
+ printf("Loaded tree with %d leafs and %d nodes\n", t.nleaf, t.root->nnode);
+ err = bio·writenewick(t, wtr, fd[1]);
+
+ io·flush(fd[1]);
+
+ io·close(fd[0]);
+ io·close(fd[1]);
+
+ mem·freearena(heap);
+ return 0;
+}
+
+error
+test·fasta()
+{
+ error err;
+ Stream *fd;
+
+ bio·Seq seq;
+ bio·FastaReader *rdr;
+
+ clock_t t;
+
+ fd = io·open("/home/nolln/root/data/test/zika.fa", "r");
+
+ /* Benchmark against Heng */
+#if 0
+ int n, slen;
+ kseq_t *kseq;
+
+ t = clock();
+ kseq = kseq_init(fd);
+ while (kseq_read(kseq) >= 0) {
+ ++n, slen += kseq->seq.l;
+ }
+ t = clock() - t;
+ printf("heng's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC);
+
+ kseq_destroy(kseq);
+
+ io·seek(fd, 0, seek·set);
+#endif
+
+ rdr = bio·openfasta((io·Reader){.read = (int (*)(void *, int, int, void *))io·read}, fd, mem·sys, nil);
+
+ t = clock();
+ err = 0;
+ while (!err) {
+ err = bio·readfasta(rdr, &seq);
+ }
+ t = clock() - t;
+ printf("nick's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC);
+ bio·closefasta(rdr);
+
+
+ io·close(fd);
+ return err <= 0 ? 0 : 1;
+}
+
+#define asrdr(x) (int (*)(void *, int, int, void *))(x)
+error
+test·fastq()
+{
+ error err;
+ Stream *fd;
+
+ bio·Seq seq;
+ bio·FastqReader *rdr;
+
+ clock_t t;
+
+ fd = io·open("/home/nolln/root/data/test/eg.fq", "r");
+
+ rdr = bio·openfastq((io·Reader){.read = asrdr(io·read)}, fd, mem·sys, nil);
+
+ t = clock();
+ err = 0;
+ while (!err) {
+ err = bio·readfastq(rdr, &seq);
+ }
+ t = clock() - t;
+ printf("nick's fastq code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC);
+ bio·closefastq(rdr);
+
+
+ io·close(fd);
+ return err <= 0 ? 0 : 1;
+}
+
+error
+test·align()
+{
+ double f;
+ error err;
+ int i, l, n;
+
+ uint64 mem[aln·N][arrlen(SEQ)][aln·L];
+ uint64 *phi[aln·N];
+ int loc[aln·N][arrlen(SEQ)][aln·L];
+ int *pos[aln·N];
+
+ for (i = 0; i < arrlen(SEQ); i++) {
+ for (n = 0; n < aln·N; n++) {
+ phi[n] = mem[n][i];
+ pos[n] = loc[n][i];
+ }
+
+ err = aln·sketch(SEQ[i], aln·L, phi, pos);
+ }
+
+ f = 0;
+ for (n = 0; n < aln·N; n++) {
+ aln·sort(arrlen(SEQ), aln·L, (uint64*)mem[n]);
+
+ if (!memcmp(mem[n][0], mem[n][1], sizeof(uint64)*aln·L)) {
+ f += 1.;
+ printf("True : ");
+ } else {
+ printf("False: ");
+ }
+ for (i = 0; i < arrlen(SEQ); i++) {
+ printf("[");
+ for (l = 0; l < aln·L; l++) {
+ printf("%lu,", mem[n][i][l]);
+ }
+ printf("]");
+ if (i == 0) printf(" ~ ");
+ }
+ printf("\n");
+ }
+
+ printf("Fraction hits %f\n", f/aln·N);
+ return err;
+
+}
+
+error
+main()
+{
+ error err;
+
+ if (err = test·newick(), err) {
+ errorf("test fail: newick");
+ }
+
+#if 0
+ if (err = test·fasta(), err) {
+ errorf("test fail: fasta");
+ }
+
+ if (err = test·fastq(), err) {
+ errorf("test fail: fastq");
+ }
+#endif
+}
+