#include #include #include #include // ----------------------------------------------------------------------- // Global data static byte *SEQ[] = { "GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC" "CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" "TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" "ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" "TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA" "GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" "GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" "CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA" "ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" "CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC" "TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" "CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" "TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT" "GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" "GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" "TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" "GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" "ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" "ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" "CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" "CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" "TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT" "TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" "GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC" "AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" "CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" "AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" "TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" "ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" "TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" "TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" "CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT", "GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAAATAC" "CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" "TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" "ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" "TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCAAAGTCA" "GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" "GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" "ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" "CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCCCCTGCC" "TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" "CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" "TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGCGGCCTT" "GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" "GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" "TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" "GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" "ACAGTCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" "ATCCTCTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" "CCGCTGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" "CTGTTAATCGCAAGCTCCAGCAGCCCCCCCCCCCCTTTTCTGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" "TCCATGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATAAAAAGAGCTCAGT" "TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" "GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATTATTAGCGAAGAAC" "AAGGTTCCAACTCTCATTCCCGCCATTTACTGGGAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" "CTCTTCAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" "AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" "TGCATTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" "ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" "TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" "TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" "CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT", }; static int my_read(Stream *s, void *buf, int n) { return io·read(s, 1, n, buf); } // ----------------------------------------------------------------------- // Point of entry for testing error test·newick() { error err; bio·Tree t; mem·Arena *heap; Stream *fd[2]; io·Peeker rdr; io·Putter wtr; bio·Node **end, **it, **list; heap = mem·makearena(mem·sys, nil); rdr = (io·Peeker){.get = (byte (*)(void *))io·getbyte, .unget = (error (*)(void *, byte))io·ungetbyte}; wtr = (io·Putter){.put = (error (*)(void *, byte))io·putbyte, .putstr = (int (*)(void *, string))io·putstring}; fd[0] = io·open("/home/nolln/root/data/test/zika.nwk", "r"); fd[1] = io·open("/home/nolln/root/data/test/zika.proc.nwk", "w"); t.h = heap; t.heap = (mem·Allocator){ .alloc = (void *(*)(void *, uint, ulong))mem·arenaalloc, .free = nil, }; if (err = bio·readnewick(rdr, fd[0], &t), err) { errorf("failed to read newick"); return 1; } printf("number of children: %d\n", t.root->nchild); phylo·ladderize(t.root); list = mem·arenaalloc(heap, t.nleaf, sizeof(**list)); phylo·getleafs(t, list); for (it = list, end = list + t.nleaf; it != end; ++it) { printf("Leaf '%s'\n", (*it)->name); } bio·Node *path[100]; // phylo·diameter(t, path); printf("Loaded tree with %d leafs and %d nodes\n", t.nleaf, t.root->nnode); err = bio·writenewick(t, wtr, fd[1]); io·flush(fd[1]); io·close(fd[0]); io·close(fd[1]); mem·freearena(heap); return 0; } error test·fasta() { error err; Stream *fd; bio·Seq seq; bio·FastaReader *rdr; clock_t t; fd = io·open("/home/nolln/root/data/test/zika.fa", "r"); /* Benchmark against Heng */ #if 0 int n, slen; kseq_t *kseq; t = clock(); kseq = kseq_init(fd); while (kseq_read(kseq) >= 0) { ++n, slen += kseq->seq.l; } t = clock() - t; printf("heng's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); kseq_destroy(kseq); io·seek(fd, 0, seek·set); #endif rdr = bio·openfasta((io·Reader){.read = (int (*)(void *, int, int, void *))io·read}, fd, mem·sys, nil); t = clock(); err = 0; while (!err) { err = bio·readfasta(rdr, &seq); } t = clock() - t; printf("nick's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); bio·closefasta(rdr); io·close(fd); return err <= 0 ? 0 : 1; } #define asrdr(x) (int (*)(void *, int, int, void *))(x) error test·fastq() { error err; Stream *fd; bio·Seq seq; bio·FastqReader *rdr; clock_t t; fd = io·open("/home/nolln/root/data/test/eg.fq", "r"); rdr = bio·openfastq((io·Reader){.read = asrdr(io·read)}, fd, mem·sys, nil); t = clock(); err = 0; while (!err) { err = bio·readfastq(rdr, &seq); } t = clock() - t; printf("nick's fastq code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); bio·closefastq(rdr); io·close(fd); return err <= 0 ? 0 : 1; } error test·align() { double f; error err; int i, l, n; uint64 mem[aln·N][arrlen(SEQ)][aln·L]; uint64 *phi[aln·N]; int loc[aln·N][arrlen(SEQ)][aln·L]; int *pos[aln·N]; for (i = 0; i < arrlen(SEQ); i++) { for (n = 0; n < aln·N; n++) { phi[n] = mem[n][i]; pos[n] = loc[n][i]; } err = aln·sketch(SEQ[i], aln·L, phi, pos); } f = 0; for (n = 0; n < aln·N; n++) { aln·sort(arrlen(SEQ), aln·L, (uint64*)mem[n]); if (!memcmp(mem[n][0], mem[n][1], sizeof(uint64)*aln·L)) { f += 1.; printf("True : "); } else { printf("False: "); } for (i = 0; i < arrlen(SEQ); i++) { printf("["); for (l = 0; l < aln·L; l++) { printf("%lu,", mem[n][i][l]); } printf("]"); if (i == 0) printf(" ~ "); } printf("\n"); } printf("Fraction hits %f\n", f/aln·N); return err; } error main() { error err; if (err = test·newick(), err) { errorf("test fail: newick"); } #if 0 if (err = test·fasta(), err) { errorf("test fail: fasta"); } if (err = test·fastq(), err) { errorf("test fail: fastq"); } #endif }