From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libbio/test.c | 283 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 283 insertions(+) create mode 100644 src/libbio/test.c (limited to 'src/libbio/test.c') diff --git a/src/libbio/test.c b/src/libbio/test.c new file mode 100644 index 0000000..9926764 --- /dev/null +++ b/src/libbio/test.c @@ -0,0 +1,283 @@ +#include +#include +#include + +#include + +// ----------------------------------------------------------------------- +// Global data + +static byte *SEQ[] = { +"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC" +"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" +"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" +"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" +"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA" +"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" +"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" +"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA" +"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" +"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC" +"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" +"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" +"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT" +"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" +"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" +"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" +"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" +"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" +"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" +"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" +"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" +"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT" +"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" +"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC" +"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" +"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" +"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" +"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" +"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" +"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" +"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" +"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT", + +"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAAATAC" +"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" +"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" +"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" +"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCAAAGTCA" +"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" +"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" +"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" +"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCCCCTGCC" +"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" +"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" +"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGCGGCCTT" +"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" +"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" +"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" +"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" +"ACAGTCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" +"ATCCTCTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" +"CCGCTGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" +"CTGTTAATCGCAAGCTCCAGCAGCCCCCCCCCCCCTTTTCTGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" +"TCCATGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATAAAAAGAGCTCAGT" +"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" +"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATTATTAGCGAAGAAC" +"AAGGTTCCAACTCTCATTCCCGCCATTTACTGGGAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" +"CTCTTCAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" +"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" +"TGCATTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" +"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" +"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" +"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" +"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT", +}; + + +static +int +my_read(Stream *s, void *buf, int n) +{ + return io·read(s, 1, n, buf); +} + +// ----------------------------------------------------------------------- +// Point of entry for testing + +error +test·newick() +{ + error err; + bio·Tree t; + mem·Arena *heap; + Stream *fd[2]; + + io·Peeker rdr; + io·Putter wtr; + + bio·Node **end, **it, **list; + + heap = mem·makearena(mem·sys, nil); + rdr = (io·Peeker){.get = (byte (*)(void *))io·getbyte, .unget = (error (*)(void *, byte))io·ungetbyte}; + wtr = (io·Putter){.put = (error (*)(void *, byte))io·putbyte, .putstr = (int (*)(void *, string))io·putstring}; + + fd[0] = io·open("/home/nolln/root/data/test/zika.nwk", "r"); + fd[1] = io·open("/home/nolln/root/data/test/zika.proc.nwk", "w"); + + t.h = heap; + t.heap = (mem·Allocator){ .alloc = (void *(*)(void *, uint, ulong))mem·arenaalloc, .free = nil, }; + + if (err = bio·readnewick(rdr, fd[0], &t), err) { + errorf("failed to read newick"); + return 1; + } + printf("number of children: %d\n", t.root->nchild); + + phylo·ladderize(t.root); + + list = mem·arenaalloc(heap, t.nleaf, sizeof(**list)); + phylo·getleafs(t, list); + for (it = list, end = list + t.nleaf; it != end; ++it) { + printf("Leaf '%s'\n", (*it)->name); + } + + bio·Node *path[100]; + // phylo·diameter(t, path); + + printf("Loaded tree with %d leafs and %d nodes\n", t.nleaf, t.root->nnode); + err = bio·writenewick(t, wtr, fd[1]); + + io·flush(fd[1]); + + io·close(fd[0]); + io·close(fd[1]); + + mem·freearena(heap); + return 0; +} + +error +test·fasta() +{ + error err; + Stream *fd; + + bio·Seq seq; + bio·FastaReader *rdr; + + clock_t t; + + fd = io·open("/home/nolln/root/data/test/zika.fa", "r"); + + /* Benchmark against Heng */ +#if 0 + int n, slen; + kseq_t *kseq; + + t = clock(); + kseq = kseq_init(fd); + while (kseq_read(kseq) >= 0) { + ++n, slen += kseq->seq.l; + } + t = clock() - t; + printf("heng's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); + + kseq_destroy(kseq); + + io·seek(fd, 0, seek·set); +#endif + + rdr = bio·openfasta((io·Reader){.read = (int (*)(void *, int, int, void *))io·read}, fd, mem·sys, nil); + + t = clock(); + err = 0; + while (!err) { + err = bio·readfasta(rdr, &seq); + } + t = clock() - t; + printf("nick's code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); + bio·closefasta(rdr); + + + io·close(fd); + return err <= 0 ? 0 : 1; +} + +#define asrdr(x) (int (*)(void *, int, int, void *))(x) +error +test·fastq() +{ + error err; + Stream *fd; + + bio·Seq seq; + bio·FastqReader *rdr; + + clock_t t; + + fd = io·open("/home/nolln/root/data/test/eg.fq", "r"); + + rdr = bio·openfastq((io·Reader){.read = asrdr(io·read)}, fd, mem·sys, nil); + + t = clock(); + err = 0; + while (!err) { + err = bio·readfastq(rdr, &seq); + } + t = clock() - t; + printf("nick's fastq code took %f ms to execute\n", 1000.*t/CLOCKS_PER_SEC); + bio·closefastq(rdr); + + + io·close(fd); + return err <= 0 ? 0 : 1; +} + +error +test·align() +{ + double f; + error err; + int i, l, n; + + uint64 mem[aln·N][arrlen(SEQ)][aln·L]; + uint64 *phi[aln·N]; + int loc[aln·N][arrlen(SEQ)][aln·L]; + int *pos[aln·N]; + + for (i = 0; i < arrlen(SEQ); i++) { + for (n = 0; n < aln·N; n++) { + phi[n] = mem[n][i]; + pos[n] = loc[n][i]; + } + + err = aln·sketch(SEQ[i], aln·L, phi, pos); + } + + f = 0; + for (n = 0; n < aln·N; n++) { + aln·sort(arrlen(SEQ), aln·L, (uint64*)mem[n]); + + if (!memcmp(mem[n][0], mem[n][1], sizeof(uint64)*aln·L)) { + f += 1.; + printf("True : "); + } else { + printf("False: "); + } + for (i = 0; i < arrlen(SEQ); i++) { + printf("["); + for (l = 0; l < aln·L; l++) { + printf("%lu,", mem[n][i][l]); + } + printf("]"); + if (i == 0) printf(" ~ "); + } + printf("\n"); + } + + printf("Fraction hits %f\n", f/aln·N); + return err; + +} + +error +main() +{ + error err; + + if (err = test·newick(), err) { + errorf("test fail: newick"); + } + +#if 0 + if (err = test·fasta(), err) { + errorf("test fail: fasta"); + } + + if (err = test·fastq(), err) { + errorf("test fail: fastq"); + } +#endif +} + -- cgit v1.2.1