From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libbio/simulate.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 src/libbio/simulate.c (limited to 'src/libbio/simulate.c') diff --git a/src/libbio/simulate.c b/src/libbio/simulate.c new file mode 100644 index 0000000..0f5a97e --- /dev/null +++ b/src/libbio/simulate.c @@ -0,0 +1,120 @@ +#include +#include +#include + +#define SEQLEN 2560 +static byte *SEQ = +"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC" +"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG" +"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG" +"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA" +"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA" +"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA" +"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA" +"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA" +"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG" +"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC" +"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT" +"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA" +"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT" +"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA" +"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG" +"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA" +"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT" +"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT" +"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG" +"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC" +"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA" +"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT" +"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG" +"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC" +"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC" +"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA" +"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG" +"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG" +"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA" +"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA" +"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG" +"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT"; + +byte* +modify(byte *seq, int *len, double p) +{ + byte *head, *new; + + head = calloc(SEQLEN+1, sizeof(byte)); + new = head; + for (; *seq != '\0'; seq++) { + if (rng·bernoulli(p)) { + switch (rng·randi(5)) { + case 0: *new++ = 'A'; break; + case 1: *new++ = 'C'; break; + case 2: *new++ = 'G'; break; + case 3: *new++ = 'T'; break; + case 4: continue; + } + } else { + *new++ = *seq; + } + } + *new = '\0'; + *len = new - head; + return head; +} + +#define NSEQS 20 +int +main() +{ + int n, i, l, lens[NSEQS]; + byte *seqs[NSEQS]; + + int locs[aln·N][NSEQS][aln·L]; + int *loc[aln·N]; + uint64 vals[aln·N][NSEQS][aln·L]; + uint64 *val[aln·N]; + + rng·init(0); + + seqs[0] = SEQ; + lens[0] = SEQLEN; + + for (n = 0; n < aln·N; n++) { + for (i = 0; i < NSEQS; i++) { + for (l = 0; l < aln·L; l++) { + vals[n][i][l] = 0; + } + } + } + + for (i = 1; i < NSEQS; i++) { + seqs[i] = modify(SEQ, lens + i, .01*i); + } + + for (i = 0; i < NSEQS; i++) { + for (n = 0; n < aln·N; n++) { + val[n] = vals[n][i]; + loc[n] = locs[n][i]; + } + aln·sketch(seqs[i], aln·L, val, loc); + } + + // for (n = 0; n < aln·N; n++) { + // printf("iteration %d\n", n); + // printf("[\n"); + // for (i = 0; i < NSEQS; i++) { + // printf(" ["); + // for (l = 0; l < aln·L; l++) { + // printf("%lu,", vals[n][i][l]); + // } + // printf("],\n"); + // } + // printf("]\n"); + // } + + for (n = 0; n < aln·N; n++) { + aln·sort(NSEQS, aln·L, (uint64*)vals[n]); + } + + return 0; +} -- cgit v1.2.1