aboutsummaryrefslogtreecommitdiff
path: root/src/libbio/simulate.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libbio/simulate.c')
-rw-r--r--src/libbio/simulate.c120
1 files changed, 120 insertions, 0 deletions
diff --git a/src/libbio/simulate.c b/src/libbio/simulate.c
new file mode 100644
index 0000000..0f5a97e
--- /dev/null
+++ b/src/libbio/simulate.c
@@ -0,0 +1,120 @@
+#include <u.h>
+#include <libn.h>
+#include <libbio.h>
+
+#define SEQLEN 2560
+static byte *SEQ =
+"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC"
+"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
+"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
+"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
+"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA"
+"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
+"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
+"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA"
+"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
+"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC"
+"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
+"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
+"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT"
+"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
+"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
+"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
+"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
+"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
+"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
+"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
+"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
+"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT"
+"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
+"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC"
+"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
+"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
+"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
+"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
+"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
+"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
+"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
+"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT";
+
+byte*
+modify(byte *seq, int *len, double p)
+{
+ byte *head, *new;
+
+ head = calloc(SEQLEN+1, sizeof(byte));
+ new = head;
+ for (; *seq != '\0'; seq++) {
+ if (rng·bernoulli(p)) {
+ switch (rng·randi(5)) {
+ case 0: *new++ = 'A'; break;
+ case 1: *new++ = 'C'; break;
+ case 2: *new++ = 'G'; break;
+ case 3: *new++ = 'T'; break;
+ case 4: continue;
+ }
+ } else {
+ *new++ = *seq;
+ }
+ }
+ *new = '\0';
+ *len = new - head;
+ return head;
+}
+
+#define NSEQS 20
+int
+main()
+{
+ int n, i, l, lens[NSEQS];
+ byte *seqs[NSEQS];
+
+ int locs[aln·N][NSEQS][aln·L];
+ int *loc[aln·N];
+ uint64 vals[aln·N][NSEQS][aln·L];
+ uint64 *val[aln·N];
+
+ rng·init(0);
+
+ seqs[0] = SEQ;
+ lens[0] = SEQLEN;
+
+ for (n = 0; n < aln·N; n++) {
+ for (i = 0; i < NSEQS; i++) {
+ for (l = 0; l < aln·L; l++) {
+ vals[n][i][l] = 0;
+ }
+ }
+ }
+
+ for (i = 1; i < NSEQS; i++) {
+ seqs[i] = modify(SEQ, lens + i, .01*i);
+ }
+
+ for (i = 0; i < NSEQS; i++) {
+ for (n = 0; n < aln·N; n++) {
+ val[n] = vals[n][i];
+ loc[n] = locs[n][i];
+ }
+ aln·sketch(seqs[i], aln·L, val, loc);
+ }
+
+ // for (n = 0; n < aln·N; n++) {
+ // printf("iteration %d\n", n);
+ // printf("[\n");
+ // for (i = 0; i < NSEQS; i++) {
+ // printf(" [");
+ // for (l = 0; l < aln·L; l++) {
+ // printf("%lu,", vals[n][i][l]);
+ // }
+ // printf("],\n");
+ // }
+ // printf("]\n");
+ // }
+
+ for (n = 0; n < aln·N; n++) {
+ aln·sort(NSEQS, aln·L, (uint64*)vals[n]);
+ }
+
+ return 0;
+}