aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio/simulate.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-03 20:20:22 -0700
commit4b2ea2ca00eea7feea036b7642c0c1443b8f77a1 (patch)
tree5cdd635bd8240a6857258a056e3932e00966bfff /sys/libbio/simulate.c
parent6b739739968a0cc9b4d9909d8f4ffec30f4461dd (diff)
removed buggy qsort header and implemented myself
Diffstat (limited to 'sys/libbio/simulate.c')
-rw-r--r--sys/libbio/simulate.c117
1 files changed, 117 insertions, 0 deletions
diff --git a/sys/libbio/simulate.c b/sys/libbio/simulate.c
new file mode 100644
index 0000000..a4567d5
--- /dev/null
+++ b/sys/libbio/simulate.c
@@ -0,0 +1,117 @@
+#include <u.h>
+#include <libn.h>
+#include <libbio.h>
+
+#define SEQLEN 2560
+static byte *SEQ =
+"GGCGGCTTCGGTGCGCTGTGTGCATTGCCGCAAAAATATCGTGAACCCGTGCTGGTTTCCGGCACTGACGGCGTAGGTAC"
+"CAAGCTGCGTCTGGCAATGGACTTAAAACGTCACGACACCATTGGTATTGATCTGGTCGCCATGTGCGTTAATGACCTGG"
+"TGGTGCAAGGTGCGGAACCGCTGTTTTTCCTCGACTATTACGCAACCGGAAAACTGGATGTTGATACCGCTTCAGCGGTG"
+"ATCAGCGGCATTGCGGAAGGTTGTCTGCAATCGGGCTGTTCTCTGGTGGGTGGCGAAACGGCAGAAATGCCGGGGATGTA"
+"TCACGGTGAAGATTACGATGTCGCGGGTTTCTGCGTGGGCGTGGTAGAAAAATCAGAAATCATCGACGGCTCTAAAGTCA"
+"GCGACGGCGATGTGCTGATTGCACTCGGTTCCAGCGGTCCGCACTCGAACGGTTATTCGCTGGTGCGCAAAATTCTTGAA"
+"GTCAGCGGTTGTGATCCGCAAACCACCGAACTTGATGGTAAGCCATTAGCCGATCATCTGCTGGCACCGACCCGCATTTA"
+"CGTGAAGTCAGTGCTGGAGTTGATTGAAAAGGTCGATGTGCATGCCATTGCGCACCTGACCGGCGGCGGCTTCTGGGAAA"
+"ACATTCCGCGCGTATTGCCAGATAATACCCAGGCAGTGATTGATGAATCTTCCTGGCAGTGGCCGGAAGTGTTCAACTGG"
+"CTGCAAACGGCAGGTAACGTTGAGCGCCATGAAATGTATCGCACCTTCAACTGCGGCGTCGGGATGATTATCGCCCTGCC"
+"TGCTCCGGAAGTGGACAAAGCCCTCGCCCTGCTCAATGCCAACGGTGAAAACGCGTGGAAAATCGGTATCATCAAAGCCT"
+"CTGATTCCGAACAACGCGTGGTTATCGAATAATGAATATTGTGGTGCTTATTTCCGGCAACGGAAGTAATTTACAGGCAA"
+"TTATTGACGCCTGTAAAACCAACAAAATTAAAGGCACCGTACGGGCAGTTTTCAGCAATAAGGCCGACGCGTTCGGCCTT"
+"GAACGCGCCCGCCAGGCGGGTATTGCAACGCATACGCTCATCGCCAGCGCGTTTGACAGTCGTGAAGCCTATGACCGGGA"
+"GTTGATTCATGAAATCGACATGTACGCACCCGATGTGGTCGTGCTGGCTGGTTTTATGCGCATTCTCAGCCCGGCGTTTG"
+"TCTCCCACTATGCCGGGCGTTTGCTGAACATTCACCCTTCTCTGCTGCCGAAATATCCCGGATTACACACCCATCGTCAA"
+"GCGCTGGAAAATGGCGATGAAGAGCACGGTACATCGGTGCATTTCGTCACCGATGAACTGGACGGTGGCCCGGTTATTTT"
+"ACAGGCGAAAGTCCCGGTATTTGCTGGTGATACGGAAGATGACGTCACCGCCCGCGTGCAAACCCAGGAACACGCCATTT"
+"ATCCACTGGTGATTAGCTGGTTTGCCGATGGTCGTCTGAAAATGCACGAAAACGCCGCGTGGCTGGATGGTCAACGTCTG"
+"CCGCCGCAGGGCTACGCTGCCGACGAGTAATGCCCCCGTAGTTAAAGCGCCAGCTCTGCCGCTGGCGTTTTTCAATTCAC"
+"CTGTAAATCGCAAGCTCCAGCAGTTTTTTTCCCCCTTTTCTGGCATAGTTGGACATCTGCCAATATTGCTCGCCATAATA"
+"TCCAGGCAGTGTCCCGTGAATAAAACGGAGTAAAAGTGGTAATGGGTCAGGAAAAGCTATACATCGAAAAAGAGCTCAGT"
+"TGGTTATCGTTCAATGAACGCGTGCTTCAGGAAGCGGCGGACAAATCTAACCCGCTGATTGAAAGGATGCGTTTCCTGGG"
+"GATCTATTCCAATAACCTTGATGAGTTCTATAAAGTCCGCTTCGCTGAACTGAAGCGACGCATCATTATTAGCGAAGAAC"
+"AAGGCTCCAACTCTCATTCCCGCCATTTACTGGGCAAAATTCAGTCCCGGGTGCTGAAAGCCGATCAGGAATTCGACGGC"
+"CTCTACAACGAGCTATTGCTGGAGATGGCGCGCAACCAGATCTTCCTGATTAATGAACGCCAGCTCTCCGTCAATCAACA"
+"AAACTGGCTGCGTCATTATTTTAAGCAGTATCTGCGTCAGCACATTACGCCGATTTTAATCAATCCTGACACTGACTTAG"
+"TGCAGTTCCTGAAAGATGATTACACCTATCTGGCGGTGGAAATTATCCGTGGCGATACCATCCGTTACGCGCTTCTGGAG"
+"ATCCCATCAGATAAAGTGCCGCGCTTTGTGAATTTACCGCCAGAAGCGCCGCGTCGACGCAAGCCGATGATTCTTCTGGA"
+"TAACATTCTGCGTTACTGCCTTGATGATATTTTCAAAGGCTTCTTTGATTATGACGCGCTGAATGCCTATTCAATGAAGA"
+"TGACCCGCGATGCCGAATACGATTTAGTGCATGAGATGGAAGCCAGCCTGATGGAGTTGATGTCTTCCAGTCTCAAGCAG"
+"CGTTTAACTGCTGAGCCGGTGCGTTTTGTTTATCAGCGCGATATGCCCAATGCGCTGGTTGAAGTTTTACGCGAAAAACT";
+
+byte*
+modify(byte *seq, int *len, double p)
+{
+ byte *head, *new;
+
+ head = calloc(SEQLEN+1, sizeof(byte));
+ new = head;
+ for (; *seq != '\0'; seq++) {
+ if (rng·bernoulli(p)) {
+ switch (rng·randi(5)) {
+ case 0: *new++ = 'A'; break;
+ case 1: *new++ = 'C'; break;
+ case 2: *new++ = 'G'; break;
+ case 3: *new++ = 'T'; break;
+ case 4: continue;
+ }
+ } else {
+ *new++ = *seq;
+ }
+ }
+ *new = '\0';
+ *len = new - head;
+ return head;
+}
+
+#define NSEQS 20
+int
+main()
+{
+ int n, i, l, lens[NSEQS];
+ byte *seqs[NSEQS];
+
+ int locs[aln·N][NSEQS][aln·L];
+ int *loc[aln·N];
+ uint64 vals[aln·N][NSEQS][aln·L];
+ uint64 *val[aln·N];
+ rng·init(0);
+
+ seqs[0] = SEQ;
+ lens[0] = SEQLEN;
+
+ for (n = 0; n < aln·N; n++) {
+ for (i = 0; i < NSEQS; i++) {
+ for (l = 0; l < aln·L; l++) {
+ vals[n][i][l] = 0;
+ }
+ }
+ }
+
+ for (i = 1; i < NSEQS; i++) {
+ seqs[i] = modify(SEQ, lens + i, .01*i);
+ }
+
+ for (i = 0; i < NSEQS; i++) {
+ for (n = 0; n < aln·N; n++) {
+ val[n] = vals[n][i];
+ loc[n] = locs[n][i];
+ }
+ aln·sketch(seqs[i], aln·L, val, loc);
+ }
+
+ // for (n = 0; n < aln·N; n++) {
+ // printf("iteration %d\n", n);
+ // printf("[\n");
+ // for (i = 0; i < NSEQS; i++) {
+ // printf(" [");
+ // for (l = 0; l < aln·L; l++) {
+ // printf("%lu,", vals[n][i][l]);
+ // }
+ // printf("],\n");
+ // }
+ // printf("]\n");
+ // }
+
+ for (n = 0; n < aln·N; n++) {
+ aln·sort(NSEQS, aln·L, (uint64*)vals[n]);
+ }
+}