aboutsummaryrefslogtreecommitdiff
path: root/sys/libbio/align.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libbio/align.c')
-rw-r--r--sys/libbio/align.c178
1 files changed, 0 insertions, 178 deletions
diff --git a/sys/libbio/align.c b/sys/libbio/align.c
deleted file mode 100644
index 20a57df..0000000
--- a/sys/libbio/align.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#include <u.h>
-#include <libn.h>
-#include <libn/macro/qsort.h>
-#include <libbio.h>
-
-// -----------------------------------------------------------------------
-// globals
-
-uint64 aln·shft = (2ULL * (aln·K - 1ULL));
-uint64 aln·mask = (1ULL << (2*aln·K)) - 1ULL;
-
-// -----------------------------------------------------------------------
-// static data
-
-static uint64 nuctab[256] = {
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-};
-
-// -----------------------------------------------------------------------
-// hash functions
-
-enum
-{
- MURM641 = 0xff51afd7ed558ccd,
- MURM642 = 0xc4ceb9fe1a85ec53
-};
-
-static
-uint64
-minihash(uint64 x, uint64 mask)
-{
- x = (~x + (x << 21)) & mask;
- x = x ^ (x >> 24);
- x = (x + (x << 3) + (x << 8)) & mask;
- x = x ^ (x >> 14);
- x = (x + (x << 2) + (x << 4)) & mask;
- x = x ^ (x >> 28);
- x = (x + (x << 31));
-
- return x;
-}
-
-static
-uint64
-murmurhash(uint64 x, uint64 mask)
-{
- x = x ^ (x >> 33);
- x = (x * MURM641);
- x = x ^ (x >> 33);
- x = (x * MURM642);
- x = x ^ (x >> 33);
-
- return x;
-}
-
-// -----------------------------------------------------------------------
-// locality sensitive hashing (with spatial extent)
-
-static
-void
-sortpos(uintptr len, uint64 vals[], int locs[])
-{
- int tmpi;
- uint64 tmpu64;
-
-#define LESS(i, j) (locs[i] < locs[j])
-#define SWAP(i, j) (tmpu64 = vals[i], tmpi = locs[i], \
- vals[i] = vals[j], locs[i] = locs[j], \
- vals[j] = tmpu64 , locs[j] = tmpi)
- QSORT(len, LESS, SWAP);
-#undef LESS
-#undef SWAP
-}
-
-/*
- * sketch
- * @param seq: '0' terminated string
- * @param len: number of sequential sketches to keep
- * @param vals: buffer to store hashes of sketch.
- * @param locs: buffer to store location of sketch hashes
- */
-error
-aln·sketch(byte *seq, int len, uint64 *vals[aln·N], int *locs[aln·N])
-{
- int i, n, l, *loc;
- uint64 kmer, h[3], *val;
- int tmpi[2];
- uint64 tmpu[2];
-
- for(n = 0; n < aln·N; n++) {
- for(l = 0; l < len; l++) {
- vals[n][l] = UINT64_MAX;
- }
- }
-
- kmer = UINT64_MAX;
- for(l = 0; *seq != '\0'; seq++, l++) {
- kmer = ((kmer << 2) | nuctab[*seq]) & aln·mask;
-
- h[0] = minihash(kmer, aln·mask);
- h[1] = murmurhash(kmer, aln·mask);
-
- for(n = 0; n < aln·N; n++) {
- val = vals[n];
- loc = locs[n];
-
- h[2] = (h[0] + n * h[1]) & aln·mask;
- for (i = 0; i < len && h[2] < val[i]; i++) {
- ;
- }
-
- tmpu[1] = h[2];
- tmpi[1] = l;
- for(i -= 1; i >= 0; i--) {
- tmpu[0] = tmpu[1], tmpu[1] = val[i], val[i] = tmpu[0];
- tmpi[0] = tmpi[1], tmpi[1] = loc[i], loc[i] = tmpi[0];
- }
- }
- }
-
- for(n = 0; n < aln·N; n++) {
- sortpos(len, vals[n], locs[n]);
- }
-
- return 0;
-}
-
-static
-int
-lessarrs(int len, uint64 a[], uint64 b[])
-{
- int l;
-
- for(l = 0; l < len; l++) {
- if (a[l] < b[l]) return 1;
- if (a[l] > b[l]) return 0;
- }
-
- return 0;
-}
-
-static
-void
-swaparrs(int len, uint64 a[], uint64 b[])
-{
- int l;
- uint64 tmp;
-
- for(l = 0; l < len; l++) {
- tmp = a[l], a[l] = b[l], b[l] = tmp;
- }
-}
-
-error
-aln·sort(uintptr n, int len, uint64 *vals)
-{
-#define LESS(i, j) (lessarrs(len, vals+((i)*len), vals+((j)*len)))
-#define SWAP(i, j) (swaparrs(len, vals+((i)*len), vals+((j)*len)))
- QSORT(n, LESS, SWAP);
-#undef LESS
-#undef SWAP
- return 0;
-}