From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/libbio/align.c | 178 ----------------------------------------------------- 1 file changed, 178 deletions(-) delete mode 100644 sys/libbio/align.c (limited to 'sys/libbio/align.c') diff --git a/sys/libbio/align.c b/sys/libbio/align.c deleted file mode 100644 index 20a57df..0000000 --- a/sys/libbio/align.c +++ /dev/null @@ -1,178 +0,0 @@ -#include -#include -#include -#include - -// ----------------------------------------------------------------------- -// globals - -uint64 aln·shft = (2ULL * (aln·K - 1ULL)); -uint64 aln·mask = (1ULL << (2*aln·K)) - 1ULL; - -// ----------------------------------------------------------------------- -// static data - -static uint64 nuctab[256] = { - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, -}; - -// ----------------------------------------------------------------------- -// hash functions - -enum -{ - MURM641 = 0xff51afd7ed558ccd, - MURM642 = 0xc4ceb9fe1a85ec53 -}; - -static -uint64 -minihash(uint64 x, uint64 mask) -{ - x = (~x + (x << 21)) & mask; - x = x ^ (x >> 24); - x = (x + (x << 3) + (x << 8)) & mask; - x = x ^ (x >> 14); - x = (x + (x << 2) + (x << 4)) & mask; - x = x ^ (x >> 28); - x = (x + (x << 31)); - - return x; -} - -static -uint64 -murmurhash(uint64 x, uint64 mask) -{ - x = x ^ (x >> 33); - x = (x * MURM641); - x = x ^ (x >> 33); - x = (x * MURM642); - x = x ^ (x >> 33); - - return x; -} - -// ----------------------------------------------------------------------- -// locality sensitive hashing (with spatial extent) - -static -void -sortpos(uintptr len, uint64 vals[], int locs[]) -{ - int tmpi; - uint64 tmpu64; - -#define LESS(i, j) (locs[i] < locs[j]) -#define SWAP(i, j) (tmpu64 = vals[i], tmpi = locs[i], \ - vals[i] = vals[j], locs[i] = locs[j], \ - vals[j] = tmpu64 , locs[j] = tmpi) - QSORT(len, LESS, SWAP); -#undef LESS -#undef SWAP -} - -/* - * sketch - * @param seq: '0' terminated string - * @param len: number of sequential sketches to keep - * @param vals: buffer to store hashes of sketch. - * @param locs: buffer to store location of sketch hashes - */ -error -aln·sketch(byte *seq, int len, uint64 *vals[aln·N], int *locs[aln·N]) -{ - int i, n, l, *loc; - uint64 kmer, h[3], *val; - int tmpi[2]; - uint64 tmpu[2]; - - for(n = 0; n < aln·N; n++) { - for(l = 0; l < len; l++) { - vals[n][l] = UINT64_MAX; - } - } - - kmer = UINT64_MAX; - for(l = 0; *seq != '\0'; seq++, l++) { - kmer = ((kmer << 2) | nuctab[*seq]) & aln·mask; - - h[0] = minihash(kmer, aln·mask); - h[1] = murmurhash(kmer, aln·mask); - - for(n = 0; n < aln·N; n++) { - val = vals[n]; - loc = locs[n]; - - h[2] = (h[0] + n * h[1]) & aln·mask; - for (i = 0; i < len && h[2] < val[i]; i++) { - ; - } - - tmpu[1] = h[2]; - tmpi[1] = l; - for(i -= 1; i >= 0; i--) { - tmpu[0] = tmpu[1], tmpu[1] = val[i], val[i] = tmpu[0]; - tmpi[0] = tmpi[1], tmpi[1] = loc[i], loc[i] = tmpi[0]; - } - } - } - - for(n = 0; n < aln·N; n++) { - sortpos(len, vals[n], locs[n]); - } - - return 0; -} - -static -int -lessarrs(int len, uint64 a[], uint64 b[]) -{ - int l; - - for(l = 0; l < len; l++) { - if (a[l] < b[l]) return 1; - if (a[l] > b[l]) return 0; - } - - return 0; -} - -static -void -swaparrs(int len, uint64 a[], uint64 b[]) -{ - int l; - uint64 tmp; - - for(l = 0; l < len; l++) { - tmp = a[l], a[l] = b[l], b[l] = tmp; - } -} - -error -aln·sort(uintptr n, int len, uint64 *vals) -{ -#define LESS(i, j) (lessarrs(len, vals+((i)*len), vals+((j)*len))) -#define SWAP(i, j) (swaparrs(len, vals+((i)*len), vals+((j)*len))) - QSORT(n, LESS, SWAP); -#undef LESS -#undef SWAP - return 0; -} -- cgit v1.2.1