#include #include #include #include // ----------------------------------------------------------------------- // Temporary globals uint64 aln·shft = (2ULL * (aln·K - 1ULL)); uint64 aln·mask = (1ULL << (2*aln·K)) - 1ULL; // ----------------------------------------------------------------------- // Static data static uint64 nuctab[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, }; // ----------------------------------------------------------------------- // Hash functions enum { MURM641 = 0xff51afd7ed558ccd, MURM642 = 0xc4ceb9fe1a85ec53 }; static uint64 minihash(uint64 x, uint64 mask) { x = (~x + (x << 21)) & mask; x = x ^ (x >> 24); x = (x + (x << 3) + (x << 8)) & mask; x = x ^ (x >> 14); x = (x + (x << 2) + (x << 4)) & mask; x = x ^ (x >> 28); x = (x + (x << 31)); return x; } static uint64 murmurhash(uint64 x, uint64 mask) { x = x ^ (x >> 33); x = (x * MURM641); x = x ^ (x >> 33); x = (x * MURM642); x = x ^ (x >> 33); return x; } // ----------------------------------------------------------------------- // Locality sensitive hashing (with spatial extent) static void sortpos(uintptr len, uint64 vals[], int locs[]) { int tmpi; uint64 tmpu64; #define LESS(i, j) (locs[i] < locs[j]) #define SWAP(i, j) (tmpu64 = vals[i], tmpi = locs[i], \ vals[i] = vals[j], locs[i] = locs[j], \ vals[j] = tmpu64 , locs[j] = tmpi) QSORT(len, LESS, SWAP); #undef LESS #undef SWAP } error aln·sketch(byte *seq, int L, uint64 *vals[aln·N], int *locs[aln·N]) { int i, n, l, *loc; uint64 kmer, tmp[4], h[3], *val; for (n = 0; n < aln·N; n++) { val = vals[n]; for (l = 0; l < L; l++) { val[l] = UINT64_MAX; } } kmer = UINT64_MAX; for (l = 0; *seq != '\0'; seq++, l++) { kmer = ((kmer << 2) | nuctab[*seq]) & aln·mask; h[0] = minihash(kmer, aln·mask); h[1] = murmurhash(kmer, aln·mask); for (n = 0; n < aln·N; n++) { val = vals[n]; loc = locs[n]; h[2] = (h[0] + n * h[1]) & aln·mask; for (i = 0; i < L && h[2] < val[i]; i++) { ; } tmp[1] = h[2]; tmp[3] = l; for (i -= 1; i >= 0; i--) { tmp[0] = tmp[1]; tmp[1] = val[i]; val[i] = tmp[0]; tmp[2] = tmp[3]; tmp[3] = loc[i]; loc[i] = tmp[2]; } } } for (n = 0; n < aln·N; n++) { sortpos(L, vals[n], locs[n]); } return 0; } static int cmparrs(int L, uint64 a[], uint64 b[]) { int l; for (l = 0; l < L; l++) { if (a[l] < b[l]) return -1; } return +1; } // TODO: replace with inlined version error aln·sort(uintptr len, int L, uint64 *vals) { uint64 tmp[128]; #define LESS(i, j) (cmparrs((L), (vals+(i)*L), (vals+(j)*L))) #define SWAP(i, j) (memcpy(tmp, (vals+(i)*L), (sizeof(uint64)*L)), \ memcpy((vals+(i)*L), (vals+(j)*L), (sizeof(uint64)*L)), \ memcpy((vals+(j)*L), tmp, (sizeof(uint64)*L))) QSORT(len, LESS, SWAP); #undef LESS #undef SWAP // qsort(vals, len, l*sizeof(uint64), &compare); return 0; }