#include #include #include #include // ----------------------------------------------------------------------- // globals uint64 aln·shft = (2ULL * (aln·K - 1ULL)); uint64 aln·mask = (1ULL << (2*aln·K)) - 1ULL; // ----------------------------------------------------------------------- // static data static uint64 nuctab[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, }; // ----------------------------------------------------------------------- // hash functions enum { MURM641 = 0xff51afd7ed558ccd, MURM642 = 0xc4ceb9fe1a85ec53 }; static uint64 minihash(uint64 x, uint64 mask) { x = (~x + (x << 21)) & mask; x = x ^ (x >> 24); x = (x + (x << 3) + (x << 8)) & mask; x = x ^ (x >> 14); x = (x + (x << 2) + (x << 4)) & mask; x = x ^ (x >> 28); x = (x + (x << 31)); return x; } static uint64 murmurhash(uint64 x, uint64 mask) { x = x ^ (x >> 33); x = (x * MURM641); x = x ^ (x >> 33); x = (x * MURM642); x = x ^ (x >> 33); return x; } // ----------------------------------------------------------------------- // locality sensitive hashing (with spatial extent) static void sortpos(uintptr len, uint64 vals[], int locs[]) { int tmpi; uint64 tmpu64; #define LESS(i, j) (locs[i] < locs[j]) #define SWAP(i, j) (tmpu64 = vals[i], tmpi = locs[i], \ vals[i] = vals[j], locs[i] = locs[j], \ vals[j] = tmpu64 , locs[j] = tmpi) QSORT(len, LESS, SWAP); #undef LESS #undef SWAP } /* * sketch * @param seq: '0' terminated string * @param len: number of sequential sketches to keep * @param vals: buffer to store hashes of sketch. * @param locs: buffer to store location of sketch hashes */ error aln·sketch(byte *seq, int len, uint64 *vals[aln·N], int *locs[aln·N]) { int i, n, l, *loc; uint64 kmer, h[3], *val; int tmpi[2]; uint64 tmpu[2]; for(n = 0; n < aln·N; n++) { for(l = 0; l < len; l++) { vals[n][l] = UINT64_MAX; } } kmer = UINT64_MAX; for(l = 0; *seq != '\0'; seq++, l++) { kmer = ((kmer << 2) | nuctab[*seq]) & aln·mask; h[0] = minihash(kmer, aln·mask); h[1] = murmurhash(kmer, aln·mask); for(n = 0; n < aln·N; n++) { val = vals[n]; loc = locs[n]; h[2] = (h[0] + n * h[1]) & aln·mask; for (i = 0; i < len && h[2] < val[i]; i++) { ; } tmpu[1] = h[2]; tmpi[1] = l; for(i -= 1; i >= 0; i--) { tmpu[0] = tmpu[1], tmpu[1] = val[i], val[i] = tmpu[0]; tmpi[0] = tmpi[1], tmpi[1] = loc[i], loc[i] = tmpi[0]; } } } for(n = 0; n < aln·N; n++) { sortpos(len, vals[n], locs[n]); } return 0; } static int lessarrs(int len, uint64 a[], uint64 b[]) { int l; for(l = 0; l < len; l++) { if (a[l] < b[l]) return 1; if (a[l] > b[l]) return 0; } return 0; } static void swaparrs(int len, uint64 a[], uint64 b[]) { int l; uint64 tmp; for(l = 0; l < len; l++) { tmp = a[l], a[l] = b[l], b[l] = tmp; } } error aln·sort(uintptr n, int len, uint64 *vals) { #define LESS(i, j) (lessarrs(len, vals+((i)*len), vals+((j)*len))) #define SWAP(i, j) (swaparrs(len, vals+((i)*len), vals+((j)*len))) QSORT(n, LESS, SWAP); #undef LESS #undef SWAP return 0; }