feat: added sort macro to libn. moved macro based templates to more obvious directory

author: Nicholas Noll <nbnoll@eml.cc> 2020-04-26 17:34:43 -0700
committer: Nicholas Noll <nbnoll@eml.cc> 2020-04-26 17:34:43 -0700
commit: 238b944f5986e7deaa2aa2d5925c1fa2c683c1c9 (patch)
tree: 7eb4f2d21ab03888ccda436bf36a68c3f13b09e6 /include
parent: 98375c9c5e4bf26f8d238666d7233177dba787eb (diff)
4 files changed, 197 insertions, 0 deletions
diff --git a/include/libbio.h b/include/libbio.h
index b405810..3db742d 100644
--- a/include/libbio.h
+++ b/include/libbio.h
@@ -31,6 +31,7 @@ error    bio·writenewick(bio·Tree tree, io·Putter out, void*);
 // -----------------------------------------------------------------------
 // Sequences
 
+/* i/o */
 typedef struct bio·FastaReader bio·FastaReader;
 typedef struct bio·FastqReader bio·FastqReader;
 
@@ -49,3 +50,14 @@ error            bio·closefasta(bio·FastaReader *rdr);
 bio·FastqReader *bio·openfastq(io·Reader stream, void *s, mem·Allocator heap, void *h);
 error            bio·readfastq(bio·FastqReader *rdr, bio·Seq *seq);
 error            bio·closefastq(bio·FastqReader *rdr);
+
+/* alignment */
+enum
+{
+    aln·K = 20,     // kmer size (k <= 32)
+    aln·L = 3,      // number of kmers / hash
+    aln·N = 1000,   // number of hashes
+};
+
+error aln·sketch(byte *seq, int l, uint64 *phi[aln·N], int *locs[aln·N]);
+error aln·sort(int len, int l, uint64 *vals);
diff --git a/include/libn.h b/include/libn.h
index bfeae3e..fee0b92 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -83,12 +83,16 @@ mem·Allocator mem·sys = {
     .free    = ·free
 };
 
+/* simple memory arena */
 typedef struct mem·Arena mem·Arena;
 
 mem·Arena *mem·newarena(mem·Allocator from, void*);
 void      *mem·arenaalloc(mem·Arena *A, uint n, ulong size); 
 void       mem·freearena(mem·Arena *A);
 
+/* generalized memxxx functions */
+void       memset64(void *dst, uint64 val, uintptr size);
+
 // -----------------------------------------------------------------------------
 // coroutines
 
diff --git a/include/.include/map.h b/include/libn/macro/map.h
index f81dc0f..f81dc0f 100644
--- a/include/.include/map.h
+++ b/include/libn/macro/map.h
diff --git a/include/libn/macro/qsort.h b/include/libn/macro/qsort.h
new file mode 100644
index 0000000..8daf0fa
--- /dev/null
+++ b/include/libn/macro/qsort.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2013, 2017 Alexey Tourbin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This is a traditional Quicksort implementation which mostly follows
+ * [Sedgewick 1978].  Sorting is performed entirely on array indices,
+ * while actual access to the array elements is abstracted out with the
+ * user-defined `LESS` and `SWAP` primitives.
+ *
+ * Synopsis:
+ *	QSORT(N, LESS, SWAP);
+ * where
+ *	N - the number of elements in A[];
+ *	LESS(i, j) - compares A[i] to A[j];
+ *	SWAP(i, j) - exchanges A[i] with A[j].
+ */
+
+#pragma once
+
+/* Sort 3 elements. */
+#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP)   \
+do {					                            \
+    if (Q_LESS(q_a2, q_a1)) {		                \
+	if (Q_LESS(q_a3, q_a2))		                    \
+	    Q_SWAP(q_a1, q_a3);		                    \
+	else {				                            \
+	    Q_SWAP(q_a1, q_a2);		                    \
+	    if (Q_LESS(q_a3, q_a2))	                    \
+		Q_SWAP(q_a2, q_a3);	                        \
+	}				                                \
+    }					                            \
+    else if (Q_LESS(q_a3, q_a2)) {	                \
+	Q_SWAP(q_a2, q_a3);		                        \
+	if (Q_LESS(q_a2, q_a1))		                    \
+	    Q_SWAP(q_a1, q_a2);		                    \
+    }					                            \
+} while (0)
+
+/* Partition [q_l,q_r] around a pivot.  After partitioning,
+ * [q_l,q_j] are the elements that are less than or equal to the pivot,
+ * while [q_i,q_r] are the elements greater than or equal to the pivot. */
+#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP)		\
+do {									                            \
+    /* The middle element, not to be confused with the median. */	\
+    Q_UINT q_m = q_l + ((q_r - q_l) >> 1);				            \
+    /* Reorder the second, the middle, and the last items.		    \
+     * As [Edelkamp Weiss 2016] explain, using the second element	\
+     * instead of the first one helps avoid bad behaviour for		\
+     * decreasingly sorted arrays.  This method is used in recent	\
+     * versions of gcc's std::sort, see gcc bug 58437#c13, although	\
+     * the details are somewhat different (cf. #c14). */		    \
+    Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP);				        \
+    /* Place the median at the beginning. */				        \
+    Q_SWAP(q_l, q_m);							                    \
+    /* Partition [q_l+2, q_r-1] around the median which is in q_l.	\
+     * q_i and q_j are initially off by one, they get decremented	\
+     * in the do-while loops. */					                \
+    q_i = q_l + 1; q_j = q_r;						                \
+    while (1) {								                        \
+	do q_i++; while (Q_LESS(q_i, q_l));				                \
+	do q_j--; while (Q_LESS(q_l, q_j));				                \
+	if (q_i >= q_j) break; /* Sedgewick says "until j < i" */	    \
+	Q_SWAP(q_i, q_j);						                        \
+    }									                            \
+    /* Compensate for the i==j case. */					            \
+    q_i = q_j + 1;							                        \
+    /* Put the median to its final place. */				        \
+    Q_SWAP(q_l, q_j);							                    \
+    /* The median is not part of the left subfile. */			    \
+    q_j--;								                            \
+} while (0)
+
+/* Insertion sort is applied to small subfiles - this is contrary to
+ * Sedgewick's suggestion to run a separate insertion sort pass after
+ * the partitioning is done.  The reason I don't like a separate pass
+ * is that it triggers extra comparisons, because it can't see that the
+ * medians are already in their final positions and need not be rechecked.
+ * Since I do not assume that comparisons are cheap, I also do not try
+ * to eliminate the (q_j > q_l) boundary check. */
+#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP)		    \
+do {									                            \
+    Q_UINT q_i, q_j;							                    \
+    /* For each item starting with the second... */			        \
+    for (q_i = q_l + 1; q_i <= q_r; q_i++)				            \
+    /* move it down the array so that the first part is sorted. */	\
+    for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--)		\
+	Q_SWAP(q_j, q_j - 1);						                    \
+} while (0)
+
+/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to
+ * Q_THRESH, the algorithm performs recursive partitioning.  When the size
+ * drops below Q_THRESH, the algorithm switches to insertion sort.
+ * The minimum valid value is probably 5 (with 5 items, the second and
+ * the middle items, the middle itself being rounded down, are distinct). */
+#define Q_THRESH 16
+
+/* The main loop. */
+#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP)				            \
+do {									                            \
+    Q_UINT q_l = 0;							                        \
+    Q_UINT q_r = (Q_N) - 1;						                    \
+    Q_UINT q_sp = 0; /* the number of frames pushed to the stack */	\
+    struct { Q_UINT q_l, q_r; }						                \
+	/* On 32-bit platforms, to sort a "char[3GB+]" array,		    \
+	 * it may take full 32 stack frames.  On 64-bit CPUs,		    \
+	 * though, the address space is limited to 48 bits.		        \
+	 * The usage is further reduced if Q_N has a 32-bit type. */	\
+	q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32];		    \
+    while (1) {								                        \
+	if (q_r - q_l + 1 >= Q_THRESH) {				                \
+	    Q_UINT q_i, q_j;						                    \
+	    Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP);	\
+	    /* Now have two subfiles: [q_l,q_j] and [q_i,q_r].		    \
+	     * Dealing with them depends on which one is bigger. */	    \
+	    if (q_j - q_l >= q_r - q_i)					                \
+		Q_SUBFILES(q_l, q_j, q_i, q_r);				                \
+	    else							                            \
+		Q_SUBFILES(q_i, q_r, q_l, q_j);				                \
+	}								                                \
+	else {								                            \
+	    Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP);		    \
+	    /* Pop subfiles from the stack, until it gets empty. */	    \
+	    if (q_sp == 0) break;					                    \
+	    q_sp--;							                            \
+	    q_l = q_st[q_sp].q_l;					                    \
+	    q_r = q_st[q_sp].q_r;					                    \
+	}								                                \
+    }									                            \
+} while (0)
+
+/* The missing part: dealing with subfiles.
+ * Assumes that the first subfile is not smaller than the second. */
+#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2)				            \
+do {									                            \
+    /* If the second subfile is only a single element, it needs		\
+     * no further processing.  The first subfile will be processed	\
+     * on the next iteration (both subfiles cannot be only a single	\
+     * element, due to Q_THRESH). */					            \
+    if (q_l2 == q_r2) {							                    \
+	q_l = q_l1;							                            \
+	q_r = q_r1;							                            \
+    }									                            \
+    else {								                            \
+	/* Otherwise, both subfiles need processing.			        \
+	 * Push the larger subfile onto the stack. */			        \
+	q_st[q_sp].q_l = q_l1;						                    \
+	q_st[q_sp].q_r = q_r1;						                    \
+	q_sp++;								                            \
+	/* Process the smaller subfile on the next iteration. */	    \
+	q_l = q_l2;							                            \
+	q_r = q_r2;							                            \
+    }									                            \
+} while (0)
+
+/* And now, ladies and gentlemen, may I proudly present to you... */
+#define QSORT(Q_N, Q_LESS, Q_SWAP)					                \
+do {									                            \
+    if ((Q_N) > 1)							                        \
+	/* We could check sizeof(Q_N) and use "unsigned", but at least	\
+	 * on x86_64, this has the performance penalty of up to 5%. */	\
+	Q_LOOP(ulong, Q_N, Q_LESS, Q_SWAP);			                    \
+} while (0)
author	Nicholas Noll <nbnoll@eml.cc>	2020-04-26 17:34:43 -0700
committer	Nicholas Noll <nbnoll@eml.cc>	2020-04-26 17:34:43 -0700
commit	238b944f5986e7deaa2aa2d5925c1fa2c683c1c9 (patch)
tree	7eb4f2d21ab03888ccda436bf36a68c3f13b09e6 /include
parent	98375c9c5e4bf26f8d238666d7233177dba787eb (diff)