aboutsummaryrefslogtreecommitdiff
path: root/include/libn/macro/qsort.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/libn/macro/qsort.h')
-rw-r--r--include/libn/macro/qsort.h261
1 files changed, 84 insertions, 177 deletions
diff --git a/include/libn/macro/qsort.h b/include/libn/macro/qsort.h
index 8daf0fa..cc38bca 100644
--- a/include/libn/macro/qsort.h
+++ b/include/libn/macro/qsort.h
@@ -1,181 +1,88 @@
-/*
- * Copyright (c) 2013, 2017 Alexey Tourbin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
+#pragma once
-/*
- * This is a traditional Quicksort implementation which mostly follows
- * [Sedgewick 1978]. Sorting is performed entirely on array indices,
- * while actual access to the array elements is abstracted out with the
- * user-defined `LESS` and `SWAP` primitives.
+/*
+ * Straight implementation of Sedgewick's median qsort
+ *
+ * @LEN: name of parameter length
+ * @QLESS: name of function that computes array[i] < array[j]
+ * should return a boolean
+ * @QSWAP: name of function that swaps array[i] <-> array[j]
*
- * Synopsis:
- * QSORT(N, LESS, SWAP);
- * where
- * N - the number of elements in A[];
- * LESS(i, j) - compares A[i] to A[j];
- * SWAP(i, j) - exchanges A[i] with A[j].
+ * NOTE: This can perform on strided arrays.
+ * Make sure to use parens liberally to ensure hygeine!
*/
-#pragma once
-
-/* Sort 3 elements. */
-#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \
-do { \
- if (Q_LESS(q_a2, q_a1)) { \
- if (Q_LESS(q_a3, q_a2)) \
- Q_SWAP(q_a1, q_a3); \
- else { \
- Q_SWAP(q_a1, q_a2); \
- if (Q_LESS(q_a3, q_a2)) \
- Q_SWAP(q_a2, q_a3); \
- } \
- } \
- else if (Q_LESS(q_a3, q_a2)) { \
- Q_SWAP(q_a2, q_a3); \
- if (Q_LESS(q_a2, q_a1)) \
- Q_SWAP(q_a1, q_a2); \
- } \
-} while (0)
-
-/* Partition [q_l,q_r] around a pivot. After partitioning,
- * [q_l,q_j] are the elements that are less than or equal to the pivot,
- * while [q_i,q_r] are the elements greater than or equal to the pivot. */
-#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \
-do { \
- /* The middle element, not to be confused with the median. */ \
- Q_UINT q_m = q_l + ((q_r - q_l) >> 1); \
- /* Reorder the second, the middle, and the last items. \
- * As [Edelkamp Weiss 2016] explain, using the second element \
- * instead of the first one helps avoid bad behaviour for \
- * decreasingly sorted arrays. This method is used in recent \
- * versions of gcc's std::sort, see gcc bug 58437#c13, although \
- * the details are somewhat different (cf. #c14). */ \
- Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP); \
- /* Place the median at the beginning. */ \
- Q_SWAP(q_l, q_m); \
- /* Partition [q_l+2, q_r-1] around the median which is in q_l. \
- * q_i and q_j are initially off by one, they get decremented \
- * in the do-while loops. */ \
- q_i = q_l + 1; q_j = q_r; \
- while (1) { \
- do q_i++; while (Q_LESS(q_i, q_l)); \
- do q_j--; while (Q_LESS(q_l, q_j)); \
- if (q_i >= q_j) break; /* Sedgewick says "until j < i" */ \
- Q_SWAP(q_i, q_j); \
- } \
- /* Compensate for the i==j case. */ \
- q_i = q_j + 1; \
- /* Put the median to its final place. */ \
- Q_SWAP(q_l, q_j); \
- /* The median is not part of the left subfile. */ \
- q_j--; \
-} while (0)
-
-/* Insertion sort is applied to small subfiles - this is contrary to
- * Sedgewick's suggestion to run a separate insertion sort pass after
- * the partitioning is done. The reason I don't like a separate pass
- * is that it triggers extra comparisons, because it can't see that the
- * medians are already in their final positions and need not be rechecked.
- * Since I do not assume that comparisons are cheap, I also do not try
- * to eliminate the (q_j > q_l) boundary check. */
-#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \
-do { \
- Q_UINT q_i, q_j; \
- /* For each item starting with the second... */ \
- for (q_i = q_l + 1; q_i <= q_r; q_i++) \
- /* move it down the array so that the first part is sorted. */ \
- for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--) \
- Q_SWAP(q_j, q_j - 1); \
-} while (0)
-
-/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to
- * Q_THRESH, the algorithm performs recursive partitioning. When the size
- * drops below Q_THRESH, the algorithm switches to insertion sort.
- * The minimum valid value is probably 5 (with 5 items, the second and
- * the middle items, the middle itself being rounded down, are distinct). */
-#define Q_THRESH 16
-
-/* The main loop. */
-#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \
-do { \
- Q_UINT q_l = 0; \
- Q_UINT q_r = (Q_N) - 1; \
- Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \
- struct { Q_UINT q_l, q_r; } \
- /* On 32-bit platforms, to sort a "char[3GB+]" array, \
- * it may take full 32 stack frames. On 64-bit CPUs, \
- * though, the address space is limited to 48 bits. \
- * The usage is further reduced if Q_N has a 32-bit type. */ \
- q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \
- while (1) { \
- if (q_r - q_l + 1 >= Q_THRESH) { \
- Q_UINT q_i, q_j; \
- Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \
- /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \
- * Dealing with them depends on which one is bigger. */ \
- if (q_j - q_l >= q_r - q_i) \
- Q_SUBFILES(q_l, q_j, q_i, q_r); \
- else \
- Q_SUBFILES(q_i, q_r, q_l, q_j); \
- } \
- else { \
- Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \
- /* Pop subfiles from the stack, until it gets empty. */ \
- if (q_sp == 0) break; \
- q_sp--; \
- q_l = q_st[q_sp].q_l; \
- q_r = q_st[q_sp].q_r; \
- } \
- } \
-} while (0)
-
-/* The missing part: dealing with subfiles.
- * Assumes that the first subfile is not smaller than the second. */
-#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \
-do { \
- /* If the second subfile is only a single element, it needs \
- * no further processing. The first subfile will be processed \
- * on the next iteration (both subfiles cannot be only a single \
- * element, due to Q_THRESH). */ \
- if (q_l2 == q_r2) { \
- q_l = q_l1; \
- q_r = q_r1; \
- } \
- else { \
- /* Otherwise, both subfiles need processing. \
- * Push the larger subfile onto the stack. */ \
- q_st[q_sp].q_l = q_l1; \
- q_st[q_sp].q_r = q_r1; \
- q_sp++; \
- /* Process the smaller subfile on the next iteration. */ \
- q_l = q_l2; \
- q_r = q_r2; \
- } \
-} while (0)
-
-/* And now, ladies and gentlemen, may I proudly present to you... */
-#define QSORT(Q_N, Q_LESS, Q_SWAP) \
-do { \
- if ((Q_N) > 1) \
- /* We could check sizeof(Q_N) and use "unsigned", but at least \
- * on x86_64, this has the performance penalty of up to 5%. */ \
- Q_LOOP(ulong, Q_N, Q_LESS, Q_SWAP); \
-} while (0)
+#define QSORT(LEN, QLESS, QSWAP) \
+ int i, j, m, r, l; \
+ struct { \
+ int i, j; \
+ } *sp, base[48]; \
+ sp = base; \
+ \
+ l = 1; \
+ r = LEN-1; \
+ \
+ if (LEN <= 14) goto ENDOUTER; \
+OUTERLOOP: \
+ SWAP((l+r)/2, l+1); \
+ \
+ if (QLESS(r, l+1)) QSWAP(r, l+1); \
+ if (QLESS(r, l)) QSWAP(r, l); \
+ if (QLESS(l, l+1)) QSWAP(l, l+1); \
+ \
+ i = l+1; \
+ j = r; \
+ \
+INNERLOOP: \
+ do ++i; while(QLESS(i, l)); \
+ do --j; while(QLESS(l, j)); \
+ if (j < i) goto ENDINNER; \
+ QSWAP(i, j); \
+ goto INNERLOOP; \
+ \
+ENDINNER: \
+ QSWAP(l, j); \
+ \
+ if (MAX(j-l, r-i+1) <= 14) { \
+ if (sp == base) { \
+ goto ENDOUTER; \
+ } else { \
+ l = sp[-1].i; \
+ r = sp[-1].j; \
+ sp--; \
+ } \
+ } else { \
+ if (MIN(j-l, r-i+1) <= 14) { \
+ if (j-l > r-i+1) { \
+ l = l; \
+ r = j-1; \
+ } else { \
+ l = i; \
+ r = r; \
+ } \
+ } else { \
+ if (j-l > r-i+1) { \
+ sp->i = l; \
+ sp->j = j-1; \
+ sp++; \
+ \
+ l = i; \
+ r = r; \
+ } else { \
+ sp->i = i; \
+ sp->j = r; \
+ sp++; \
+ \
+ l = l; \
+ r = j-1; \
+ } \
+ } \
+ } \
+ goto OUTERLOOP; \
+ \
+ENDOUTER: \
+ for (i = 1; i < LEN; i++) { \
+ for (j = i; j > 0 && QLESS(j, j-1); j--) { \
+ QSWAP(j, j-1); \
+ } \
+ } \