From 4b2ea2ca00eea7feea036b7642c0c1443b8f77a1 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sun, 3 May 2020 20:20:22 -0700 Subject: removed buggy qsort header and implemented myself --- include/libn/macro/qsort.h | 261 +++++++++++++++------------------------------ 1 file changed, 84 insertions(+), 177 deletions(-) (limited to 'include/libn/macro/qsort.h') diff --git a/include/libn/macro/qsort.h b/include/libn/macro/qsort.h index 8daf0fa..cc38bca 100644 --- a/include/libn/macro/qsort.h +++ b/include/libn/macro/qsort.h @@ -1,181 +1,88 @@ -/* - * Copyright (c) 2013, 2017 Alexey Tourbin - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ +#pragma once -/* - * This is a traditional Quicksort implementation which mostly follows - * [Sedgewick 1978]. Sorting is performed entirely on array indices, - * while actual access to the array elements is abstracted out with the - * user-defined `LESS` and `SWAP` primitives. +/* + * Straight implementation of Sedgewick's median qsort + * + * @LEN: name of parameter length + * @QLESS: name of function that computes array[i] < array[j] + * should return a boolean + * @QSWAP: name of function that swaps array[i] <-> array[j] * - * Synopsis: - * QSORT(N, LESS, SWAP); - * where - * N - the number of elements in A[]; - * LESS(i, j) - compares A[i] to A[j]; - * SWAP(i, j) - exchanges A[i] with A[j]. + * NOTE: This can perform on strided arrays. + * Make sure to use parens liberally to ensure hygeine! */ -#pragma once - -/* Sort 3 elements. */ -#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \ -do { \ - if (Q_LESS(q_a2, q_a1)) { \ - if (Q_LESS(q_a3, q_a2)) \ - Q_SWAP(q_a1, q_a3); \ - else { \ - Q_SWAP(q_a1, q_a2); \ - if (Q_LESS(q_a3, q_a2)) \ - Q_SWAP(q_a2, q_a3); \ - } \ - } \ - else if (Q_LESS(q_a3, q_a2)) { \ - Q_SWAP(q_a2, q_a3); \ - if (Q_LESS(q_a2, q_a1)) \ - Q_SWAP(q_a1, q_a2); \ - } \ -} while (0) - -/* Partition [q_l,q_r] around a pivot. After partitioning, - * [q_l,q_j] are the elements that are less than or equal to the pivot, - * while [q_i,q_r] are the elements greater than or equal to the pivot. */ -#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \ -do { \ - /* The middle element, not to be confused with the median. */ \ - Q_UINT q_m = q_l + ((q_r - q_l) >> 1); \ - /* Reorder the second, the middle, and the last items. \ - * As [Edelkamp Weiss 2016] explain, using the second element \ - * instead of the first one helps avoid bad behaviour for \ - * decreasingly sorted arrays. This method is used in recent \ - * versions of gcc's std::sort, see gcc bug 58437#c13, although \ - * the details are somewhat different (cf. #c14). */ \ - Q_SORT3(q_l + 1, q_m, q_r, Q_LESS, Q_SWAP); \ - /* Place the median at the beginning. */ \ - Q_SWAP(q_l, q_m); \ - /* Partition [q_l+2, q_r-1] around the median which is in q_l. \ - * q_i and q_j are initially off by one, they get decremented \ - * in the do-while loops. */ \ - q_i = q_l + 1; q_j = q_r; \ - while (1) { \ - do q_i++; while (Q_LESS(q_i, q_l)); \ - do q_j--; while (Q_LESS(q_l, q_j)); \ - if (q_i >= q_j) break; /* Sedgewick says "until j < i" */ \ - Q_SWAP(q_i, q_j); \ - } \ - /* Compensate for the i==j case. */ \ - q_i = q_j + 1; \ - /* Put the median to its final place. */ \ - Q_SWAP(q_l, q_j); \ - /* The median is not part of the left subfile. */ \ - q_j--; \ -} while (0) - -/* Insertion sort is applied to small subfiles - this is contrary to - * Sedgewick's suggestion to run a separate insertion sort pass after - * the partitioning is done. The reason I don't like a separate pass - * is that it triggers extra comparisons, because it can't see that the - * medians are already in their final positions and need not be rechecked. - * Since I do not assume that comparisons are cheap, I also do not try - * to eliminate the (q_j > q_l) boundary check. */ -#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \ -do { \ - Q_UINT q_i, q_j; \ - /* For each item starting with the second... */ \ - for (q_i = q_l + 1; q_i <= q_r; q_i++) \ - /* move it down the array so that the first part is sorted. */ \ - for (q_j = q_i; q_j > q_l && (Q_LESS(q_j, q_j - 1)); q_j--) \ - Q_SWAP(q_j, q_j - 1); \ -} while (0) - -/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to - * Q_THRESH, the algorithm performs recursive partitioning. When the size - * drops below Q_THRESH, the algorithm switches to insertion sort. - * The minimum valid value is probably 5 (with 5 items, the second and - * the middle items, the middle itself being rounded down, are distinct). */ -#define Q_THRESH 16 - -/* The main loop. */ -#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \ -do { \ - Q_UINT q_l = 0; \ - Q_UINT q_r = (Q_N) - 1; \ - Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \ - struct { Q_UINT q_l, q_r; } \ - /* On 32-bit platforms, to sort a "char[3GB+]" array, \ - * it may take full 32 stack frames. On 64-bit CPUs, \ - * though, the address space is limited to 48 bits. \ - * The usage is further reduced if Q_N has a 32-bit type. */ \ - q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \ - while (1) { \ - if (q_r - q_l + 1 >= Q_THRESH) { \ - Q_UINT q_i, q_j; \ - Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \ - /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \ - * Dealing with them depends on which one is bigger. */ \ - if (q_j - q_l >= q_r - q_i) \ - Q_SUBFILES(q_l, q_j, q_i, q_r); \ - else \ - Q_SUBFILES(q_i, q_r, q_l, q_j); \ - } \ - else { \ - Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \ - /* Pop subfiles from the stack, until it gets empty. */ \ - if (q_sp == 0) break; \ - q_sp--; \ - q_l = q_st[q_sp].q_l; \ - q_r = q_st[q_sp].q_r; \ - } \ - } \ -} while (0) - -/* The missing part: dealing with subfiles. - * Assumes that the first subfile is not smaller than the second. */ -#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \ -do { \ - /* If the second subfile is only a single element, it needs \ - * no further processing. The first subfile will be processed \ - * on the next iteration (both subfiles cannot be only a single \ - * element, due to Q_THRESH). */ \ - if (q_l2 == q_r2) { \ - q_l = q_l1; \ - q_r = q_r1; \ - } \ - else { \ - /* Otherwise, both subfiles need processing. \ - * Push the larger subfile onto the stack. */ \ - q_st[q_sp].q_l = q_l1; \ - q_st[q_sp].q_r = q_r1; \ - q_sp++; \ - /* Process the smaller subfile on the next iteration. */ \ - q_l = q_l2; \ - q_r = q_r2; \ - } \ -} while (0) - -/* And now, ladies and gentlemen, may I proudly present to you... */ -#define QSORT(Q_N, Q_LESS, Q_SWAP) \ -do { \ - if ((Q_N) > 1) \ - /* We could check sizeof(Q_N) and use "unsigned", but at least \ - * on x86_64, this has the performance penalty of up to 5%. */ \ - Q_LOOP(ulong, Q_N, Q_LESS, Q_SWAP); \ -} while (0) +#define QSORT(LEN, QLESS, QSWAP) \ + int i, j, m, r, l; \ + struct { \ + int i, j; \ + } *sp, base[48]; \ + sp = base; \ + \ + l = 1; \ + r = LEN-1; \ + \ + if (LEN <= 14) goto ENDOUTER; \ +OUTERLOOP: \ + SWAP((l+r)/2, l+1); \ + \ + if (QLESS(r, l+1)) QSWAP(r, l+1); \ + if (QLESS(r, l)) QSWAP(r, l); \ + if (QLESS(l, l+1)) QSWAP(l, l+1); \ + \ + i = l+1; \ + j = r; \ + \ +INNERLOOP: \ + do ++i; while(QLESS(i, l)); \ + do --j; while(QLESS(l, j)); \ + if (j < i) goto ENDINNER; \ + QSWAP(i, j); \ + goto INNERLOOP; \ + \ +ENDINNER: \ + QSWAP(l, j); \ + \ + if (MAX(j-l, r-i+1) <= 14) { \ + if (sp == base) { \ + goto ENDOUTER; \ + } else { \ + l = sp[-1].i; \ + r = sp[-1].j; \ + sp--; \ + } \ + } else { \ + if (MIN(j-l, r-i+1) <= 14) { \ + if (j-l > r-i+1) { \ + l = l; \ + r = j-1; \ + } else { \ + l = i; \ + r = r; \ + } \ + } else { \ + if (j-l > r-i+1) { \ + sp->i = l; \ + sp->j = j-1; \ + sp++; \ + \ + l = i; \ + r = r; \ + } else { \ + sp->i = i; \ + sp->j = r; \ + sp++; \ + \ + l = l; \ + r = j-1; \ + } \ + } \ + } \ + goto OUTERLOOP; \ + \ +ENDOUTER: \ + for (i = 1; i < LEN; i++) { \ + for (j = i; j > 0 && QLESS(j, j-1); j--) { \ + QSWAP(j, j-1); \ + } \ + } \ -- cgit v1.2.1