aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-25 15:34:25 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-25 15:34:25 -0700
commit3eff49e57b9d122a1da1103988731c9ace69c304 (patch)
tree1302554b20a6739946cd19dbd843201be84dbb3f /sys
parent5e53685221576ad6ec53bafffd14bc7d5e01fa30 (diff)
feat: added preprocessor macros to manually unroll loops
Diffstat (limited to 'sys')
-rw-r--r--sys/libmath/blas1body215
-rw-r--r--sys/libmath/loop.h37
2 files changed, 252 insertions, 0 deletions
diff --git a/sys/libmath/blas1body b/sys/libmath/blas1body
new file mode 100644
index 0000000..de4b637
--- /dev/null
+++ b/sys/libmath/blas1body
@@ -0,0 +1,215 @@
+/* vim: set ft=c */
+// -----------------------------------------------------------------------
+// Function implementations
+
+FLOAT
+func(dot)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy)
+{
+#define INIT(I,...) reg[I] = 0;
+#define KERNEL(I, INCX, INCY) reg[I] += x[(INCX)*(i + I)] * y[(INCY)*(i + I)];
+ INT i, n;
+ FLOAT reg[UNROLL];
+
+ BODY_XY()
+
+ for (i = 1; i < UNROLL; i++)
+ reg[0] += reg[i];
+
+ return reg[0];
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(copy)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy)
+{
+#define INIT(I,...)
+#define KERNEL(I, INCX, INCY) y[(INCY)*(i + I)] = x[(INCX)*(i + I)];
+ INT i, n;
+
+ BODY_XY();
+
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(swap)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy)
+{
+#define INIT(I,...)
+#define KERNEL(I, INCX, INCY) tmp[I] = x[(INCX)*(i + I)], x[(INCX)*(i + I)] = y[(INCY)*(i + I)], y[(INCY)*(i + I)] = tmp[I];
+ INT i, n;
+ FLOAT tmp[UNROLL];
+
+ BODY_XY();
+
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(axpy)(INT len, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy)
+{
+#define INIT(I,...)
+#define KERNEL(I, INCX, INCY) y[(INCY)*(i + I)] += a*x[(INCX)*(i + I)];
+ INT i, n;
+
+ BODY_XY();
+
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(axpby)(INT len, FLOAT a, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy)
+{
+#define INIT(I,...)
+#define KERNEL(I, INCX, INCY) y[(INCY)*(i + I)] = a*x[(INCX)*(i + I)] + b*y[(INCY)*(i + I)];
+ INT i, n;
+
+ BODY_XY();
+
+#undef INIT
+#undef KERNEL
+}
+
+INT
+func(argmax)(INT len, FLOAT *x, INT incx)
+{
+#define INIT(I,...) max[I] = x[I], idx[I] = I;
+#define KERNEL(I, INCX) if (x[(INCX)*(i+I)] > max[I]) {max[i] = x[INCX*(i+I)]; idx[I] = i+I;}
+ INT i, n;
+ FLOAT max[UNROLL];
+ INT idx[UNROLL];
+
+ BODY_X();
+
+ for (i = 1; i < UNROLL; i++)
+ if (max[i] > max[0])
+ idx[0] = idx[i];
+
+ return idx[0];
+#undef INIT
+#undef KERNEL
+}
+
+INT
+func(argmin)(INT len, FLOAT *x, INT incx)
+{
+#define INIT(I,...) min[I] = x[I], idx[I] = I;
+#define KERNEL(I, INCX) if (x[INCX*(i+I)] < min[I]) {min[i] = x[INCX*(i+I)]; idx[I] = i+I;}
+ INT i, n;
+ FLOAT min[UNROLL];
+ INT idx[UNROLL];
+
+ BODY_X();
+
+ for (i = 1; i < UNROLL; i++)
+ if (min[i] < min[0])
+ idx[0] = idx[i];
+
+ return idx[0];
+#undef INIT
+#undef KERNEL
+}
+
+FLOAT
+func(asum)(INT len, FLOAT *x, INT incx)
+{
+#define INIT(I,...) sum[I] = 0;
+#define KERNEL(I, INCX) sum[I] += x[INCX*(i+I)] > 0 ? x[INCX*(i+I)] : -x[INCX*(i+I)];
+ INT i, n;
+ FLOAT sum[UNROLL];
+
+ BODY_X();
+
+ for (i = 1; i < UNROLL; i++)
+ sum[0] += sum[i];
+
+ return sum[0];
+
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(scale)(INT len, FLOAT a, FLOAT *x, INT incx)
+{
+#define INIT(I, ...)
+#define KERNEL(I, INCX) x[INCX*(i+I)] *= a;
+ INT i, n;
+
+ BODY_X();
+
+#undef INIT
+#undef KERNEL
+}
+
+FLOAT
+func(norm)(INT len, FLOAT *x, INT incx)
+{
+#define INIT(I, ...)
+#define KERNEL(I, INCX) norm[I] += x[INCX*(i+I)] * x[INCX*(i+I)];
+ INT i, n;
+ FLOAT norm[UNROLL];
+
+ BODY_X();
+
+ for (i = 1; i < UNROLL; i++)
+ norm[0] += norm[i];
+
+ return math·sqrt(norm[0]);
+
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(drot)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT cos, FLOAT sin)
+{
+#define INIT(I, ...)
+#define KERNEL(I, INCX, INCY) tmp[I] = x[INCX*(i+I)], x[INCX*(i+I)] = cos*x[INCX*(i+I)] + sin*y[INCY*(i+I)], y[INCY*(i+I)] = cos*y[INCY*(i+I)] - sin*tmp[I];
+ INT i, n;
+ FLOAT tmp[UNROLL];
+
+ BODY_XY();
+
+#undef INIT
+#undef KERNEL
+}
+
+void
+func(rotm)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT H[5])
+{
+#define INIT(I, ...)
+#define KERNEL(I, INCX, INCY) tmp[I] = x[INCX*(i+I)], x[INCX*(i+I)] = H[1]*x[INCX*(i+I)] + H[2]*y[INCY*(i+I)], y[INCY*(i+I)] = H[3]*tmp[I] + H[4]*y[INCY*(i+I)];
+ INT i, n, f;
+ FLOAT tmp[UNROLL];
+
+ f = (INT)H[0];
+ switch (f) {
+ case -2:
+ H[1] = +1;
+ H[2] = +0;
+ H[3] = +0;
+ H[4] = +1;
+ break;
+ case -1:
+ break;
+ case +0:
+ H[1] = +1;
+ H[4] = +1;
+ break;
+ case +1:
+ H[2] = +1;
+ H[3] = -1;
+ break;
+ default:
+ return;
+ }
+
+ BODY_XY();
+
+#undef INIT
+#undef KERNEL
+}
diff --git a/sys/libmath/loop.h b/sys/libmath/loop.h
new file mode 100644
index 0000000..9a425a7
--- /dev/null
+++ b/sys/libmath/loop.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#define ROUNDBY(x, n) ((x) & ~((n)-1))
+
+/* loop unrolling (vertical) */
+#define LOOP1(I,STMT,...) STMT(I,__VA_ARGS__)
+#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1((I+1),STMT,__VA_ARGS__)
+#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2((I+1),STMT,__VA_ARGS__)
+#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3((I+1),STMT,__VA_ARGS__)
+#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4((I+1),STMT,__VA_ARGS__)
+#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5((I+1),STMT,__VA_ARGS__)
+#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6((I+1),STMT,__VA_ARGS__)
+#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7((I+1),STMT,__VA_ARGS__)
+#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8((I+1),STMT,__VA_ARGS__)
+#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9((I+1),STMT,__VA_ARGS__)
+#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10((I+1),STMT,__VA_ARGS__)
+#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11((I+1),STMT,__VA_ARGS__)
+#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12((I+1),STMT,__VA_ARGS__)
+#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13((I+1),STMT,__VA_ARGS__)
+#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14((I+1),STMT,__VA_ARGS__)
+#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15((I+1),STMT,__VA_ARGS__)
+
+#define _LOOP_(n,I,STMT,...) LOOP##n(I,STMT,__VA_ARGS__)
+#define LOOP(n,I,STMT,...) _LOOP_(n,I,STMT,__VA_ARGS__)
+
+/* loop expansion (horizontal) */
+#define EXPAND1(I,TERM,OP,...) TERM(I,__VA_ARGS__)
+#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1((I+1),TERM,OP,__VA_ARGS__)
+#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2((I+1),TERM,OP,__VA_ARGS__)
+#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3((I+1),TERM,OP,__VA_ARGS__)
+#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4((I+1),TERM,OP,__VA_ARGS__)
+#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5((I+1),TERM,OP,__VA_ARGS__)
+#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6((I+1),TERM,OP,__VA_ARGS__)
+#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7((I+1),TERM,OP,__VA_ARGS__)
+
+#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__)
+#define EXPAND(n,I,TERM,OP, ...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__)