aboutsummaryrefslogtreecommitdiff
path: root/src/libmath/blas2.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/libmath/blas2.c')
-rw-r--r--src/libmath/blas2.c222
1 files changed, 222 insertions, 0 deletions
diff --git a/src/libmath/blas2.c b/src/libmath/blas2.c
new file mode 100644
index 0000000..7e4b08e
--- /dev/null
+++ b/src/libmath/blas2.c
@@ -0,0 +1,222 @@
+#include <u.h>
+#include <libmath/blas.h>
+#include "loop.h"
+
+// -----------------------------------------------------------------------
+// Templates
+
+#define BODY_RECT() \
+ nr = ROUNDBY(nrow, UNROW); \
+ nc = ROUNDBY(ncol, UNCOL); \
+ if (incx == 1 && incy == 1) { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,1,1); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,1,1,UNCOL); \
+ } \
+ for (; c < ncol; c++) { \
+ LOOP(UNROW,0,KERN,1,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1,1); \
+ } \
+ } else { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,incx,incy); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < ncol; c++) { \
+ LOOP(UNROW,0,KERN,incx,incy,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx,incy); \
+ } \
+ } \
+ \
+ for (; r < nrow; r++) { \
+ LOOP(1,0,INIT,incx,incy); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(1,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < ncol; c++) { \
+ LOOP(1,0,KERN,incx,incy,1); \
+ } \
+ LOOP(1,0,FINI,incx,incy); \
+ }
+
+#define BODY_LOTRI() \
+ nr = ROUNDBY(n, UNROW); \
+ if (incx == 1) { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,1); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,1,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1); \
+ } \
+ } else { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,incx); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,incx,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx); \
+ } \
+ } \
+ \
+ for (; r < n; r++) { \
+ LOOP(1,0,INIT,incx); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(1,0,KERN,incx,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(1,0,KERN,incx,1); \
+ } \
+ LOOP(1,0,FINI,incx); \
+ }
+
+#define BODY_UPTRI() \
+ nr = n - ROUNDBY(n, UNROW); \
+ if (incx == 1) { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,1); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,1,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1); \
+ } \
+ } else { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,incx); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,incx,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx); \
+ } \
+ } \
+ \
+ for (; r >= 0; r--) { \
+ LOOP(1,0,INIT,incx); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(1,0,KERN,incx,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(1,0,KERN,incx,1); \
+ } \
+ LOOP(1,0,FINI,incx); \
+ }
+
+#define BODY_LOTRI_XY() \
+ nr = ROUNDBY(n, UNROW); \
+ if (incx == 1 && incy == 1) { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,1,1); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,1,1,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,1,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1,1); \
+ } \
+ } else { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,incx,incy); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,incx,incy,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx, incy); \
+ } \
+ } \
+ \
+ for (; r < n; r++) { \
+ LOOP(1,0,INIT,incx,incy); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(1,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(1,0,KERN,incx,incy,1); \
+ } \
+ LOOP(1,0,FINI,incx,incy); \
+ }
+
+#define BODY_UPTRI_XY() \
+ nr = n - ROUNDBY(n, UNROW); \
+ if (incx == 1 && incy == 1) { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,1,1); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,1,1,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,1,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1,1); \
+ } \
+ } else { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,incx,incy); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,incx,incy,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx,incy); \
+ } \
+ } \
+ \
+ for (; r >= 0; r--) { \
+ LOOP(1,0,INIT,incx,incy); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(1,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(1,0,KERN,incx,incy,1); \
+ } \
+ LOOP(1,0,FINI,incx,incy); \
+ }
+
+// -----------------------------------------------------------------------
+// implementation
+
+#define UNROW 4
+#define UNCOL 4
+
+#define INT int
+#define FLOAT double
+#define func(name) blas·d##name
+#include "blas2body"
+
+#undef FLOAT
+#undef func
+
+#define FLOAT float
+#define func(name) blas·f##name
+#include "blas2body"