From 6db18fdf24d4f91f208618de03a9ade8d21dc999 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 12 Jun 2020 12:34:22 -0700 Subject: straglers --- sys/libmath/blas2.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 sys/libmath/blas2.c (limited to 'sys/libmath/blas2.c') diff --git a/sys/libmath/blas2.c b/sys/libmath/blas2.c new file mode 100644 index 0000000..7e4b08e --- /dev/null +++ b/sys/libmath/blas2.c @@ -0,0 +1,222 @@ +#include +#include +#include "loop.h" + +// ----------------------------------------------------------------------- +// Templates + +#define BODY_RECT() \ + nr = ROUNDBY(nrow, UNROW); \ + nc = ROUNDBY(ncol, UNCOL); \ + if (incx == 1 && incy == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx,incy); \ + } \ + } \ + \ + for (; r < nrow; r++) { \ + LOOP(1,0,INIT,incx,incy); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +#define BODY_LOTRI() \ + nr = ROUNDBY(n, UNROW); \ + if (incx == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,1,1); \ + } \ + LOOP(UNROW,0,FINI,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,incx,1); \ + } \ + LOOP(UNROW,0,FINI,incx); \ + } \ + } \ + \ + for (; r < n; r++) { \ + LOOP(1,0,INIT,incx); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(1,0,KERN,incx,1); \ + } \ + LOOP(1,0,FINI,incx); \ + } + +#define BODY_UPTRI() \ + nr = n - ROUNDBY(n, UNROW); \ + if (incx == 1) { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,1); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,1,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,1,1); \ + } \ + LOOP(UNROW,0,FINI,1); \ + } \ + } else { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,incx); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,incx,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,incx,1); \ + } \ + LOOP(UNROW,0,FINI,incx); \ + } \ + } \ + \ + for (; r >= 0; r--) { \ + LOOP(1,0,INIT,incx); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(1,0,KERN,incx,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(1,0,KERN,incx,1); \ + } \ + LOOP(1,0,FINI,incx); \ + } + +#define BODY_LOTRI_XY() \ + nr = ROUNDBY(n, UNROW); \ + if (incx == 1 && incy == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx, incy); \ + } \ + } \ + \ + for (; r < n; r++) { \ + LOOP(1,0,INIT,incx,incy); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +#define BODY_UPTRI_XY() \ + nr = n - ROUNDBY(n, UNROW); \ + if (incx == 1 && incy == 1) { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx,incy); \ + } \ + } \ + \ + for (; r >= 0; r--) { \ + LOOP(1,0,INIT,incx,incy); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +// ----------------------------------------------------------------------- +// implementation + +#define UNROW 4 +#define UNCOL 4 + +#define INT int +#define FLOAT double +#define func(name) blas·d##name +#include "blas2body" + +#undef FLOAT +#undef func + +#define FLOAT float +#define func(name) blas·f##name +#include "blas2body" -- cgit v1.2.1