From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/libmath/blas2.c | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 src/libmath/blas2.c (limited to 'src/libmath/blas2.c') diff --git a/src/libmath/blas2.c b/src/libmath/blas2.c new file mode 100644 index 0000000..7e4b08e --- /dev/null +++ b/src/libmath/blas2.c @@ -0,0 +1,222 @@ +#include +#include +#include "loop.h" + +// ----------------------------------------------------------------------- +// Templates + +#define BODY_RECT() \ + nr = ROUNDBY(nrow, UNROW); \ + nc = ROUNDBY(ncol, UNCOL); \ + if (incx == 1 && incy == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx,incy); \ + } \ + } \ + \ + for (; r < nrow; r++) { \ + LOOP(1,0,INIT,incx,incy); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +#define BODY_LOTRI() \ + nr = ROUNDBY(n, UNROW); \ + if (incx == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,1,1); \ + } \ + LOOP(UNROW,0,FINI,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,incx,1); \ + } \ + LOOP(UNROW,0,FINI,incx); \ + } \ + } \ + \ + for (; r < n; r++) { \ + LOOP(1,0,INIT,incx); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(1,0,KERN,incx,1); \ + } \ + LOOP(1,0,FINI,incx); \ + } + +#define BODY_UPTRI() \ + nr = n - ROUNDBY(n, UNROW); \ + if (incx == 1) { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,1); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,1,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,1,1); \ + } \ + LOOP(UNROW,0,FINI,1); \ + } \ + } else { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,incx); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,incx,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,incx,1); \ + } \ + LOOP(UNROW,0,FINI,incx); \ + } \ + } \ + \ + for (; r >= 0; r--) { \ + LOOP(1,0,INIT,incx); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(1,0,KERN,incx,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(1,0,KERN,incx,1); \ + } \ + LOOP(1,0,FINI,incx); \ + } + +#define BODY_LOTRI_XY() \ + nr = ROUNDBY(n, UNROW); \ + if (incx == 1 && incy == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx, incy); \ + } \ + } \ + \ + for (; r < n; r++) { \ + LOOP(1,0,INIT,incx,incy); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +#define BODY_UPTRI_XY() \ + nr = n - ROUNDBY(n, UNROW); \ + if (incx == 1 && incy == 1) { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx,incy); \ + } \ + } \ + \ + for (; r >= 0; r--) { \ + LOOP(1,0,INIT,incx,incy); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +// ----------------------------------------------------------------------- +// implementation + +#define UNROW 4 +#define UNCOL 4 + +#define INT int +#define FLOAT double +#define func(name) blas·d##name +#include "blas2body" + +#undef FLOAT +#undef func + +#define FLOAT float +#define func(name) blas·f##name +#include "blas2body" -- cgit v1.2.1