#include #include #include "loop.h" // ----------------------------------------------------------------------- // Templates #define BODY_RECT() \ nr = ROUNDBY(nrow, UNROW); \ nc = ROUNDBY(ncol, UNCOL); \ if (incx == 1 && incy == 1) { \ for (r = 0; r < nr; r += UNROW) { \ LOOP(UNROW,0,INIT,1,1); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(UNROW,0,KERN,1,1,UNCOL); \ } \ for (; c < ncol; c++) { \ LOOP(UNROW,0,KERN,1,1,1); \ } \ LOOP(UNROW,0,FINI,1,1); \ } \ } else { \ for (r = 0; r < nr; r += UNROW) { \ LOOP(UNROW,0,INIT,incx,incy); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ } \ for (; c < ncol; c++) { \ LOOP(UNROW,0,KERN,incx,incy,1); \ } \ LOOP(UNROW,0,FINI,incx,incy); \ } \ } \ \ for (; r < nrow; r++) { \ LOOP(1,0,INIT,incx,incy); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(1,0,KERN,incx,incy,UNCOL); \ } \ for (; c < ncol; c++) { \ LOOP(1,0,KERN,incx,incy,1); \ } \ LOOP(1,0,FINI,incx,incy); \ } #define BODY_LOTRI() \ nr = ROUNDBY(n, UNROW); \ if (incx == 1) { \ for (r = 0; r < nr; r += UNROW) { \ LOOP(UNROW,0,INIT,1); \ nc = ROUNDBY(r, UNCOL); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(UNROW,0,KERN,1,UNCOL); \ } \ for (; c < r; c++) { \ LOOP(UNROW,0,KERN,1,1); \ } \ LOOP(UNROW,0,FINI,1); \ } \ } else { \ for (r = 0; r < nr; r += UNROW) { \ LOOP(UNROW,0,INIT,incx); \ nc = ROUNDBY(r, UNCOL); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(UNROW,0,KERN,incx,UNCOL); \ } \ for (; c < r; c++) { \ LOOP(UNROW,0,KERN,incx,1); \ } \ LOOP(UNROW,0,FINI,incx); \ } \ } \ \ for (; r < n; r++) { \ LOOP(1,0,INIT,incx); \ nc = ROUNDBY(r, UNCOL); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(1,0,KERN,incx,UNCOL); \ } \ for (; c < r; c++) { \ LOOP(1,0,KERN,incx,1); \ } \ LOOP(1,0,FINI,incx); \ } #define BODY_UPTRI() \ nr = n - ROUNDBY(n, UNROW); \ if (incx == 1) { \ for (r = n-1; r >= nr; r -= UNROW) { \ LOOP(UNROW,0,INIT,1); \ nc = n - ROUNDBY(r, UNCOL); \ for (c = n-1; c >= nc; c -= UNCOL) { \ LOOP(UNROW,0,KERN,1,UNCOL); \ } \ for (; c > r; c--) { \ LOOP(UNROW,0,KERN,1,1); \ } \ LOOP(UNROW,0,FINI,1); \ } \ } else { \ for (r = n-1; r >= nr; r -= UNROW) { \ LOOP(UNROW,0,INIT,incx); \ nc = n - ROUNDBY(r, UNCOL); \ for (c = n-1; c >= nc; c -= UNCOL) { \ LOOP(UNROW,0,KERN,incx,UNCOL); \ } \ for (; c > r; c--) { \ LOOP(UNROW,0,KERN,incx,1); \ } \ LOOP(UNROW,0,FINI,incx); \ } \ } \ \ for (; r >= 0; r--) { \ LOOP(1,0,INIT,incx); \ nc = n - ROUNDBY(r, UNCOL); \ for (c = n-1; c >= nc; c -= UNCOL) { \ LOOP(1,0,KERN,incx,UNCOL); \ } \ for (; c > r; c--) { \ LOOP(1,0,KERN,incx,1); \ } \ LOOP(1,0,FINI,incx); \ } #define BODY_LOTRI_XY() \ nr = ROUNDBY(n, UNROW); \ if (incx == 1 && incy == 1) { \ for (r = 0; r < nr; r += UNROW) { \ LOOP(UNROW,0,INIT,1,1); \ nc = ROUNDBY(r, UNCOL); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(UNROW,0,KERN,1,1,UNCOL); \ } \ for (; c < r; c++) { \ LOOP(UNROW,0,KERN,1,1,1); \ } \ LOOP(UNROW,0,FINI,1,1); \ } \ } else { \ for (r = 0; r < nr; r += UNROW) { \ LOOP(UNROW,0,INIT,incx,incy); \ nc = ROUNDBY(r, UNCOL); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ } \ for (; c < r; c++) { \ LOOP(UNROW,0,KERN,incx,incy,1); \ } \ LOOP(UNROW,0,FINI,incx, incy); \ } \ } \ \ for (; r < n; r++) { \ LOOP(1,0,INIT,incx,incy); \ nc = ROUNDBY(r, UNCOL); \ for (c = 0; c < nc; c += UNCOL) { \ LOOP(1,0,KERN,incx,incy,UNCOL); \ } \ for (; c < r; c++) { \ LOOP(1,0,KERN,incx,incy,1); \ } \ LOOP(1,0,FINI,incx,incy); \ } #define BODY_UPTRI_XY() \ nr = n - ROUNDBY(n, UNROW); \ if (incx == 1 && incy == 1) { \ for (r = n-1; r >= nr; r -= UNROW) { \ LOOP(UNROW,0,INIT,1,1); \ nc = n - ROUNDBY(r, UNCOL); \ for (c = n-1; c >= nc; c -= UNCOL) { \ LOOP(UNROW,0,KERN,1,1,UNCOL); \ } \ for (; c > r; c--) { \ LOOP(UNROW,0,KERN,1,1,1); \ } \ LOOP(UNROW,0,FINI,1,1); \ } \ } else { \ for (r = n-1; r >= nr; r -= UNROW) { \ LOOP(UNROW,0,INIT,incx,incy); \ nc = n - ROUNDBY(r, UNCOL); \ for (c = n-1; c >= nc; c -= UNCOL) { \ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ } \ for (; c > r; c--) { \ LOOP(UNROW,0,KERN,incx,incy,1); \ } \ LOOP(UNROW,0,FINI,incx,incy); \ } \ } \ \ for (; r >= 0; r--) { \ LOOP(1,0,INIT,incx,incy); \ nc = n - ROUNDBY(r, UNCOL); \ for (c = n-1; c >= nc; c -= UNCOL) { \ LOOP(1,0,KERN,incx,incy,UNCOL); \ } \ for (; c > r; c--) { \ LOOP(1,0,KERN,incx,incy,1); \ } \ LOOP(1,0,FINI,incx,incy); \ } // ----------------------------------------------------------------------- // implementation #define UNROW 4 #define UNCOL 4 #define INT int #define FLOAT double #define func(name) blas·d##name #include "blas2body" #undef FLOAT #undef func #define FLOAT float #define func(name) blas·f##name #include "blas2body"