/* vim: set ft=c */ // ----------------------------------------------------------------------- // Function implementations FLOAT func(dot)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy) { #define INIT(I,...) reg[I] = 0; #define KERNEL(I, INCX, INCY) reg[I] += x[(INCX)*(i + I)] * y[(INCY)*(i + I)]; INT i, n; FLOAT reg[UNROLL]; BODY_XY() for (i = 1; i < UNROLL; i++) reg[0] += reg[i]; return reg[0]; #undef INIT #undef KERNEL } void func(copy)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy) { #define INIT(I,...) #define KERNEL(I, INCX, INCY) y[(INCY)*(i + I)] = x[(INCX)*(i + I)]; INT i, n; BODY_XY(); #undef INIT #undef KERNEL } void func(swap)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy) { #define INIT(I,...) #define KERNEL(I, INCX, INCY) tmp[I] = x[(INCX)*(i + I)], x[(INCX)*(i + I)] = y[(INCY)*(i + I)], y[(INCY)*(i + I)] = tmp[I]; INT i, n; FLOAT tmp[UNROLL]; BODY_XY(); #undef INIT #undef KERNEL } void func(axpy)(INT len, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy) { #define INIT(I,...) #define KERNEL(I, INCX, INCY) y[(INCY)*(i + I)] += a*x[(INCX)*(i + I)]; INT i, n; BODY_XY(); #undef INIT #undef KERNEL } void func(axpby)(INT len, FLOAT a, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) { #define INIT(I,...) #define KERNEL(I, INCX, INCY) y[(INCY)*(i + I)] = a*x[(INCX)*(i + I)] + b*y[(INCY)*(i + I)]; INT i, n; BODY_XY(); #undef INIT #undef KERNEL } INT func(argmax)(INT len, FLOAT *x, INT incx) { #define INIT(I,...) max[I] = x[I], idx[I] = I; #define KERNEL(I, INCX) if (x[(INCX)*(i+I)] > max[I]) {max[i] = x[INCX*(i+I)]; idx[I] = i+I;} INT i, n; FLOAT max[UNROLL]; INT idx[UNROLL]; BODY_X(); for (i = 1; i < UNROLL; i++) if (max[i] > max[0]) idx[0] = idx[i]; return idx[0]; #undef INIT #undef KERNEL } INT func(argmin)(INT len, FLOAT *x, INT incx) { #define INIT(I,...) min[I] = x[I], idx[I] = I; #define KERNEL(I, INCX) if (x[INCX*(i+I)] < min[I]) {min[i] = x[INCX*(i+I)]; idx[I] = i+I;} INT i, n; FLOAT min[UNROLL]; INT idx[UNROLL]; BODY_X(); for (i = 1; i < UNROLL; i++) if (min[i] < min[0]) idx[0] = idx[i]; return idx[0]; #undef INIT #undef KERNEL } FLOAT func(asum)(INT len, FLOAT *x, INT incx) { #define INIT(I,...) sum[I] = 0; #define KERNEL(I, INCX) sum[I] += x[INCX*(i+I)] > 0 ? x[INCX*(i+I)] : -x[INCX*(i+I)]; INT i, n; FLOAT sum[UNROLL]; BODY_X(); for (i = 1; i < UNROLL; i++) sum[0] += sum[i]; return sum[0]; #undef INIT #undef KERNEL } void func(scale)(INT len, FLOAT a, FLOAT *x, INT incx) { #define INIT(I, ...) #define KERNEL(I, INCX) x[INCX*(i+I)] *= a; INT i, n; BODY_X(); #undef INIT #undef KERNEL } FLOAT func(norm)(INT len, FLOAT *x, INT incx) { #define INIT(I, ...) #define KERNEL(I, INCX) norm[I] += x[INCX*(i+I)] * x[INCX*(i+I)]; INT i, n; FLOAT norm[UNROLL]; BODY_X(); for (i = 1; i < UNROLL; i++) norm[0] += norm[i]; return math·sqrt(norm[0]); #undef INIT #undef KERNEL } void func(drot)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT cos, FLOAT sin) { #define INIT(I, ...) #define KERNEL(I, INCX, INCY) tmp[I] = x[INCX*(i+I)], x[INCX*(i+I)] = cos*x[INCX*(i+I)] + sin*y[INCY*(i+I)], y[INCY*(i+I)] = cos*y[INCY*(i+I)] - sin*tmp[I]; INT i, n; FLOAT tmp[UNROLL]; BODY_XY(); #undef INIT #undef KERNEL } void func(rotm)(INT len, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT H[5]) { #define INIT(I, ...) #define KERNEL(I, INCX, INCY) tmp[I] = x[INCX*(i+I)], x[INCX*(i+I)] = H[1]*x[INCX*(i+I)] + H[2]*y[INCY*(i+I)], y[INCY*(i+I)] = H[3]*tmp[I] + H[4]*y[INCY*(i+I)]; INT i, n, f; FLOAT tmp[UNROLL]; f = (INT)H[0]; switch (f) { case -2: H[1] = +1; H[2] = +0; H[3] = +0; H[4] = +1; break; case -1: break; case +0: H[1] = +1; H[4] = +1; break; case +1: H[2] = +1; H[3] = -1; break; default: return; } BODY_XY(); #undef INIT #undef KERNEL }