/* general matrix multiply */ error func(gemv)(uint flag, INT nrow, INT ncol, FLOAT a, FLOAT *m, INT incm, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) { INT r, c, nr, nc; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = 0; #define TERM(J, I, INCX, INCY) row[I][c+J] * x[INCX*(c+J)] #define KERN(I, INCX, INCY, LENGTH) reg[I] += EXPAND(LENGTH,0,TERM,+,I,INCX,INCY); #define FINI(I, INCX, INCY) y[INCY*(r+I)] = b*y[INCY*(r+I)] + a*reg[I]; if (!flag) { BODY_RECT(); } else { func(scale)(ncol, b, y, incy); #undef KERN #undef FINI #undef INIT #undef TERM #define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = a*x[INCX*(r+I)]; #define TERM(J, I, INCX, INCY) row[I][c+J] * reg[I] #define KERN(I, INCX, INCY, LENGTH) y[INCY*(c+I)] += EXPAND(LENGTH,0,TERM,+,I,INCX,INCY); #define FINI(I, INCX, INCY) BODY_RECT(); } return 0; #undef INIT #undef TERM #undef KERN #undef FINI } /* symmetric matrix vector multiply (different layouts) */ void func(symv)(uint upper, INT n, FLOAT a, FLOAT *m, INT incm, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) { INT r, c, nr, nc, i; FLOAT *row[UNROW], reg1[UNROW], reg2[UNROW]; #define INIT(I, INCX, INCY) row[I] = m + (r XX I)*incm, reg1[I] = 0, reg2[I] = 0; #define TERM1(J, I, INCX, INCY) row[I][c XX J]*x[INCX*(c XX J)] #define TERM2(J, I, INCX, INCY) row[I][c XX J]*x[INCX*((n-c-1) XX J)] #define KERN(I, INCX, INCY, REPEAT) reg1[I] += EXPAND(REPEAT,0,TERM1,+,I,INCX,INCY), \ reg2[I] += EXPAND(REPEAT,0,TERM2,+,I,INCX,INCY); #define FINI(I, INCX, INCY) y[INCY*(r+I)] += a*(reg1[I] + row[I][r]*x[INCX*r]), \ y[INCY*(n-r-1+I)] += a*reg2[I]; func(scale)(n, b, y, incy); #define XX + if (!upper) { BODY_LOTRI_XY(); } else { #undef XX #define XX - BODY_UPTRI_XY(); } #undef XX #undef INIT } void func(spmv)(uint upper, INT n, FLOAT a, FLOAT *m, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) { INT r, c, nr, nc, i; FLOAT *row[UNROW], reg1[UNROW], reg2[UNROW]; #define INIT(I, INCX, INCY) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg1[I] = 0, reg2[I] = 0; func(scale)(n, b, y, incy); #define XX + if (!upper) { BODY_LOTRI_XY(); } else { #undef XX #undef INIT #define INIT(I, INCX, INCY) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg1[I] = 0, reg2[I] = 0; #define XX - BODY_UPTRI_XY(); } #undef XX #undef TERM #undef INIT #undef KERN #undef FINI } /* triangular multiply (different layouts) */ void func(trmv)(uint upper, INT n, FLOAT *m, INT incm, FLOAT *x, INT incx) { INT r, c, nr, nc, i; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = 0; #define TERM(J, I, INCX) row[I][c XX J]*x[INCX*(c XX J)] #define KERN(I, INCX, REPEAT) reg[I] += EXPAND(REPEAT,0,TERM,+,I,INCX); #define FINI(I, INCX) x[INCX*(r XX I)] = row[I][r XX I]*x[INCX*(r XX I)] + reg[I]; #define XX + if (!upper) { BODY_LOTRI(); } else { #undef XX #define XX - BODY_UPTRI(); } #undef XX #undef INIT } void func(tpmv)(uint upper, INT n, FLOAT *m, FLOAT *x, INT incx) { INT r, c, nr, nc, i; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0; #define XX + if (!upper) { BODY_LOTRI(); } else { #undef XX #undef INIT #define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0; #define XX - BODY_UPTRI(); } #undef XX #undef INIT #undef TERM #undef KERN #undef FINI } /* triangular solve (different layouts) */ void func(trsv)(uint upper, INT n, FLOAT *m, INT incm, FLOAT *x, INT incx) { INT r, c, nr, nc, i; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = 0; #define TERM(J, I, INCX) row[I][c XX J]*x[c XX J] #define KERN(I, INCX, REPEAT) reg[I] += EXPAND(REPEAT,0,TERM,+,I,INCX); #define SOLN(J, I, INCX) reg[J] += row[I][r XX I]*x[INCX*(r XX I)] #define FINI(I, INCX) x[INCX*(r XX I)] = reg[I] / row[I][r XX I]; EXPAND_TRI(UNROW,INC(I),SOLN,;,I,INCX); #define XX + if (!upper) { BODY_LOTRI(); } else { #undef XX #define XX - BODY_UPTRI(); } #undef XX #undef INIT } void func(tpsv)(uint upper, INT n, FLOAT *m, FLOAT *x, INT incx) { INT r, c, nr, nc, i; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0; #define XX + if (!upper) { BODY_LOTRI(); } else { #undef XX #undef INIT #define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0; #define XX - BODY_UPTRI(); } #undef XX #undef INIT #undef TERM #undef KERN #undef SOLN #undef FINI } /* rank 1 update */ void func(ger)(INT nrow, INT ncol, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT *m, INT incm) { INT r, c, nr, nc; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = a*x[INCX*(r+I)]; #define TERM(J, I, INCX, INCY) row[I][c+J] += reg[I] * y[INCY*(c+J)] #define KERN(I, INCX, INCY, LENGTH) EXPAND(LENGTH,0,TERM,;,I,INCX, INCY); #define FINI(I, ...) BODY_RECT(); #undef INIT #undef TERM #undef KERN #undef FINI } /* symmetric rank 1 update (different memory layouts) */ void func(syr)(uint upper, INT n, FLOAT a, FLOAT *x, INT incx, FLOAT *m, INT incm) { INT r, c, nr, nc; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = a*x[INCX*(r XX I)]; #define TERM(J, I, INCX) row[I][c XX J] += reg[I] * x[INCX*(c XX J)] #define KERN(I, INCX, LENGTH) EXPAND(LENGTH,0,TERM,;,I,INCX); #define FINI(I, ...) #define XX + if (!upper) { BODY_LOTRI(); } else { #undef XX #define XX - BODY_UPTRI(); } #undef XX #undef INIT } void func(spr)(uint upper, INT n, FLOAT a, FLOAT *x, INT incx, FLOAT *m) { INT r, c, nr, nc; FLOAT *row[UNROW], reg[UNROW]; #define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0; #define XX + if (!upper) { BODY_LOTRI(); } else { #undef XX #undef INIT #define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0; #define XX - BODY_UPTRI(); } #undef XX #undef INIT #undef TERM #undef KERN #undef FINI }