From 6db18fdf24d4f91f208618de03a9ade8d21dc999 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 12 Jun 2020 12:34:22 -0700 Subject: straglers --- sys/cmd/menu/menu.h | 38 +++++++ sys/libmath/blas2.c | 222 +++++++++++++++++++++++++++++++++++++++ sys/libmath/blas2body | 256 +++++++++++++++++++++++++++++++++++++++++++++ sys/libmath/blas3.c | 279 ++++++++++++++++++++++++++++++++++++++++++++++++++ sys/libmath/lapack.c | 0 sys/libmath/matrix.c | 176 +++++++++++++++++++++++++++++++ 6 files changed, 971 insertions(+) create mode 100644 sys/cmd/menu/menu.h create mode 100644 sys/libmath/blas2.c create mode 100644 sys/libmath/blas2body create mode 100644 sys/libmath/blas3.c create mode 100644 sys/libmath/lapack.c create mode 100644 sys/libmath/matrix.c (limited to 'sys') diff --git a/sys/cmd/menu/menu.h b/sys/cmd/menu/menu.h new file mode 100644 index 0000000..f558339 --- /dev/null +++ b/sys/cmd/menu/menu.h @@ -0,0 +1,38 @@ +/* See LICENSE file for copyright and license details. */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "drw.h" + +/* macros */ +#define INTERSECT(x,y,w,h,r) (MAX(0, MIN((x)+(w),(r).x_org+(r).width) - MAX((x),(r).x_org)) \ + * MAX(0, MIN((y)+(h),(r).y_org+(r).height) - MAX((y),(r).y_org))) +#define TEXTW(X) (drw_fontset_getwidth(drw, (X)) + lrpad) +#define BETWEEN(X, A, B) ((A) <= (X) && (X) <= (B)) + + +/* enums */ +enum { + SchemeNorm, + SchemeSel, + SchemeOut, + SchemeLast +}; /* color schemes */ + +struct item { + char *text; + struct item *left, *right; + int out; +}; + +/* util.c */ +void fatal(const char *fmt, ...); +void *ecalloc(size_t nmemb, size_t size); diff --git a/sys/libmath/blas2.c b/sys/libmath/blas2.c new file mode 100644 index 0000000..7e4b08e --- /dev/null +++ b/sys/libmath/blas2.c @@ -0,0 +1,222 @@ +#include +#include +#include "loop.h" + +// ----------------------------------------------------------------------- +// Templates + +#define BODY_RECT() \ + nr = ROUNDBY(nrow, UNROW); \ + nc = ROUNDBY(ncol, UNCOL); \ + if (incx == 1 && incy == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx,incy); \ + } \ + } \ + \ + for (; r < nrow; r++) { \ + LOOP(1,0,INIT,incx,incy); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < ncol; c++) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +#define BODY_LOTRI() \ + nr = ROUNDBY(n, UNROW); \ + if (incx == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,1,1); \ + } \ + LOOP(UNROW,0,FINI,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,incx,1); \ + } \ + LOOP(UNROW,0,FINI,incx); \ + } \ + } \ + \ + for (; r < n; r++) { \ + LOOP(1,0,INIT,incx); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(1,0,KERN,incx,1); \ + } \ + LOOP(1,0,FINI,incx); \ + } + +#define BODY_UPTRI() \ + nr = n - ROUNDBY(n, UNROW); \ + if (incx == 1) { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,1); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,1,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,1,1); \ + } \ + LOOP(UNROW,0,FINI,1); \ + } \ + } else { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,incx); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,incx,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,incx,1); \ + } \ + LOOP(UNROW,0,FINI,incx); \ + } \ + } \ + \ + for (; r >= 0; r--) { \ + LOOP(1,0,INIT,incx); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(1,0,KERN,incx,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(1,0,KERN,incx,1); \ + } \ + LOOP(1,0,FINI,incx); \ + } + +#define BODY_LOTRI_XY() \ + nr = ROUNDBY(n, UNROW); \ + if (incx == 1 && incy == 1) { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = 0; r < nr; r += UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx, incy); \ + } \ + } \ + \ + for (; r < n; r++) { \ + LOOP(1,0,INIT,incx,incy); \ + nc = ROUNDBY(r, UNCOL); \ + for (c = 0; c < nc; c += UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c < r; c++) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +#define BODY_UPTRI_XY() \ + nr = n - ROUNDBY(n, UNROW); \ + if (incx == 1 && incy == 1) { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,1,1); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,1,1,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,1,1,1); \ + } \ + LOOP(UNROW,0,FINI,1,1); \ + } \ + } else { \ + for (r = n-1; r >= nr; r -= UNROW) { \ + LOOP(UNROW,0,INIT,incx,incy); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(UNROW,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(UNROW,0,KERN,incx,incy,1); \ + } \ + LOOP(UNROW,0,FINI,incx,incy); \ + } \ + } \ + \ + for (; r >= 0; r--) { \ + LOOP(1,0,INIT,incx,incy); \ + nc = n - ROUNDBY(r, UNCOL); \ + for (c = n-1; c >= nc; c -= UNCOL) { \ + LOOP(1,0,KERN,incx,incy,UNCOL); \ + } \ + for (; c > r; c--) { \ + LOOP(1,0,KERN,incx,incy,1); \ + } \ + LOOP(1,0,FINI,incx,incy); \ + } + +// ----------------------------------------------------------------------- +// implementation + +#define UNROW 4 +#define UNCOL 4 + +#define INT int +#define FLOAT double +#define func(name) blas·d##name +#include "blas2body" + +#undef FLOAT +#undef func + +#define FLOAT float +#define func(name) blas·f##name +#include "blas2body" diff --git a/sys/libmath/blas2body b/sys/libmath/blas2body new file mode 100644 index 0000000..45baf67 --- /dev/null +++ b/sys/libmath/blas2body @@ -0,0 +1,256 @@ +/* general matrix multiply */ +error +func(gemv)(uint flag, INT nrow, INT ncol, FLOAT a, FLOAT *m, INT incm, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) +{ + INT r, c, nr, nc; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = 0; +#define TERM(J, I, INCX, INCY) row[I][c+J] * x[INCX*(c+J)] +#define KERN(I, INCX, INCY, LENGTH) reg[I] += EXPAND(LENGTH,0,TERM,+,I,INCX,INCY); +#define FINI(I, INCX, INCY) y[INCY*(r+I)] = b*y[INCY*(r+I)] + a*reg[I]; + + if (!flag) { + BODY_RECT(); + } else { + func(scale)(ncol, b, y, incy); +#undef KERN +#undef FINI +#undef INIT +#undef TERM +#define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = a*x[INCX*(r+I)]; +#define TERM(J, I, INCX, INCY) row[I][c+J] * reg[I] +#define KERN(I, INCX, INCY, LENGTH) y[INCY*(c+I)] += EXPAND(LENGTH,0,TERM,+,I,INCX,INCY); +#define FINI(I, INCX, INCY) + BODY_RECT(); + } + + return 0; +#undef INIT +#undef TERM +#undef KERN +#undef FINI +} + +/* symmetric matrix vector multiply (different layouts) */ +void +func(symv)(uint upper, INT n, FLOAT a, FLOAT *m, INT incm, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) +{ + INT r, c, nr, nc, i; + FLOAT *row[UNROW], reg1[UNROW], reg2[UNROW]; + +#define INIT(I, INCX, INCY) row[I] = m + (r XX I)*incm, reg1[I] = 0, reg2[I] = 0; +#define TERM1(J, I, INCX, INCY) row[I][c XX J]*x[INCX*(c XX J)] +#define TERM2(J, I, INCX, INCY) row[I][c XX J]*x[INCX*((n-c-1) XX J)] +#define KERN(I, INCX, INCY, REPEAT) reg1[I] += EXPAND(REPEAT,0,TERM1,+,I,INCX,INCY), \ + reg2[I] += EXPAND(REPEAT,0,TERM2,+,I,INCX,INCY); +#define FINI(I, INCX, INCY) y[INCY*(r+I)] += a*(reg1[I] + row[I][r]*x[INCX*r]), \ + y[INCY*(n-r-1+I)] += a*reg2[I]; + + func(scale)(n, b, y, incy); +#define XX + + if (!upper) { + BODY_LOTRI_XY(); + } else { +#undef XX +#define XX - + BODY_UPTRI_XY(); + } +#undef XX + +#undef INIT +} + +void +func(spmv)(uint upper, INT n, FLOAT a, FLOAT *m, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy) +{ + INT r, c, nr, nc, i; + FLOAT *row[UNROW], reg1[UNROW], reg2[UNROW]; + +#define INIT(I, INCX, INCY) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg1[I] = 0, reg2[I] = 0; + + func(scale)(n, b, y, incy); +#define XX + + if (!upper) { + BODY_LOTRI_XY(); + } else { +#undef XX +#undef INIT +#define INIT(I, INCX, INCY) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg1[I] = 0, reg2[I] = 0; +#define XX - + BODY_UPTRI_XY(); + } +#undef XX + +#undef TERM +#undef INIT +#undef KERN +#undef FINI +} + +/* triangular multiply (different layouts) */ +void +func(trmv)(uint upper, INT n, FLOAT *m, INT incm, FLOAT *x, INT incx) +{ + INT r, c, nr, nc, i; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = 0; +#define TERM(J, I, INCX) row[I][c XX J]*x[INCX*(c XX J)] +#define KERN(I, INCX, REPEAT) reg[I] += EXPAND(REPEAT,0,TERM,+,I,INCX); +#define FINI(I, INCX) x[INCX*(r XX I)] = row[I][r XX I]*x[INCX*(r XX I)] + reg[I]; + +#define XX + + if (!upper) { + BODY_LOTRI(); + } else { +#undef XX +#define XX - + BODY_UPTRI(); + } +#undef XX + +#undef INIT +} + +void +func(tpmv)(uint upper, INT n, FLOAT *m, FLOAT *x, INT incx) +{ + INT r, c, nr, nc, i; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0; + +#define XX + + if (!upper) { + BODY_LOTRI(); + } else { +#undef XX +#undef INIT +#define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0; +#define XX - + BODY_UPTRI(); + } +#undef XX + +#undef INIT +#undef TERM +#undef KERN +#undef FINI +} + +/* triangular solve (different layouts) */ +void +func(trsv)(uint upper, INT n, FLOAT *m, INT incm, FLOAT *x, INT incx) +{ + INT r, c, nr, nc, i; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = 0; +#define TERM(J, I, INCX) row[I][c XX J]*x[c XX J] +#define KERN(I, INCX, REPEAT) reg[I] += EXPAND(REPEAT,0,TERM,+,I,INCX); +#define SOLN(J, I, INCX) reg[J] += row[I][r XX I]*x[INCX*(r XX I)] +#define FINI(I, INCX) x[INCX*(r XX I)] = reg[I] / row[I][r XX I]; EXPAND_TRI(UNROW,INC(I),SOLN,;,I,INCX); + +#define XX + + if (!upper) { + BODY_LOTRI(); + } else { +#undef XX +#define XX - + BODY_UPTRI(); + } +#undef XX + +#undef INIT +} + +void +func(tpsv)(uint upper, INT n, FLOAT *m, FLOAT *x, INT incx) +{ + INT r, c, nr, nc, i; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0; + +#define XX + + if (!upper) { + BODY_LOTRI(); + } else { +#undef XX +#undef INIT +#define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0; +#define XX - + BODY_UPTRI(); + } +#undef XX + +#undef INIT +#undef TERM +#undef KERN +#undef SOLN +#undef FINI +} + +/* rank 1 update */ +void +func(ger)(INT nrow, INT ncol, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT *m, INT incm) +{ + INT r, c, nr, nc; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = a*x[INCX*(r+I)]; +#define TERM(J, I, INCX, INCY) row[I][c+J] += reg[I] * y[INCY*(c+J)] +#define KERN(I, INCX, INCY, LENGTH) EXPAND(LENGTH,0,TERM,;,I,INCX, INCY); +#define FINI(I, ...) + + BODY_RECT(); + +#undef INIT +#undef TERM +#undef KERN +#undef FINI +} + +/* symmetric rank 1 update (different memory layouts) */ +void +func(syr)(uint upper, INT n, FLOAT a, FLOAT *x, INT incx, FLOAT *m, INT incm) +{ + INT r, c, nr, nc; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = a*x[INCX*(r XX I)]; +#define TERM(J, I, INCX) row[I][c XX J] += reg[I] * x[INCX*(c XX J)] +#define KERN(I, INCX, LENGTH) EXPAND(LENGTH,0,TERM,;,I,INCX); +#define FINI(I, ...) + +#define XX + + if (!upper) { + BODY_LOTRI(); + } else { +#undef XX +#define XX - + BODY_UPTRI(); + } +#undef XX + +#undef INIT +} + +void +func(spr)(uint upper, INT n, FLOAT a, FLOAT *x, INT incx, FLOAT *m) +{ + INT r, c, nr, nc; + FLOAT *row[UNROW], reg[UNROW]; +#define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0; + +#define XX + + if (!upper) { + BODY_LOTRI(); + } else { +#undef XX +#undef INIT +#define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0; +#define XX - + BODY_UPTRI(); + } +#undef XX + +#undef INIT +#undef TERM +#undef KERN +#undef FINI +} diff --git a/sys/libmath/blas3.c b/sys/libmath/blas3.c new file mode 100644 index 0000000..d821e6e --- /dev/null +++ b/sys/libmath/blas3.c @@ -0,0 +1,279 @@ +#include +#include +#include + +#define INT int +#define FLOAT double +#define func(name) blas·d##name + +#define X(i, j) x[j + incx*(i)] +#define Y(i, j) y[j + incy*(i)] +#define Z(i, j) z[j + incz*(i)] + +void +func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz) +{ + INT jj, jb, kk, kb, dk, i, j, k, end; + FLOAT r0[8], r1[8], r2[8], r3[8], pf; + + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + Z(i,j) *= b; + } + } + + jb = MIN(256, nj); + kb = MIN(48, nk); + for (jj = 0; jj < nj; jj += jb) { + for (kk = 0; kk < nk; kk += kb) { + for (i = 0; i < ni; i += 4) { + for (j = jj; j < jj + jb; j += 8) { + r0[0] = Z(i+0, j+0); r0[1] = Z(i+0, j+1); r0[2] = Z(i+0, j+2); r0[3] = Z(i+0, j+3); + r1[0] = Z(i+1, j+0); r1[1] = Z(i+1, j+1); r1[2] = Z(i+1, j+2); r1[3] = Z(i+1, j+3); + r2[0] = Z(i+2, j+0); r2[1] = Z(i+2, j+1); r2[2] = Z(i+2, j+2); r2[3] = Z(i+2, j+3); + r3[0] = Z(i+3, j+0); r3[1] = Z(i+3, j+1); r3[2] = Z(i+3, j+2); r3[3] = Z(i+3, j+3); + end = MIN(nk, kk+kb); + for (k = kk; k < end; k++) { + pf = a * X(i, k); + r0[0] += pf * Y(k, j+0); r0[1] += pf * Y(k, j+1); r0[2] += pf * Y(k, j+2); r0[3] += pf * Y(k, j+3); + + pf = a * X(i+1, k); + r1[0] += pf * Y(k, j+0); r1[1] += pf * Y(k, j+1); r1[2] += pf * Y(k, j+2); r1[3] += pf * Y(k, j+3); + + pf = a * X(i+2, k); + r1[0] += pf * Y(k, j+0); r1[1] += pf * Y(k, j+1); r1[2] += pf * Y(k, j+2); r1[3] += pf * Y(k, j+3); + + pf = a * X(i+3, k); + r1[0] += pf * Y(k, j+0); r1[1] += pf * Y(k, j+1); r1[2] += pf * Y(k, j+2); r1[3] += pf * Y(k, j+3); + } + Z(i+0, j+0) = r0[0]; Z(i+0, j+1) = r0[1]; Z(i+0, j+2) = r0[2]; Z(i+0, j+3) = r0[3]; + Z(i+1, j+0) = r1[0]; Z(i+1, j+1) = r1[1]; Z(i+1, j+2) = r1[2]; Z(i+1, j+3) = r1[3]; + Z(i+2, j+0) = r2[0]; Z(i+2, j+1) = r2[1]; Z(i+2, j+2) = r2[2]; Z(i+2, j+3) = r2[3]; + Z(i+3, j+0) = r3[0]; Z(i+3, j+1) = r3[1]; Z(i+3, j+2) = r3[2]; Z(i+3, j+3) = r3[3]; + } + } + } + } +} + +#if 0 +void +func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz) +{ + int i, j, k; + FLOAT w[nj*nk], acc[4][4]; + + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + Z(i,j) *= b; + W(i,j) = Y(j,i); + } + } + + for (i = 0; i < ni; i+=4) { + for (j = 0; j < nj; j+=4) { + memset(acc, 0, sizeof(acc)); + for (k = 0; k < nk; k+=4) { + acc[0][0] += X(i+0,k)*W(j+0,k) + X(i+0,k+1)*W(j+0,k+1) + X(i+0,k+2)*W(j+0,k+2) + X(i+0,k+3)*W(j+0,k+3); + acc[0][1] += X(i+0,k)*W(j+1,k) + X(i+0,k+1)*W(j+1,k+1) + X(i+0,k+2)*W(j+1,k+2) + X(i+0,k+3)*W(j+1,k+3); + acc[0][2] += X(i+0,k)*W(j+2,k) + X(i+0,k+1)*W(j+2,k+1) + X(i+0,k+2)*W(j+2,k+2) + X(i+0,k+3)*W(j+2,k+3); + acc[0][3] += X(i+0,k)*W(j+3,k) + X(i+0,k+1)*W(j+3,k+1) + X(i+0,k+2)*W(j+3,k+2) + X(i+0,k+3)*W(j+3,k+3); + + acc[1][0] += X(i+1,k)*W(j+0,k) + X(i+1,k+1)*W(j+0,k+1) + X(i+1,k+2)*W(j+0,k+2) + X(i+1,k+3)*W(j+0,k+3); + acc[1][1] += X(i+1,k)*W(j+1,k) + X(i+1,k+1)*W(j+1,k+1) + X(i+1,k+2)*W(j+1,k+2) + X(i+1,k+3)*W(j+1,k+3); + acc[1][2] += X(i+1,k)*W(j+2,k) + X(i+1,k+1)*W(j+2,k+1) + X(i+1,k+2)*W(j+2,k+2) + X(i+1,k+3)*W(j+2,k+3); + acc[1][3] += X(i+1,k)*W(j+3,k) + X(i+1,k+1)*W(j+3,k+1) + X(i+1,k+2)*W(j+3,k+2) + X(i+1,k+3)*W(j+3,k+3); + + acc[2][0] += X(i+2,k)*W(j+0,k) + X(i+2,k+1)*W(j+0,k+1) + X(i+2,k+2)*W(j+0,k+2) + X(i+2,k+3)*W(j+0,k+3); + acc[2][1] += X(i+2,k)*W(j+1,k) + X(i+2,k+1)*W(j+1,k+1) + X(i+2,k+2)*W(j+1,k+2) + X(i+2,k+3)*W(j+1,k+3); + acc[2][2] += X(i+2,k)*W(j+2,k) + X(i+2,k+1)*W(j+2,k+1) + X(i+2,k+2)*W(j+2,k+2) + X(i+2,k+3)*W(j+2,k+3); + acc[2][3] += X(i+2,k)*W(j+3,k) + X(i+2,k+1)*W(j+3,k+1) + X(i+2,k+2)*W(j+3,k+2) + X(i+2,k+3)*W(j+3,k+3); + + acc[2][0] += X(i+3,k)*W(j+0,k) + X(i+3,k+1)*W(j+0,k+1) + X(i+3,k+2)*W(j+0,k+2) + X(i+3,k+3)*W(j+0,k+3); + acc[2][1] += X(i+3,k)*W(j+1,k) + X(i+3,k+1)*W(j+1,k+1) + X(i+3,k+2)*W(j+1,k+2) + X(i+3,k+3)*W(j+1,k+3); + acc[2][2] += X(i+3,k)*W(j+2,k) + X(i+3,k+1)*W(j+2,k+1) + X(i+3,k+2)*W(j+2,k+2) + X(i+3,k+3)*W(j+2,k+3); + acc[2][3] += X(i+3,k)*W(j+3,k) + X(i+3,k+1)*W(j+3,k+1) + X(i+3,k+2)*W(j+3,k+2) + X(i+3,k+3)*W(j+3,k+3); + // Z(i,j) += X(i,k)*Y(k,j); + } + Z(i+0,j+1) = a*acc[0][0]; + Z(i+0,j+2) = a*acc[0][1]; + Z(i+0,j+3) = a*acc[0][2]; + Z(i+0,j+4) = a*acc[0][3]; + + Z(i+1,j+1) = a*acc[1][0]; + Z(i+1,j+2) = a*acc[1][1]; + Z(i+1,j+3) = a*acc[1][2]; + Z(i+1,j+4) = a*acc[1][3]; + + Z(i+2,j+1) = a*acc[2][0]; + Z(i+2,j+2) = a*acc[2][1]; + Z(i+2,j+3) = a*acc[2][2]; + Z(i+2,j+4) = a*acc[2][3]; + + Z(i+3,j+1) = a*acc[3][0]; + Z(i+3,j+2) = a*acc[3][1]; + Z(i+3,j+3) = a*acc[3][2]; + Z(i+3,j+4) = a*acc[3][3]; + } + } +} +#endif + +#if 0 +void +func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz) +{ + int i, j, k, ri, rj, rk; + FLOAT reg[4][4], *xrow[4], *yrow[4]; + + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + z[j + incz*i] *= b; + } + } + + for (i = 0; i < ni; i += 4) { + xrow[0] = x + incx*(i+0); + xrow[1] = x + incx*(i+1); + xrow[2] = x + incx*(i+2); + xrow[3] = x + incx*(i+3); + for (k = 0; k < nk; k+=4) { + yrow[0] = y + incy*(k+0); + yrow[1] = y + incy*(k+1); + yrow[2] = y + incy*(k+2); + yrow[3] = y + incy*(k+3); + reg[0][0] = a * xrow[0][k+0]; reg[0][1] = a * xrow[0][k+1]; reg[0][2] = a * xrow[0][k+2]; reg[0][3] = a * xrow[0][k+3]; + reg[1][0] = a * xrow[1][k+0]; reg[1][1] = a * xrow[1][k+1]; reg[1][2] = a * xrow[1][k+2]; reg[1][3] = a * xrow[1][k+3]; + reg[2][0] = a * xrow[2][k+0]; reg[2][1] = a * xrow[2][k+1]; reg[2][2] = a * xrow[2][k+2]; reg[2][3] = a * xrow[2][k+3]; + reg[3][0] = a * xrow[3][k+0]; reg[3][1] = a * xrow[3][k+1]; reg[3][2] = a * xrow[3][k+2]; reg[3][3] = a * xrow[3][k+3]; + for (j = 0; j < nj; j += 1) { + z[j + incz*(i+0)] += (reg[0][0]*yrow[0][j]+reg[0][1]*yrow[1][j]+reg[0][2]*yrow[2][j]+reg[0][3]*yrow[3][j]); + z[j + incz*(i+1)] += (reg[1][0]*yrow[0][j]+reg[1][1]*yrow[1][j]+reg[1][2]*yrow[2][j]+reg[1][3]*yrow[3][j]); + z[j + incz*(i+2)] += (reg[2][0]*yrow[0][j]+reg[2][1]*yrow[1][j]+reg[2][2]*yrow[2][j]+reg[2][3]*yrow[3][j]); + z[j + incz*(i+3)] += (reg[3][0]*yrow[0][j]+reg[3][1]*yrow[1][j]+reg[3][2]*yrow[2][j]+reg[3][3]*yrow[3][j]); + } + } + } +} +#endif + +#if 0 +void +func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz) +{ + int i, j, k, ri, rj, rk; + FLOAT r[4][4], *row[4]; + + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + Z(i, j) *= b; + } + } + + for (i = 0; i < ni; i+=4) { + for (j = 0; j < nj; j+=4) { + r[0][0] = 0; r[0][1] = 0; r[0][2] = 0; r[0][3] = 0; + r[1][0] = 0; r[1][1] = 0; r[1][2] = 0; r[1][3] = 0; + r[2][0] = 0; r[2][1] = 0; r[2][2] = 0; r[2][3] = 0; + r[3][0] = 0; r[3][1] = 0; r[3][2] = 0; r[3][3] = 0; + row[0] = &X(i+0, 0); + row[1] = &X(i+1, 0); + row[2] = &X(i+2, 0); + row[3] = &X(i+3, 0); + for (k = 0; k < nk; k++) { + r[0][0] += row[0][k]*Y(k,0); r[0][1] += row[0][k]*Y(k,1); r[0][2] += row[0][k]*Y(k,2); r[0][3] += row[0][k]*Y(k,3); + r[1][0] += row[1][k]*Y(k,0); r[1][1] += row[1][k]*Y(k,1); r[1][2] += row[1][k]*Y(k,2); r[1][3] += row[1][k]*Y(k,3); + r[2][0] += row[2][k]*Y(k,0); r[2][1] += row[2][k]*Y(k,1); r[2][2] += row[2][k]*Y(k,2); r[2][3] += row[2][k]*Y(k,3); + r[3][0] += row[3][k]*Y(k,0); r[3][1] += row[3][k]*Y(k,1); r[3][2] += row[3][k]*Y(k,2); r[3][3] += row[3][k]*Y(k,3); + } + Z(i+0, j+0) += r[0][0]; Z(i+0, j+1) += r[0][1]; Z(i+0, j+2) += r[0][2]; Z(i+0, j+3) += r[0][3]; + Z(i+1, j+0) += r[1][0]; Z(i+1, j+1) += r[1][1]; Z(i+1, j+2) += r[1][2]; Z(i+1, j+3) += r[1][3]; + Z(i+2, j+0) += r[2][0]; Z(i+2, j+1) += r[2][1]; Z(i+2, j+2) += r[2][2]; Z(i+2, j+3) += r[2][3]; + Z(i+3, j+0) += r[3][0]; Z(i+3, j+1) += r[3][1]; Z(i+3, j+2) += r[3][2]; Z(i+3, j+3) += r[3][3]; + } + } +} +#endif + +#if 0 +void +func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz) +{ + int i, j, k, ri, rj, rk; + FLOAT *xrow[8], *yrow[8], reg; + + for (i = 0; i < ni; i++) { + for (j = 0; j < nj; j++) { + z[j + incz*i] *= b; + } + } + + ri = ni & ~7; + rj = nj & ~7; + for (i = 0; i < ri; i += 8) { + xrow[0] = x + incx*(i+0); + xrow[1] = x + incx*(i+1); + xrow[2] = x + incx*(i+2); + xrow[3] = x + incx*(i+3); + xrow[4] = x + incx*(i+4); + xrow[5] = x + incx*(i+5); + xrow[6] = x + incx*(i+6); + xrow[7] = x + incx*(i+7); + for (j = 0; j < rj; j += 8) { + yrow[0] = y + incy*(j+0); + yrow[1] = y + incy*(j+1); + yrow[2] = y + incy*(j+2); + yrow[3] = y + incy*(j+3); + yrow[4] = y + incy*(j+4); + yrow[5] = y + incy*(j+5); + yrow[6] = y + incy*(j+6); + yrow[7] = y + incy*(j+7); + for (k = 0; k < nk; k++) { + reg = a*(yrow[0][k] + yrow[1][k] + yrow[2][k] + yrow[3][k] + yrow[4][k] + yrow[5][k] + yrow[6][k] + yrow[7][k]); + z[k + incz*(i+0)] += xrow[0][k]*reg; + z[k + incz*(i+1)] += xrow[1][k]*reg; + z[k + incz*(i+2)] += xrow[2][k]*reg; + z[k + incz*(i+3)] += xrow[3][k]*reg; + z[k + incz*(i+4)] += xrow[4][k]*reg; + z[k + incz*(i+5)] += xrow[5][k]*reg; + z[k + incz*(i+6)] += xrow[6][k]*reg; + z[k + incz*(i+7)] += xrow[7][k]*reg; + } + } + for (; j < nj; j++) { + for (k = 0; k < nk; k++) { + reg = a*y[k+incy*j]; + z[k + incz*(i+0)] += xrow[0][k]*reg; + z[k + incz*(i+1)] += xrow[1][k]*reg; + z[k + incz*(i+2)] += xrow[2][k]*reg; + z[k + incz*(i+3)] += xrow[3][k]*reg; + z[k + incz*(i+4)] += xrow[4][k]*reg; + z[k + incz*(i+5)] += xrow[5][k]*reg; + z[k + incz*(i+6)] += xrow[6][k]*reg; + z[k + incz*(i+7)] += xrow[7][k]*reg; + } + } + } + + for (; i < ni; i++) { + for (j = 0; j < rj; j += 8) { + yrow[0] = y + incy*(j+0); + yrow[1] = y + incy*(j+1); + yrow[2] = y + incy*(j+2); + yrow[3] = y + incy*(j+3); + yrow[4] = y + incy*(j+4); + yrow[5] = y + incy*(j+5); + yrow[6] = y + incy*(j+6); + yrow[7] = y + incy*(j+7); + for (k = 0; k < nk; k++) { + z[k + incz*(i)] += a*x[k + incx*i]*(yrow[0][k] + yrow[1][k] + yrow[2][k] + yrow[3][k] + yrow[4][k] + yrow[5][k] + yrow[6][k] + yrow[7][k]); + } + } + for (; j < nj; j++) { + for (k = 0; k < nk; k++) { + z[k + incz*i] += a*x[k + incx*i]*y[k + incy*j]; + } + } + } +} +#endif diff --git a/sys/libmath/lapack.c b/sys/libmath/lapack.c new file mode 100644 index 0000000..e69de29 diff --git a/sys/libmath/matrix.c b/sys/libmath/matrix.c new file mode 100644 index 0000000..e8bca0b --- /dev/null +++ b/sys/libmath/matrix.c @@ -0,0 +1,176 @@ +#include +#include +#include + +/* TODO: replace (incrementally) with native C version! */ +#include +#include + +// ----------------------------------------------------------------------- +// level 1 + +error +la·vecslice(math·Vector *x, int min, int max, int inc) +{ + if (max > x->len || min < 0) { + errorf("out of bounds: attempted to access vector past length"); + return 1; + } + x->len = (max - min) / inc; + x->d += x->inc * min; + x->inc *= inc; + + return 0; +} + +/* simple blas wrappers */ +void +la·veccopy(math·Vector *dst, math·Vector *src) +{ + return cblas_dcopy(src->len, src->d, src->inc, dst->d, dst->inc); +} + +double +la·vecnorm(math·Vector *x) +{ + return cblas_dnrm2(x->len, x->d, x->inc); +} + +void +la·vecscale(math·Vector *x, double a) +{ + return cblas_dscal(x->len, a, x->d, x->inc); +} + +double +la·vecdot(math·Vector *x, math·Vector *y) +{ + return cblas_ddot(x->len, x->d, x->inc, y->d, y->inc); +} + +// ----------------------------------------------------------------------- +// level 2 + +error +la·vecmat(math·Vector *x, math·Matrix *M) +{ + if (M->dim[1] != x->len) { + errorf("incompatible matrix dimensions"); + return 1; + } + if (M->state & ~mat·trans) + cblas_dgemv(CblasRowMajor,CblasNoTrans,M->dim[0],M->dim[1],1.,M->d,M->inc,x->d,x->inc,0.,x->d,x->inc); + else + cblas_dgemv(CblasRowMajor,CblasTrans,M->dim[0],M->dim[1],1.,M->d,M->inc,x->d,x->inc,0.,x->d,x->inc); + + return 0; +} + +// ----------------------------------------------------------------------- +// level 3 + +void +la·transpose(math·Matrix *X) +{ + int tmp; + X->state ^= mat·trans; + tmp = X->dim[0], X->dim[0] = X->dim[1], X->dim[1] = tmp; +} + +error +la·matrow(math·Matrix *X, int r, math·Vector *row) +{ + if (r < 0 || r >= X->dim[0]) { + errorf("out of bounds"); + return 1; + } + + row->len = X->dim[1]; + row->inc = 1; + row->d = X->d + X->dim[1] * r; + + return 0; +} + +error +la·matcol(math·Matrix *X, int c, math·Vector *col) +{ + if (c < 0 || c >= X->dim[1]) { + errorf("out of bounds"); + return 1; + } + + col->len = X->dim[0]; + col->inc = X->dim[1]; + col->d = X->d + c; + + return 0; +} + +error +la·matslice(math·Matrix *X, int r[3], int c[3]) +{ + /* TODO */ + return 0; +} + +error +la·eig(math·Matrix *X) +{ + +} + +/* X = A*B */ +error +la·matmul(math·Matrix *X, math·Matrix *A, math·Matrix *B) +{ + if (A->dim[1] != B->dim[0]) { + errorf("number of interior dimensions of A '%d' not equal to that of B '%d'", A->dim[1], B->dim[0]); + return 1; + } + if (X->dim[0] != A->dim[0]) { + errorf("number of exterior dimensions of X '%d' not equal to that of A '%d'", X->dim[0], A->dim[0]); + return 1; + } + if (X->dim[1] != B->dim[1]) { + errorf("number of exterior dimensions of X '%d' not equal to that of B '%d'", X->dim[1], B->dim[1]); + return 1; + } + + if (X->state & ~mat·trans) + if (A->state & ~mat·trans) + cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc); + else + cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc); + else + if (A->state & ~mat·trans) + cblas_dgemm(CblasRowMajor,CblasTrans,CblasNoTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc); + else + cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc); + + return 0; +} + +/* + * solves A*X=B + * pass in B via X + */ +error +la·solve(math·Matrix *X, math·Matrix *A) +{ + error err; + int n, *ipv; + static int buf[512]; + if (n = A->dim[0], n < arrlen(buf)) { + ipv = buf; + n = 0; + } else + ipv = malloc(n*sizeof(*ipv)); + + /* TODO: utilize more specific regimes if applicable */ + err = LAPACKE_dgesv(LAPACK_ROW_MAJOR,A->dim[0],X->dim[1],A->d,A->inc,ipv,X->d,X->inc); + + if (n) + free(ipv); + return err; +} -- cgit v1.2.1