aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-06-12 12:34:22 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-06-12 12:34:22 -0700
commit6db18fdf24d4f91f208618de03a9ade8d21dc999 (patch)
tree95ec49c80a6361bff0ce996e7e0d96b18adb13da
parenta79d1edc9ef2e29597faa723a05088a5d19ea8aa (diff)
straglers
-rw-r--r--sys/cmd/menu/menu.h38
-rw-r--r--sys/libmath/blas2.c222
-rw-r--r--sys/libmath/blas2body256
-rw-r--r--sys/libmath/blas3.c279
-rw-r--r--sys/libmath/lapack.c0
-rw-r--r--sys/libmath/matrix.c176
6 files changed, 971 insertions, 0 deletions
diff --git a/sys/cmd/menu/menu.h b/sys/cmd/menu/menu.h
new file mode 100644
index 0000000..f558339
--- /dev/null
+++ b/sys/cmd/menu/menu.h
@@ -0,0 +1,38 @@
+/* See LICENSE file for copyright and license details. */
+#include <u.h>
+#include <libn.h>
+#include <time.h>
+#include <locale.h>
+
+#include <X11/Xlib.h>
+#include <X11/Xatom.h>
+#include <X11/Xutil.h>
+#include <X11/extensions/Xinerama.h>
+#include <X11/Xft/Xft.h>
+
+#include "drw.h"
+
+/* macros */
+#define INTERSECT(x,y,w,h,r) (MAX(0, MIN((x)+(w),(r).x_org+(r).width) - MAX((x),(r).x_org)) \
+ * MAX(0, MIN((y)+(h),(r).y_org+(r).height) - MAX((y),(r).y_org)))
+#define TEXTW(X) (drw_fontset_getwidth(drw, (X)) + lrpad)
+#define BETWEEN(X, A, B) ((A) <= (X) && (X) <= (B))
+
+
+/* enums */
+enum {
+ SchemeNorm,
+ SchemeSel,
+ SchemeOut,
+ SchemeLast
+}; /* color schemes */
+
+struct item {
+ char *text;
+ struct item *left, *right;
+ int out;
+};
+
+/* util.c */
+void fatal(const char *fmt, ...);
+void *ecalloc(size_t nmemb, size_t size);
diff --git a/sys/libmath/blas2.c b/sys/libmath/blas2.c
new file mode 100644
index 0000000..7e4b08e
--- /dev/null
+++ b/sys/libmath/blas2.c
@@ -0,0 +1,222 @@
+#include <u.h>
+#include <libmath/blas.h>
+#include "loop.h"
+
+// -----------------------------------------------------------------------
+// Templates
+
+#define BODY_RECT() \
+ nr = ROUNDBY(nrow, UNROW); \
+ nc = ROUNDBY(ncol, UNCOL); \
+ if (incx == 1 && incy == 1) { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,1,1); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,1,1,UNCOL); \
+ } \
+ for (; c < ncol; c++) { \
+ LOOP(UNROW,0,KERN,1,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1,1); \
+ } \
+ } else { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,incx,incy); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < ncol; c++) { \
+ LOOP(UNROW,0,KERN,incx,incy,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx,incy); \
+ } \
+ } \
+ \
+ for (; r < nrow; r++) { \
+ LOOP(1,0,INIT,incx,incy); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(1,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < ncol; c++) { \
+ LOOP(1,0,KERN,incx,incy,1); \
+ } \
+ LOOP(1,0,FINI,incx,incy); \
+ }
+
+#define BODY_LOTRI() \
+ nr = ROUNDBY(n, UNROW); \
+ if (incx == 1) { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,1); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,1,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1); \
+ } \
+ } else { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,incx); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,incx,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx); \
+ } \
+ } \
+ \
+ for (; r < n; r++) { \
+ LOOP(1,0,INIT,incx); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(1,0,KERN,incx,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(1,0,KERN,incx,1); \
+ } \
+ LOOP(1,0,FINI,incx); \
+ }
+
+#define BODY_UPTRI() \
+ nr = n - ROUNDBY(n, UNROW); \
+ if (incx == 1) { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,1); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,1,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1); \
+ } \
+ } else { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,incx); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,incx,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx); \
+ } \
+ } \
+ \
+ for (; r >= 0; r--) { \
+ LOOP(1,0,INIT,incx); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(1,0,KERN,incx,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(1,0,KERN,incx,1); \
+ } \
+ LOOP(1,0,FINI,incx); \
+ }
+
+#define BODY_LOTRI_XY() \
+ nr = ROUNDBY(n, UNROW); \
+ if (incx == 1 && incy == 1) { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,1,1); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,1,1,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,1,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1,1); \
+ } \
+ } else { \
+ for (r = 0; r < nr; r += UNROW) { \
+ LOOP(UNROW,0,INIT,incx,incy); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(UNROW,0,KERN,incx,incy,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx, incy); \
+ } \
+ } \
+ \
+ for (; r < n; r++) { \
+ LOOP(1,0,INIT,incx,incy); \
+ nc = ROUNDBY(r, UNCOL); \
+ for (c = 0; c < nc; c += UNCOL) { \
+ LOOP(1,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c < r; c++) { \
+ LOOP(1,0,KERN,incx,incy,1); \
+ } \
+ LOOP(1,0,FINI,incx,incy); \
+ }
+
+#define BODY_UPTRI_XY() \
+ nr = n - ROUNDBY(n, UNROW); \
+ if (incx == 1 && incy == 1) { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,1,1); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,1,1,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,1,1,1); \
+ } \
+ LOOP(UNROW,0,FINI,1,1); \
+ } \
+ } else { \
+ for (r = n-1; r >= nr; r -= UNROW) { \
+ LOOP(UNROW,0,INIT,incx,incy); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(UNROW,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(UNROW,0,KERN,incx,incy,1); \
+ } \
+ LOOP(UNROW,0,FINI,incx,incy); \
+ } \
+ } \
+ \
+ for (; r >= 0; r--) { \
+ LOOP(1,0,INIT,incx,incy); \
+ nc = n - ROUNDBY(r, UNCOL); \
+ for (c = n-1; c >= nc; c -= UNCOL) { \
+ LOOP(1,0,KERN,incx,incy,UNCOL); \
+ } \
+ for (; c > r; c--) { \
+ LOOP(1,0,KERN,incx,incy,1); \
+ } \
+ LOOP(1,0,FINI,incx,incy); \
+ }
+
+// -----------------------------------------------------------------------
+// implementation
+
+#define UNROW 4
+#define UNCOL 4
+
+#define INT int
+#define FLOAT double
+#define func(name) blas·d##name
+#include "blas2body"
+
+#undef FLOAT
+#undef func
+
+#define FLOAT float
+#define func(name) blas·f##name
+#include "blas2body"
diff --git a/sys/libmath/blas2body b/sys/libmath/blas2body
new file mode 100644
index 0000000..45baf67
--- /dev/null
+++ b/sys/libmath/blas2body
@@ -0,0 +1,256 @@
+/* general matrix multiply */
+error
+func(gemv)(uint flag, INT nrow, INT ncol, FLOAT a, FLOAT *m, INT incm, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy)
+{
+ INT r, c, nr, nc;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = 0;
+#define TERM(J, I, INCX, INCY) row[I][c+J] * x[INCX*(c+J)]
+#define KERN(I, INCX, INCY, LENGTH) reg[I] += EXPAND(LENGTH,0,TERM,+,I,INCX,INCY);
+#define FINI(I, INCX, INCY) y[INCY*(r+I)] = b*y[INCY*(r+I)] + a*reg[I];
+
+ if (!flag) {
+ BODY_RECT();
+ } else {
+ func(scale)(ncol, b, y, incy);
+#undef KERN
+#undef FINI
+#undef INIT
+#undef TERM
+#define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = a*x[INCX*(r+I)];
+#define TERM(J, I, INCX, INCY) row[I][c+J] * reg[I]
+#define KERN(I, INCX, INCY, LENGTH) y[INCY*(c+I)] += EXPAND(LENGTH,0,TERM,+,I,INCX,INCY);
+#define FINI(I, INCX, INCY)
+ BODY_RECT();
+ }
+
+ return 0;
+#undef INIT
+#undef TERM
+#undef KERN
+#undef FINI
+}
+
+/* symmetric matrix vector multiply (different layouts) */
+void
+func(symv)(uint upper, INT n, FLOAT a, FLOAT *m, INT incm, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy)
+{
+ INT r, c, nr, nc, i;
+ FLOAT *row[UNROW], reg1[UNROW], reg2[UNROW];
+
+#define INIT(I, INCX, INCY) row[I] = m + (r XX I)*incm, reg1[I] = 0, reg2[I] = 0;
+#define TERM1(J, I, INCX, INCY) row[I][c XX J]*x[INCX*(c XX J)]
+#define TERM2(J, I, INCX, INCY) row[I][c XX J]*x[INCX*((n-c-1) XX J)]
+#define KERN(I, INCX, INCY, REPEAT) reg1[I] += EXPAND(REPEAT,0,TERM1,+,I,INCX,INCY), \
+ reg2[I] += EXPAND(REPEAT,0,TERM2,+,I,INCX,INCY);
+#define FINI(I, INCX, INCY) y[INCY*(r+I)] += a*(reg1[I] + row[I][r]*x[INCX*r]), \
+ y[INCY*(n-r-1+I)] += a*reg2[I];
+
+ func(scale)(n, b, y, incy);
+#define XX +
+ if (!upper) {
+ BODY_LOTRI_XY();
+ } else {
+#undef XX
+#define XX -
+ BODY_UPTRI_XY();
+ }
+#undef XX
+
+#undef INIT
+}
+
+void
+func(spmv)(uint upper, INT n, FLOAT a, FLOAT *m, FLOAT *x, INT incx, FLOAT b, FLOAT *y, INT incy)
+{
+ INT r, c, nr, nc, i;
+ FLOAT *row[UNROW], reg1[UNROW], reg2[UNROW];
+
+#define INIT(I, INCX, INCY) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg1[I] = 0, reg2[I] = 0;
+
+ func(scale)(n, b, y, incy);
+#define XX +
+ if (!upper) {
+ BODY_LOTRI_XY();
+ } else {
+#undef XX
+#undef INIT
+#define INIT(I, INCX, INCY) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg1[I] = 0, reg2[I] = 0;
+#define XX -
+ BODY_UPTRI_XY();
+ }
+#undef XX
+
+#undef TERM
+#undef INIT
+#undef KERN
+#undef FINI
+}
+
+/* triangular multiply (different layouts) */
+void
+func(trmv)(uint upper, INT n, FLOAT *m, INT incm, FLOAT *x, INT incx)
+{
+ INT r, c, nr, nc, i;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = 0;
+#define TERM(J, I, INCX) row[I][c XX J]*x[INCX*(c XX J)]
+#define KERN(I, INCX, REPEAT) reg[I] += EXPAND(REPEAT,0,TERM,+,I,INCX);
+#define FINI(I, INCX) x[INCX*(r XX I)] = row[I][r XX I]*x[INCX*(r XX I)] + reg[I];
+
+#define XX +
+ if (!upper) {
+ BODY_LOTRI();
+ } else {
+#undef XX
+#define XX -
+ BODY_UPTRI();
+ }
+#undef XX
+
+#undef INIT
+}
+
+void
+func(tpmv)(uint upper, INT n, FLOAT *m, FLOAT *x, INT incx)
+{
+ INT r, c, nr, nc, i;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0;
+
+#define XX +
+ if (!upper) {
+ BODY_LOTRI();
+ } else {
+#undef XX
+#undef INIT
+#define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0;
+#define XX -
+ BODY_UPTRI();
+ }
+#undef XX
+
+#undef INIT
+#undef TERM
+#undef KERN
+#undef FINI
+}
+
+/* triangular solve (different layouts) */
+void
+func(trsv)(uint upper, INT n, FLOAT *m, INT incm, FLOAT *x, INT incx)
+{
+ INT r, c, nr, nc, i;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = 0;
+#define TERM(J, I, INCX) row[I][c XX J]*x[c XX J]
+#define KERN(I, INCX, REPEAT) reg[I] += EXPAND(REPEAT,0,TERM,+,I,INCX);
+#define SOLN(J, I, INCX) reg[J] += row[I][r XX I]*x[INCX*(r XX I)]
+#define FINI(I, INCX) x[INCX*(r XX I)] = reg[I] / row[I][r XX I]; EXPAND_TRI(UNROW,INC(I),SOLN,;,I,INCX);
+
+#define XX +
+ if (!upper) {
+ BODY_LOTRI();
+ } else {
+#undef XX
+#define XX -
+ BODY_UPTRI();
+ }
+#undef XX
+
+#undef INIT
+}
+
+void
+func(tpsv)(uint upper, INT n, FLOAT *m, FLOAT *x, INT incx)
+{
+ INT r, c, nr, nc, i;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0;
+
+#define XX +
+ if (!upper) {
+ BODY_LOTRI();
+ } else {
+#undef XX
+#undef INIT
+#define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0;
+#define XX -
+ BODY_UPTRI();
+ }
+#undef XX
+
+#undef INIT
+#undef TERM
+#undef KERN
+#undef SOLN
+#undef FINI
+}
+
+/* rank 1 update */
+void
+func(ger)(INT nrow, INT ncol, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT *m, INT incm)
+{
+ INT r, c, nr, nc;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX, INCY) row[I] = m + (r+I)*incm, reg[I] = a*x[INCX*(r+I)];
+#define TERM(J, I, INCX, INCY) row[I][c+J] += reg[I] * y[INCY*(c+J)]
+#define KERN(I, INCX, INCY, LENGTH) EXPAND(LENGTH,0,TERM,;,I,INCX, INCY);
+#define FINI(I, ...)
+
+ BODY_RECT();
+
+#undef INIT
+#undef TERM
+#undef KERN
+#undef FINI
+}
+
+/* symmetric rank 1 update (different memory layouts) */
+void
+func(syr)(uint upper, INT n, FLOAT a, FLOAT *x, INT incx, FLOAT *m, INT incm)
+{
+ INT r, c, nr, nc;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX) row[I] = m + (r XX I)*incm, reg[I] = a*x[INCX*(r XX I)];
+#define TERM(J, I, INCX) row[I][c XX J] += reg[I] * x[INCX*(c XX J)]
+#define KERN(I, INCX, LENGTH) EXPAND(LENGTH,0,TERM,;,I,INCX);
+#define FINI(I, ...)
+
+#define XX +
+ if (!upper) {
+ BODY_LOTRI();
+ } else {
+#undef XX
+#define XX -
+ BODY_UPTRI();
+ }
+#undef XX
+
+#undef INIT
+}
+
+void
+func(spr)(uint upper, INT n, FLOAT a, FLOAT *x, INT incx, FLOAT *m)
+{
+ INT r, c, nr, nc;
+ FLOAT *row[UNROW], reg[UNROW];
+#define INIT(I, INCX) row[I] = m + ((r+I)*(r+I+1))*(1/2), reg[I] = 0;
+
+#define XX +
+ if (!upper) {
+ BODY_LOTRI();
+ } else {
+#undef XX
+#undef INIT
+#define INIT(I, INCX) row[I] = m + ((2*n-r-I)*(r+I+1))*(1/2), reg[I] = 0;
+#define XX -
+ BODY_UPTRI();
+ }
+#undef XX
+
+#undef INIT
+#undef TERM
+#undef KERN
+#undef FINI
+}
diff --git a/sys/libmath/blas3.c b/sys/libmath/blas3.c
new file mode 100644
index 0000000..d821e6e
--- /dev/null
+++ b/sys/libmath/blas3.c
@@ -0,0 +1,279 @@
+#include <u.h>
+#include <libn.h>
+#include <libmath.h>
+
+#define INT int
+#define FLOAT double
+#define func(name) blas·d##name
+
+#define X(i, j) x[j + incx*(i)]
+#define Y(i, j) y[j + incy*(i)]
+#define Z(i, j) z[j + incz*(i)]
+
+void
+func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz)
+{
+ INT jj, jb, kk, kb, dk, i, j, k, end;
+ FLOAT r0[8], r1[8], r2[8], r3[8], pf;
+
+ for (i = 0; i < ni; i++) {
+ for (j = 0; j < nj; j++) {
+ Z(i,j) *= b;
+ }
+ }
+
+ jb = MIN(256, nj);
+ kb = MIN(48, nk);
+ for (jj = 0; jj < nj; jj += jb) {
+ for (kk = 0; kk < nk; kk += kb) {
+ for (i = 0; i < ni; i += 4) {
+ for (j = jj; j < jj + jb; j += 8) {
+ r0[0] = Z(i+0, j+0); r0[1] = Z(i+0, j+1); r0[2] = Z(i+0, j+2); r0[3] = Z(i+0, j+3);
+ r1[0] = Z(i+1, j+0); r1[1] = Z(i+1, j+1); r1[2] = Z(i+1, j+2); r1[3] = Z(i+1, j+3);
+ r2[0] = Z(i+2, j+0); r2[1] = Z(i+2, j+1); r2[2] = Z(i+2, j+2); r2[3] = Z(i+2, j+3);
+ r3[0] = Z(i+3, j+0); r3[1] = Z(i+3, j+1); r3[2] = Z(i+3, j+2); r3[3] = Z(i+3, j+3);
+ end = MIN(nk, kk+kb);
+ for (k = kk; k < end; k++) {
+ pf = a * X(i, k);
+ r0[0] += pf * Y(k, j+0); r0[1] += pf * Y(k, j+1); r0[2] += pf * Y(k, j+2); r0[3] += pf * Y(k, j+3);
+
+ pf = a * X(i+1, k);
+ r1[0] += pf * Y(k, j+0); r1[1] += pf * Y(k, j+1); r1[2] += pf * Y(k, j+2); r1[3] += pf * Y(k, j+3);
+
+ pf = a * X(i+2, k);
+ r1[0] += pf * Y(k, j+0); r1[1] += pf * Y(k, j+1); r1[2] += pf * Y(k, j+2); r1[3] += pf * Y(k, j+3);
+
+ pf = a * X(i+3, k);
+ r1[0] += pf * Y(k, j+0); r1[1] += pf * Y(k, j+1); r1[2] += pf * Y(k, j+2); r1[3] += pf * Y(k, j+3);
+ }
+ Z(i+0, j+0) = r0[0]; Z(i+0, j+1) = r0[1]; Z(i+0, j+2) = r0[2]; Z(i+0, j+3) = r0[3];
+ Z(i+1, j+0) = r1[0]; Z(i+1, j+1) = r1[1]; Z(i+1, j+2) = r1[2]; Z(i+1, j+3) = r1[3];
+ Z(i+2, j+0) = r2[0]; Z(i+2, j+1) = r2[1]; Z(i+2, j+2) = r2[2]; Z(i+2, j+3) = r2[3];
+ Z(i+3, j+0) = r3[0]; Z(i+3, j+1) = r3[1]; Z(i+3, j+2) = r3[2]; Z(i+3, j+3) = r3[3];
+ }
+ }
+ }
+ }
+}
+
+#if 0
+void
+func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz)
+{
+ int i, j, k;
+ FLOAT w[nj*nk], acc[4][4];
+
+ for (i = 0; i < ni; i++) {
+ for (j = 0; j < nj; j++) {
+ Z(i,j) *= b;
+ W(i,j) = Y(j,i);
+ }
+ }
+
+ for (i = 0; i < ni; i+=4) {
+ for (j = 0; j < nj; j+=4) {
+ memset(acc, 0, sizeof(acc));
+ for (k = 0; k < nk; k+=4) {
+ acc[0][0] += X(i+0,k)*W(j+0,k) + X(i+0,k+1)*W(j+0,k+1) + X(i+0,k+2)*W(j+0,k+2) + X(i+0,k+3)*W(j+0,k+3);
+ acc[0][1] += X(i+0,k)*W(j+1,k) + X(i+0,k+1)*W(j+1,k+1) + X(i+0,k+2)*W(j+1,k+2) + X(i+0,k+3)*W(j+1,k+3);
+ acc[0][2] += X(i+0,k)*W(j+2,k) + X(i+0,k+1)*W(j+2,k+1) + X(i+0,k+2)*W(j+2,k+2) + X(i+0,k+3)*W(j+2,k+3);
+ acc[0][3] += X(i+0,k)*W(j+3,k) + X(i+0,k+1)*W(j+3,k+1) + X(i+0,k+2)*W(j+3,k+2) + X(i+0,k+3)*W(j+3,k+3);
+
+ acc[1][0] += X(i+1,k)*W(j+0,k) + X(i+1,k+1)*W(j+0,k+1) + X(i+1,k+2)*W(j+0,k+2) + X(i+1,k+3)*W(j+0,k+3);
+ acc[1][1] += X(i+1,k)*W(j+1,k) + X(i+1,k+1)*W(j+1,k+1) + X(i+1,k+2)*W(j+1,k+2) + X(i+1,k+3)*W(j+1,k+3);
+ acc[1][2] += X(i+1,k)*W(j+2,k) + X(i+1,k+1)*W(j+2,k+1) + X(i+1,k+2)*W(j+2,k+2) + X(i+1,k+3)*W(j+2,k+3);
+ acc[1][3] += X(i+1,k)*W(j+3,k) + X(i+1,k+1)*W(j+3,k+1) + X(i+1,k+2)*W(j+3,k+2) + X(i+1,k+3)*W(j+3,k+3);
+
+ acc[2][0] += X(i+2,k)*W(j+0,k) + X(i+2,k+1)*W(j+0,k+1) + X(i+2,k+2)*W(j+0,k+2) + X(i+2,k+3)*W(j+0,k+3);
+ acc[2][1] += X(i+2,k)*W(j+1,k) + X(i+2,k+1)*W(j+1,k+1) + X(i+2,k+2)*W(j+1,k+2) + X(i+2,k+3)*W(j+1,k+3);
+ acc[2][2] += X(i+2,k)*W(j+2,k) + X(i+2,k+1)*W(j+2,k+1) + X(i+2,k+2)*W(j+2,k+2) + X(i+2,k+3)*W(j+2,k+3);
+ acc[2][3] += X(i+2,k)*W(j+3,k) + X(i+2,k+1)*W(j+3,k+1) + X(i+2,k+2)*W(j+3,k+2) + X(i+2,k+3)*W(j+3,k+3);
+
+ acc[2][0] += X(i+3,k)*W(j+0,k) + X(i+3,k+1)*W(j+0,k+1) + X(i+3,k+2)*W(j+0,k+2) + X(i+3,k+3)*W(j+0,k+3);
+ acc[2][1] += X(i+3,k)*W(j+1,k) + X(i+3,k+1)*W(j+1,k+1) + X(i+3,k+2)*W(j+1,k+2) + X(i+3,k+3)*W(j+1,k+3);
+ acc[2][2] += X(i+3,k)*W(j+2,k) + X(i+3,k+1)*W(j+2,k+1) + X(i+3,k+2)*W(j+2,k+2) + X(i+3,k+3)*W(j+2,k+3);
+ acc[2][3] += X(i+3,k)*W(j+3,k) + X(i+3,k+1)*W(j+3,k+1) + X(i+3,k+2)*W(j+3,k+2) + X(i+3,k+3)*W(j+3,k+3);
+ // Z(i,j) += X(i,k)*Y(k,j);
+ }
+ Z(i+0,j+1) = a*acc[0][0];
+ Z(i+0,j+2) = a*acc[0][1];
+ Z(i+0,j+3) = a*acc[0][2];
+ Z(i+0,j+4) = a*acc[0][3];
+
+ Z(i+1,j+1) = a*acc[1][0];
+ Z(i+1,j+2) = a*acc[1][1];
+ Z(i+1,j+3) = a*acc[1][2];
+ Z(i+1,j+4) = a*acc[1][3];
+
+ Z(i+2,j+1) = a*acc[2][0];
+ Z(i+2,j+2) = a*acc[2][1];
+ Z(i+2,j+3) = a*acc[2][2];
+ Z(i+2,j+4) = a*acc[2][3];
+
+ Z(i+3,j+1) = a*acc[3][0];
+ Z(i+3,j+2) = a*acc[3][1];
+ Z(i+3,j+3) = a*acc[3][2];
+ Z(i+3,j+4) = a*acc[3][3];
+ }
+ }
+}
+#endif
+
+#if 0
+void
+func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz)
+{
+ int i, j, k, ri, rj, rk;
+ FLOAT reg[4][4], *xrow[4], *yrow[4];
+
+ for (i = 0; i < ni; i++) {
+ for (j = 0; j < nj; j++) {
+ z[j + incz*i] *= b;
+ }
+ }
+
+ for (i = 0; i < ni; i += 4) {
+ xrow[0] = x + incx*(i+0);
+ xrow[1] = x + incx*(i+1);
+ xrow[2] = x + incx*(i+2);
+ xrow[3] = x + incx*(i+3);
+ for (k = 0; k < nk; k+=4) {
+ yrow[0] = y + incy*(k+0);
+ yrow[1] = y + incy*(k+1);
+ yrow[2] = y + incy*(k+2);
+ yrow[3] = y + incy*(k+3);
+ reg[0][0] = a * xrow[0][k+0]; reg[0][1] = a * xrow[0][k+1]; reg[0][2] = a * xrow[0][k+2]; reg[0][3] = a * xrow[0][k+3];
+ reg[1][0] = a * xrow[1][k+0]; reg[1][1] = a * xrow[1][k+1]; reg[1][2] = a * xrow[1][k+2]; reg[1][3] = a * xrow[1][k+3];
+ reg[2][0] = a * xrow[2][k+0]; reg[2][1] = a * xrow[2][k+1]; reg[2][2] = a * xrow[2][k+2]; reg[2][3] = a * xrow[2][k+3];
+ reg[3][0] = a * xrow[3][k+0]; reg[3][1] = a * xrow[3][k+1]; reg[3][2] = a * xrow[3][k+2]; reg[3][3] = a * xrow[3][k+3];
+ for (j = 0; j < nj; j += 1) {
+ z[j + incz*(i+0)] += (reg[0][0]*yrow[0][j]+reg[0][1]*yrow[1][j]+reg[0][2]*yrow[2][j]+reg[0][3]*yrow[3][j]);
+ z[j + incz*(i+1)] += (reg[1][0]*yrow[0][j]+reg[1][1]*yrow[1][j]+reg[1][2]*yrow[2][j]+reg[1][3]*yrow[3][j]);
+ z[j + incz*(i+2)] += (reg[2][0]*yrow[0][j]+reg[2][1]*yrow[1][j]+reg[2][2]*yrow[2][j]+reg[2][3]*yrow[3][j]);
+ z[j + incz*(i+3)] += (reg[3][0]*yrow[0][j]+reg[3][1]*yrow[1][j]+reg[3][2]*yrow[2][j]+reg[3][3]*yrow[3][j]);
+ }
+ }
+ }
+}
+#endif
+
+#if 0
+void
+func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz)
+{
+ int i, j, k, ri, rj, rk;
+ FLOAT r[4][4], *row[4];
+
+ for (i = 0; i < ni; i++) {
+ for (j = 0; j < nj; j++) {
+ Z(i, j) *= b;
+ }
+ }
+
+ for (i = 0; i < ni; i+=4) {
+ for (j = 0; j < nj; j+=4) {
+ r[0][0] = 0; r[0][1] = 0; r[0][2] = 0; r[0][3] = 0;
+ r[1][0] = 0; r[1][1] = 0; r[1][2] = 0; r[1][3] = 0;
+ r[2][0] = 0; r[2][1] = 0; r[2][2] = 0; r[2][3] = 0;
+ r[3][0] = 0; r[3][1] = 0; r[3][2] = 0; r[3][3] = 0;
+ row[0] = &X(i+0, 0);
+ row[1] = &X(i+1, 0);
+ row[2] = &X(i+2, 0);
+ row[3] = &X(i+3, 0);
+ for (k = 0; k < nk; k++) {
+ r[0][0] += row[0][k]*Y(k,0); r[0][1] += row[0][k]*Y(k,1); r[0][2] += row[0][k]*Y(k,2); r[0][3] += row[0][k]*Y(k,3);
+ r[1][0] += row[1][k]*Y(k,0); r[1][1] += row[1][k]*Y(k,1); r[1][2] += row[1][k]*Y(k,2); r[1][3] += row[1][k]*Y(k,3);
+ r[2][0] += row[2][k]*Y(k,0); r[2][1] += row[2][k]*Y(k,1); r[2][2] += row[2][k]*Y(k,2); r[2][3] += row[2][k]*Y(k,3);
+ r[3][0] += row[3][k]*Y(k,0); r[3][1] += row[3][k]*Y(k,1); r[3][2] += row[3][k]*Y(k,2); r[3][3] += row[3][k]*Y(k,3);
+ }
+ Z(i+0, j+0) += r[0][0]; Z(i+0, j+1) += r[0][1]; Z(i+0, j+2) += r[0][2]; Z(i+0, j+3) += r[0][3];
+ Z(i+1, j+0) += r[1][0]; Z(i+1, j+1) += r[1][1]; Z(i+1, j+2) += r[1][2]; Z(i+1, j+3) += r[1][3];
+ Z(i+2, j+0) += r[2][0]; Z(i+2, j+1) += r[2][1]; Z(i+2, j+2) += r[2][2]; Z(i+2, j+3) += r[2][3];
+ Z(i+3, j+0) += r[3][0]; Z(i+3, j+1) += r[3][1]; Z(i+3, j+2) += r[3][2]; Z(i+3, j+3) += r[3][3];
+ }
+ }
+}
+#endif
+
+#if 0
+void
+func(gemm)(uint trm, uint trn, INT ni, INT nj, INT nk, FLOAT a, FLOAT *x, INT incx, FLOAT *y, INT incy, FLOAT b, FLOAT *z, INT incz)
+{
+ int i, j, k, ri, rj, rk;
+ FLOAT *xrow[8], *yrow[8], reg;
+
+ for (i = 0; i < ni; i++) {
+ for (j = 0; j < nj; j++) {
+ z[j + incz*i] *= b;
+ }
+ }
+
+ ri = ni & ~7;
+ rj = nj & ~7;
+ for (i = 0; i < ri; i += 8) {
+ xrow[0] = x + incx*(i+0);
+ xrow[1] = x + incx*(i+1);
+ xrow[2] = x + incx*(i+2);
+ xrow[3] = x + incx*(i+3);
+ xrow[4] = x + incx*(i+4);
+ xrow[5] = x + incx*(i+5);
+ xrow[6] = x + incx*(i+6);
+ xrow[7] = x + incx*(i+7);
+ for (j = 0; j < rj; j += 8) {
+ yrow[0] = y + incy*(j+0);
+ yrow[1] = y + incy*(j+1);
+ yrow[2] = y + incy*(j+2);
+ yrow[3] = y + incy*(j+3);
+ yrow[4] = y + incy*(j+4);
+ yrow[5] = y + incy*(j+5);
+ yrow[6] = y + incy*(j+6);
+ yrow[7] = y + incy*(j+7);
+ for (k = 0; k < nk; k++) {
+ reg = a*(yrow[0][k] + yrow[1][k] + yrow[2][k] + yrow[3][k] + yrow[4][k] + yrow[5][k] + yrow[6][k] + yrow[7][k]);
+ z[k + incz*(i+0)] += xrow[0][k]*reg;
+ z[k + incz*(i+1)] += xrow[1][k]*reg;
+ z[k + incz*(i+2)] += xrow[2][k]*reg;
+ z[k + incz*(i+3)] += xrow[3][k]*reg;
+ z[k + incz*(i+4)] += xrow[4][k]*reg;
+ z[k + incz*(i+5)] += xrow[5][k]*reg;
+ z[k + incz*(i+6)] += xrow[6][k]*reg;
+ z[k + incz*(i+7)] += xrow[7][k]*reg;
+ }
+ }
+ for (; j < nj; j++) {
+ for (k = 0; k < nk; k++) {
+ reg = a*y[k+incy*j];
+ z[k + incz*(i+0)] += xrow[0][k]*reg;
+ z[k + incz*(i+1)] += xrow[1][k]*reg;
+ z[k + incz*(i+2)] += xrow[2][k]*reg;
+ z[k + incz*(i+3)] += xrow[3][k]*reg;
+ z[k + incz*(i+4)] += xrow[4][k]*reg;
+ z[k + incz*(i+5)] += xrow[5][k]*reg;
+ z[k + incz*(i+6)] += xrow[6][k]*reg;
+ z[k + incz*(i+7)] += xrow[7][k]*reg;
+ }
+ }
+ }
+
+ for (; i < ni; i++) {
+ for (j = 0; j < rj; j += 8) {
+ yrow[0] = y + incy*(j+0);
+ yrow[1] = y + incy*(j+1);
+ yrow[2] = y + incy*(j+2);
+ yrow[3] = y + incy*(j+3);
+ yrow[4] = y + incy*(j+4);
+ yrow[5] = y + incy*(j+5);
+ yrow[6] = y + incy*(j+6);
+ yrow[7] = y + incy*(j+7);
+ for (k = 0; k < nk; k++) {
+ z[k + incz*(i)] += a*x[k + incx*i]*(yrow[0][k] + yrow[1][k] + yrow[2][k] + yrow[3][k] + yrow[4][k] + yrow[5][k] + yrow[6][k] + yrow[7][k]);
+ }
+ }
+ for (; j < nj; j++) {
+ for (k = 0; k < nk; k++) {
+ z[k + incz*i] += a*x[k + incx*i]*y[k + incy*j];
+ }
+ }
+ }
+}
+#endif
diff --git a/sys/libmath/lapack.c b/sys/libmath/lapack.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/sys/libmath/lapack.c
diff --git a/sys/libmath/matrix.c b/sys/libmath/matrix.c
new file mode 100644
index 0000000..e8bca0b
--- /dev/null
+++ b/sys/libmath/matrix.c
@@ -0,0 +1,176 @@
+#include <u.h>
+#include <libn.h>
+#include <libmath.h>
+
+/* TODO: replace (incrementally) with native C version! */
+#include <vendor/blas/cblas.h>
+#include <vendor/blas/lapacke.h>
+
+// -----------------------------------------------------------------------
+// level 1
+
+error
+la·vecslice(math·Vector *x, int min, int max, int inc)
+{
+ if (max > x->len || min < 0) {
+ errorf("out of bounds: attempted to access vector past length");
+ return 1;
+ }
+ x->len = (max - min) / inc;
+ x->d += x->inc * min;
+ x->inc *= inc;
+
+ return 0;
+}
+
+/* simple blas wrappers */
+void
+la·veccopy(math·Vector *dst, math·Vector *src)
+{
+ return cblas_dcopy(src->len, src->d, src->inc, dst->d, dst->inc);
+}
+
+double
+la·vecnorm(math·Vector *x)
+{
+ return cblas_dnrm2(x->len, x->d, x->inc);
+}
+
+void
+la·vecscale(math·Vector *x, double a)
+{
+ return cblas_dscal(x->len, a, x->d, x->inc);
+}
+
+double
+la·vecdot(math·Vector *x, math·Vector *y)
+{
+ return cblas_ddot(x->len, x->d, x->inc, y->d, y->inc);
+}
+
+// -----------------------------------------------------------------------
+// level 2
+
+error
+la·vecmat(math·Vector *x, math·Matrix *M)
+{
+ if (M->dim[1] != x->len) {
+ errorf("incompatible matrix dimensions");
+ return 1;
+ }
+ if (M->state & ~mat·trans)
+ cblas_dgemv(CblasRowMajor,CblasNoTrans,M->dim[0],M->dim[1],1.,M->d,M->inc,x->d,x->inc,0.,x->d,x->inc);
+ else
+ cblas_dgemv(CblasRowMajor,CblasTrans,M->dim[0],M->dim[1],1.,M->d,M->inc,x->d,x->inc,0.,x->d,x->inc);
+
+ return 0;
+}
+
+// -----------------------------------------------------------------------
+// level 3
+
+void
+la·transpose(math·Matrix *X)
+{
+ int tmp;
+ X->state ^= mat·trans;
+ tmp = X->dim[0], X->dim[0] = X->dim[1], X->dim[1] = tmp;
+}
+
+error
+la·matrow(math·Matrix *X, int r, math·Vector *row)
+{
+ if (r < 0 || r >= X->dim[0]) {
+ errorf("out of bounds");
+ return 1;
+ }
+
+ row->len = X->dim[1];
+ row->inc = 1;
+ row->d = X->d + X->dim[1] * r;
+
+ return 0;
+}
+
+error
+la·matcol(math·Matrix *X, int c, math·Vector *col)
+{
+ if (c < 0 || c >= X->dim[1]) {
+ errorf("out of bounds");
+ return 1;
+ }
+
+ col->len = X->dim[0];
+ col->inc = X->dim[1];
+ col->d = X->d + c;
+
+ return 0;
+}
+
+error
+la·matslice(math·Matrix *X, int r[3], int c[3])
+{
+ /* TODO */
+ return 0;
+}
+
+error
+la·eig(math·Matrix *X)
+{
+
+}
+
+/* X = A*B */
+error
+la·matmul(math·Matrix *X, math·Matrix *A, math·Matrix *B)
+{
+ if (A->dim[1] != B->dim[0]) {
+ errorf("number of interior dimensions of A '%d' not equal to that of B '%d'", A->dim[1], B->dim[0]);
+ return 1;
+ }
+ if (X->dim[0] != A->dim[0]) {
+ errorf("number of exterior dimensions of X '%d' not equal to that of A '%d'", X->dim[0], A->dim[0]);
+ return 1;
+ }
+ if (X->dim[1] != B->dim[1]) {
+ errorf("number of exterior dimensions of X '%d' not equal to that of B '%d'", X->dim[1], B->dim[1]);
+ return 1;
+ }
+
+ if (X->state & ~mat·trans)
+ if (A->state & ~mat·trans)
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc);
+ else
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc);
+ else
+ if (A->state & ~mat·trans)
+ cblas_dgemm(CblasRowMajor,CblasTrans,CblasNoTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc);
+ else
+ cblas_dgemm(CblasRowMajor,CblasTrans,CblasTrans,A->dim[0],B->dim[1],A->dim[1],1.,A->d,A->inc,B->d,B->inc,0.,X->d,X->inc);
+
+ return 0;
+}
+
+/*
+ * solves A*X=B
+ * pass in B via X
+ */
+error
+la·solve(math·Matrix *X, math·Matrix *A)
+{
+ error err;
+ int n, *ipv;
+ static int buf[512];
+ if (n = A->dim[0], n < arrlen(buf)) {
+ ipv = buf;
+ n = 0;
+ } else
+ ipv = malloc(n*sizeof(*ipv));
+
+ /* TODO: utilize more specific regimes if applicable */
+ err = LAPACKE_dgesv(LAPACK_ROW_MAJOR,A->dim[0],X->dim[1],A->d,A->inc,ipv,X->d,X->inc);
+
+ if (n)
+ free(ipv);
+ return err;
+}