aboutsummaryrefslogtreecommitdiff
path: root/sys/libmath
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-29 14:41:05 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-29 14:41:05 -0700
commit43ecfce7d20360a5fdc53e5ced266eccc8723242 (patch)
treec380ce6c3b2e9221a9510e9e829a4d772041ea78 /sys/libmath
parent680d60678b273f1ff20b013b24954773f76b4e1d (diff)
blas code update
Diffstat (limited to 'sys/libmath')
-rw-r--r--sys/libmath/blas.c29
-rw-r--r--sys/libmath/blas1.c7
-rw-r--r--sys/libmath/loop.h127
-rw-r--r--sys/libmath/rules.mk4
4 files changed, 127 insertions, 40 deletions
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index 224480b..4c9e67b 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -6,38 +6,47 @@
#include <vendor/blas/cblas.h>
-#define NCOL 20000
-#define NROW 20000
-#define NIT 2000
+#define NCOL 2*512
+#define NROW 2*512
+#define NSUM 2*512
+#define NIT 10
#define INC 1
error
main()
{
int i, j, nit;
- double *x, *y, res[2];
+ double *x, *y, *z, *w, res[2];
clock_t t;
double tprof[2] = { 0 };
rng·init(0);
- x = malloc(sizeof(*x)*NCOL);
- y = malloc(sizeof(*x)*NROW);
+ x = malloc(sizeof(*x)*NROW*NCOL);
+ y = malloc(sizeof(*x)*NROW*NCOL);
+ z = malloc(sizeof(*x)*NROW*NCOL);
+ w = malloc(sizeof(*x)*NROW*NCOL);
#define DO_0 t = clock(); \
- res[0] += blas·dasum(NROW/INC, x, INC); \
+ blas·dgemm(0,0,NROW,NCOL,NSUM,10.1,x,NROW,y,NROW,1.2,z,NROW);\
t = clock() - t; \
+ res[0] += blas·dasum(NROW*NCOL,z,INC); \
tprof[0] += 1000.*t/CLOCKS_PER_SEC; \
#define DO_1 t = clock(); \
- res[1] += cblas_dasum(NROW/INC, x, INC); \
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,NROW,NCOL,NSUM,10.1,x,NROW,y,NROW,1.2,w,NROW);\
t = clock() - t; \
+ res[1] += cblas_dasum(NROW*NCOL,w,INC); \
tprof[1] += 1000.*t/CLOCKS_PER_SEC;
for (nit = 0; nit < NIT; nit++) {
for (i = 0; i < NROW; i++) {
- x[i] = rng·random();
- y[i] = rng·random();
+ for (j = 0; j < NCOL; j++) {
+ x[j + NROW*i] = rng·random();
+ y[j + NROW*i] = rng·random();
+ z[j + NROW*i] = rng·random();
+ w[j + NROW*i] = z[j + NROW*i];
+ }
}
switch (nit % 2) {
diff --git a/sys/libmath/blas1.c b/sys/libmath/blas1.c
index d9792f6..a8ca085 100644
--- a/sys/libmath/blas1.c
+++ b/sys/libmath/blas1.c
@@ -1,9 +1,6 @@
#include <u.h>
#include <libmath.h>
-#define UNROLL 8
-#define INT uintptr
-
// -----------------------------------------------------------------------
// Templates
@@ -45,10 +42,12 @@
// -----------------------------------------------------------------------
// Implementation
+#define UNROLL 8
+#define INT int
+
#define FLOAT double
#define func(name) blas·d##name
#include "blas1body"
-#undef FLOAT
#undef FLOAT
#undef func
diff --git a/sys/libmath/loop.h b/sys/libmath/loop.h
index 9a425a7..a877d84 100644
--- a/sys/libmath/loop.h
+++ b/sys/libmath/loop.h
@@ -1,37 +1,114 @@
#pragma once
+/* increment operator */
+#define INC2(x) INC_##x
+#define INC1(x) INC2(x)
+#define INC(x) INC1(x)
+
+#define INC_0 1
+#define INC_1 2
+#define INC_2 3
+#define INC_3 4
+#define INC_4 5
+#define INC_5 6
+#define INC_6 7
+#define INC_7 8
+#define INC_8 9
+#define INC_9 10
+#define INC_10 11
+#define INC_11 12
+#define INC_12 13
+#define INC_13 14
+#define INC_14 15
+#define INC_15 16
+
+#define ROUNDBY(x, n) ((x) & ~((n)-1))
+
+/* subtraction tables */
+#define SUB2(x, y) SUB_##x##_##y
+#define SUB1(x, y) SUB2(x, y)
+#define SUB(x, y) SUB1(x, y)
+#define SUB_8_0 8
+#define SUB_8_1 7
+#define SUB_8_2 6
+#define SUB_8_3 5
+#define SUB_8_4 4
+#define SUB_8_5 3
+#define SUB_8_6 2
+#define SUB_8_7 1
+#define SUB_8_8 0
+#define SUB_7_0 7
+#define SUB_7_1 6
+#define SUB_7_2 5
+#define SUB_7_3 4
+#define SUB_7_4 3
+#define SUB_7_5 2
+#define SUB_7_6 1
+#define SUB_7_7 0
+#define SUB_6_0 6
+#define SUB_6_1 5
+#define SUB_6_2 4
+#define SUB_6_3 3
+#define SUB_6_4 2
+#define SUB_6_5 1
+#define SUB_6_6 0
+#define SUB_5_0 5
+#define SUB_5_1 4
+#define SUB_5_2 3
+#define SUB_5_3 2
+#define SUB_5_4 1
+#define SUB_5_5 0
+#define SUB_4_0 4
+#define SUB_4_1 3
+#define SUB_4_2 2
+#define SUB_4_3 1
+#define SUB_4_4 0
+#define SUB_3_0 3
+#define SUB_3_1 2
+#define SUB_3_2 1
+#define SUB_3_3 0
+#define SUB_2_0 2
+#define SUB_2_1 1
+#define SUB_2_2 0
+#define SUB_1_0 1
+#define SUB_1_1 0
+
+/* rounding operator */
#define ROUNDBY(x, n) ((x) & ~((n)-1))
/* loop unrolling (vertical) */
#define LOOP1(I,STMT,...) STMT(I,__VA_ARGS__)
-#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1((I+1),STMT,__VA_ARGS__)
-#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2((I+1),STMT,__VA_ARGS__)
-#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3((I+1),STMT,__VA_ARGS__)
-#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4((I+1),STMT,__VA_ARGS__)
-#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5((I+1),STMT,__VA_ARGS__)
-#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6((I+1),STMT,__VA_ARGS__)
-#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7((I+1),STMT,__VA_ARGS__)
-#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8((I+1),STMT,__VA_ARGS__)
-#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9((I+1),STMT,__VA_ARGS__)
-#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10((I+1),STMT,__VA_ARGS__)
-#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11((I+1),STMT,__VA_ARGS__)
-#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12((I+1),STMT,__VA_ARGS__)
-#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13((I+1),STMT,__VA_ARGS__)
-#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14((I+1),STMT,__VA_ARGS__)
-#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15((I+1),STMT,__VA_ARGS__)
+#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1(INC(I),STMT,__VA_ARGS__)
+#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2(INC(I),STMT,__VA_ARGS__)
+#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3(INC(I),STMT,__VA_ARGS__)
+#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4(INC(I),STMT,__VA_ARGS__)
+#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5(INC(I),STMT,__VA_ARGS__)
+#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6(INC(I),STMT,__VA_ARGS__)
+#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7(INC(I),STMT,__VA_ARGS__)
+#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8(INC(I),STMT,__VA_ARGS__)
+#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9(INC(I),STMT,__VA_ARGS__)
+#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10(INC(I),STMT,__VA_ARGS__)
+#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11(INC(I),STMT,__VA_ARGS__)
+#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12(INC(I),STMT,__VA_ARGS__)
+#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13(INC(I),STMT,__VA_ARGS__)
+#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14(INC(I),STMT,__VA_ARGS__)
+#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15(INC(I),STMT,__VA_ARGS__)
#define _LOOP_(n,I,STMT,...) LOOP##n(I,STMT,__VA_ARGS__)
#define LOOP(n,I,STMT,...) _LOOP_(n,I,STMT,__VA_ARGS__)
/* loop expansion (horizontal) */
+#define EXPAND0(I,TERM,OP,...)
#define EXPAND1(I,TERM,OP,...) TERM(I,__VA_ARGS__)
-#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7((I+1),TERM,OP,__VA_ARGS__)
-
-#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__)
-#define EXPAND(n,I,TERM,OP, ...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__)
+#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7(INC(I),TERM,OP,__VA_ARGS__)
+
+#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__)
+#define EXPAND(n,I,TERM,OP,...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__)
+#define EXPAND_TRI1(n,I,TERM,OP,...) EXPAND(n,I,TERM,OP,__VA_ARGS__)
+#define EXPAND_TRI(n,I,TERM,OP,...) EXPAND_TRI1(SUB(n,I),I,TERM,OP,__VA_ARGS__)
diff --git a/sys/libmath/rules.mk b/sys/libmath/rules.mk
index fddd74a..9e44ce4 100644
--- a/sys/libmath/rules.mk
+++ b/sys/libmath/rules.mk
@@ -5,7 +5,9 @@ include share/push.mk
# Local sources
SRCS_$(d) := \
$(d)/basic.c \
- $(d)/blas1.c
+ $(d)/blas1.c \
+ $(d)/blas2.c \
+ $(d)/blas3.c
LIBS_$(d) := $(d)/libmath.a
BINS_$(d) :=
TSTS_$(d) := \