aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-29 14:41:05 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-29 14:41:05 -0700
commit43ecfce7d20360a5fdc53e5ced266eccc8723242 (patch)
treec380ce6c3b2e9221a9510e9e829a4d772041ea78 /sys
parent680d60678b273f1ff20b013b24954773f76b4e1d (diff)
blas code update
Diffstat (limited to 'sys')
-rw-r--r--sys/cmd/rules.mk3
-rw-r--r--sys/libmath/blas.c29
-rw-r--r--sys/libmath/blas1.c7
-rw-r--r--sys/libmath/loop.h127
-rw-r--r--sys/libmath/rules.mk4
-rw-r--r--sys/libn/error.c56
-rw-r--r--sys/libn/memory.c20
-rw-r--r--sys/libn/string.c22
8 files changed, 220 insertions, 48 deletions
diff --git a/sys/cmd/rules.mk b/sys/cmd/rules.mk
index 81c1cbe..6e4a3cb 100644
--- a/sys/cmd/rules.mk
+++ b/sys/cmd/rules.mk
@@ -8,6 +8,9 @@ include $(DIR)/rules.mk
# DIR := $(d)/cc
# include $(DIR)/rules.mk
+DIR := $(d)/edo
+include $(DIR)/rules.mk
+
DIR := $(d)/rc
include $(DIR)/rules.mk
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index 224480b..4c9e67b 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -6,38 +6,47 @@
#include <vendor/blas/cblas.h>
-#define NCOL 20000
-#define NROW 20000
-#define NIT 2000
+#define NCOL 2*512
+#define NROW 2*512
+#define NSUM 2*512
+#define NIT 10
#define INC 1
error
main()
{
int i, j, nit;
- double *x, *y, res[2];
+ double *x, *y, *z, *w, res[2];
clock_t t;
double tprof[2] = { 0 };
rng·init(0);
- x = malloc(sizeof(*x)*NCOL);
- y = malloc(sizeof(*x)*NROW);
+ x = malloc(sizeof(*x)*NROW*NCOL);
+ y = malloc(sizeof(*x)*NROW*NCOL);
+ z = malloc(sizeof(*x)*NROW*NCOL);
+ w = malloc(sizeof(*x)*NROW*NCOL);
#define DO_0 t = clock(); \
- res[0] += blas·dasum(NROW/INC, x, INC); \
+ blas·dgemm(0,0,NROW,NCOL,NSUM,10.1,x,NROW,y,NROW,1.2,z,NROW);\
t = clock() - t; \
+ res[0] += blas·dasum(NROW*NCOL,z,INC); \
tprof[0] += 1000.*t/CLOCKS_PER_SEC; \
#define DO_1 t = clock(); \
- res[1] += cblas_dasum(NROW/INC, x, INC); \
+ cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,NROW,NCOL,NSUM,10.1,x,NROW,y,NROW,1.2,w,NROW);\
t = clock() - t; \
+ res[1] += cblas_dasum(NROW*NCOL,w,INC); \
tprof[1] += 1000.*t/CLOCKS_PER_SEC;
for (nit = 0; nit < NIT; nit++) {
for (i = 0; i < NROW; i++) {
- x[i] = rng·random();
- y[i] = rng·random();
+ for (j = 0; j < NCOL; j++) {
+ x[j + NROW*i] = rng·random();
+ y[j + NROW*i] = rng·random();
+ z[j + NROW*i] = rng·random();
+ w[j + NROW*i] = z[j + NROW*i];
+ }
}
switch (nit % 2) {
diff --git a/sys/libmath/blas1.c b/sys/libmath/blas1.c
index d9792f6..a8ca085 100644
--- a/sys/libmath/blas1.c
+++ b/sys/libmath/blas1.c
@@ -1,9 +1,6 @@
#include <u.h>
#include <libmath.h>
-#define UNROLL 8
-#define INT uintptr
-
// -----------------------------------------------------------------------
// Templates
@@ -45,10 +42,12 @@
// -----------------------------------------------------------------------
// Implementation
+#define UNROLL 8
+#define INT int
+
#define FLOAT double
#define func(name) blas·d##name
#include "blas1body"
-#undef FLOAT
#undef FLOAT
#undef func
diff --git a/sys/libmath/loop.h b/sys/libmath/loop.h
index 9a425a7..a877d84 100644
--- a/sys/libmath/loop.h
+++ b/sys/libmath/loop.h
@@ -1,37 +1,114 @@
#pragma once
+/* increment operator */
+#define INC2(x) INC_##x
+#define INC1(x) INC2(x)
+#define INC(x) INC1(x)
+
+#define INC_0 1
+#define INC_1 2
+#define INC_2 3
+#define INC_3 4
+#define INC_4 5
+#define INC_5 6
+#define INC_6 7
+#define INC_7 8
+#define INC_8 9
+#define INC_9 10
+#define INC_10 11
+#define INC_11 12
+#define INC_12 13
+#define INC_13 14
+#define INC_14 15
+#define INC_15 16
+
+#define ROUNDBY(x, n) ((x) & ~((n)-1))
+
+/* subtraction tables */
+#define SUB2(x, y) SUB_##x##_##y
+#define SUB1(x, y) SUB2(x, y)
+#define SUB(x, y) SUB1(x, y)
+#define SUB_8_0 8
+#define SUB_8_1 7
+#define SUB_8_2 6
+#define SUB_8_3 5
+#define SUB_8_4 4
+#define SUB_8_5 3
+#define SUB_8_6 2
+#define SUB_8_7 1
+#define SUB_8_8 0
+#define SUB_7_0 7
+#define SUB_7_1 6
+#define SUB_7_2 5
+#define SUB_7_3 4
+#define SUB_7_4 3
+#define SUB_7_5 2
+#define SUB_7_6 1
+#define SUB_7_7 0
+#define SUB_6_0 6
+#define SUB_6_1 5
+#define SUB_6_2 4
+#define SUB_6_3 3
+#define SUB_6_4 2
+#define SUB_6_5 1
+#define SUB_6_6 0
+#define SUB_5_0 5
+#define SUB_5_1 4
+#define SUB_5_2 3
+#define SUB_5_3 2
+#define SUB_5_4 1
+#define SUB_5_5 0
+#define SUB_4_0 4
+#define SUB_4_1 3
+#define SUB_4_2 2
+#define SUB_4_3 1
+#define SUB_4_4 0
+#define SUB_3_0 3
+#define SUB_3_1 2
+#define SUB_3_2 1
+#define SUB_3_3 0
+#define SUB_2_0 2
+#define SUB_2_1 1
+#define SUB_2_2 0
+#define SUB_1_0 1
+#define SUB_1_1 0
+
+/* rounding operator */
#define ROUNDBY(x, n) ((x) & ~((n)-1))
/* loop unrolling (vertical) */
#define LOOP1(I,STMT,...) STMT(I,__VA_ARGS__)
-#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1((I+1),STMT,__VA_ARGS__)
-#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2((I+1),STMT,__VA_ARGS__)
-#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3((I+1),STMT,__VA_ARGS__)
-#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4((I+1),STMT,__VA_ARGS__)
-#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5((I+1),STMT,__VA_ARGS__)
-#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6((I+1),STMT,__VA_ARGS__)
-#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7((I+1),STMT,__VA_ARGS__)
-#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8((I+1),STMT,__VA_ARGS__)
-#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9((I+1),STMT,__VA_ARGS__)
-#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10((I+1),STMT,__VA_ARGS__)
-#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11((I+1),STMT,__VA_ARGS__)
-#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12((I+1),STMT,__VA_ARGS__)
-#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13((I+1),STMT,__VA_ARGS__)
-#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14((I+1),STMT,__VA_ARGS__)
-#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15((I+1),STMT,__VA_ARGS__)
+#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1(INC(I),STMT,__VA_ARGS__)
+#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2(INC(I),STMT,__VA_ARGS__)
+#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3(INC(I),STMT,__VA_ARGS__)
+#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4(INC(I),STMT,__VA_ARGS__)
+#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5(INC(I),STMT,__VA_ARGS__)
+#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6(INC(I),STMT,__VA_ARGS__)
+#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7(INC(I),STMT,__VA_ARGS__)
+#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8(INC(I),STMT,__VA_ARGS__)
+#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9(INC(I),STMT,__VA_ARGS__)
+#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10(INC(I),STMT,__VA_ARGS__)
+#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11(INC(I),STMT,__VA_ARGS__)
+#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12(INC(I),STMT,__VA_ARGS__)
+#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13(INC(I),STMT,__VA_ARGS__)
+#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14(INC(I),STMT,__VA_ARGS__)
+#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15(INC(I),STMT,__VA_ARGS__)
#define _LOOP_(n,I,STMT,...) LOOP##n(I,STMT,__VA_ARGS__)
#define LOOP(n,I,STMT,...) _LOOP_(n,I,STMT,__VA_ARGS__)
/* loop expansion (horizontal) */
+#define EXPAND0(I,TERM,OP,...)
#define EXPAND1(I,TERM,OP,...) TERM(I,__VA_ARGS__)
-#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6((I+1),TERM,OP,__VA_ARGS__)
-#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7((I+1),TERM,OP,__VA_ARGS__)
-
-#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__)
-#define EXPAND(n,I,TERM,OP, ...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__)
+#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6(INC(I),TERM,OP,__VA_ARGS__)
+#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7(INC(I),TERM,OP,__VA_ARGS__)
+
+#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__)
+#define EXPAND(n,I,TERM,OP,...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__)
+#define EXPAND_TRI1(n,I,TERM,OP,...) EXPAND(n,I,TERM,OP,__VA_ARGS__)
+#define EXPAND_TRI(n,I,TERM,OP,...) EXPAND_TRI1(SUB(n,I),I,TERM,OP,__VA_ARGS__)
diff --git a/sys/libmath/rules.mk b/sys/libmath/rules.mk
index fddd74a..9e44ce4 100644
--- a/sys/libmath/rules.mk
+++ b/sys/libmath/rules.mk
@@ -5,7 +5,9 @@ include share/push.mk
# Local sources
SRCS_$(d) := \
$(d)/basic.c \
- $(d)/blas1.c
+ $(d)/blas1.c \
+ $(d)/blas2.c \
+ $(d)/blas3.c
LIBS_$(d) := $(d)/libmath.a
BINS_$(d) :=
TSTS_$(d) := \
diff --git a/sys/libn/error.c b/sys/libn/error.c
index 15611a3..9b6a28a 100644
--- a/sys/libn/error.c
+++ b/sys/libn/error.c
@@ -2,7 +2,7 @@
#include <libn.h>
void
-errorf(const byte* fmt, ...)
+errorf(byte* fmt, ...)
{
va_list args;
va_start(args, fmt);
@@ -13,3 +13,57 @@ errorf(const byte* fmt, ...)
va_end(args);
}
+
+void
+panicf(byte* fmt, ...)
+{
+ int n;
+ va_list args;
+ static byte buf[4*1024];
+ va_start(args, fmt);
+
+ buf[0] = 'p';
+ buf[1] = 'a';
+ buf[2] = 'n';
+ buf[3] = 'i';
+ buf[4] = 'c';
+ buf[5] = ':';
+ buf[6] = ' ';
+ n = vsnprintf(buf+7, arrlen(buf)-8, fmt, args);
+ if (n < 0)
+ return;
+ buf[n] = 0;
+
+ perror(buf);
+ va_end(args);
+
+ /* TODO: portable stack unwinding */
+ exit(1);
+}
+
+void
+vpanicf(byte* fmt, va_list args)
+{
+ int n;
+ va_list nargs;
+ static byte buf[4*1024];
+ va_copy(nargs, args);
+
+ buf[0] = 'p';
+ buf[1] = 'a';
+ buf[2] = 'n';
+ buf[3] = 'i';
+ buf[4] = 'c';
+ buf[5] = ':';
+ buf[6] = ' ';
+ n = vsnprintf(buf+7, arrlen(buf)-8, fmt, args);
+ if (n < 0)
+ return;
+ buf[n] = 0;
+
+ perror(buf);
+ va_end(nargs);
+
+ /* TODO: portable stack unwinding */
+ exit(1);
+}
diff --git a/sys/libn/memory.c b/sys/libn/memory.c
index dce0c36..7993ca2 100644
--- a/sys/libn/memory.c
+++ b/sys/libn/memory.c
@@ -1,6 +1,26 @@
#include <u.h>
#include <libn.h>
+void
+·free(void* _, void* ptr) {
+ return free(ptr);
+}
+
+void *
+·alloc(void* _, uint n, ulong size) {
+ return malloc(n*size);
+}
+
+void *
+·calloc(void* _, uint n, ulong size) {
+ return calloc(n, size);
+}
+
+void *
+·realloc(void* _, void *ptr, uint n, ulong size) {
+ return realloc(ptr, n*size);
+}
+
// -------------------------------------------------------------------------
// Dynamic buffer.
diff --git a/sys/libn/string.c b/sys/libn/string.c
index 7c152e3..e2cdddf 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -349,25 +349,29 @@ str·fit(string *s)
// structure. This variant can append a substring of length len of the given
// string to our buffer. The result is reallocated if not enough room is present
// in the buffer.
-void
+int
str·appendlen(string *s, vlong n, const byte* b)
{
- vlong bl = strlen(b);
+ /*
+ bl = strlen(b);
if (n > bl) panicf("attempted to make a substring longer than string");
+ */
str·grow(s, n);
- if (*s == nil) return;
+ if (*s == nil) return 0;
Hdr* h = (Hdr*)(*s - sizeof(Hdr));
memcpy(*s + str·len(*s), b, n);
h->len += n;
(*s)[h->len] = '\0';
+
+ return n;
}
// Append will append the given null terminated C string to the string data
// structure. This variant will append the entire string.
-void
+int
str·append(string *s, const byte* b)
{
return str·appendlen(s, strlen(b), b);
@@ -376,17 +380,19 @@ str·append(string *s, const byte* b)
// AppendByte will append the given byte to our string.
// NOTE: As the byte is on the stack, it is not null-terminated.
// Can not pass to the above functions.
-void
+int
str·appendbyte(string *s, const byte b)
{
str·grow(s, 1);
- if (*s == nil) return;
+ if (*s == nil) return 0;
Hdr* h = (Hdr*)(*s - sizeof(Hdr));
*(*s + str·len(*s)) = b;
h->len++;
(*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..?
+
+ return 1;
}
/*
@@ -394,7 +400,7 @@ str·appendbyte(string *s, const byte b)
* Returns the newly minted string
*/
-void
+int
str·appendf(string *s, const byte* fmt, ...)
{
va_list args;
@@ -415,6 +421,8 @@ str·appendf(string *s, const byte* fmt, ...)
Hdr* h = (Hdr*)(*s - sizeof(Hdr));
h->len += n;
+
+ return n;
}
// Equals returns true if string s and t are equivalent.