From 43ecfce7d20360a5fdc53e5ced266eccc8723242 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 29 May 2020 14:41:05 -0700 Subject: blas code update --- sys/cmd/rules.mk | 3 ++ sys/libmath/blas.c | 29 ++++++++---- sys/libmath/blas1.c | 7 ++- sys/libmath/loop.h | 127 +++++++++++++++++++++++++++++++++++++++++---------- sys/libmath/rules.mk | 4 +- sys/libn/error.c | 56 ++++++++++++++++++++++- sys/libn/memory.c | 20 ++++++++ sys/libn/string.c | 22 ++++++--- 8 files changed, 220 insertions(+), 48 deletions(-) (limited to 'sys') diff --git a/sys/cmd/rules.mk b/sys/cmd/rules.mk index 81c1cbe..6e4a3cb 100644 --- a/sys/cmd/rules.mk +++ b/sys/cmd/rules.mk @@ -8,6 +8,9 @@ include $(DIR)/rules.mk # DIR := $(d)/cc # include $(DIR)/rules.mk +DIR := $(d)/edo +include $(DIR)/rules.mk + DIR := $(d)/rc include $(DIR)/rules.mk diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c index 224480b..4c9e67b 100644 --- a/sys/libmath/blas.c +++ b/sys/libmath/blas.c @@ -6,38 +6,47 @@ #include -#define NCOL 20000 -#define NROW 20000 -#define NIT 2000 +#define NCOL 2*512 +#define NROW 2*512 +#define NSUM 2*512 +#define NIT 10 #define INC 1 error main() { int i, j, nit; - double *x, *y, res[2]; + double *x, *y, *z, *w, res[2]; clock_t t; double tprof[2] = { 0 }; rng·init(0); - x = malloc(sizeof(*x)*NCOL); - y = malloc(sizeof(*x)*NROW); + x = malloc(sizeof(*x)*NROW*NCOL); + y = malloc(sizeof(*x)*NROW*NCOL); + z = malloc(sizeof(*x)*NROW*NCOL); + w = malloc(sizeof(*x)*NROW*NCOL); #define DO_0 t = clock(); \ - res[0] += blas·dasum(NROW/INC, x, INC); \ + blas·dgemm(0,0,NROW,NCOL,NSUM,10.1,x,NROW,y,NROW,1.2,z,NROW);\ t = clock() - t; \ + res[0] += blas·dasum(NROW*NCOL,z,INC); \ tprof[0] += 1000.*t/CLOCKS_PER_SEC; \ #define DO_1 t = clock(); \ - res[1] += cblas_dasum(NROW/INC, x, INC); \ + cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,NROW,NCOL,NSUM,10.1,x,NROW,y,NROW,1.2,w,NROW);\ t = clock() - t; \ + res[1] += cblas_dasum(NROW*NCOL,w,INC); \ tprof[1] += 1000.*t/CLOCKS_PER_SEC; for (nit = 0; nit < NIT; nit++) { for (i = 0; i < NROW; i++) { - x[i] = rng·random(); - y[i] = rng·random(); + for (j = 0; j < NCOL; j++) { + x[j + NROW*i] = rng·random(); + y[j + NROW*i] = rng·random(); + z[j + NROW*i] = rng·random(); + w[j + NROW*i] = z[j + NROW*i]; + } } switch (nit % 2) { diff --git a/sys/libmath/blas1.c b/sys/libmath/blas1.c index d9792f6..a8ca085 100644 --- a/sys/libmath/blas1.c +++ b/sys/libmath/blas1.c @@ -1,9 +1,6 @@ #include #include -#define UNROLL 8 -#define INT uintptr - // ----------------------------------------------------------------------- // Templates @@ -45,10 +42,12 @@ // ----------------------------------------------------------------------- // Implementation +#define UNROLL 8 +#define INT int + #define FLOAT double #define func(name) blas·d##name #include "blas1body" -#undef FLOAT #undef FLOAT #undef func diff --git a/sys/libmath/loop.h b/sys/libmath/loop.h index 9a425a7..a877d84 100644 --- a/sys/libmath/loop.h +++ b/sys/libmath/loop.h @@ -1,37 +1,114 @@ #pragma once +/* increment operator */ +#define INC2(x) INC_##x +#define INC1(x) INC2(x) +#define INC(x) INC1(x) + +#define INC_0 1 +#define INC_1 2 +#define INC_2 3 +#define INC_3 4 +#define INC_4 5 +#define INC_5 6 +#define INC_6 7 +#define INC_7 8 +#define INC_8 9 +#define INC_9 10 +#define INC_10 11 +#define INC_11 12 +#define INC_12 13 +#define INC_13 14 +#define INC_14 15 +#define INC_15 16 + +#define ROUNDBY(x, n) ((x) & ~((n)-1)) + +/* subtraction tables */ +#define SUB2(x, y) SUB_##x##_##y +#define SUB1(x, y) SUB2(x, y) +#define SUB(x, y) SUB1(x, y) +#define SUB_8_0 8 +#define SUB_8_1 7 +#define SUB_8_2 6 +#define SUB_8_3 5 +#define SUB_8_4 4 +#define SUB_8_5 3 +#define SUB_8_6 2 +#define SUB_8_7 1 +#define SUB_8_8 0 +#define SUB_7_0 7 +#define SUB_7_1 6 +#define SUB_7_2 5 +#define SUB_7_3 4 +#define SUB_7_4 3 +#define SUB_7_5 2 +#define SUB_7_6 1 +#define SUB_7_7 0 +#define SUB_6_0 6 +#define SUB_6_1 5 +#define SUB_6_2 4 +#define SUB_6_3 3 +#define SUB_6_4 2 +#define SUB_6_5 1 +#define SUB_6_6 0 +#define SUB_5_0 5 +#define SUB_5_1 4 +#define SUB_5_2 3 +#define SUB_5_3 2 +#define SUB_5_4 1 +#define SUB_5_5 0 +#define SUB_4_0 4 +#define SUB_4_1 3 +#define SUB_4_2 2 +#define SUB_4_3 1 +#define SUB_4_4 0 +#define SUB_3_0 3 +#define SUB_3_1 2 +#define SUB_3_2 1 +#define SUB_3_3 0 +#define SUB_2_0 2 +#define SUB_2_1 1 +#define SUB_2_2 0 +#define SUB_1_0 1 +#define SUB_1_1 0 + +/* rounding operator */ #define ROUNDBY(x, n) ((x) & ~((n)-1)) /* loop unrolling (vertical) */ #define LOOP1(I,STMT,...) STMT(I,__VA_ARGS__) -#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1((I+1),STMT,__VA_ARGS__) -#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2((I+1),STMT,__VA_ARGS__) -#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3((I+1),STMT,__VA_ARGS__) -#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4((I+1),STMT,__VA_ARGS__) -#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5((I+1),STMT,__VA_ARGS__) -#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6((I+1),STMT,__VA_ARGS__) -#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7((I+1),STMT,__VA_ARGS__) -#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8((I+1),STMT,__VA_ARGS__) -#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9((I+1),STMT,__VA_ARGS__) -#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10((I+1),STMT,__VA_ARGS__) -#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11((I+1),STMT,__VA_ARGS__) -#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12((I+1),STMT,__VA_ARGS__) -#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13((I+1),STMT,__VA_ARGS__) -#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14((I+1),STMT,__VA_ARGS__) -#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15((I+1),STMT,__VA_ARGS__) +#define LOOP2(I,STMT,...) STMT(I,__VA_ARGS__) LOOP1(INC(I),STMT,__VA_ARGS__) +#define LOOP3(I,STMT,...) STMT(I,__VA_ARGS__) LOOP2(INC(I),STMT,__VA_ARGS__) +#define LOOP4(I,STMT,...) STMT(I,__VA_ARGS__) LOOP3(INC(I),STMT,__VA_ARGS__) +#define LOOP5(I,STMT,...) STMT(I,__VA_ARGS__) LOOP4(INC(I),STMT,__VA_ARGS__) +#define LOOP6(I,STMT,...) STMT(I,__VA_ARGS__) LOOP5(INC(I),STMT,__VA_ARGS__) +#define LOOP7(I,STMT,...) STMT(I,__VA_ARGS__) LOOP6(INC(I),STMT,__VA_ARGS__) +#define LOOP8(I,STMT,...) STMT(I,__VA_ARGS__) LOOP7(INC(I),STMT,__VA_ARGS__) +#define LOOP9(I,STMT,...) STMT(I,__VA_ARGS__) LOOP8(INC(I),STMT,__VA_ARGS__) +#define LOOP10(I,STMT,...) STMT(I,__VA_ARGS__) LOOP9(INC(I),STMT,__VA_ARGS__) +#define LOOP11(I,STMT,...) STMT(I,__VA_ARGS__) LOOP10(INC(I),STMT,__VA_ARGS__) +#define LOOP12(I,STMT,...) STMT(I,__VA_ARGS__) LOOP11(INC(I),STMT,__VA_ARGS__) +#define LOOP13(I,STMT,...) STMT(I,__VA_ARGS__) LOOP12(INC(I),STMT,__VA_ARGS__) +#define LOOP14(I,STMT,...) STMT(I,__VA_ARGS__) LOOP13(INC(I),STMT,__VA_ARGS__) +#define LOOP15(I,STMT,...) STMT(I,__VA_ARGS__) LOOP14(INC(I),STMT,__VA_ARGS__) +#define LOOP16(I,STMT,...) STMT(I,__VA_ARGS__) LOOP15(INC(I),STMT,__VA_ARGS__) #define _LOOP_(n,I,STMT,...) LOOP##n(I,STMT,__VA_ARGS__) #define LOOP(n,I,STMT,...) _LOOP_(n,I,STMT,__VA_ARGS__) /* loop expansion (horizontal) */ +#define EXPAND0(I,TERM,OP,...) #define EXPAND1(I,TERM,OP,...) TERM(I,__VA_ARGS__) -#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1((I+1),TERM,OP,__VA_ARGS__) -#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2((I+1),TERM,OP,__VA_ARGS__) -#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3((I+1),TERM,OP,__VA_ARGS__) -#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4((I+1),TERM,OP,__VA_ARGS__) -#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5((I+1),TERM,OP,__VA_ARGS__) -#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6((I+1),TERM,OP,__VA_ARGS__) -#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7((I+1),TERM,OP,__VA_ARGS__) - -#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__) -#define EXPAND(n,I,TERM,OP, ...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__) +#define EXPAND2(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND1(INC(I),TERM,OP,__VA_ARGS__) +#define EXPAND3(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND2(INC(I),TERM,OP,__VA_ARGS__) +#define EXPAND4(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND3(INC(I),TERM,OP,__VA_ARGS__) +#define EXPAND5(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND4(INC(I),TERM,OP,__VA_ARGS__) +#define EXPAND6(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND5(INC(I),TERM,OP,__VA_ARGS__) +#define EXPAND7(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND6(INC(I),TERM,OP,__VA_ARGS__) +#define EXPAND8(I,TERM,OP,...) TERM(I,__VA_ARGS__) OP EXPAND7(INC(I),TERM,OP,__VA_ARGS__) + +#define _EXPAND_(n,I,TERM,OP,...) EXPAND##n(I,TERM,OP,__VA_ARGS__) +#define EXPAND(n,I,TERM,OP,...) _EXPAND_(n,I,TERM,OP,__VA_ARGS__) +#define EXPAND_TRI1(n,I,TERM,OP,...) EXPAND(n,I,TERM,OP,__VA_ARGS__) +#define EXPAND_TRI(n,I,TERM,OP,...) EXPAND_TRI1(SUB(n,I),I,TERM,OP,__VA_ARGS__) diff --git a/sys/libmath/rules.mk b/sys/libmath/rules.mk index fddd74a..9e44ce4 100644 --- a/sys/libmath/rules.mk +++ b/sys/libmath/rules.mk @@ -5,7 +5,9 @@ include share/push.mk # Local sources SRCS_$(d) := \ $(d)/basic.c \ - $(d)/blas1.c + $(d)/blas1.c \ + $(d)/blas2.c \ + $(d)/blas3.c LIBS_$(d) := $(d)/libmath.a BINS_$(d) := TSTS_$(d) := \ diff --git a/sys/libn/error.c b/sys/libn/error.c index 15611a3..9b6a28a 100644 --- a/sys/libn/error.c +++ b/sys/libn/error.c @@ -2,7 +2,7 @@ #include void -errorf(const byte* fmt, ...) +errorf(byte* fmt, ...) { va_list args; va_start(args, fmt); @@ -13,3 +13,57 @@ errorf(const byte* fmt, ...) va_end(args); } + +void +panicf(byte* fmt, ...) +{ + int n; + va_list args; + static byte buf[4*1024]; + va_start(args, fmt); + + buf[0] = 'p'; + buf[1] = 'a'; + buf[2] = 'n'; + buf[3] = 'i'; + buf[4] = 'c'; + buf[5] = ':'; + buf[6] = ' '; + n = vsnprintf(buf+7, arrlen(buf)-8, fmt, args); + if (n < 0) + return; + buf[n] = 0; + + perror(buf); + va_end(args); + + /* TODO: portable stack unwinding */ + exit(1); +} + +void +vpanicf(byte* fmt, va_list args) +{ + int n; + va_list nargs; + static byte buf[4*1024]; + va_copy(nargs, args); + + buf[0] = 'p'; + buf[1] = 'a'; + buf[2] = 'n'; + buf[3] = 'i'; + buf[4] = 'c'; + buf[5] = ':'; + buf[6] = ' '; + n = vsnprintf(buf+7, arrlen(buf)-8, fmt, args); + if (n < 0) + return; + buf[n] = 0; + + perror(buf); + va_end(nargs); + + /* TODO: portable stack unwinding */ + exit(1); +} diff --git a/sys/libn/memory.c b/sys/libn/memory.c index dce0c36..7993ca2 100644 --- a/sys/libn/memory.c +++ b/sys/libn/memory.c @@ -1,6 +1,26 @@ #include #include +void +·free(void* _, void* ptr) { + return free(ptr); +} + +void * +·alloc(void* _, uint n, ulong size) { + return malloc(n*size); +} + +void * +·calloc(void* _, uint n, ulong size) { + return calloc(n, size); +} + +void * +·realloc(void* _, void *ptr, uint n, ulong size) { + return realloc(ptr, n*size); +} + // ------------------------------------------------------------------------- // Dynamic buffer. diff --git a/sys/libn/string.c b/sys/libn/string.c index 7c152e3..e2cdddf 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -349,25 +349,29 @@ str·fit(string *s) // structure. This variant can append a substring of length len of the given // string to our buffer. The result is reallocated if not enough room is present // in the buffer. -void +int str·appendlen(string *s, vlong n, const byte* b) { - vlong bl = strlen(b); + /* + bl = strlen(b); if (n > bl) panicf("attempted to make a substring longer than string"); + */ str·grow(s, n); - if (*s == nil) return; + if (*s == nil) return 0; Hdr* h = (Hdr*)(*s - sizeof(Hdr)); memcpy(*s + str·len(*s), b, n); h->len += n; (*s)[h->len] = '\0'; + + return n; } // Append will append the given null terminated C string to the string data // structure. This variant will append the entire string. -void +int str·append(string *s, const byte* b) { return str·appendlen(s, strlen(b), b); @@ -376,17 +380,19 @@ str·append(string *s, const byte* b) // AppendByte will append the given byte to our string. // NOTE: As the byte is on the stack, it is not null-terminated. // Can not pass to the above functions. -void +int str·appendbyte(string *s, const byte b) { str·grow(s, 1); - if (*s == nil) return; + if (*s == nil) return 0; Hdr* h = (Hdr*)(*s - sizeof(Hdr)); *(*s + str·len(*s)) = b; h->len++; (*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? + + return 1; } /* @@ -394,7 +400,7 @@ str·appendbyte(string *s, const byte b) * Returns the newly minted string */ -void +int str·appendf(string *s, const byte* fmt, ...) { va_list args; @@ -415,6 +421,8 @@ str·appendf(string *s, const byte* fmt, ...) Hdr* h = (Hdr*)(*s - sizeof(Hdr)); h->len += n; + + return n; } // Equals returns true if string s and t are equivalent. -- cgit v1.2.1