From d982e7c2fdebf560ccce193cb98b85d4fac28a45 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 13 May 2020 17:30:19 -0700 Subject: blas 1 generation code complete --- sys/libmath/blas.c | 127 ++++++++++------------------------------------------- 1 file changed, 24 insertions(+), 103 deletions(-) (limited to 'sys/libmath/blas.c') diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c index f6eb830..f5392c8 100644 --- a/sys/libmath/blas.c +++ b/sys/libmath/blas.c @@ -1,132 +1,53 @@ #include #include +#include +#include #include -#include - -int -argmax2(int len, double *x) -{ - int i, ix[8]; - double *end; - double max[8]; - - max[0] = x[0]; max[1] = x[1]; max[2] = x[2]; max[3] = x[3]; - max[4] = x[4]; max[5] = x[5]; max[6] = x[6]; max[7] = x[7]; - for (i = 0; i < len; i+=8) { - if (x[i+0] > max[0]) { - max[0] = *x; - ix[0] = i; - } - if (x[i+1] > max[1]) { - max[1] = *x; - ix[1] = i; - } - if (x[i+2] > max[2]) { - max[2] = *x; - ix[2] = i; - } - if (x[i+3] > max[3]) { - max[3] = *x; - ix[3] = i; - } - if (x[i+4] > max[4]) { - max[4] = *x; - ix[4] = i; - } - if (x[i+5] > max[5]) { - max[5] = *x; - ix[5] = i; - } - if (x[i+6] > max[6]) { - max[6] = *x; - ix[6] = i; - } - if (x[i+7] > max[7]) { - max[7] = *x; - ix[7] = i; - } - } - for (i = 1; i < 8; i++) { - if (max[i] > max[0]) { - max[0] = max[i]; - ix[0] = ix[i] + i; - } - } - - return ix[0]; -} - -int -argmax(int len, double *x) -{ - int i, ix; - double *end; - double max; - - max = *x; - for (i = 0; i < len; i++) { - if (x[i] > max) { - max = *x; - ix = i; - } - } - - return ix; -} +#include -#define LEN 1000000 +#define LEN 2000000 #define NIT 2000 +#define INC 2 error main() { int i, nit; - double *x, *y, res[3]; + double *x, *y, res[2]; clock_t t; - double tprof[3] = { 0 }; + double tprof[2] = { 0 }; rng·init(0); x = malloc(sizeof(*x)*LEN); - // y = malloc(sizeof(*x)*LEN); - -#define DO_0 t = clock(); \ - res[0] += argmax(LEN, x); \ - t = clock() - t; \ - tprof[0] += 1000.*t/CLOCKS_PER_SEC; \ + y = malloc(sizeof(*x)*LEN); -#define DO_1 t = clock(); \ - res[1] += argmax2(LEN, x); \ - t = clock() - t; \ - tprof[1] += 1000.*t/CLOCKS_PER_SEC; \ +#define DO_0 t = clock(); \ + res[0] += blas·sumd(LEN/INC, x, INC); \ + t = clock() - t; \ + tprof[0] += 1000.*t/CLOCKS_PER_SEC; \ -#define DO_2 t = clock(); \ - res[2] += cblas_idamax(LEN, x, 1); \ - t = clock() - t; \ - tprof[2] += 1000.*t/CLOCKS_PER_SEC; +#define DO_1 t = clock(); \ + res[1] += cblas_dsum(LEN/INC, x, INC); \ + t = clock() - t; \ + tprof[1] += 1000.*t/CLOCKS_PER_SEC; for (nit = 0; nit < NIT; nit++) { for (i = 0; i < LEN; i++) { x[i] = rng·random(); - // y[i] = rng·random(); + y[i] = rng·random(); } - switch (nit % 6) { - case 0: DO_0; DO_1; DO_2; break; - case 1: DO_0; DO_2; DO_1; break; - case 2: DO_1; DO_0; DO_2; break; - case 3: DO_1; DO_2; DO_0; break; - case 4: DO_2; DO_0; DO_1; break; - case 5: DO_2; DO_1; DO_0; break; + switch (nit % 2) { + case 0: DO_0; DO_1; break; + case 1: DO_1; DO_0; break; } } - printf("mean time/iteration (naive): %fms\n", tprof[0]/NIT); - printf("--> result (naive): %f\n", res[0]); - printf("mean time/iteration (unrolled): %fms\n", tprof[1]/NIT); - printf("--> result (unrolled): %f\n", res[1]); - printf("mean time/iteration (openblas): %fms\n", tprof[2]/NIT); - printf("--> result (openblas): %f\n", res[2]); + printf("mean time/iteration (mine): %fms\n", tprof[0]/NIT); + printf("--> result (mine): %f\n", res[0]); + printf("mean time/iteration (openblas): %fms\n", tprof[1]/NIT); + printf("--> result (openblas): %f\n", res[1]); return 0; } -- cgit v1.2.1