aboutsummaryrefslogtreecommitdiff
path: root/sys/libmath/blas.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-13 17:30:19 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-13 17:30:19 -0700
commitd982e7c2fdebf560ccce193cb98b85d4fac28a45 (patch)
treeb18902eea12a2d55a24994ca0681ca1a369631aa /sys/libmath/blas.c
parentc9d4b2d7dd1d9a46571e5d2b2cf6ce10a9d9ebea (diff)
blas 1 generation code complete
Diffstat (limited to 'sys/libmath/blas.c')
-rw-r--r--sys/libmath/blas.c127
1 files changed, 24 insertions, 103 deletions
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index f6eb830..f5392c8 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -1,132 +1,53 @@
#include <u.h>
#include <libn.h>
+#include <libmath.h>
+#include <libmath/blas.h>
#include <time.h>
-#include <vendor/blas/cblas.h>
-
-int
-argmax2(int len, double *x)
-{
- int i, ix[8];
- double *end;
- double max[8];
-
- max[0] = x[0]; max[1] = x[1]; max[2] = x[2]; max[3] = x[3];
- max[4] = x[4]; max[5] = x[5]; max[6] = x[6]; max[7] = x[7];
- for (i = 0; i < len; i+=8) {
- if (x[i+0] > max[0]) {
- max[0] = *x;
- ix[0] = i;
- }
- if (x[i+1] > max[1]) {
- max[1] = *x;
- ix[1] = i;
- }
- if (x[i+2] > max[2]) {
- max[2] = *x;
- ix[2] = i;
- }
- if (x[i+3] > max[3]) {
- max[3] = *x;
- ix[3] = i;
- }
- if (x[i+4] > max[4]) {
- max[4] = *x;
- ix[4] = i;
- }
- if (x[i+5] > max[5]) {
- max[5] = *x;
- ix[5] = i;
- }
- if (x[i+6] > max[6]) {
- max[6] = *x;
- ix[6] = i;
- }
- if (x[i+7] > max[7]) {
- max[7] = *x;
- ix[7] = i;
- }
- }
- for (i = 1; i < 8; i++) {
- if (max[i] > max[0]) {
- max[0] = max[i];
- ix[0] = ix[i] + i;
- }
- }
-
- return ix[0];
-}
-
-int
-argmax(int len, double *x)
-{
- int i, ix;
- double *end;
- double max;
-
- max = *x;
- for (i = 0; i < len; i++) {
- if (x[i] > max) {
- max = *x;
- ix = i;
- }
- }
-
- return ix;
-}
+#include <vendor/blas/cblas.h>
-#define LEN 1000000
+#define LEN 2000000
#define NIT 2000
+#define INC 2
error
main()
{
int i, nit;
- double *x, *y, res[3];
+ double *x, *y, res[2];
clock_t t;
- double tprof[3] = { 0 };
+ double tprof[2] = { 0 };
rng·init(0);
x = malloc(sizeof(*x)*LEN);
- // y = malloc(sizeof(*x)*LEN);
-
-#define DO_0 t = clock(); \
- res[0] += argmax(LEN, x); \
- t = clock() - t; \
- tprof[0] += 1000.*t/CLOCKS_PER_SEC; \
+ y = malloc(sizeof(*x)*LEN);
-#define DO_1 t = clock(); \
- res[1] += argmax2(LEN, x); \
- t = clock() - t; \
- tprof[1] += 1000.*t/CLOCKS_PER_SEC; \
+#define DO_0 t = clock(); \
+ res[0] += blas·sumd(LEN/INC, x, INC); \
+ t = clock() - t; \
+ tprof[0] += 1000.*t/CLOCKS_PER_SEC; \
-#define DO_2 t = clock(); \
- res[2] += cblas_idamax(LEN, x, 1); \
- t = clock() - t; \
- tprof[2] += 1000.*t/CLOCKS_PER_SEC;
+#define DO_1 t = clock(); \
+ res[1] += cblas_dsum(LEN/INC, x, INC); \
+ t = clock() - t; \
+ tprof[1] += 1000.*t/CLOCKS_PER_SEC;
for (nit = 0; nit < NIT; nit++) {
for (i = 0; i < LEN; i++) {
x[i] = rng·random();
- // y[i] = rng·random();
+ y[i] = rng·random();
}
- switch (nit % 6) {
- case 0: DO_0; DO_1; DO_2; break;
- case 1: DO_0; DO_2; DO_1; break;
- case 2: DO_1; DO_0; DO_2; break;
- case 3: DO_1; DO_2; DO_0; break;
- case 4: DO_2; DO_0; DO_1; break;
- case 5: DO_2; DO_1; DO_0; break;
+ switch (nit % 2) {
+ case 0: DO_0; DO_1; break;
+ case 1: DO_1; DO_0; break;
}
}
- printf("mean time/iteration (naive): %fms\n", tprof[0]/NIT);
- printf("--> result (naive): %f\n", res[0]);
- printf("mean time/iteration (unrolled): %fms\n", tprof[1]/NIT);
- printf("--> result (unrolled): %f\n", res[1]);
- printf("mean time/iteration (openblas): %fms\n", tprof[2]/NIT);
- printf("--> result (openblas): %f\n", res[2]);
+ printf("mean time/iteration (mine): %fms\n", tprof[0]/NIT);
+ printf("--> result (mine): %f\n", res[0]);
+ printf("mean time/iteration (openblas): %fms\n", tprof[1]/NIT);
+ printf("--> result (openblas): %f\n", res[1]);
return 0;
}