aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--sys/libmath/blas.c96
2 files changed, 69 insertions, 29 deletions
diff --git a/Makefile b/Makefile
index 952595e..0821ef6 100644
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ LIB_DIR := lib
OBJ_DIR := build
# Flags, Libraries and Includes
-CFLAGS := -g -O1 -march=native -ffast-math -fno-strict-aliasing -fwrapv -fms-extensions -Wno-microsoft-anon-tag -Wno-incompatible-function-pointer-types
+CFLAGS := -g -O3 -march=native -ffast-math -fno-strict-aliasing -fwrapv -fms-extensions -Wno-microsoft-anon-tag -Wno-incompatible-function-pointer-types
AFLAGS := -f elf64
INCS := -I$(INC_DIR)
ELIBS :=
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index a008ffb..b42a3b8 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -326,21 +326,50 @@ blas·gemv(int nrow, int ncol, double a, double *m, double *x, double b, double
/*
* rank one addition
- * M = axy + M
+ * M = ax(y^T) + M
*/
-#define NITER 50
-#define NCOL 1000
-#define NROW 1000
+void
+blas·ger(int nrow, int ncol, double a, double *x, double *y, double *m)
+{
+ int i, j;
+
+ for (i = 0; i < nrow; i++) {
+ for (j = 0; j < ncol; j++) {
+ m[i+ncol*j] += a * x[i] * y[j];
+ }
+ }
+}
+
+/*
+ * symmetric rank one addition
+ * M = ax(x^T) + M
+ */
+
+void
+blas·syr(int nrow, int ncol, double a, double *x, double *m)
+{
+ int i, j;
+
+ for (i = 0; i < nrow; i++) {
+ for (j = 0; j < ncol; j++) {
+ m[i+ncol*j] += a * x[i] * x[j];
+ }
+ }
+}
+
+#define NITER 500
+#define NCOL 1007
+#define NROW 1007
error
main()
{
- int i;
+ int i, n;
clock_t t;
- double res;
double *x, *y, *m;
+ double res[2], tprof[2];
openblas_set_num_threads(1);
@@ -348,33 +377,44 @@ main()
y = malloc(sizeof(*x)*NCOL);
m = malloc(sizeof(*x)*NCOL*NROW);
- for (i = 0; i < NCOL; i++) {
- y[i] = i;
- }
+ tprof[0] = 0;
+ tprof[1] = 0;
- t = clock();
- for (i = 0; i < NITER; i++) {
- cblas_dgemv(CblasRowMajor, CblasNoTrans, NROW, NCOL, 1.5, m, NCOL, x, 1, 2.5, y, 1);
- }
- t = clock() - t;
- res = cblas_ddot(NROW, y, 1, y, 1);
+ for (n = 0; n < NITER; n++) {
+ for (i = 0; i < NCOL; i++) {
+ x[i] = i*i+1;
+ y[i] = i+1;
+ }
- printf("the result is %f\n", res);
- printf("time elapsed (blas): %fms\n", 1000.*t/CLOCKS_PER_SEC);
+ for (i = 0; i < NCOL*NROW; i++) {
+ m[i] = .1;
+ }
- for (i = 0; i < NCOL; i++) {
- y[i] = i;
- }
+ t = clock();
+ cblas_dger(CblasRowMajor, NROW, NCOL, 1.2, x, 1, y, 1, m, NCOL);
+ t = clock() - t;
+ tprof[1] += 1000.*t/CLOCKS_PER_SEC;
+ res[1] = cblas_ddot(NROW*NCOL, m, 1, m, 1);
- t = clock();
- for (i = 0; i < NITER; i++) {
- blas·gemv(NROW, NCOL, 1.5, m, x, 2.5, y);
- }
- t = clock() - t;
- res = blas·dot(NCOL, y, y);
+ for (i = 0; i < NCOL; i++) {
+ x[i] = i*i+1;
+ y[i] = i+1;
+ }
- printf("the dot product is %f\n", res);
- printf("time elapsed (naive): %fms\n", 1000.*t/CLOCKS_PER_SEC);
+ for (i = 0; i < NCOL*NROW; i++) {
+ m[i] = .1;
+ }
+
+ t = clock();
+ blas·ger(NROW, NCOL, 1.2, x, y, m);
+ t = clock() - t;
+ tprof[0] += 1000.*t/CLOCKS_PER_SEC;
+ res[0] = blas·dot(NROW*NCOL, m, m);
+ }
+ printf("avg time elapsed/iteration (naive): %fms\n", tprof[0]/NITER);
+ printf("--> result (naive): %f\n", res[0]);
+ printf("avg time elapsed/iteration (oblas): %fms\n", tprof[1]/NITER);
+ printf("--> result (oblas): %f\n", res[1]);
return 0;
}