From 36117f59ec77784c9ef77801d7c1cbf03a4c4a8b Mon Sep 17 00:00:00 2001
From: Nicholas Noll <nbnoll@eml.cc>
Date: Thu, 7 May 2020 15:35:06 -0700
Subject: wrap: elementary math functions for libmath

---
 sys/libmath/basic.c | 531 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/libmath/blas.c  | 237 ++++++++++++++++++++++-
 sys/libn/random.c   |   2 +-
 sys/libn/string.c   |   1 +
 4 files changed, 766 insertions(+), 5 deletions(-)
 create mode 100644 sys/libmath/basic.c

(limited to 'sys')

diff --git a/sys/libmath/basic.c b/sys/libmath/basic.c
new file mode 100644
index 0000000..50a4ec2
--- /dev/null
+++ b/sys/libmath/basic.c
@@ -0,0 +1,531 @@
+#include <u.h>
+#include <libn.h>
+#include <libmath.h>
+
+#include <math.h>
+
+// TODO(nnoll): Replace implementations with your own.
+
+double
+math·acos(double x)
+{
+    return acos(x);
+}
+
+float
+math·acosf(float x)
+{
+    return acosf(x);
+}
+
+
+double
+math·acosh(double x)
+{
+    return acosh(x);
+}
+
+float
+math·acoshf(float x)
+{
+    return acoshf(x);
+}
+
+
+double
+math·asin(double x)
+{
+    return asin(x);
+}
+
+float
+math·asinf(float x)
+{
+    return asinf(x);
+}
+
+
+double
+math·asinh(double x)
+{
+    return asinh(x);
+}
+
+float
+math·asinhf(float x)
+{
+    return asinhf(x);
+}
+
+
+double
+math·atan(double x)
+{
+    return atan(x);
+}
+
+float
+math·atanf(float x)
+{
+    return atanf(x);
+}
+
+
+double
+math·atan2(double x, double y)
+{
+    return atan2(x, y);
+}
+
+float
+math·atan2f(float x, float y)
+{
+    return atan2f(x, y);
+}
+
+double
+math·atanh(double x)
+{
+    return atanh(x);
+}
+
+float
+math·atanhf(float x)
+{
+    return atanhf(x);
+}
+
+
+double
+math·cbrt(double x)
+{
+    return cbrt(x);
+}
+
+float
+math·cbrtf(float x)
+{
+    return cbrtf(x);
+}
+
+
+double
+math·ceil(double x)
+{
+    return ceil(x);
+}
+
+float
+math·ceilf(float x)
+{
+    return ceilf(x);
+}
+
+double
+math·cos(double x)
+{
+    return cos(x);
+}
+
+float
+math·cosf(float x)
+{
+    return cosf(x);
+}
+
+
+double
+math·cosh(double x)
+{
+    return cosh(x);
+}
+
+float
+math·coshf(float x)
+{
+    return coshf(x);
+}
+
+
+double
+math·erf(double x)
+{
+    return erf(x);
+}
+
+float
+math·erff(float x)
+{
+    return erff(x);
+}
+
+
+double
+math·erfc(double x)
+{
+    return erfc(x);
+}
+
+float
+math·erfcf(float x)
+{
+    return erfcf(x);
+}
+
+
+double
+math·exp(double x)
+{
+    return exp(x);
+}
+
+float
+math·expf(float x)
+{
+    return expf(x);
+}
+
+
+double
+math·exp2(double x)
+{
+    return exp2(x);
+}
+
+float
+math·exp2f(float x)
+{
+    return exp2f(x);
+}
+
+
+double
+math·expm1(double x)
+{
+    return expm1(x);
+}
+
+float
+math·expm1f(float x)
+{
+    return expm1f(x);
+}
+
+
+double
+math·floor(double x)
+{
+    return floor(x);
+}
+
+float
+math·floorf(float x)
+{
+    return floorf(x);
+}
+
+
+int
+math·ilogb(double x)
+{
+    return ilogb(x);
+}
+
+int
+math·ilogbf(float x)
+{
+    return ilogbf(x);
+}
+
+double
+math·lgamma(double x)
+{
+    return lgamma(x);
+}
+
+float
+math·lgammaf(float x)
+{
+    return lgammaf(x);
+}
+
+
+vlong   
+math·llrint(double x)
+{
+      return math·llrint(x);
+}
+
+vlong
+math·llrintf(float x)
+{
+      return math·llrintf(x);
+}
+
+
+vlong
+math·llround(double x)
+{
+    return llround(x);
+}
+
+vlong
+math·llroundf(float x)
+{
+    return llroundf(x);
+}
+
+
+double
+math·log(double x)
+{
+    return log(x);
+}
+
+float
+math·logf(float x)
+{
+    return logf(x);
+}
+
+
+double
+math·log10(double x)
+{
+    return log10(x);
+}
+
+float
+math·log10f(float x)
+{
+    return log10f(x);
+}
+
+
+double
+math·log1p(double x)
+{
+    return log1p(x);
+}
+
+float
+math·log1pf(float x)
+{
+    return log1pf(x);
+}
+
+
+double
+math·log2(double x)
+{
+    return log2(x);
+}
+
+float
+math·log2f(float x)
+{
+    return log2f(x);
+}
+
+
+double
+math·logb(double x)
+{
+    return logb(x);
+}
+
+float
+math·logbf(float x)
+{
+    return logbf(x);
+}
+
+
+long
+math·lrint(double x)
+{
+    return lrint(x);
+}
+
+long
+math·lrintf(float x)
+{
+    return lrintf(x);
+}
+
+
+long
+math·lround(double x)
+{
+    return lround(x);
+}
+
+long
+math·lroundf(float x)
+{
+    return lroundf(x);
+}
+
+
+double      math·modf(double, double *);
+float       math·modff(float, float *);
+
+double
+math·nan(const char * x)
+{
+    return nan(x);
+}
+
+float
+math·nanf(const char * x)
+{
+    return nanf(x);
+}
+
+
+double
+math·nearbyint(double x)
+{
+    return nearbyint(x);
+}
+
+float
+math·nearbyintf(float x)
+{
+    return nearbyintf(x);
+}
+
+
+double
+math·pow(double x, double exp)
+{
+    return pow(x, exp);
+}
+
+float       
+math·powf(float x, float exp)
+{
+    return powf(x, exp);
+}
+
+double
+math·rint(double x)
+{
+    return rint(x);
+}
+
+float
+math·rintf(float x)
+{
+    return rintf(x);
+}
+
+
+double
+math·round(double x)
+{
+    return round(x);
+}
+
+float
+math·roundf(float x)
+{
+    return roundf(x);
+}
+
+
+double      math·scalbln(double, long);
+float       math·scalblnf(float, long);
+
+double      math·scalbn(double, int);
+float       math·scalbnf(float, int);
+
+double
+math·sin(double x)
+{
+    return sin(x);
+}
+
+float
+math·sinf(float x)
+{
+    return sinf(x);
+}
+
+
+double
+math·sinh(double x)
+{
+    return sinh(x);
+}
+
+float
+math·sinhf(float x)
+{
+    return sinhf(x);
+}
+
+
+double
+math·sqrt(double x)
+{
+    return sqrt(x);
+}
+
+float
+math·sqrtf(float x)
+{
+    return sqrtf(x);
+}
+
+
+double
+math·tan(double x)
+{
+    return tan(x);
+}
+
+float
+math·tanf(float x)
+{
+    return tanf(x);
+}
+
+
+double
+math·tanh(double x)
+{
+    return tanh(x);
+}
+
+float
+math·tanhf(float x)
+{
+    return tanhf(x);
+}
+
+
+double
+math·tgamma(double x)
+{
+    return tgamma(x);
+}
+
+float
+math·tgammaf(float x)
+{
+    return tgammaf(x);
+}
+
+
+double
+math·trunc(double x)
+{
+    return trunc(x);
+}
+
+float
+math·truncf(float x)
+{
+    return truncf(x);
+}
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index bbbe1c8..8343a42 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -1,5 +1,6 @@
 #include <u.h>
 #include <libn.h>
+#include <libmath.h>
 #include <vendor/blas/cblas.h>
 
 #include <x86intrin.h>
@@ -8,6 +9,176 @@
 // -----------------------------------------------------------------------
 // level one
 
+/*
+ * Rotate vector
+ * x = cos*x + sin*y
+ * y = cos*x - sin*y
+ */ 
+
+static
+void
+rot_kernel8_avx2(int n, double *x, double *y, double cos, double sin)
+{
+    register int i;
+    __m256d x256, y256;
+    __m128d cos128, sin128;
+    __m256d cos256, sin256;
+
+    cos128 = _mm_load_sd(&cos);
+    cos256 = _mm256_broadcastsd_pd(cos128);
+
+    sin128 = _mm_load_sd(&sin);
+    sin256 = _mm256_broadcastsd_pd(sin128);
+
+    for (i = 0; i < n; i+=8) {
+        x256 = _mm256_loadu_pd(x+i+0);
+        y256 = _mm256_loadu_pd(y+i+0);
+        _mm256_storeu_pd(x+i+0, cos256 * x256 + sin256 * y256);
+        _mm256_storeu_pd(y+i+0, cos256 * y256 - sin256 * x256);
+
+        x256 = _mm256_loadu_pd(x+i+4);
+        y256 = _mm256_loadu_pd(y+i+4);
+        _mm256_storeu_pd(x+i+4, cos256 * x256 + sin256 * y256);
+        _mm256_storeu_pd(y+i+4, cos256 * y256 - sin256 * x256);
+    }
+}
+
+static
+void
+rot_kernel8(int n, double *x, double *y, double cos, double sin)
+{
+    register int i;
+    register double tmp;
+
+    for (i = 0; i < n; i+=8) {
+        tmp = x[i+0], x[i+0] = cos*x[i+0] + sin*y[i+0], y[i+0] = cos*y[i+0] - sin*tmp;
+        tmp = x[i+1], x[i+1] = cos*x[i+1] + sin*y[i+1], y[i+1] = cos*y[i+1] - sin*tmp;
+        tmp = x[i+2], x[i+2] = cos*x[i+2] + sin*y[i+2], y[i+2] = cos*y[i+2] - sin*tmp;
+        tmp = x[i+3], x[i+3] = cos*x[i+3] + sin*y[i+3], y[i+3] = cos*y[i+3] - sin*tmp;
+        tmp = x[i+4], x[i+4] = cos*x[i+4] + sin*y[i+4], y[i+4] = cos*y[i+4] - sin*tmp;
+        tmp = x[i+5], x[i+5] = cos*x[i+5] + sin*y[i+5], y[i+5] = cos*y[i+5] - sin*tmp;
+        tmp = x[i+6], x[i+6] = cos*x[i+6] + sin*y[i+6], y[i+6] = cos*y[i+6] - sin*tmp;
+        tmp = x[i+7], x[i+7] = cos*x[i+7] + sin*y[i+7], y[i+7] = cos*y[i+7] - sin*tmp;
+    }
+}
+
+void
+blas·rot(int len, double *x, double *y, double cos, double sin)
+{
+    register int n;
+    register double tmp;
+
+    n = len & ~7;
+    rot_kernel8_avx2(n, x, y, cos, sin);
+
+    for (; n < len; n++) {
+        tmp = x[n], x[n] = cos*x[n] + sin*y[n], y[n] = cos*y[n]- sin*tmp;
+    }
+}
+
+/*
+ * compute givens rotate vector
+ * --      --
+ * |+cos -sin| | a | = | r |
+ * |-sin +cos| | b | = | 0 |
+ * --      --
+ */ 
+
+void
+blas·rotg(double *a, double *b, double *cos, double *sin)
+{
+    double abs_a, abs_b, r, rho, scale, z;
+
+    abs_a = math·abs(*a);
+    abs_b = math·abs(*b);
+    rho   = abs_a > abs_b ? *a : *b;
+    scale = abs_a + abs_b;
+
+    if (scale == 0) {
+        *cos = 1, *sin = 0;
+        r = 0.;
+        z = 0.;
+    } else {
+        r    = math·sgn(rho) * scale * math·sqrt(math·pow(abs_a/scale, 2) + math·pow(abs_b/scale, 2));
+        *cos = *a / r;
+        *sin = *b / r;
+        if (abs_a > abs_b)
+            z = *sin;
+        else if (abs_b >= abs_a && *cos != 0)
+            z = 1/(*cos);
+        else 
+            z = 1.;
+    }
+    *a = r;
+    *b = z;
+}
+
+/*
+ * modified Givens rotation of points in plane
+ * operates on len points
+ *
+ * params = [flag, h11, h12, h21, h22]
+ * @flag = -1:
+ *      H -> [ [h11, h12], [h21, h22] ]
+ * @flag = 0.0:
+ *      H -> [ [1, h12], [h21, 1] ]
+ * @flag = +1:
+ *      H -> [ [h11, 1], [-1, h22] ]
+ * @flag = -2:
+ *      H -> [ [1, 0], [0, 1] ]
+ *
+ *  Replaces:
+ *      x -> H11 * x + H12 * y
+ *      y -> H21 * x + H22 * y
+ */
+
+static
+void
+rotm_kernel8(int n, double *x, double *y, double H[4])
+{
+    register int i;
+    register double tmp;
+
+    for (i = 0; i < n; i+=8) {
+        tmp = x[i+0], x[i+0] = H[0]*x[i+0] + H[1]*y[i+0], y[i+0] = H[2]*y[i+0] + H[3]*tmp;
+        tmp = x[i+1], x[i+1] = H[0]*x[i+1] + H[1]*y[i+1], y[i+1] = H[2]*y[i+1] + H[3]*tmp;
+        tmp = x[i+2], x[i+2] = H[0]*x[i+2] + H[1]*y[i+2], y[i+2] = H[2]*y[i+2] + H[3]*tmp;
+        tmp = x[i+3], x[i+3] = H[0]*x[i+3] + H[1]*y[i+3], y[i+3] = H[2]*y[i+3] + H[3]*tmp;
+        tmp = x[i+4], x[i+4] = H[0]*x[i+4] + H[1]*y[i+4], y[i+4] = H[2]*y[i+4] + H[3]*tmp;
+        tmp = x[i+5], x[i+5] = H[0]*x[i+5] + H[1]*y[i+5], y[i+5] = H[2]*y[i+5] + H[3]*tmp;
+        tmp = x[i+6], x[i+6] = H[0]*x[i+6] + H[1]*y[i+6], y[i+6] = H[2]*y[i+6] + H[3]*tmp;
+        tmp = x[i+7], x[i+7] = H[0]*x[i+7] + H[1]*y[i+7], y[i+7] = H[2]*y[i+7] + H[3]*tmp;
+    }
+}
+
+error
+blas·rotm(int len, double *x, double *y, double p[5])
+{
+    int    n, flag;
+    double tmp, H[4];
+
+    flag = math·round(p[0]);
+    switch (flag) {
+        case  0: H[0] = p[1], H[1] = p[2], H[2] = p[3], H[3] = p[4]; break;
+        case -1: H[0] = +1,   H[1] = p[2], H[2] = p[3], H[3] = +1;   break;
+        case +1: H[0] = p[1], H[1] = +1,   H[2] = -1,   H[3] = p[4]; break;
+        case -2: H[0] = +1,   H[1] = 0,    H[2] = 0,    H[3] = +1;   break;
+        default:
+            errorf("rotm: flag '%d' unrecognized", flag);
+            return 1;
+    }
+
+    n = len & ~7;
+    rotm_kernel8(n, x, y, H);
+
+    for (; n < len; n++) {
+        tmp = x[n], x[n] = H[0]*x[n] + H[1]*y[n], y[n] = H[2]*y[n] + H[3]*tmp;
+    }
+
+    return 0;
+}
+
+
 /*
  * Scale vector
  * x = ax
@@ -47,7 +218,7 @@ scale_kernel8(int n, double *x, double a)
 }
 
 void
-blas·scalevec(int len, double *x, double a)
+blas·scale(int len, double *x, double a)
 {
     int n;
 
@@ -403,11 +574,11 @@ blas·gemm(int n1, int n2, int n3, double a, double *m1, double *m2, double b, d
 }
 
 #define NITER 5000
-#define NCOL  57
-#define NROW  57
+#define NCOL  10007
+#define NROW  10007
 
 error
-main()
+test·gemm()
 {
     int i, n;
     clock_t t;
@@ -468,3 +639,61 @@ main()
 
     return 0;
 }
+
+void
+print·array(int n, double *x)
+{
+    double *end;
+    printf("[");
+    for (end=x+n; x != end; ++x) {
+        printf("%f,", *x);
+    }
+    printf("]\n");
+}
+
+error
+main()
+{
+    int i, n;
+    double *x, *y;
+    double tprof[2];
+    clock_t t;
+
+    x = malloc(sizeof(*x)*NCOL);
+    y = malloc(sizeof(*x)*NCOL);
+
+    for (n = 0; n < NITER; n++) {
+        for (i = 0; i < NCOL; i++) {
+            x[i] = i*i+1;
+            y[i] = i+1;
+        }
+
+        t = clock();
+        blas·rot(NCOL, x, y, .707, .707); 
+        t = clock() - t;
+        tprof[0] += 1000.*t/CLOCKS_PER_SEC;
+
+        for (i = 0; i < NCOL; i++) {
+            x[i] = i*i+1;
+            y[i] = i+1;
+        }
+
+        t = clock();
+        cblas_drot(NCOL, x, 1, y, 1, .707, .707);
+        t = clock() - t;
+        tprof[1] += 1000.*t/CLOCKS_PER_SEC;
+    }
+
+    printf("mean time/iteration (naive): %fms\n", tprof[0]/NITER);
+    printf("mean time/iteration (oblas): %fms\n", tprof[1]/NITER);
+
+    double a, b, c, s;
+
+    a = 10.234, b = 2.;
+    cblas_drotg(&a, &b, &c, &s);
+    printf("%f, %f, %f, %f\n", a, b, c, s);
+
+    a = 10.234, b = 2.;
+    blas·rotg(&a, &b, &c, &s);
+    printf("%f, %f, %f, %f\n", a, b, c, s);
+}
diff --git a/sys/libn/random.c b/sys/libn/random.c
index 1e2c2d5..558641e 100644
--- a/sys/libn/random.c
+++ b/sys/libn/random.c
@@ -69,7 +69,7 @@ double
 rng·random()
 {
     uint64 r = xoshiro256ss(&RNG); 
-    return (double)r / UINT64_MAX;
+    return (double)r / (double)UINT64_MAX;
 }
 
 /* Returns true or false on success of trial */
diff --git a/sys/libn/string.c b/sys/libn/string.c
index 4280e27..ca53bdc 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -180,6 +180,7 @@ str·makecap(const byte* s, vlong len, vlong cap)
 cleanup:
     free(h);
     panicf("Attempted to create a string with less capacity than length");
+    return nil;
 }
 
 // New returns a new dynamic string object, initialized from the given C string.
-- 
cgit v1.2.1