From 36117f59ec77784c9ef77801d7c1cbf03a4c4a8b Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 7 May 2020 15:35:06 -0700 Subject: wrap: elementary math functions for libmath --- sys/libmath/basic.c | 531 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sys/libmath/blas.c | 237 ++++++++++++++++++++++- sys/libn/random.c | 2 +- sys/libn/string.c | 1 + 4 files changed, 766 insertions(+), 5 deletions(-) create mode 100644 sys/libmath/basic.c (limited to 'sys') diff --git a/sys/libmath/basic.c b/sys/libmath/basic.c new file mode 100644 index 0000000..50a4ec2 --- /dev/null +++ b/sys/libmath/basic.c @@ -0,0 +1,531 @@ +#include +#include +#include + +#include + +// TODO(nnoll): Replace implementations with your own. + +double +math·acos(double x) +{ + return acos(x); +} + +float +math·acosf(float x) +{ + return acosf(x); +} + + +double +math·acosh(double x) +{ + return acosh(x); +} + +float +math·acoshf(float x) +{ + return acoshf(x); +} + + +double +math·asin(double x) +{ + return asin(x); +} + +float +math·asinf(float x) +{ + return asinf(x); +} + + +double +math·asinh(double x) +{ + return asinh(x); +} + +float +math·asinhf(float x) +{ + return asinhf(x); +} + + +double +math·atan(double x) +{ + return atan(x); +} + +float +math·atanf(float x) +{ + return atanf(x); +} + + +double +math·atan2(double x, double y) +{ + return atan2(x, y); +} + +float +math·atan2f(float x, float y) +{ + return atan2f(x, y); +} + +double +math·atanh(double x) +{ + return atanh(x); +} + +float +math·atanhf(float x) +{ + return atanhf(x); +} + + +double +math·cbrt(double x) +{ + return cbrt(x); +} + +float +math·cbrtf(float x) +{ + return cbrtf(x); +} + + +double +math·ceil(double x) +{ + return ceil(x); +} + +float +math·ceilf(float x) +{ + return ceilf(x); +} + +double +math·cos(double x) +{ + return cos(x); +} + +float +math·cosf(float x) +{ + return cosf(x); +} + + +double +math·cosh(double x) +{ + return cosh(x); +} + +float +math·coshf(float x) +{ + return coshf(x); +} + + +double +math·erf(double x) +{ + return erf(x); +} + +float +math·erff(float x) +{ + return erff(x); +} + + +double +math·erfc(double x) +{ + return erfc(x); +} + +float +math·erfcf(float x) +{ + return erfcf(x); +} + + +double +math·exp(double x) +{ + return exp(x); +} + +float +math·expf(float x) +{ + return expf(x); +} + + +double +math·exp2(double x) +{ + return exp2(x); +} + +float +math·exp2f(float x) +{ + return exp2f(x); +} + + +double +math·expm1(double x) +{ + return expm1(x); +} + +float +math·expm1f(float x) +{ + return expm1f(x); +} + + +double +math·floor(double x) +{ + return floor(x); +} + +float +math·floorf(float x) +{ + return floorf(x); +} + + +int +math·ilogb(double x) +{ + return ilogb(x); +} + +int +math·ilogbf(float x) +{ + return ilogbf(x); +} + +double +math·lgamma(double x) +{ + return lgamma(x); +} + +float +math·lgammaf(float x) +{ + return lgammaf(x); +} + + +vlong +math·llrint(double x) +{ + return math·llrint(x); +} + +vlong +math·llrintf(float x) +{ + return math·llrintf(x); +} + + +vlong +math·llround(double x) +{ + return llround(x); +} + +vlong +math·llroundf(float x) +{ + return llroundf(x); +} + + +double +math·log(double x) +{ + return log(x); +} + +float +math·logf(float x) +{ + return logf(x); +} + + +double +math·log10(double x) +{ + return log10(x); +} + +float +math·log10f(float x) +{ + return log10f(x); +} + + +double +math·log1p(double x) +{ + return log1p(x); +} + +float +math·log1pf(float x) +{ + return log1pf(x); +} + + +double +math·log2(double x) +{ + return log2(x); +} + +float +math·log2f(float x) +{ + return log2f(x); +} + + +double +math·logb(double x) +{ + return logb(x); +} + +float +math·logbf(float x) +{ + return logbf(x); +} + + +long +math·lrint(double x) +{ + return lrint(x); +} + +long +math·lrintf(float x) +{ + return lrintf(x); +} + + +long +math·lround(double x) +{ + return lround(x); +} + +long +math·lroundf(float x) +{ + return lroundf(x); +} + + +double math·modf(double, double *); +float math·modff(float, float *); + +double +math·nan(const char * x) +{ + return nan(x); +} + +float +math·nanf(const char * x) +{ + return nanf(x); +} + + +double +math·nearbyint(double x) +{ + return nearbyint(x); +} + +float +math·nearbyintf(float x) +{ + return nearbyintf(x); +} + + +double +math·pow(double x, double exp) +{ + return pow(x, exp); +} + +float +math·powf(float x, float exp) +{ + return powf(x, exp); +} + +double +math·rint(double x) +{ + return rint(x); +} + +float +math·rintf(float x) +{ + return rintf(x); +} + + +double +math·round(double x) +{ + return round(x); +} + +float +math·roundf(float x) +{ + return roundf(x); +} + + +double math·scalbln(double, long); +float math·scalblnf(float, long); + +double math·scalbn(double, int); +float math·scalbnf(float, int); + +double +math·sin(double x) +{ + return sin(x); +} + +float +math·sinf(float x) +{ + return sinf(x); +} + + +double +math·sinh(double x) +{ + return sinh(x); +} + +float +math·sinhf(float x) +{ + return sinhf(x); +} + + +double +math·sqrt(double x) +{ + return sqrt(x); +} + +float +math·sqrtf(float x) +{ + return sqrtf(x); +} + + +double +math·tan(double x) +{ + return tan(x); +} + +float +math·tanf(float x) +{ + return tanf(x); +} + + +double +math·tanh(double x) +{ + return tanh(x); +} + +float +math·tanhf(float x) +{ + return tanhf(x); +} + + +double +math·tgamma(double x) +{ + return tgamma(x); +} + +float +math·tgammaf(float x) +{ + return tgammaf(x); +} + + +double +math·trunc(double x) +{ + return trunc(x); +} + +float +math·truncf(float x) +{ + return truncf(x); +} diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c index bbbe1c8..8343a42 100644 --- a/sys/libmath/blas.c +++ b/sys/libmath/blas.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -8,6 +9,176 @@ // ----------------------------------------------------------------------- // level one +/* + * Rotate vector + * x = cos*x + sin*y + * y = cos*x - sin*y + */ + +static +void +rot_kernel8_avx2(int n, double *x, double *y, double cos, double sin) +{ + register int i; + __m256d x256, y256; + __m128d cos128, sin128; + __m256d cos256, sin256; + + cos128 = _mm_load_sd(&cos); + cos256 = _mm256_broadcastsd_pd(cos128); + + sin128 = _mm_load_sd(&sin); + sin256 = _mm256_broadcastsd_pd(sin128); + + for (i = 0; i < n; i+=8) { + x256 = _mm256_loadu_pd(x+i+0); + y256 = _mm256_loadu_pd(y+i+0); + _mm256_storeu_pd(x+i+0, cos256 * x256 + sin256 * y256); + _mm256_storeu_pd(y+i+0, cos256 * y256 - sin256 * x256); + + x256 = _mm256_loadu_pd(x+i+4); + y256 = _mm256_loadu_pd(y+i+4); + _mm256_storeu_pd(x+i+4, cos256 * x256 + sin256 * y256); + _mm256_storeu_pd(y+i+4, cos256 * y256 - sin256 * x256); + } +} + +static +void +rot_kernel8(int n, double *x, double *y, double cos, double sin) +{ + register int i; + register double tmp; + + for (i = 0; i < n; i+=8) { + tmp = x[i+0], x[i+0] = cos*x[i+0] + sin*y[i+0], y[i+0] = cos*y[i+0] - sin*tmp; + tmp = x[i+1], x[i+1] = cos*x[i+1] + sin*y[i+1], y[i+1] = cos*y[i+1] - sin*tmp; + tmp = x[i+2], x[i+2] = cos*x[i+2] + sin*y[i+2], y[i+2] = cos*y[i+2] - sin*tmp; + tmp = x[i+3], x[i+3] = cos*x[i+3] + sin*y[i+3], y[i+3] = cos*y[i+3] - sin*tmp; + tmp = x[i+4], x[i+4] = cos*x[i+4] + sin*y[i+4], y[i+4] = cos*y[i+4] - sin*tmp; + tmp = x[i+5], x[i+5] = cos*x[i+5] + sin*y[i+5], y[i+5] = cos*y[i+5] - sin*tmp; + tmp = x[i+6], x[i+6] = cos*x[i+6] + sin*y[i+6], y[i+6] = cos*y[i+6] - sin*tmp; + tmp = x[i+7], x[i+7] = cos*x[i+7] + sin*y[i+7], y[i+7] = cos*y[i+7] - sin*tmp; + } +} + +void +blas·rot(int len, double *x, double *y, double cos, double sin) +{ + register int n; + register double tmp; + + n = len & ~7; + rot_kernel8_avx2(n, x, y, cos, sin); + + for (; n < len; n++) { + tmp = x[n], x[n] = cos*x[n] + sin*y[n], y[n] = cos*y[n]- sin*tmp; + } +} + +/* + * compute givens rotate vector + * -- -- + * |+cos -sin| | a | = | r | + * |-sin +cos| | b | = | 0 | + * -- -- + */ + +void +blas·rotg(double *a, double *b, double *cos, double *sin) +{ + double abs_a, abs_b, r, rho, scale, z; + + abs_a = math·abs(*a); + abs_b = math·abs(*b); + rho = abs_a > abs_b ? *a : *b; + scale = abs_a + abs_b; + + if (scale == 0) { + *cos = 1, *sin = 0; + r = 0.; + z = 0.; + } else { + r = math·sgn(rho) * scale * math·sqrt(math·pow(abs_a/scale, 2) + math·pow(abs_b/scale, 2)); + *cos = *a / r; + *sin = *b / r; + if (abs_a > abs_b) + z = *sin; + else if (abs_b >= abs_a && *cos != 0) + z = 1/(*cos); + else + z = 1.; + } + *a = r; + *b = z; +} + +/* + * modified Givens rotation of points in plane + * operates on len points + * + * params = [flag, h11, h12, h21, h22] + * @flag = -1: + * H -> [ [h11, h12], [h21, h22] ] + * @flag = 0.0: + * H -> [ [1, h12], [h21, 1] ] + * @flag = +1: + * H -> [ [h11, 1], [-1, h22] ] + * @flag = -2: + * H -> [ [1, 0], [0, 1] ] + * + * Replaces: + * x -> H11 * x + H12 * y + * y -> H21 * x + H22 * y + */ + +static +void +rotm_kernel8(int n, double *x, double *y, double H[4]) +{ + register int i; + register double tmp; + + for (i = 0; i < n; i+=8) { + tmp = x[i+0], x[i+0] = H[0]*x[i+0] + H[1]*y[i+0], y[i+0] = H[2]*y[i+0] + H[3]*tmp; + tmp = x[i+1], x[i+1] = H[0]*x[i+1] + H[1]*y[i+1], y[i+1] = H[2]*y[i+1] + H[3]*tmp; + tmp = x[i+2], x[i+2] = H[0]*x[i+2] + H[1]*y[i+2], y[i+2] = H[2]*y[i+2] + H[3]*tmp; + tmp = x[i+3], x[i+3] = H[0]*x[i+3] + H[1]*y[i+3], y[i+3] = H[2]*y[i+3] + H[3]*tmp; + tmp = x[i+4], x[i+4] = H[0]*x[i+4] + H[1]*y[i+4], y[i+4] = H[2]*y[i+4] + H[3]*tmp; + tmp = x[i+5], x[i+5] = H[0]*x[i+5] + H[1]*y[i+5], y[i+5] = H[2]*y[i+5] + H[3]*tmp; + tmp = x[i+6], x[i+6] = H[0]*x[i+6] + H[1]*y[i+6], y[i+6] = H[2]*y[i+6] + H[3]*tmp; + tmp = x[i+7], x[i+7] = H[0]*x[i+7] + H[1]*y[i+7], y[i+7] = H[2]*y[i+7] + H[3]*tmp; + } +} + +error +blas·rotm(int len, double *x, double *y, double p[5]) +{ + int n, flag; + double tmp, H[4]; + + flag = math·round(p[0]); + switch (flag) { + case 0: H[0] = p[1], H[1] = p[2], H[2] = p[3], H[3] = p[4]; break; + case -1: H[0] = +1, H[1] = p[2], H[2] = p[3], H[3] = +1; break; + case +1: H[0] = p[1], H[1] = +1, H[2] = -1, H[3] = p[4]; break; + case -2: H[0] = +1, H[1] = 0, H[2] = 0, H[3] = +1; break; + default: + errorf("rotm: flag '%d' unrecognized", flag); + return 1; + } + + n = len & ~7; + rotm_kernel8(n, x, y, H); + + for (; n < len; n++) { + tmp = x[n], x[n] = H[0]*x[n] + H[1]*y[n], y[n] = H[2]*y[n] + H[3]*tmp; + } + + return 0; +} + + /* * Scale vector * x = ax @@ -47,7 +218,7 @@ scale_kernel8(int n, double *x, double a) } void -blas·scalevec(int len, double *x, double a) +blas·scale(int len, double *x, double a) { int n; @@ -403,11 +574,11 @@ blas·gemm(int n1, int n2, int n3, double a, double *m1, double *m2, double b, d } #define NITER 5000 -#define NCOL 57 -#define NROW 57 +#define NCOL 10007 +#define NROW 10007 error -main() +test·gemm() { int i, n; clock_t t; @@ -468,3 +639,61 @@ main() return 0; } + +void +print·array(int n, double *x) +{ + double *end; + printf("["); + for (end=x+n; x != end; ++x) { + printf("%f,", *x); + } + printf("]\n"); +} + +error +main() +{ + int i, n; + double *x, *y; + double tprof[2]; + clock_t t; + + x = malloc(sizeof(*x)*NCOL); + y = malloc(sizeof(*x)*NCOL); + + for (n = 0; n < NITER; n++) { + for (i = 0; i < NCOL; i++) { + x[i] = i*i+1; + y[i] = i+1; + } + + t = clock(); + blas·rot(NCOL, x, y, .707, .707); + t = clock() - t; + tprof[0] += 1000.*t/CLOCKS_PER_SEC; + + for (i = 0; i < NCOL; i++) { + x[i] = i*i+1; + y[i] = i+1; + } + + t = clock(); + cblas_drot(NCOL, x, 1, y, 1, .707, .707); + t = clock() - t; + tprof[1] += 1000.*t/CLOCKS_PER_SEC; + } + + printf("mean time/iteration (naive): %fms\n", tprof[0]/NITER); + printf("mean time/iteration (oblas): %fms\n", tprof[1]/NITER); + + double a, b, c, s; + + a = 10.234, b = 2.; + cblas_drotg(&a, &b, &c, &s); + printf("%f, %f, %f, %f\n", a, b, c, s); + + a = 10.234, b = 2.; + blas·rotg(&a, &b, &c, &s); + printf("%f, %f, %f, %f\n", a, b, c, s); +} diff --git a/sys/libn/random.c b/sys/libn/random.c index 1e2c2d5..558641e 100644 --- a/sys/libn/random.c +++ b/sys/libn/random.c @@ -69,7 +69,7 @@ double rng·random() { uint64 r = xoshiro256ss(&RNG); - return (double)r / UINT64_MAX; + return (double)r / (double)UINT64_MAX; } /* Returns true or false on success of trial */ diff --git a/sys/libn/string.c b/sys/libn/string.c index 4280e27..ca53bdc 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -180,6 +180,7 @@ str·makecap(const byte* s, vlong len, vlong cap) cleanup: free(h); panicf("Attempted to create a string with less capacity than length"); + return nil; } // New returns a new dynamic string object, initialized from the given C string. -- cgit v1.2.1