From 36117f59ec77784c9ef77801d7c1cbf03a4c4a8b Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 7 May 2020 15:35:06 -0700 Subject: wrap: elementary math functions for libmath --- compile_commands.json | 170 +++++++++------- include/libmath.h | 147 ++++++++++++-- include/libn.h | 4 + rules.mk | 2 +- sys/libmath/basic.c | 531 ++++++++++++++++++++++++++++++++++++++++++++++++++ sys/libmath/blas.c | 237 +++++++++++++++++++++- sys/libn/random.c | 2 +- sys/libn/string.c | 1 + 8 files changed, 998 insertions(+), 96 deletions(-) create mode 100644 sys/libmath/basic.c diff --git a/compile_commands.json b/compile_commands.json index acb788e..cbb7712 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -11,19 +11,16 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", - "-ffreestanding", - "-fno-builtin", - "-nostdlib", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libc/string.o", - "sys/libc/string.c" + "build/libn/coro.o", + "sys/libn/coro.c" ], "directory": "/home/nolln/root", - "file": "sys/libc/string.c" + "file": "sys/libn/coro.c" }, { "arguments": [ @@ -42,11 +39,11 @@ "-I", "include", "-o", - "build/libn/bufio.o", - "sys/libn/bufio.c" + "build/libn/memory.o", + "sys/libn/memory.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/bufio.c" + "file": "sys/libn/memory.c" }, { "arguments": [ @@ -60,16 +57,19 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-ffreestanding", + "-fno-builtin", + "-nostdlib", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libbio/phylo.o", - "sys/libbio/phylo.c" + "build/libc/string.o", + "sys/libc/string.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/phylo.c" + "file": "sys/libc/string.c" }, { "arguments": [ @@ -83,16 +83,17 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libn/flate.o", - "sys/libn/flate.c" + "build/libmath/lapack.o", + "sys/libmath/lapack.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/flate.c" + "file": "sys/libmath/lapack.c" }, { "arguments": [ @@ -111,11 +112,11 @@ "-I", "include", "-o", - "build/libn/sort.o", - "sys/libn/sort.c" + "build/libn/random.o", + "sys/libn/random.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/sort.c" + "file": "sys/libn/random.c" }, { "arguments": [ @@ -134,11 +135,11 @@ "-I", "include", "-o", - "build/libbio/test.o", - "sys/libbio/test.c" + "build/libn/test.o", + "sys/libn/test.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/test.c" + "file": "sys/libn/test.c" }, { "arguments": [ @@ -152,16 +153,17 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libbio/align.o", - "sys/libbio/align.c" + "build/libmath/blas.o", + "sys/libmath/blas.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/align.c" + "file": "sys/libmath/blas.c" }, { "arguments": [ @@ -181,11 +183,11 @@ "-I", "include", "-o", - "build/libmath/blas.o", - "sys/libmath/blas.c" + "build/libmath/test.o", + "sys/libmath/test.c" ], "directory": "/home/nolln/root", - "file": "sys/libmath/blas.c" + "file": "sys/libmath/test.c" }, { "arguments": [ @@ -204,11 +206,11 @@ "-I", "include", "-o", - "build/libn/memory.o", - "sys/libn/memory.c" + "build/libbio/test.o", + "sys/libbio/test.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/memory.c" + "file": "sys/libbio/test.c" }, { "arguments": [ @@ -227,11 +229,11 @@ "-I", "include", "-o", - "build/libn/mmap.o", - "sys/libn/mmap.c" + "build/libn/io.o", + "sys/libn/io.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/mmap.c" + "file": "sys/libn/io.c" }, { "arguments": [ @@ -250,11 +252,11 @@ "-I", "include", "-o", - "build/libbio/io/fasta.o", - "sys/libbio/io/fasta.c" + "build/libbio/phylo.o", + "sys/libbio/phylo.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/io/fasta.c" + "file": "sys/libbio/phylo.c" }, { "arguments": [ @@ -273,11 +275,11 @@ "-I", "include", "-o", - "build/libbio/simulate.o", - "sys/libbio/simulate.c" + "build/libn/sort.o", + "sys/libn/sort.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/simulate.c" + "file": "sys/libn/sort.c" }, { "arguments": [ @@ -291,16 +293,19 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", + "-ffreestanding", + "-fno-builtin", + "-nostdlib", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libn/random.o", - "sys/libn/random.c" + "build/libc/stdio.o", + "sys/libc/stdio.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/random.c" + "file": "sys/libc/stdio.c" }, { "arguments": [ @@ -314,17 +319,16 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", - "-D_GNU_SOURCE", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libmath/test.o", - "sys/libmath/test.c" + "build/libn/gz.o", + "sys/libn/gz.c" ], "directory": "/home/nolln/root", - "file": "sys/libmath/test.c" + "file": "sys/libn/gz.c" }, { "arguments": [ @@ -343,11 +347,11 @@ "-I", "include", "-o", - "build/libbio/io/newick.o", - "sys/libbio/io/newick.c" + "build/libn/error.o", + "sys/libn/error.c" ], "directory": "/home/nolln/root", - "file": "sys/libbio/io/newick.c" + "file": "sys/libn/error.c" }, { "arguments": [ @@ -366,11 +370,11 @@ "-I", "include", "-o", - "build/libn/string.o", - "sys/libn/string.c" + "build/libn/bufio.o", + "sys/libn/bufio.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/string.c" + "file": "sys/libn/bufio.c" }, { "arguments": [ @@ -384,19 +388,16 @@ "-fms-extensions", "-Wno-microsoft-anon-tag", "-Wno-incompatible-function-pointer-types", - "-ffreestanding", - "-fno-builtin", - "-nostdlib", "-isystem", "include/vendor/libc", "-I", "include", "-o", - "build/libc/stdio.o", - "sys/libc/stdio.c" + "build/libbio/align.o", + "sys/libbio/align.c" ], "directory": "/home/nolln/root", - "file": "sys/libc/stdio.c" + "file": "sys/libbio/align.c" }, { "arguments": [ @@ -415,11 +416,11 @@ "-I", "include", "-o", - "build/libn/test.o", - "sys/libn/test.c" + "build/libbio/io/newick.o", + "sys/libbio/io/newick.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/test.c" + "file": "sys/libbio/io/newick.c" }, { "arguments": [ @@ -438,11 +439,11 @@ "-I", "include", "-o", - "build/libn/gz.o", - "sys/libn/gz.c" + "build/libn/mmap.o", + "sys/libn/mmap.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/gz.c" + "file": "sys/libn/mmap.c" }, { "arguments": [ @@ -461,11 +462,11 @@ "-I", "include", "-o", - "build/libn/coro.o", - "sys/libn/coro.c" + "build/libn/flate.o", + "sys/libn/flate.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/coro.c" + "file": "sys/libn/flate.c" }, { "arguments": [ @@ -484,11 +485,11 @@ "-I", "include", "-o", - "build/libn/io.o", - "sys/libn/io.c" + "build/libbio/io/fasta.o", + "sys/libbio/io/fasta.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/io.c" + "file": "sys/libbio/io/fasta.c" }, { "arguments": [ @@ -507,10 +508,33 @@ "-I", "include", "-o", - "build/libn/error.o", - "sys/libn/error.c" + "build/libbio/simulate.o", + "sys/libbio/simulate.c" ], "directory": "/home/nolln/root", - "file": "sys/libn/error.c" + "file": "sys/libbio/simulate.c" + }, + { + "arguments": [ + "clang", + "-c", + "-g", + "-march=native", + "-ffast-math", + "-fno-strict-aliasing", + "-fwrapv", + "-fms-extensions", + "-Wno-microsoft-anon-tag", + "-Wno-incompatible-function-pointer-types", + "-isystem", + "include/vendor/libc", + "-I", + "include", + "-o", + "build/libn/string.o", + "sys/libn/string.c" + ], + "directory": "/home/nolln/root", + "file": "sys/libn/string.c" } ] \ No newline at end of file diff --git a/include/libmath.h b/include/libmath.h index 03e8e1a..ec15f6f 100644 --- a/include/libmath.h +++ b/include/libmath.h @@ -1,20 +1,133 @@ #pragma once // ----------------------------------------------------------------------- -// Linear algebra - -/* - * Vector (double precision) - * len : dimension of space - * inc : stride to access in buffer - * buf : pointer to start of data - * NOTE: minimum buffer size MUST be len*inc - */ -typedef struct math·Vecd -{ - int len; - int inc; - double *buf; -} math·Vecd; - -double linalg·dotd(math·Vecd a, math·Vecd b); +// basic macros + +#define math·abs(x) (((x) > 0) ? (x) : (-(x))) +#define math·sgn(x) (((x) > 0) ? (+1) : (((x) == 0) ? (0) : (-1) )) + +// ----------------------------------------------------------------------- +// elementary functions + +double math·acos(double); +float math·acosf(float); + +double math·acosh(double); +float math·acoshf(float); + +double math·asin(double); +float math·asinf(float); + +double math·asinh(double); +float math·asinhf(float); + +double math·atan(double); +float math·atanf(float); + +double math·atan2(double, double); +float math·atan2f(float, float); + +double math·atanh(double); +float math·atanhf(float); + +double math·cbrt(double); +float math·cbrtf(float); + +double math·ceil(double); +float math·ceilf(float); + +double math·cos(double); +float math·cosf(float); + +double math·cosh(double); +float math·coshf(float); + +double math·erf(double); +float math·erff(float); + +double math·erfc(double); +float math·erfcf(float); + +double math·exp(double); +float math·expf(float); + +double math·exp2(double); +float math·exp2f(float); + +double math·expm1(double); +float math·expm1f(float); + +double math·floor(double); +float math·floorf(float); + +int math·ilogb(double); +int math·ilogbf(float); + +double math·lgamma(double); +float math·lgammaf(float); + +long long math·llrint(double); +long long math·llrintf(float); + +long long math·llround(double); +long long math·llroundf(float); + +double math·log(double); +float math·logf(float); + +double math·log10(double); +float math·log10f(float); + +double math·log1p(double); +float math·log1pf(float); + +double math·log2(double); +float math·log2f(float); + +double math·logb(double); +float math·logbf(float); + +long math·lrint(double); +long math·lrintf(float); + +long math·lround(double); +long math·lroundf(float); + +double math·nan(const char *); +float math·nanf(const char *); + +double math·nearbyint(double); +float math·nearbyintf(float); + +double math·pow(double, double); +float math·powf(float, float); + +double math·rint(double); +float math·rintf(float); + +double math·round(double); +float math·roundf(float); + +double math·sin(double); +float math·sinf(float); + +double math·sinh(double); +float math·sinhf(float); + +double math·sqrt(double); +float math·sqrtf(float); + +double math·tan(double); +float math·tanf(float); + +double math·tanh(double); +float math·tanhf(float); + +double math·tgamma(double); +float math·tgammaf(float); + +double math·trunc(double); +float math·truncf(float); + +// ----------------------------------------------------------------------- +// linear algebra diff --git a/include/libn.h b/include/libn.h index 8e30eae..4efe3c1 100644 --- a/include/libn.h +++ b/include/libn.h @@ -285,6 +285,10 @@ int gz·printf(gz·Stream *s, byte *fmt, ...); error gz·flush(gz·Stream *s); vlong gz·seek(gz·Stream *s, long off, enum SeekPos whence); +// ----------------------------------------------------------------------------- +// libjson +// NOTE: Experimental! + // ----------------------------------------------------------------------------- // error handling functions diff --git a/rules.mk b/rules.mk index 7fc6a1f..ce6f3e4 100644 --- a/rules.mk +++ b/rules.mk @@ -8,7 +8,7 @@ all: targets debug: CFLAGS += -DDEBUG debug: targets -release: CFLAGS += -DNDEBUG -O3 +release: CFLAGS += -DNDEBUG -O3 -flto release: targets # Targets & array of sources & intermediates diff --git a/sys/libmath/basic.c b/sys/libmath/basic.c new file mode 100644 index 0000000..50a4ec2 --- /dev/null +++ b/sys/libmath/basic.c @@ -0,0 +1,531 @@ +#include +#include +#include + +#include + +// TODO(nnoll): Replace implementations with your own. + +double +math·acos(double x) +{ + return acos(x); +} + +float +math·acosf(float x) +{ + return acosf(x); +} + + +double +math·acosh(double x) +{ + return acosh(x); +} + +float +math·acoshf(float x) +{ + return acoshf(x); +} + + +double +math·asin(double x) +{ + return asin(x); +} + +float +math·asinf(float x) +{ + return asinf(x); +} + + +double +math·asinh(double x) +{ + return asinh(x); +} + +float +math·asinhf(float x) +{ + return asinhf(x); +} + + +double +math·atan(double x) +{ + return atan(x); +} + +float +math·atanf(float x) +{ + return atanf(x); +} + + +double +math·atan2(double x, double y) +{ + return atan2(x, y); +} + +float +math·atan2f(float x, float y) +{ + return atan2f(x, y); +} + +double +math·atanh(double x) +{ + return atanh(x); +} + +float +math·atanhf(float x) +{ + return atanhf(x); +} + + +double +math·cbrt(double x) +{ + return cbrt(x); +} + +float +math·cbrtf(float x) +{ + return cbrtf(x); +} + + +double +math·ceil(double x) +{ + return ceil(x); +} + +float +math·ceilf(float x) +{ + return ceilf(x); +} + +double +math·cos(double x) +{ + return cos(x); +} + +float +math·cosf(float x) +{ + return cosf(x); +} + + +double +math·cosh(double x) +{ + return cosh(x); +} + +float +math·coshf(float x) +{ + return coshf(x); +} + + +double +math·erf(double x) +{ + return erf(x); +} + +float +math·erff(float x) +{ + return erff(x); +} + + +double +math·erfc(double x) +{ + return erfc(x); +} + +float +math·erfcf(float x) +{ + return erfcf(x); +} + + +double +math·exp(double x) +{ + return exp(x); +} + +float +math·expf(float x) +{ + return expf(x); +} + + +double +math·exp2(double x) +{ + return exp2(x); +} + +float +math·exp2f(float x) +{ + return exp2f(x); +} + + +double +math·expm1(double x) +{ + return expm1(x); +} + +float +math·expm1f(float x) +{ + return expm1f(x); +} + + +double +math·floor(double x) +{ + return floor(x); +} + +float +math·floorf(float x) +{ + return floorf(x); +} + + +int +math·ilogb(double x) +{ + return ilogb(x); +} + +int +math·ilogbf(float x) +{ + return ilogbf(x); +} + +double +math·lgamma(double x) +{ + return lgamma(x); +} + +float +math·lgammaf(float x) +{ + return lgammaf(x); +} + + +vlong +math·llrint(double x) +{ + return math·llrint(x); +} + +vlong +math·llrintf(float x) +{ + return math·llrintf(x); +} + + +vlong +math·llround(double x) +{ + return llround(x); +} + +vlong +math·llroundf(float x) +{ + return llroundf(x); +} + + +double +math·log(double x) +{ + return log(x); +} + +float +math·logf(float x) +{ + return logf(x); +} + + +double +math·log10(double x) +{ + return log10(x); +} + +float +math·log10f(float x) +{ + return log10f(x); +} + + +double +math·log1p(double x) +{ + return log1p(x); +} + +float +math·log1pf(float x) +{ + return log1pf(x); +} + + +double +math·log2(double x) +{ + return log2(x); +} + +float +math·log2f(float x) +{ + return log2f(x); +} + + +double +math·logb(double x) +{ + return logb(x); +} + +float +math·logbf(float x) +{ + return logbf(x); +} + + +long +math·lrint(double x) +{ + return lrint(x); +} + +long +math·lrintf(float x) +{ + return lrintf(x); +} + + +long +math·lround(double x) +{ + return lround(x); +} + +long +math·lroundf(float x) +{ + return lroundf(x); +} + + +double math·modf(double, double *); +float math·modff(float, float *); + +double +math·nan(const char * x) +{ + return nan(x); +} + +float +math·nanf(const char * x) +{ + return nanf(x); +} + + +double +math·nearbyint(double x) +{ + return nearbyint(x); +} + +float +math·nearbyintf(float x) +{ + return nearbyintf(x); +} + + +double +math·pow(double x, double exp) +{ + return pow(x, exp); +} + +float +math·powf(float x, float exp) +{ + return powf(x, exp); +} + +double +math·rint(double x) +{ + return rint(x); +} + +float +math·rintf(float x) +{ + return rintf(x); +} + + +double +math·round(double x) +{ + return round(x); +} + +float +math·roundf(float x) +{ + return roundf(x); +} + + +double math·scalbln(double, long); +float math·scalblnf(float, long); + +double math·scalbn(double, int); +float math·scalbnf(float, int); + +double +math·sin(double x) +{ + return sin(x); +} + +float +math·sinf(float x) +{ + return sinf(x); +} + + +double +math·sinh(double x) +{ + return sinh(x); +} + +float +math·sinhf(float x) +{ + return sinhf(x); +} + + +double +math·sqrt(double x) +{ + return sqrt(x); +} + +float +math·sqrtf(float x) +{ + return sqrtf(x); +} + + +double +math·tan(double x) +{ + return tan(x); +} + +float +math·tanf(float x) +{ + return tanf(x); +} + + +double +math·tanh(double x) +{ + return tanh(x); +} + +float +math·tanhf(float x) +{ + return tanhf(x); +} + + +double +math·tgamma(double x) +{ + return tgamma(x); +} + +float +math·tgammaf(float x) +{ + return tgammaf(x); +} + + +double +math·trunc(double x) +{ + return trunc(x); +} + +float +math·truncf(float x) +{ + return truncf(x); +} diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c index bbbe1c8..8343a42 100644 --- a/sys/libmath/blas.c +++ b/sys/libmath/blas.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -8,6 +9,176 @@ // ----------------------------------------------------------------------- // level one +/* + * Rotate vector + * x = cos*x + sin*y + * y = cos*x - sin*y + */ + +static +void +rot_kernel8_avx2(int n, double *x, double *y, double cos, double sin) +{ + register int i; + __m256d x256, y256; + __m128d cos128, sin128; + __m256d cos256, sin256; + + cos128 = _mm_load_sd(&cos); + cos256 = _mm256_broadcastsd_pd(cos128); + + sin128 = _mm_load_sd(&sin); + sin256 = _mm256_broadcastsd_pd(sin128); + + for (i = 0; i < n; i+=8) { + x256 = _mm256_loadu_pd(x+i+0); + y256 = _mm256_loadu_pd(y+i+0); + _mm256_storeu_pd(x+i+0, cos256 * x256 + sin256 * y256); + _mm256_storeu_pd(y+i+0, cos256 * y256 - sin256 * x256); + + x256 = _mm256_loadu_pd(x+i+4); + y256 = _mm256_loadu_pd(y+i+4); + _mm256_storeu_pd(x+i+4, cos256 * x256 + sin256 * y256); + _mm256_storeu_pd(y+i+4, cos256 * y256 - sin256 * x256); + } +} + +static +void +rot_kernel8(int n, double *x, double *y, double cos, double sin) +{ + register int i; + register double tmp; + + for (i = 0; i < n; i+=8) { + tmp = x[i+0], x[i+0] = cos*x[i+0] + sin*y[i+0], y[i+0] = cos*y[i+0] - sin*tmp; + tmp = x[i+1], x[i+1] = cos*x[i+1] + sin*y[i+1], y[i+1] = cos*y[i+1] - sin*tmp; + tmp = x[i+2], x[i+2] = cos*x[i+2] + sin*y[i+2], y[i+2] = cos*y[i+2] - sin*tmp; + tmp = x[i+3], x[i+3] = cos*x[i+3] + sin*y[i+3], y[i+3] = cos*y[i+3] - sin*tmp; + tmp = x[i+4], x[i+4] = cos*x[i+4] + sin*y[i+4], y[i+4] = cos*y[i+4] - sin*tmp; + tmp = x[i+5], x[i+5] = cos*x[i+5] + sin*y[i+5], y[i+5] = cos*y[i+5] - sin*tmp; + tmp = x[i+6], x[i+6] = cos*x[i+6] + sin*y[i+6], y[i+6] = cos*y[i+6] - sin*tmp; + tmp = x[i+7], x[i+7] = cos*x[i+7] + sin*y[i+7], y[i+7] = cos*y[i+7] - sin*tmp; + } +} + +void +blas·rot(int len, double *x, double *y, double cos, double sin) +{ + register int n; + register double tmp; + + n = len & ~7; + rot_kernel8_avx2(n, x, y, cos, sin); + + for (; n < len; n++) { + tmp = x[n], x[n] = cos*x[n] + sin*y[n], y[n] = cos*y[n]- sin*tmp; + } +} + +/* + * compute givens rotate vector + * -- -- + * |+cos -sin| | a | = | r | + * |-sin +cos| | b | = | 0 | + * -- -- + */ + +void +blas·rotg(double *a, double *b, double *cos, double *sin) +{ + double abs_a, abs_b, r, rho, scale, z; + + abs_a = math·abs(*a); + abs_b = math·abs(*b); + rho = abs_a > abs_b ? *a : *b; + scale = abs_a + abs_b; + + if (scale == 0) { + *cos = 1, *sin = 0; + r = 0.; + z = 0.; + } else { + r = math·sgn(rho) * scale * math·sqrt(math·pow(abs_a/scale, 2) + math·pow(abs_b/scale, 2)); + *cos = *a / r; + *sin = *b / r; + if (abs_a > abs_b) + z = *sin; + else if (abs_b >= abs_a && *cos != 0) + z = 1/(*cos); + else + z = 1.; + } + *a = r; + *b = z; +} + +/* + * modified Givens rotation of points in plane + * operates on len points + * + * params = [flag, h11, h12, h21, h22] + * @flag = -1: + * H -> [ [h11, h12], [h21, h22] ] + * @flag = 0.0: + * H -> [ [1, h12], [h21, 1] ] + * @flag = +1: + * H -> [ [h11, 1], [-1, h22] ] + * @flag = -2: + * H -> [ [1, 0], [0, 1] ] + * + * Replaces: + * x -> H11 * x + H12 * y + * y -> H21 * x + H22 * y + */ + +static +void +rotm_kernel8(int n, double *x, double *y, double H[4]) +{ + register int i; + register double tmp; + + for (i = 0; i < n; i+=8) { + tmp = x[i+0], x[i+0] = H[0]*x[i+0] + H[1]*y[i+0], y[i+0] = H[2]*y[i+0] + H[3]*tmp; + tmp = x[i+1], x[i+1] = H[0]*x[i+1] + H[1]*y[i+1], y[i+1] = H[2]*y[i+1] + H[3]*tmp; + tmp = x[i+2], x[i+2] = H[0]*x[i+2] + H[1]*y[i+2], y[i+2] = H[2]*y[i+2] + H[3]*tmp; + tmp = x[i+3], x[i+3] = H[0]*x[i+3] + H[1]*y[i+3], y[i+3] = H[2]*y[i+3] + H[3]*tmp; + tmp = x[i+4], x[i+4] = H[0]*x[i+4] + H[1]*y[i+4], y[i+4] = H[2]*y[i+4] + H[3]*tmp; + tmp = x[i+5], x[i+5] = H[0]*x[i+5] + H[1]*y[i+5], y[i+5] = H[2]*y[i+5] + H[3]*tmp; + tmp = x[i+6], x[i+6] = H[0]*x[i+6] + H[1]*y[i+6], y[i+6] = H[2]*y[i+6] + H[3]*tmp; + tmp = x[i+7], x[i+7] = H[0]*x[i+7] + H[1]*y[i+7], y[i+7] = H[2]*y[i+7] + H[3]*tmp; + } +} + +error +blas·rotm(int len, double *x, double *y, double p[5]) +{ + int n, flag; + double tmp, H[4]; + + flag = math·round(p[0]); + switch (flag) { + case 0: H[0] = p[1], H[1] = p[2], H[2] = p[3], H[3] = p[4]; break; + case -1: H[0] = +1, H[1] = p[2], H[2] = p[3], H[3] = +1; break; + case +1: H[0] = p[1], H[1] = +1, H[2] = -1, H[3] = p[4]; break; + case -2: H[0] = +1, H[1] = 0, H[2] = 0, H[3] = +1; break; + default: + errorf("rotm: flag '%d' unrecognized", flag); + return 1; + } + + n = len & ~7; + rotm_kernel8(n, x, y, H); + + for (; n < len; n++) { + tmp = x[n], x[n] = H[0]*x[n] + H[1]*y[n], y[n] = H[2]*y[n] + H[3]*tmp; + } + + return 0; +} + + /* * Scale vector * x = ax @@ -47,7 +218,7 @@ scale_kernel8(int n, double *x, double a) } void -blas·scalevec(int len, double *x, double a) +blas·scale(int len, double *x, double a) { int n; @@ -403,11 +574,11 @@ blas·gemm(int n1, int n2, int n3, double a, double *m1, double *m2, double b, d } #define NITER 5000 -#define NCOL 57 -#define NROW 57 +#define NCOL 10007 +#define NROW 10007 error -main() +test·gemm() { int i, n; clock_t t; @@ -468,3 +639,61 @@ main() return 0; } + +void +print·array(int n, double *x) +{ + double *end; + printf("["); + for (end=x+n; x != end; ++x) { + printf("%f,", *x); + } + printf("]\n"); +} + +error +main() +{ + int i, n; + double *x, *y; + double tprof[2]; + clock_t t; + + x = malloc(sizeof(*x)*NCOL); + y = malloc(sizeof(*x)*NCOL); + + for (n = 0; n < NITER; n++) { + for (i = 0; i < NCOL; i++) { + x[i] = i*i+1; + y[i] = i+1; + } + + t = clock(); + blas·rot(NCOL, x, y, .707, .707); + t = clock() - t; + tprof[0] += 1000.*t/CLOCKS_PER_SEC; + + for (i = 0; i < NCOL; i++) { + x[i] = i*i+1; + y[i] = i+1; + } + + t = clock(); + cblas_drot(NCOL, x, 1, y, 1, .707, .707); + t = clock() - t; + tprof[1] += 1000.*t/CLOCKS_PER_SEC; + } + + printf("mean time/iteration (naive): %fms\n", tprof[0]/NITER); + printf("mean time/iteration (oblas): %fms\n", tprof[1]/NITER); + + double a, b, c, s; + + a = 10.234, b = 2.; + cblas_drotg(&a, &b, &c, &s); + printf("%f, %f, %f, %f\n", a, b, c, s); + + a = 10.234, b = 2.; + blas·rotg(&a, &b, &c, &s); + printf("%f, %f, %f, %f\n", a, b, c, s); +} diff --git a/sys/libn/random.c b/sys/libn/random.c index 1e2c2d5..558641e 100644 --- a/sys/libn/random.c +++ b/sys/libn/random.c @@ -69,7 +69,7 @@ double rng·random() { uint64 r = xoshiro256ss(&RNG); - return (double)r / UINT64_MAX; + return (double)r / (double)UINT64_MAX; } /* Returns true or false on success of trial */ diff --git a/sys/libn/string.c b/sys/libn/string.c index 4280e27..ca53bdc 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -180,6 +180,7 @@ str·makecap(const byte* s, vlong len, vlong cap) cleanup: free(h); panicf("Attempted to create a string with less capacity than length"); + return nil; } // New returns a new dynamic string object, initialized from the given C string. -- cgit v1.2.1