aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compile_commands.json170
-rw-r--r--include/libmath.h147
-rw-r--r--include/libn.h4
-rw-r--r--rules.mk2
-rw-r--r--sys/libmath/basic.c531
-rw-r--r--sys/libmath/blas.c237
-rw-r--r--sys/libn/random.c2
-rw-r--r--sys/libn/string.c1
8 files changed, 998 insertions, 96 deletions
diff --git a/compile_commands.json b/compile_commands.json
index acb788e..cbb7712 100644
--- a/compile_commands.json
+++ b/compile_commands.json
@@ -11,19 +11,16 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-ffreestanding",
- "-fno-builtin",
- "-nostdlib",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libc/string.o",
- "sys/libc/string.c"
+ "build/libn/coro.o",
+ "sys/libn/coro.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libc/string.c"
+ "file": "sys/libn/coro.c"
},
{
"arguments": [
@@ -42,11 +39,11 @@
"-I",
"include",
"-o",
- "build/libn/bufio.o",
- "sys/libn/bufio.c"
+ "build/libn/memory.o",
+ "sys/libn/memory.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/bufio.c"
+ "file": "sys/libn/memory.c"
},
{
"arguments": [
@@ -60,16 +57,19 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
+ "-ffreestanding",
+ "-fno-builtin",
+ "-nostdlib",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libbio/phylo.o",
- "sys/libbio/phylo.c"
+ "build/libc/string.o",
+ "sys/libc/string.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libbio/phylo.c"
+ "file": "sys/libc/string.c"
},
{
"arguments": [
@@ -83,16 +83,17 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
+ "-D_GNU_SOURCE",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libn/flate.o",
- "sys/libn/flate.c"
+ "build/libmath/lapack.o",
+ "sys/libmath/lapack.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/flate.c"
+ "file": "sys/libmath/lapack.c"
},
{
"arguments": [
@@ -111,11 +112,11 @@
"-I",
"include",
"-o",
- "build/libn/sort.o",
- "sys/libn/sort.c"
+ "build/libn/random.o",
+ "sys/libn/random.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/sort.c"
+ "file": "sys/libn/random.c"
},
{
"arguments": [
@@ -134,11 +135,11 @@
"-I",
"include",
"-o",
- "build/libbio/test.o",
- "sys/libbio/test.c"
+ "build/libn/test.o",
+ "sys/libn/test.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libbio/test.c"
+ "file": "sys/libn/test.c"
},
{
"arguments": [
@@ -152,16 +153,17 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
+ "-D_GNU_SOURCE",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libbio/align.o",
- "sys/libbio/align.c"
+ "build/libmath/blas.o",
+ "sys/libmath/blas.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libbio/align.c"
+ "file": "sys/libmath/blas.c"
},
{
"arguments": [
@@ -181,11 +183,11 @@
"-I",
"include",
"-o",
- "build/libmath/blas.o",
- "sys/libmath/blas.c"
+ "build/libmath/test.o",
+ "sys/libmath/test.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libmath/blas.c"
+ "file": "sys/libmath/test.c"
},
{
"arguments": [
@@ -204,11 +206,11 @@
"-I",
"include",
"-o",
- "build/libn/memory.o",
- "sys/libn/memory.c"
+ "build/libbio/test.o",
+ "sys/libbio/test.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/memory.c"
+ "file": "sys/libbio/test.c"
},
{
"arguments": [
@@ -227,11 +229,11 @@
"-I",
"include",
"-o",
- "build/libn/mmap.o",
- "sys/libn/mmap.c"
+ "build/libn/io.o",
+ "sys/libn/io.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/mmap.c"
+ "file": "sys/libn/io.c"
},
{
"arguments": [
@@ -250,11 +252,11 @@
"-I",
"include",
"-o",
- "build/libbio/io/fasta.o",
- "sys/libbio/io/fasta.c"
+ "build/libbio/phylo.o",
+ "sys/libbio/phylo.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libbio/io/fasta.c"
+ "file": "sys/libbio/phylo.c"
},
{
"arguments": [
@@ -273,11 +275,11 @@
"-I",
"include",
"-o",
- "build/libbio/simulate.o",
- "sys/libbio/simulate.c"
+ "build/libn/sort.o",
+ "sys/libn/sort.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libbio/simulate.c"
+ "file": "sys/libn/sort.c"
},
{
"arguments": [
@@ -291,16 +293,19 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
+ "-ffreestanding",
+ "-fno-builtin",
+ "-nostdlib",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libn/random.o",
- "sys/libn/random.c"
+ "build/libc/stdio.o",
+ "sys/libc/stdio.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/random.c"
+ "file": "sys/libc/stdio.c"
},
{
"arguments": [
@@ -314,17 +319,16 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-D_GNU_SOURCE",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libmath/test.o",
- "sys/libmath/test.c"
+ "build/libn/gz.o",
+ "sys/libn/gz.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libmath/test.c"
+ "file": "sys/libn/gz.c"
},
{
"arguments": [
@@ -343,11 +347,11 @@
"-I",
"include",
"-o",
- "build/libbio/io/newick.o",
- "sys/libbio/io/newick.c"
+ "build/libn/error.o",
+ "sys/libn/error.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libbio/io/newick.c"
+ "file": "sys/libn/error.c"
},
{
"arguments": [
@@ -366,11 +370,11 @@
"-I",
"include",
"-o",
- "build/libn/string.o",
- "sys/libn/string.c"
+ "build/libn/bufio.o",
+ "sys/libn/bufio.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/string.c"
+ "file": "sys/libn/bufio.c"
},
{
"arguments": [
@@ -384,19 +388,16 @@
"-fms-extensions",
"-Wno-microsoft-anon-tag",
"-Wno-incompatible-function-pointer-types",
- "-ffreestanding",
- "-fno-builtin",
- "-nostdlib",
"-isystem",
"include/vendor/libc",
"-I",
"include",
"-o",
- "build/libc/stdio.o",
- "sys/libc/stdio.c"
+ "build/libbio/align.o",
+ "sys/libbio/align.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libc/stdio.c"
+ "file": "sys/libbio/align.c"
},
{
"arguments": [
@@ -415,11 +416,11 @@
"-I",
"include",
"-o",
- "build/libn/test.o",
- "sys/libn/test.c"
+ "build/libbio/io/newick.o",
+ "sys/libbio/io/newick.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/test.c"
+ "file": "sys/libbio/io/newick.c"
},
{
"arguments": [
@@ -438,11 +439,11 @@
"-I",
"include",
"-o",
- "build/libn/gz.o",
- "sys/libn/gz.c"
+ "build/libn/mmap.o",
+ "sys/libn/mmap.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/gz.c"
+ "file": "sys/libn/mmap.c"
},
{
"arguments": [
@@ -461,11 +462,11 @@
"-I",
"include",
"-o",
- "build/libn/coro.o",
- "sys/libn/coro.c"
+ "build/libn/flate.o",
+ "sys/libn/flate.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/coro.c"
+ "file": "sys/libn/flate.c"
},
{
"arguments": [
@@ -484,11 +485,11 @@
"-I",
"include",
"-o",
- "build/libn/io.o",
- "sys/libn/io.c"
+ "build/libbio/io/fasta.o",
+ "sys/libbio/io/fasta.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/io.c"
+ "file": "sys/libbio/io/fasta.c"
},
{
"arguments": [
@@ -507,10 +508,33 @@
"-I",
"include",
"-o",
- "build/libn/error.o",
- "sys/libn/error.c"
+ "build/libbio/simulate.o",
+ "sys/libbio/simulate.c"
],
"directory": "/home/nolln/root",
- "file": "sys/libn/error.c"
+ "file": "sys/libbio/simulate.c"
+ },
+ {
+ "arguments": [
+ "clang",
+ "-c",
+ "-g",
+ "-march=native",
+ "-ffast-math",
+ "-fno-strict-aliasing",
+ "-fwrapv",
+ "-fms-extensions",
+ "-Wno-microsoft-anon-tag",
+ "-Wno-incompatible-function-pointer-types",
+ "-isystem",
+ "include/vendor/libc",
+ "-I",
+ "include",
+ "-o",
+ "build/libn/string.o",
+ "sys/libn/string.c"
+ ],
+ "directory": "/home/nolln/root",
+ "file": "sys/libn/string.c"
}
] \ No newline at end of file
diff --git a/include/libmath.h b/include/libmath.h
index 03e8e1a..ec15f6f 100644
--- a/include/libmath.h
+++ b/include/libmath.h
@@ -1,20 +1,133 @@
#pragma once
// -----------------------------------------------------------------------
-// Linear algebra
-
-/*
- * Vector (double precision)
- * len : dimension of space
- * inc : stride to access in buffer
- * buf : pointer to start of data
- * NOTE: minimum buffer size MUST be len*inc
- */
-typedef struct math·Vecd
-{
- int len;
- int inc;
- double *buf;
-} math·Vecd;
-
-double linalg·dotd(math·Vecd a, math·Vecd b);
+// basic macros
+
+#define math·abs(x) (((x) > 0) ? (x) : (-(x)))
+#define math·sgn(x) (((x) > 0) ? (+1) : (((x) == 0) ? (0) : (-1) ))
+
+// -----------------------------------------------------------------------
+// elementary functions
+
+double math·acos(double);
+float math·acosf(float);
+
+double math·acosh(double);
+float math·acoshf(float);
+
+double math·asin(double);
+float math·asinf(float);
+
+double math·asinh(double);
+float math·asinhf(float);
+
+double math·atan(double);
+float math·atanf(float);
+
+double math·atan2(double, double);
+float math·atan2f(float, float);
+
+double math·atanh(double);
+float math·atanhf(float);
+
+double math·cbrt(double);
+float math·cbrtf(float);
+
+double math·ceil(double);
+float math·ceilf(float);
+
+double math·cos(double);
+float math·cosf(float);
+
+double math·cosh(double);
+float math·coshf(float);
+
+double math·erf(double);
+float math·erff(float);
+
+double math·erfc(double);
+float math·erfcf(float);
+
+double math·exp(double);
+float math·expf(float);
+
+double math·exp2(double);
+float math·exp2f(float);
+
+double math·expm1(double);
+float math·expm1f(float);
+
+double math·floor(double);
+float math·floorf(float);
+
+int math·ilogb(double);
+int math·ilogbf(float);
+
+double math·lgamma(double);
+float math·lgammaf(float);
+
+long long math·llrint(double);
+long long math·llrintf(float);
+
+long long math·llround(double);
+long long math·llroundf(float);
+
+double math·log(double);
+float math·logf(float);
+
+double math·log10(double);
+float math·log10f(float);
+
+double math·log1p(double);
+float math·log1pf(float);
+
+double math·log2(double);
+float math·log2f(float);
+
+double math·logb(double);
+float math·logbf(float);
+
+long math·lrint(double);
+long math·lrintf(float);
+
+long math·lround(double);
+long math·lroundf(float);
+
+double math·nan(const char *);
+float math·nanf(const char *);
+
+double math·nearbyint(double);
+float math·nearbyintf(float);
+
+double math·pow(double, double);
+float math·powf(float, float);
+
+double math·rint(double);
+float math·rintf(float);
+
+double math·round(double);
+float math·roundf(float);
+
+double math·sin(double);
+float math·sinf(float);
+
+double math·sinh(double);
+float math·sinhf(float);
+
+double math·sqrt(double);
+float math·sqrtf(float);
+
+double math·tan(double);
+float math·tanf(float);
+
+double math·tanh(double);
+float math·tanhf(float);
+
+double math·tgamma(double);
+float math·tgammaf(float);
+
+double math·trunc(double);
+float math·truncf(float);
+
+// -----------------------------------------------------------------------
+// linear algebra
diff --git a/include/libn.h b/include/libn.h
index 8e30eae..4efe3c1 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -286,6 +286,10 @@ error gz·flush(gz·Stream *s);
vlong gz·seek(gz·Stream *s, long off, enum SeekPos whence);
// -----------------------------------------------------------------------------
+// libjson
+// NOTE: Experimental!
+
+// -----------------------------------------------------------------------------
// error handling functions
void errorf(const byte* fmt, ...);
diff --git a/rules.mk b/rules.mk
index 7fc6a1f..ce6f3e4 100644
--- a/rules.mk
+++ b/rules.mk
@@ -8,7 +8,7 @@ all: targets
debug: CFLAGS += -DDEBUG
debug: targets
-release: CFLAGS += -DNDEBUG -O3
+release: CFLAGS += -DNDEBUG -O3 -flto
release: targets
# Targets & array of sources & intermediates
diff --git a/sys/libmath/basic.c b/sys/libmath/basic.c
new file mode 100644
index 0000000..50a4ec2
--- /dev/null
+++ b/sys/libmath/basic.c
@@ -0,0 +1,531 @@
+#include <u.h>
+#include <libn.h>
+#include <libmath.h>
+
+#include <math.h>
+
+// TODO(nnoll): Replace implementations with your own.
+
+double
+math·acos(double x)
+{
+ return acos(x);
+}
+
+float
+math·acosf(float x)
+{
+ return acosf(x);
+}
+
+
+double
+math·acosh(double x)
+{
+ return acosh(x);
+}
+
+float
+math·acoshf(float x)
+{
+ return acoshf(x);
+}
+
+
+double
+math·asin(double x)
+{
+ return asin(x);
+}
+
+float
+math·asinf(float x)
+{
+ return asinf(x);
+}
+
+
+double
+math·asinh(double x)
+{
+ return asinh(x);
+}
+
+float
+math·asinhf(float x)
+{
+ return asinhf(x);
+}
+
+
+double
+math·atan(double x)
+{
+ return atan(x);
+}
+
+float
+math·atanf(float x)
+{
+ return atanf(x);
+}
+
+
+double
+math·atan2(double x, double y)
+{
+ return atan2(x, y);
+}
+
+float
+math·atan2f(float x, float y)
+{
+ return atan2f(x, y);
+}
+
+double
+math·atanh(double x)
+{
+ return atanh(x);
+}
+
+float
+math·atanhf(float x)
+{
+ return atanhf(x);
+}
+
+
+double
+math·cbrt(double x)
+{
+ return cbrt(x);
+}
+
+float
+math·cbrtf(float x)
+{
+ return cbrtf(x);
+}
+
+
+double
+math·ceil(double x)
+{
+ return ceil(x);
+}
+
+float
+math·ceilf(float x)
+{
+ return ceilf(x);
+}
+
+double
+math·cos(double x)
+{
+ return cos(x);
+}
+
+float
+math·cosf(float x)
+{
+ return cosf(x);
+}
+
+
+double
+math·cosh(double x)
+{
+ return cosh(x);
+}
+
+float
+math·coshf(float x)
+{
+ return coshf(x);
+}
+
+
+double
+math·erf(double x)
+{
+ return erf(x);
+}
+
+float
+math·erff(float x)
+{
+ return erff(x);
+}
+
+
+double
+math·erfc(double x)
+{
+ return erfc(x);
+}
+
+float
+math·erfcf(float x)
+{
+ return erfcf(x);
+}
+
+
+double
+math·exp(double x)
+{
+ return exp(x);
+}
+
+float
+math·expf(float x)
+{
+ return expf(x);
+}
+
+
+double
+math·exp2(double x)
+{
+ return exp2(x);
+}
+
+float
+math·exp2f(float x)
+{
+ return exp2f(x);
+}
+
+
+double
+math·expm1(double x)
+{
+ return expm1(x);
+}
+
+float
+math·expm1f(float x)
+{
+ return expm1f(x);
+}
+
+
+double
+math·floor(double x)
+{
+ return floor(x);
+}
+
+float
+math·floorf(float x)
+{
+ return floorf(x);
+}
+
+
+int
+math·ilogb(double x)
+{
+ return ilogb(x);
+}
+
+int
+math·ilogbf(float x)
+{
+ return ilogbf(x);
+}
+
+double
+math·lgamma(double x)
+{
+ return lgamma(x);
+}
+
+float
+math·lgammaf(float x)
+{
+ return lgammaf(x);
+}
+
+
+vlong
+math·llrint(double x)
+{
+ return math·llrint(x);
+}
+
+vlong
+math·llrintf(float x)
+{
+ return math·llrintf(x);
+}
+
+
+vlong
+math·llround(double x)
+{
+ return llround(x);
+}
+
+vlong
+math·llroundf(float x)
+{
+ return llroundf(x);
+}
+
+
+double
+math·log(double x)
+{
+ return log(x);
+}
+
+float
+math·logf(float x)
+{
+ return logf(x);
+}
+
+
+double
+math·log10(double x)
+{
+ return log10(x);
+}
+
+float
+math·log10f(float x)
+{
+ return log10f(x);
+}
+
+
+double
+math·log1p(double x)
+{
+ return log1p(x);
+}
+
+float
+math·log1pf(float x)
+{
+ return log1pf(x);
+}
+
+
+double
+math·log2(double x)
+{
+ return log2(x);
+}
+
+float
+math·log2f(float x)
+{
+ return log2f(x);
+}
+
+
+double
+math·logb(double x)
+{
+ return logb(x);
+}
+
+float
+math·logbf(float x)
+{
+ return logbf(x);
+}
+
+
+long
+math·lrint(double x)
+{
+ return lrint(x);
+}
+
+long
+math·lrintf(float x)
+{
+ return lrintf(x);
+}
+
+
+long
+math·lround(double x)
+{
+ return lround(x);
+}
+
+long
+math·lroundf(float x)
+{
+ return lroundf(x);
+}
+
+
+double math·modf(double, double *);
+float math·modff(float, float *);
+
+double
+math·nan(const char * x)
+{
+ return nan(x);
+}
+
+float
+math·nanf(const char * x)
+{
+ return nanf(x);
+}
+
+
+double
+math·nearbyint(double x)
+{
+ return nearbyint(x);
+}
+
+float
+math·nearbyintf(float x)
+{
+ return nearbyintf(x);
+}
+
+
+double
+math·pow(double x, double exp)
+{
+ return pow(x, exp);
+}
+
+float
+math·powf(float x, float exp)
+{
+ return powf(x, exp);
+}
+
+double
+math·rint(double x)
+{
+ return rint(x);
+}
+
+float
+math·rintf(float x)
+{
+ return rintf(x);
+}
+
+
+double
+math·round(double x)
+{
+ return round(x);
+}
+
+float
+math·roundf(float x)
+{
+ return roundf(x);
+}
+
+
+double math·scalbln(double, long);
+float math·scalblnf(float, long);
+
+double math·scalbn(double, int);
+float math·scalbnf(float, int);
+
+double
+math·sin(double x)
+{
+ return sin(x);
+}
+
+float
+math·sinf(float x)
+{
+ return sinf(x);
+}
+
+
+double
+math·sinh(double x)
+{
+ return sinh(x);
+}
+
+float
+math·sinhf(float x)
+{
+ return sinhf(x);
+}
+
+
+double
+math·sqrt(double x)
+{
+ return sqrt(x);
+}
+
+float
+math·sqrtf(float x)
+{
+ return sqrtf(x);
+}
+
+
+double
+math·tan(double x)
+{
+ return tan(x);
+}
+
+float
+math·tanf(float x)
+{
+ return tanf(x);
+}
+
+
+double
+math·tanh(double x)
+{
+ return tanh(x);
+}
+
+float
+math·tanhf(float x)
+{
+ return tanhf(x);
+}
+
+
+double
+math·tgamma(double x)
+{
+ return tgamma(x);
+}
+
+float
+math·tgammaf(float x)
+{
+ return tgammaf(x);
+}
+
+
+double
+math·trunc(double x)
+{
+ return trunc(x);
+}
+
+float
+math·truncf(float x)
+{
+ return truncf(x);
+}
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index bbbe1c8..8343a42 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -1,5 +1,6 @@
#include <u.h>
#include <libn.h>
+#include <libmath.h>
#include <vendor/blas/cblas.h>
#include <x86intrin.h>
@@ -9,6 +10,176 @@
// level one
/*
+ * Rotate vector
+ * x = cos*x + sin*y
+ * y = cos*x - sin*y
+ */
+
+static
+void
+rot_kernel8_avx2(int n, double *x, double *y, double cos, double sin)
+{
+ register int i;
+ __m256d x256, y256;
+ __m128d cos128, sin128;
+ __m256d cos256, sin256;
+
+ cos128 = _mm_load_sd(&cos);
+ cos256 = _mm256_broadcastsd_pd(cos128);
+
+ sin128 = _mm_load_sd(&sin);
+ sin256 = _mm256_broadcastsd_pd(sin128);
+
+ for (i = 0; i < n; i+=8) {
+ x256 = _mm256_loadu_pd(x+i+0);
+ y256 = _mm256_loadu_pd(y+i+0);
+ _mm256_storeu_pd(x+i+0, cos256 * x256 + sin256 * y256);
+ _mm256_storeu_pd(y+i+0, cos256 * y256 - sin256 * x256);
+
+ x256 = _mm256_loadu_pd(x+i+4);
+ y256 = _mm256_loadu_pd(y+i+4);
+ _mm256_storeu_pd(x+i+4, cos256 * x256 + sin256 * y256);
+ _mm256_storeu_pd(y+i+4, cos256 * y256 - sin256 * x256);
+ }
+}
+
+static
+void
+rot_kernel8(int n, double *x, double *y, double cos, double sin)
+{
+ register int i;
+ register double tmp;
+
+ for (i = 0; i < n; i+=8) {
+ tmp = x[i+0], x[i+0] = cos*x[i+0] + sin*y[i+0], y[i+0] = cos*y[i+0] - sin*tmp;
+ tmp = x[i+1], x[i+1] = cos*x[i+1] + sin*y[i+1], y[i+1] = cos*y[i+1] - sin*tmp;
+ tmp = x[i+2], x[i+2] = cos*x[i+2] + sin*y[i+2], y[i+2] = cos*y[i+2] - sin*tmp;
+ tmp = x[i+3], x[i+3] = cos*x[i+3] + sin*y[i+3], y[i+3] = cos*y[i+3] - sin*tmp;
+ tmp = x[i+4], x[i+4] = cos*x[i+4] + sin*y[i+4], y[i+4] = cos*y[i+4] - sin*tmp;
+ tmp = x[i+5], x[i+5] = cos*x[i+5] + sin*y[i+5], y[i+5] = cos*y[i+5] - sin*tmp;
+ tmp = x[i+6], x[i+6] = cos*x[i+6] + sin*y[i+6], y[i+6] = cos*y[i+6] - sin*tmp;
+ tmp = x[i+7], x[i+7] = cos*x[i+7] + sin*y[i+7], y[i+7] = cos*y[i+7] - sin*tmp;
+ }
+}
+
+void
+blas·rot(int len, double *x, double *y, double cos, double sin)
+{
+ register int n;
+ register double tmp;
+
+ n = len & ~7;
+ rot_kernel8_avx2(n, x, y, cos, sin);
+
+ for (; n < len; n++) {
+ tmp = x[n], x[n] = cos*x[n] + sin*y[n], y[n] = cos*y[n]- sin*tmp;
+ }
+}
+
+/*
+ * compute givens rotate vector
+ * -- --
+ * |+cos -sin| | a | = | r |
+ * |-sin +cos| | b | = | 0 |
+ * -- --
+ */
+
+void
+blas·rotg(double *a, double *b, double *cos, double *sin)
+{
+ double abs_a, abs_b, r, rho, scale, z;
+
+ abs_a = math·abs(*a);
+ abs_b = math·abs(*b);
+ rho = abs_a > abs_b ? *a : *b;
+ scale = abs_a + abs_b;
+
+ if (scale == 0) {
+ *cos = 1, *sin = 0;
+ r = 0.;
+ z = 0.;
+ } else {
+ r = math·sgn(rho) * scale * math·sqrt(math·pow(abs_a/scale, 2) + math·pow(abs_b/scale, 2));
+ *cos = *a / r;
+ *sin = *b / r;
+ if (abs_a > abs_b)
+ z = *sin;
+ else if (abs_b >= abs_a && *cos != 0)
+ z = 1/(*cos);
+ else
+ z = 1.;
+ }
+ *a = r;
+ *b = z;
+}
+
+/*
+ * modified Givens rotation of points in plane
+ * operates on len points
+ *
+ * params = [flag, h11, h12, h21, h22]
+ * @flag = -1:
+ * H -> [ [h11, h12], [h21, h22] ]
+ * @flag = 0.0:
+ * H -> [ [1, h12], [h21, 1] ]
+ * @flag = +1:
+ * H -> [ [h11, 1], [-1, h22] ]
+ * @flag = -2:
+ * H -> [ [1, 0], [0, 1] ]
+ *
+ * Replaces:
+ * x -> H11 * x + H12 * y
+ * y -> H21 * x + H22 * y
+ */
+
+static
+void
+rotm_kernel8(int n, double *x, double *y, double H[4])
+{
+ register int i;
+ register double tmp;
+
+ for (i = 0; i < n; i+=8) {
+ tmp = x[i+0], x[i+0] = H[0]*x[i+0] + H[1]*y[i+0], y[i+0] = H[2]*y[i+0] + H[3]*tmp;
+ tmp = x[i+1], x[i+1] = H[0]*x[i+1] + H[1]*y[i+1], y[i+1] = H[2]*y[i+1] + H[3]*tmp;
+ tmp = x[i+2], x[i+2] = H[0]*x[i+2] + H[1]*y[i+2], y[i+2] = H[2]*y[i+2] + H[3]*tmp;
+ tmp = x[i+3], x[i+3] = H[0]*x[i+3] + H[1]*y[i+3], y[i+3] = H[2]*y[i+3] + H[3]*tmp;
+ tmp = x[i+4], x[i+4] = H[0]*x[i+4] + H[1]*y[i+4], y[i+4] = H[2]*y[i+4] + H[3]*tmp;
+ tmp = x[i+5], x[i+5] = H[0]*x[i+5] + H[1]*y[i+5], y[i+5] = H[2]*y[i+5] + H[3]*tmp;
+ tmp = x[i+6], x[i+6] = H[0]*x[i+6] + H[1]*y[i+6], y[i+6] = H[2]*y[i+6] + H[3]*tmp;
+ tmp = x[i+7], x[i+7] = H[0]*x[i+7] + H[1]*y[i+7], y[i+7] = H[2]*y[i+7] + H[3]*tmp;
+ }
+}
+
+error
+blas·rotm(int len, double *x, double *y, double p[5])
+{
+ int n, flag;
+ double tmp, H[4];
+
+ flag = math·round(p[0]);
+ switch (flag) {
+ case 0: H[0] = p[1], H[1] = p[2], H[2] = p[3], H[3] = p[4]; break;
+ case -1: H[0] = +1, H[1] = p[2], H[2] = p[3], H[3] = +1; break;
+ case +1: H[0] = p[1], H[1] = +1, H[2] = -1, H[3] = p[4]; break;
+ case -2: H[0] = +1, H[1] = 0, H[2] = 0, H[3] = +1; break;
+ default:
+ errorf("rotm: flag '%d' unrecognized", flag);
+ return 1;
+ }
+
+ n = len & ~7;
+ rotm_kernel8(n, x, y, H);
+
+ for (; n < len; n++) {
+ tmp = x[n], x[n] = H[0]*x[n] + H[1]*y[n], y[n] = H[2]*y[n] + H[3]*tmp;
+ }
+
+ return 0;
+}
+
+
+/*
* Scale vector
* x = ax
*/
@@ -47,7 +218,7 @@ scale_kernel8(int n, double *x, double a)
}
void
-blas·scalevec(int len, double *x, double a)
+blas·scale(int len, double *x, double a)
{
int n;
@@ -403,11 +574,11 @@ blas·gemm(int n1, int n2, int n3, double a, double *m1, double *m2, double b, d
}
#define NITER 5000
-#define NCOL 57
-#define NROW 57
+#define NCOL 10007
+#define NROW 10007
error
-main()
+test·gemm()
{
int i, n;
clock_t t;
@@ -468,3 +639,61 @@ main()
return 0;
}
+
+void
+print·array(int n, double *x)
+{
+ double *end;
+ printf("[");
+ for (end=x+n; x != end; ++x) {
+ printf("%f,", *x);
+ }
+ printf("]\n");
+}
+
+error
+main()
+{
+ int i, n;
+ double *x, *y;
+ double tprof[2];
+ clock_t t;
+
+ x = malloc(sizeof(*x)*NCOL);
+ y = malloc(sizeof(*x)*NCOL);
+
+ for (n = 0; n < NITER; n++) {
+ for (i = 0; i < NCOL; i++) {
+ x[i] = i*i+1;
+ y[i] = i+1;
+ }
+
+ t = clock();
+ blas·rot(NCOL, x, y, .707, .707);
+ t = clock() - t;
+ tprof[0] += 1000.*t/CLOCKS_PER_SEC;
+
+ for (i = 0; i < NCOL; i++) {
+ x[i] = i*i+1;
+ y[i] = i+1;
+ }
+
+ t = clock();
+ cblas_drot(NCOL, x, 1, y, 1, .707, .707);
+ t = clock() - t;
+ tprof[1] += 1000.*t/CLOCKS_PER_SEC;
+ }
+
+ printf("mean time/iteration (naive): %fms\n", tprof[0]/NITER);
+ printf("mean time/iteration (oblas): %fms\n", tprof[1]/NITER);
+
+ double a, b, c, s;
+
+ a = 10.234, b = 2.;
+ cblas_drotg(&a, &b, &c, &s);
+ printf("%f, %f, %f, %f\n", a, b, c, s);
+
+ a = 10.234, b = 2.;
+ blas·rotg(&a, &b, &c, &s);
+ printf("%f, %f, %f, %f\n", a, b, c, s);
+}
diff --git a/sys/libn/random.c b/sys/libn/random.c
index 1e2c2d5..558641e 100644
--- a/sys/libn/random.c
+++ b/sys/libn/random.c
@@ -69,7 +69,7 @@ double
rng·random()
{
uint64 r = xoshiro256ss(&RNG);
- return (double)r / UINT64_MAX;
+ return (double)r / (double)UINT64_MAX;
}
/* Returns true or false on success of trial */
diff --git a/sys/libn/string.c b/sys/libn/string.c
index 4280e27..ca53bdc 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -180,6 +180,7 @@ str·makecap(const byte* s, vlong len, vlong cap)
cleanup:
free(h);
panicf("Attempted to create a string with less capacity than length");
+ return nil;
}
// New returns a new dynamic string object, initialized from the given C string.