From 36117f59ec77784c9ef77801d7c1cbf03a4c4a8b Mon Sep 17 00:00:00 2001
From: Nicholas Noll <nbnoll@eml.cc>
Date: Thu, 7 May 2020 15:35:06 -0700
Subject: wrap: elementary math functions for libmath

---
 compile_commands.json | 170 +++++++++-------
 include/libmath.h     | 147 ++++++++++++--
 include/libn.h        |   4 +
 rules.mk              |   2 +-
 sys/libmath/basic.c   | 531 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/libmath/blas.c    | 237 +++++++++++++++++++++-
 sys/libn/random.c     |   2 +-
 sys/libn/string.c     |   1 +
 8 files changed, 998 insertions(+), 96 deletions(-)
 create mode 100644 sys/libmath/basic.c

diff --git a/compile_commands.json b/compile_commands.json
index acb788e..cbb7712 100644
--- a/compile_commands.json
+++ b/compile_commands.json
@@ -11,19 +11,16 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
-            "-ffreestanding",
-            "-fno-builtin",
-            "-nostdlib",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libc/string.o",
-            "sys/libc/string.c"
+            "build/libn/coro.o",
+            "sys/libn/coro.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libc/string.c"
+        "file": "sys/libn/coro.c"
     },
     {
         "arguments": [
@@ -42,11 +39,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/bufio.o",
-            "sys/libn/bufio.c"
+            "build/libn/memory.o",
+            "sys/libn/memory.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/bufio.c"
+        "file": "sys/libn/memory.c"
     },
     {
         "arguments": [
@@ -60,16 +57,19 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
+            "-ffreestanding",
+            "-fno-builtin",
+            "-nostdlib",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libbio/phylo.o",
-            "sys/libbio/phylo.c"
+            "build/libc/string.o",
+            "sys/libc/string.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libbio/phylo.c"
+        "file": "sys/libc/string.c"
     },
     {
         "arguments": [
@@ -83,16 +83,17 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
+            "-D_GNU_SOURCE",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libn/flate.o",
-            "sys/libn/flate.c"
+            "build/libmath/lapack.o",
+            "sys/libmath/lapack.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/flate.c"
+        "file": "sys/libmath/lapack.c"
     },
     {
         "arguments": [
@@ -111,11 +112,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/sort.o",
-            "sys/libn/sort.c"
+            "build/libn/random.o",
+            "sys/libn/random.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/sort.c"
+        "file": "sys/libn/random.c"
     },
     {
         "arguments": [
@@ -134,11 +135,11 @@
             "-I",
             "include",
             "-o",
-            "build/libbio/test.o",
-            "sys/libbio/test.c"
+            "build/libn/test.o",
+            "sys/libn/test.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libbio/test.c"
+        "file": "sys/libn/test.c"
     },
     {
         "arguments": [
@@ -152,16 +153,17 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
+            "-D_GNU_SOURCE",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libbio/align.o",
-            "sys/libbio/align.c"
+            "build/libmath/blas.o",
+            "sys/libmath/blas.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libbio/align.c"
+        "file": "sys/libmath/blas.c"
     },
     {
         "arguments": [
@@ -181,11 +183,11 @@
             "-I",
             "include",
             "-o",
-            "build/libmath/blas.o",
-            "sys/libmath/blas.c"
+            "build/libmath/test.o",
+            "sys/libmath/test.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libmath/blas.c"
+        "file": "sys/libmath/test.c"
     },
     {
         "arguments": [
@@ -204,11 +206,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/memory.o",
-            "sys/libn/memory.c"
+            "build/libbio/test.o",
+            "sys/libbio/test.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/memory.c"
+        "file": "sys/libbio/test.c"
     },
     {
         "arguments": [
@@ -227,11 +229,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/mmap.o",
-            "sys/libn/mmap.c"
+            "build/libn/io.o",
+            "sys/libn/io.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/mmap.c"
+        "file": "sys/libn/io.c"
     },
     {
         "arguments": [
@@ -250,11 +252,11 @@
             "-I",
             "include",
             "-o",
-            "build/libbio/io/fasta.o",
-            "sys/libbio/io/fasta.c"
+            "build/libbio/phylo.o",
+            "sys/libbio/phylo.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libbio/io/fasta.c"
+        "file": "sys/libbio/phylo.c"
     },
     {
         "arguments": [
@@ -273,11 +275,11 @@
             "-I",
             "include",
             "-o",
-            "build/libbio/simulate.o",
-            "sys/libbio/simulate.c"
+            "build/libn/sort.o",
+            "sys/libn/sort.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libbio/simulate.c"
+        "file": "sys/libn/sort.c"
     },
     {
         "arguments": [
@@ -291,16 +293,19 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
+            "-ffreestanding",
+            "-fno-builtin",
+            "-nostdlib",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libn/random.o",
-            "sys/libn/random.c"
+            "build/libc/stdio.o",
+            "sys/libc/stdio.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/random.c"
+        "file": "sys/libc/stdio.c"
     },
     {
         "arguments": [
@@ -314,17 +319,16 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
-            "-D_GNU_SOURCE",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libmath/test.o",
-            "sys/libmath/test.c"
+            "build/libn/gz.o",
+            "sys/libn/gz.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libmath/test.c"
+        "file": "sys/libn/gz.c"
     },
     {
         "arguments": [
@@ -343,11 +347,11 @@
             "-I",
             "include",
             "-o",
-            "build/libbio/io/newick.o",
-            "sys/libbio/io/newick.c"
+            "build/libn/error.o",
+            "sys/libn/error.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libbio/io/newick.c"
+        "file": "sys/libn/error.c"
     },
     {
         "arguments": [
@@ -366,11 +370,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/string.o",
-            "sys/libn/string.c"
+            "build/libn/bufio.o",
+            "sys/libn/bufio.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/string.c"
+        "file": "sys/libn/bufio.c"
     },
     {
         "arguments": [
@@ -384,19 +388,16 @@
             "-fms-extensions",
             "-Wno-microsoft-anon-tag",
             "-Wno-incompatible-function-pointer-types",
-            "-ffreestanding",
-            "-fno-builtin",
-            "-nostdlib",
             "-isystem",
             "include/vendor/libc",
             "-I",
             "include",
             "-o",
-            "build/libc/stdio.o",
-            "sys/libc/stdio.c"
+            "build/libbio/align.o",
+            "sys/libbio/align.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libc/stdio.c"
+        "file": "sys/libbio/align.c"
     },
     {
         "arguments": [
@@ -415,11 +416,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/test.o",
-            "sys/libn/test.c"
+            "build/libbio/io/newick.o",
+            "sys/libbio/io/newick.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/test.c"
+        "file": "sys/libbio/io/newick.c"
     },
     {
         "arguments": [
@@ -438,11 +439,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/gz.o",
-            "sys/libn/gz.c"
+            "build/libn/mmap.o",
+            "sys/libn/mmap.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/gz.c"
+        "file": "sys/libn/mmap.c"
     },
     {
         "arguments": [
@@ -461,11 +462,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/coro.o",
-            "sys/libn/coro.c"
+            "build/libn/flate.o",
+            "sys/libn/flate.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/coro.c"
+        "file": "sys/libn/flate.c"
     },
     {
         "arguments": [
@@ -484,11 +485,11 @@
             "-I",
             "include",
             "-o",
-            "build/libn/io.o",
-            "sys/libn/io.c"
+            "build/libbio/io/fasta.o",
+            "sys/libbio/io/fasta.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/io.c"
+        "file": "sys/libbio/io/fasta.c"
     },
     {
         "arguments": [
@@ -507,10 +508,33 @@
             "-I",
             "include",
             "-o",
-            "build/libn/error.o",
-            "sys/libn/error.c"
+            "build/libbio/simulate.o",
+            "sys/libbio/simulate.c"
         ],
         "directory": "/home/nolln/root",
-        "file": "sys/libn/error.c"
+        "file": "sys/libbio/simulate.c"
+    },
+    {
+        "arguments": [
+            "clang",
+            "-c",
+            "-g",
+            "-march=native",
+            "-ffast-math",
+            "-fno-strict-aliasing",
+            "-fwrapv",
+            "-fms-extensions",
+            "-Wno-microsoft-anon-tag",
+            "-Wno-incompatible-function-pointer-types",
+            "-isystem",
+            "include/vendor/libc",
+            "-I",
+            "include",
+            "-o",
+            "build/libn/string.o",
+            "sys/libn/string.c"
+        ],
+        "directory": "/home/nolln/root",
+        "file": "sys/libn/string.c"
     }
 ]
\ No newline at end of file
diff --git a/include/libmath.h b/include/libmath.h
index 03e8e1a..ec15f6f 100644
--- a/include/libmath.h
+++ b/include/libmath.h
@@ -1,20 +1,133 @@
 #pragma once
 
 // -----------------------------------------------------------------------
-// Linear algebra
-
-/* 
- * Vector (double precision)
- * len : dimension of space
- * inc : stride to access in buffer
- * buf : pointer to start of data
- * NOTE: minimum buffer size MUST be len*inc
- */
-typedef struct math·Vecd
-{
-    int     len;
-    int     inc;
-    double *buf;
-} math·Vecd;
-
-double linalg·dotd(math·Vecd a, math·Vecd b);
+// basic macros
+
+#define math·abs(x) (((x) > 0) ? (x) : (-(x)))
+#define math·sgn(x) (((x) > 0) ? (+1) : (((x) == 0) ? (0) : (-1) ))
+
+// -----------------------------------------------------------------------
+// elementary functions 
+
+double      math·acos(double);
+float       math·acosf(float);
+
+double      math·acosh(double);
+float       math·acoshf(float);
+
+double      math·asin(double);
+float       math·asinf(float);
+
+double      math·asinh(double);
+float       math·asinhf(float);
+
+double      math·atan(double);
+float       math·atanf(float);
+
+double      math·atan2(double, double);
+float       math·atan2f(float, float);
+
+double      math·atanh(double);
+float       math·atanhf(float);
+
+double      math·cbrt(double);
+float       math·cbrtf(float);
+
+double      math·ceil(double);
+float       math·ceilf(float);
+
+double      math·cos(double);
+float       math·cosf(float);
+
+double      math·cosh(double);
+float       math·coshf(float);
+
+double      math·erf(double);
+float       math·erff(float);
+
+double      math·erfc(double);
+float       math·erfcf(float);
+
+double      math·exp(double);
+float       math·expf(float);
+
+double      math·exp2(double);
+float       math·exp2f(float);
+
+double      math·expm1(double);
+float       math·expm1f(float);
+
+double      math·floor(double);
+float       math·floorf(float);
+
+int         math·ilogb(double);
+int         math·ilogbf(float);
+
+double      math·lgamma(double);
+float       math·lgammaf(float);
+
+long long   math·llrint(double);
+long long   math·llrintf(float);
+
+long long   math·llround(double);
+long long   math·llroundf(float);
+
+double      math·log(double);
+float       math·logf(float);
+
+double      math·log10(double);
+float       math·log10f(float);
+
+double      math·log1p(double);
+float       math·log1pf(float);
+
+double      math·log2(double);
+float       math·log2f(float);
+
+double      math·logb(double);
+float       math·logbf(float);
+
+long        math·lrint(double);
+long        math·lrintf(float);
+
+long        math·lround(double);
+long        math·lroundf(float);
+
+double      math·nan(const char *);
+float       math·nanf(const char *);
+
+double      math·nearbyint(double);
+float       math·nearbyintf(float);
+
+double      math·pow(double, double);
+float       math·powf(float, float);
+
+double      math·rint(double);
+float       math·rintf(float);
+
+double      math·round(double);
+float       math·roundf(float);
+
+double      math·sin(double);
+float       math·sinf(float);
+
+double      math·sinh(double);
+float       math·sinhf(float);
+
+double      math·sqrt(double);
+float       math·sqrtf(float);
+
+double      math·tan(double);
+float       math·tanf(float);
+
+double      math·tanh(double);
+float       math·tanhf(float);
+
+double      math·tgamma(double);
+float       math·tgammaf(float);
+
+double      math·trunc(double);
+float       math·truncf(float);
+
+// -----------------------------------------------------------------------
+// linear algebra
diff --git a/include/libn.h b/include/libn.h
index 8e30eae..4efe3c1 100644
--- a/include/libn.h
+++ b/include/libn.h
@@ -285,6 +285,10 @@ int        gz·printf(gz·Stream *s, byte *fmt, ...);
 error      gz·flush(gz·Stream *s);
 vlong      gz·seek(gz·Stream *s, long off, enum SeekPos whence);
 
+// -----------------------------------------------------------------------------
+// libjson
+// NOTE: Experimental!
+
 // -----------------------------------------------------------------------------
 // error handling functions
 
diff --git a/rules.mk b/rules.mk
index 7fc6a1f..ce6f3e4 100644
--- a/rules.mk
+++ b/rules.mk
@@ -8,7 +8,7 @@ all: targets
 debug: CFLAGS += -DDEBUG
 debug: targets
 
-release: CFLAGS += -DNDEBUG -O3
+release: CFLAGS += -DNDEBUG -O3 -flto
 release: targets
 
 # Targets & array of sources & intermediates
diff --git a/sys/libmath/basic.c b/sys/libmath/basic.c
new file mode 100644
index 0000000..50a4ec2
--- /dev/null
+++ b/sys/libmath/basic.c
@@ -0,0 +1,531 @@
+#include <u.h>
+#include <libn.h>
+#include <libmath.h>
+
+#include <math.h>
+
+// TODO(nnoll): Replace implementations with your own.
+
+double
+math·acos(double x)
+{
+    return acos(x);
+}
+
+float
+math·acosf(float x)
+{
+    return acosf(x);
+}
+
+
+double
+math·acosh(double x)
+{
+    return acosh(x);
+}
+
+float
+math·acoshf(float x)
+{
+    return acoshf(x);
+}
+
+
+double
+math·asin(double x)
+{
+    return asin(x);
+}
+
+float
+math·asinf(float x)
+{
+    return asinf(x);
+}
+
+
+double
+math·asinh(double x)
+{
+    return asinh(x);
+}
+
+float
+math·asinhf(float x)
+{
+    return asinhf(x);
+}
+
+
+double
+math·atan(double x)
+{
+    return atan(x);
+}
+
+float
+math·atanf(float x)
+{
+    return atanf(x);
+}
+
+
+double
+math·atan2(double x, double y)
+{
+    return atan2(x, y);
+}
+
+float
+math·atan2f(float x, float y)
+{
+    return atan2f(x, y);
+}
+
+double
+math·atanh(double x)
+{
+    return atanh(x);
+}
+
+float
+math·atanhf(float x)
+{
+    return atanhf(x);
+}
+
+
+double
+math·cbrt(double x)
+{
+    return cbrt(x);
+}
+
+float
+math·cbrtf(float x)
+{
+    return cbrtf(x);
+}
+
+
+double
+math·ceil(double x)
+{
+    return ceil(x);
+}
+
+float
+math·ceilf(float x)
+{
+    return ceilf(x);
+}
+
+double
+math·cos(double x)
+{
+    return cos(x);
+}
+
+float
+math·cosf(float x)
+{
+    return cosf(x);
+}
+
+
+double
+math·cosh(double x)
+{
+    return cosh(x);
+}
+
+float
+math·coshf(float x)
+{
+    return coshf(x);
+}
+
+
+double
+math·erf(double x)
+{
+    return erf(x);
+}
+
+float
+math·erff(float x)
+{
+    return erff(x);
+}
+
+
+double
+math·erfc(double x)
+{
+    return erfc(x);
+}
+
+float
+math·erfcf(float x)
+{
+    return erfcf(x);
+}
+
+
+double
+math·exp(double x)
+{
+    return exp(x);
+}
+
+float
+math·expf(float x)
+{
+    return expf(x);
+}
+
+
+double
+math·exp2(double x)
+{
+    return exp2(x);
+}
+
+float
+math·exp2f(float x)
+{
+    return exp2f(x);
+}
+
+
+double
+math·expm1(double x)
+{
+    return expm1(x);
+}
+
+float
+math·expm1f(float x)
+{
+    return expm1f(x);
+}
+
+
+double
+math·floor(double x)
+{
+    return floor(x);
+}
+
+float
+math·floorf(float x)
+{
+    return floorf(x);
+}
+
+
+int
+math·ilogb(double x)
+{
+    return ilogb(x);
+}
+
+int
+math·ilogbf(float x)
+{
+    return ilogbf(x);
+}
+
+double
+math·lgamma(double x)
+{
+    return lgamma(x);
+}
+
+float
+math·lgammaf(float x)
+{
+    return lgammaf(x);
+}
+
+
+vlong   
+math·llrint(double x)
+{
+      return math·llrint(x);
+}
+
+vlong
+math·llrintf(float x)
+{
+      return math·llrintf(x);
+}
+
+
+vlong
+math·llround(double x)
+{
+    return llround(x);
+}
+
+vlong
+math·llroundf(float x)
+{
+    return llroundf(x);
+}
+
+
+double
+math·log(double x)
+{
+    return log(x);
+}
+
+float
+math·logf(float x)
+{
+    return logf(x);
+}
+
+
+double
+math·log10(double x)
+{
+    return log10(x);
+}
+
+float
+math·log10f(float x)
+{
+    return log10f(x);
+}
+
+
+double
+math·log1p(double x)
+{
+    return log1p(x);
+}
+
+float
+math·log1pf(float x)
+{
+    return log1pf(x);
+}
+
+
+double
+math·log2(double x)
+{
+    return log2(x);
+}
+
+float
+math·log2f(float x)
+{
+    return log2f(x);
+}
+
+
+double
+math·logb(double x)
+{
+    return logb(x);
+}
+
+float
+math·logbf(float x)
+{
+    return logbf(x);
+}
+
+
+long
+math·lrint(double x)
+{
+    return lrint(x);
+}
+
+long
+math·lrintf(float x)
+{
+    return lrintf(x);
+}
+
+
+long
+math·lround(double x)
+{
+    return lround(x);
+}
+
+long
+math·lroundf(float x)
+{
+    return lroundf(x);
+}
+
+
+double      math·modf(double, double *);
+float       math·modff(float, float *);
+
+double
+math·nan(const char * x)
+{
+    return nan(x);
+}
+
+float
+math·nanf(const char * x)
+{
+    return nanf(x);
+}
+
+
+double
+math·nearbyint(double x)
+{
+    return nearbyint(x);
+}
+
+float
+math·nearbyintf(float x)
+{
+    return nearbyintf(x);
+}
+
+
+double
+math·pow(double x, double exp)
+{
+    return pow(x, exp);
+}
+
+float       
+math·powf(float x, float exp)
+{
+    return powf(x, exp);
+}
+
+double
+math·rint(double x)
+{
+    return rint(x);
+}
+
+float
+math·rintf(float x)
+{
+    return rintf(x);
+}
+
+
+double
+math·round(double x)
+{
+    return round(x);
+}
+
+float
+math·roundf(float x)
+{
+    return roundf(x);
+}
+
+
+double      math·scalbln(double, long);
+float       math·scalblnf(float, long);
+
+double      math·scalbn(double, int);
+float       math·scalbnf(float, int);
+
+double
+math·sin(double x)
+{
+    return sin(x);
+}
+
+float
+math·sinf(float x)
+{
+    return sinf(x);
+}
+
+
+double
+math·sinh(double x)
+{
+    return sinh(x);
+}
+
+float
+math·sinhf(float x)
+{
+    return sinhf(x);
+}
+
+
+double
+math·sqrt(double x)
+{
+    return sqrt(x);
+}
+
+float
+math·sqrtf(float x)
+{
+    return sqrtf(x);
+}
+
+
+double
+math·tan(double x)
+{
+    return tan(x);
+}
+
+float
+math·tanf(float x)
+{
+    return tanf(x);
+}
+
+
+double
+math·tanh(double x)
+{
+    return tanh(x);
+}
+
+float
+math·tanhf(float x)
+{
+    return tanhf(x);
+}
+
+
+double
+math·tgamma(double x)
+{
+    return tgamma(x);
+}
+
+float
+math·tgammaf(float x)
+{
+    return tgammaf(x);
+}
+
+
+double
+math·trunc(double x)
+{
+    return trunc(x);
+}
+
+float
+math·truncf(float x)
+{
+    return truncf(x);
+}
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index bbbe1c8..8343a42 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -1,5 +1,6 @@
 #include <u.h>
 #include <libn.h>
+#include <libmath.h>
 #include <vendor/blas/cblas.h>
 
 #include <x86intrin.h>
@@ -8,6 +9,176 @@
 // -----------------------------------------------------------------------
 // level one
 
+/*
+ * Rotate vector
+ * x = cos*x + sin*y
+ * y = cos*x - sin*y
+ */ 
+
+static
+void
+rot_kernel8_avx2(int n, double *x, double *y, double cos, double sin)
+{
+    register int i;
+    __m256d x256, y256;
+    __m128d cos128, sin128;
+    __m256d cos256, sin256;
+
+    cos128 = _mm_load_sd(&cos);
+    cos256 = _mm256_broadcastsd_pd(cos128);
+
+    sin128 = _mm_load_sd(&sin);
+    sin256 = _mm256_broadcastsd_pd(sin128);
+
+    for (i = 0; i < n; i+=8) {
+        x256 = _mm256_loadu_pd(x+i+0);
+        y256 = _mm256_loadu_pd(y+i+0);
+        _mm256_storeu_pd(x+i+0, cos256 * x256 + sin256 * y256);
+        _mm256_storeu_pd(y+i+0, cos256 * y256 - sin256 * x256);
+
+        x256 = _mm256_loadu_pd(x+i+4);
+        y256 = _mm256_loadu_pd(y+i+4);
+        _mm256_storeu_pd(x+i+4, cos256 * x256 + sin256 * y256);
+        _mm256_storeu_pd(y+i+4, cos256 * y256 - sin256 * x256);
+    }
+}
+
+static
+void
+rot_kernel8(int n, double *x, double *y, double cos, double sin)
+{
+    register int i;
+    register double tmp;
+
+    for (i = 0; i < n; i+=8) {
+        tmp = x[i+0], x[i+0] = cos*x[i+0] + sin*y[i+0], y[i+0] = cos*y[i+0] - sin*tmp;
+        tmp = x[i+1], x[i+1] = cos*x[i+1] + sin*y[i+1], y[i+1] = cos*y[i+1] - sin*tmp;
+        tmp = x[i+2], x[i+2] = cos*x[i+2] + sin*y[i+2], y[i+2] = cos*y[i+2] - sin*tmp;
+        tmp = x[i+3], x[i+3] = cos*x[i+3] + sin*y[i+3], y[i+3] = cos*y[i+3] - sin*tmp;
+        tmp = x[i+4], x[i+4] = cos*x[i+4] + sin*y[i+4], y[i+4] = cos*y[i+4] - sin*tmp;
+        tmp = x[i+5], x[i+5] = cos*x[i+5] + sin*y[i+5], y[i+5] = cos*y[i+5] - sin*tmp;
+        tmp = x[i+6], x[i+6] = cos*x[i+6] + sin*y[i+6], y[i+6] = cos*y[i+6] - sin*tmp;
+        tmp = x[i+7], x[i+7] = cos*x[i+7] + sin*y[i+7], y[i+7] = cos*y[i+7] - sin*tmp;
+    }
+}
+
+void
+blas·rot(int len, double *x, double *y, double cos, double sin)
+{
+    register int n;
+    register double tmp;
+
+    n = len & ~7;
+    rot_kernel8_avx2(n, x, y, cos, sin);
+
+    for (; n < len; n++) {
+        tmp = x[n], x[n] = cos*x[n] + sin*y[n], y[n] = cos*y[n]- sin*tmp;
+    }
+}
+
+/*
+ * compute givens rotate vector
+ * --      --
+ * |+cos -sin| | a | = | r |
+ * |-sin +cos| | b | = | 0 |
+ * --      --
+ */ 
+
+void
+blas·rotg(double *a, double *b, double *cos, double *sin)
+{
+    double abs_a, abs_b, r, rho, scale, z;
+
+    abs_a = math·abs(*a);
+    abs_b = math·abs(*b);
+    rho   = abs_a > abs_b ? *a : *b;
+    scale = abs_a + abs_b;
+
+    if (scale == 0) {
+        *cos = 1, *sin = 0;
+        r = 0.;
+        z = 0.;
+    } else {
+        r    = math·sgn(rho) * scale * math·sqrt(math·pow(abs_a/scale, 2) + math·pow(abs_b/scale, 2));
+        *cos = *a / r;
+        *sin = *b / r;
+        if (abs_a > abs_b)
+            z = *sin;
+        else if (abs_b >= abs_a && *cos != 0)
+            z = 1/(*cos);
+        else 
+            z = 1.;
+    }
+    *a = r;
+    *b = z;
+}
+
+/*
+ * modified Givens rotation of points in plane
+ * operates on len points
+ *
+ * params = [flag, h11, h12, h21, h22]
+ * @flag = -1:
+ *      H -> [ [h11, h12], [h21, h22] ]
+ * @flag = 0.0:
+ *      H -> [ [1, h12], [h21, 1] ]
+ * @flag = +1:
+ *      H -> [ [h11, 1], [-1, h22] ]
+ * @flag = -2:
+ *      H -> [ [1, 0], [0, 1] ]
+ *
+ *  Replaces:
+ *      x -> H11 * x + H12 * y
+ *      y -> H21 * x + H22 * y
+ */
+
+static
+void
+rotm_kernel8(int n, double *x, double *y, double H[4])
+{
+    register int i;
+    register double tmp;
+
+    for (i = 0; i < n; i+=8) {
+        tmp = x[i+0], x[i+0] = H[0]*x[i+0] + H[1]*y[i+0], y[i+0] = H[2]*y[i+0] + H[3]*tmp;
+        tmp = x[i+1], x[i+1] = H[0]*x[i+1] + H[1]*y[i+1], y[i+1] = H[2]*y[i+1] + H[3]*tmp;
+        tmp = x[i+2], x[i+2] = H[0]*x[i+2] + H[1]*y[i+2], y[i+2] = H[2]*y[i+2] + H[3]*tmp;
+        tmp = x[i+3], x[i+3] = H[0]*x[i+3] + H[1]*y[i+3], y[i+3] = H[2]*y[i+3] + H[3]*tmp;
+        tmp = x[i+4], x[i+4] = H[0]*x[i+4] + H[1]*y[i+4], y[i+4] = H[2]*y[i+4] + H[3]*tmp;
+        tmp = x[i+5], x[i+5] = H[0]*x[i+5] + H[1]*y[i+5], y[i+5] = H[2]*y[i+5] + H[3]*tmp;
+        tmp = x[i+6], x[i+6] = H[0]*x[i+6] + H[1]*y[i+6], y[i+6] = H[2]*y[i+6] + H[3]*tmp;
+        tmp = x[i+7], x[i+7] = H[0]*x[i+7] + H[1]*y[i+7], y[i+7] = H[2]*y[i+7] + H[3]*tmp;
+    }
+}
+
+error
+blas·rotm(int len, double *x, double *y, double p[5])
+{
+    int    n, flag;
+    double tmp, H[4];
+
+    flag = math·round(p[0]);
+    switch (flag) {
+        case  0: H[0] = p[1], H[1] = p[2], H[2] = p[3], H[3] = p[4]; break;
+        case -1: H[0] = +1,   H[1] = p[2], H[2] = p[3], H[3] = +1;   break;
+        case +1: H[0] = p[1], H[1] = +1,   H[2] = -1,   H[3] = p[4]; break;
+        case -2: H[0] = +1,   H[1] = 0,    H[2] = 0,    H[3] = +1;   break;
+        default:
+            errorf("rotm: flag '%d' unrecognized", flag);
+            return 1;
+    }
+
+    n = len & ~7;
+    rotm_kernel8(n, x, y, H);
+
+    for (; n < len; n++) {
+        tmp = x[n], x[n] = H[0]*x[n] + H[1]*y[n], y[n] = H[2]*y[n] + H[3]*tmp;
+    }
+
+    return 0;
+}
+
+
 /*
  * Scale vector
  * x = ax
@@ -47,7 +218,7 @@ scale_kernel8(int n, double *x, double a)
 }
 
 void
-blas·scalevec(int len, double *x, double a)
+blas·scale(int len, double *x, double a)
 {
     int n;
 
@@ -403,11 +574,11 @@ blas·gemm(int n1, int n2, int n3, double a, double *m1, double *m2, double b, d
 }
 
 #define NITER 5000
-#define NCOL  57
-#define NROW  57
+#define NCOL  10007
+#define NROW  10007
 
 error
-main()
+test·gemm()
 {
     int i, n;
     clock_t t;
@@ -468,3 +639,61 @@ main()
 
     return 0;
 }
+
+void
+print·array(int n, double *x)
+{
+    double *end;
+    printf("[");
+    for (end=x+n; x != end; ++x) {
+        printf("%f,", *x);
+    }
+    printf("]\n");
+}
+
+error
+main()
+{
+    int i, n;
+    double *x, *y;
+    double tprof[2];
+    clock_t t;
+
+    x = malloc(sizeof(*x)*NCOL);
+    y = malloc(sizeof(*x)*NCOL);
+
+    for (n = 0; n < NITER; n++) {
+        for (i = 0; i < NCOL; i++) {
+            x[i] = i*i+1;
+            y[i] = i+1;
+        }
+
+        t = clock();
+        blas·rot(NCOL, x, y, .707, .707); 
+        t = clock() - t;
+        tprof[0] += 1000.*t/CLOCKS_PER_SEC;
+
+        for (i = 0; i < NCOL; i++) {
+            x[i] = i*i+1;
+            y[i] = i+1;
+        }
+
+        t = clock();
+        cblas_drot(NCOL, x, 1, y, 1, .707, .707);
+        t = clock() - t;
+        tprof[1] += 1000.*t/CLOCKS_PER_SEC;
+    }
+
+    printf("mean time/iteration (naive): %fms\n", tprof[0]/NITER);
+    printf("mean time/iteration (oblas): %fms\n", tprof[1]/NITER);
+
+    double a, b, c, s;
+
+    a = 10.234, b = 2.;
+    cblas_drotg(&a, &b, &c, &s);
+    printf("%f, %f, %f, %f\n", a, b, c, s);
+
+    a = 10.234, b = 2.;
+    blas·rotg(&a, &b, &c, &s);
+    printf("%f, %f, %f, %f\n", a, b, c, s);
+}
diff --git a/sys/libn/random.c b/sys/libn/random.c
index 1e2c2d5..558641e 100644
--- a/sys/libn/random.c
+++ b/sys/libn/random.c
@@ -69,7 +69,7 @@ double
 rng·random()
 {
     uint64 r = xoshiro256ss(&RNG); 
-    return (double)r / UINT64_MAX;
+    return (double)r / (double)UINT64_MAX;
 }
 
 /* Returns true or false on success of trial */
diff --git a/sys/libn/string.c b/sys/libn/string.c
index 4280e27..ca53bdc 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -180,6 +180,7 @@ str·makecap(const byte* s, vlong len, vlong cap)
 cleanup:
     free(h);
     panicf("Attempted to create a string with less capacity than length");
+    return nil;
 }
 
 // New returns a new dynamic string object, initialized from the given C string.
-- 
cgit v1.2.1