aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-25 15:33:54 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-25 15:33:54 -0700
commit5e53685221576ad6ec53bafffd14bc7d5e01fa30 (patch)
tree9780092401ccd8a116fbc33a50756cf2a90985b6 /sys
parent70148cca0463cc45408401a0f2d76ba805d0a157 (diff)
deprecated old python generation files
Diffstat (limited to 'sys')
-rw-r--r--sys/cmd/cc/ast.c167
-rw-r--r--sys/cmd/cc/cc.c3
-rw-r--r--sys/cmd/cc/cc.h42
-rw-r--r--sys/cmd/rules.mk4
-rw-r--r--sys/libmath/blas.c18
-rw-r--r--sys/libmath/blas1.c2135
-rwxr-xr-xsys/libmath/gen1.py360
-rwxr-xr-xsys/libmath/gen2.py410
-rw-r--r--sys/libmath/rules.mk4
9 files changed, 257 insertions, 2886 deletions
diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c
index 49e35fd..0547712 100644
--- a/sys/cmd/cc/ast.c
+++ b/sys/cmd/cc/ast.c
@@ -17,15 +17,20 @@
// -----------------------------------------------------------------------
// helper functions
+static
string
nameof(Name *n)
{
- if (n->kind & Nident)
+ switch (n->kind) {
+ case Nident:
return n->ident;
- if (n->kind & Nparen)
+ case Nparen:
return nameof(n->paren->name);
-
- panicf("ill-formed declarator name");
+ case Nindex:
+ case Ncall:
+ return nameof(n->sfx.name);
+ }
+ panicf("unreachable");
}
static
@@ -201,8 +206,12 @@ nomatch(Token t, vlong kind)
static error spec(Parser *, Lexer *, uint64 *);
static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab);
+static Type *typeofspec(Parser *, Lexer *, uint64 *);
+static Type *typeofdtor(Dtor *, Type *);
+
static Decl *decl(Parser *, Lexer *);
static Expr *expr(Parser *, Lexer *);
+
static error blkstmt(Parser *, Lexer *, Stmt **);
#define MAKEX(x, state) alloc((x)), (x)->kind = X##state
@@ -434,6 +443,7 @@ Bad:
return nil;
}
+#if 0
static
Type*
type(Parser *p, Lexer *lx)
@@ -462,6 +472,7 @@ Bad:
errorat(lx->pos, "failed to parse type expression");
return nil;
}
+#endif
static
Expr*
@@ -1006,12 +1017,115 @@ Bad:
// declarations
static
+Type *
+typeofname(Name *name, Type *type)
+{
+ switch (name->kind) {
+ case Nident:
+ return type;
+ case Nparen:
+ return typeofdtor(name->paren, type);
+ case Nindex:
+ return typeofname(name->sfx.name, array(type, name->sfx.idx.q, name->sfx.idx.x));
+ case Ncall:
+ return typeofname(name->sfx.name, func(type, name->sfx.call.n, name->sfx.call.arg, name->sfx.call.dots));
+ default:
+ panicf("unreachable");
+ }
+}
+
+static
+Type *
+typeofdtor(Dtor *decl, Type* type)
+{
+ int n;
+ Ptr *p;
+ uint64 b, tmp;
+
+ n = 0;
+ p = &decl->ptr;
+ b = p->kind;
+ while (b & 1) {
+ type = ptr(type, b >> 1);
+ if (++n >= 8) {
+ p = p->link;
+ b = p->kind;
+ } else {
+ b >>= 6;
+ }
+ }
+
+ return typeofname(decl->name, type);
+}
+
+static
+Type *
+aggr(Parser *p, Lexer *lx, string name, int kind)
+{
+ int n;
+ uint64 s;
+ Dtor *dt;
+ Token tk;
+ Type *t;
+ Type *fs[1024];
+ byte *nm[1024];
+ Expr *cx[1024];
+
+ n = -1;
+ for (;tk.kind != Arbrace && n < arrlen(fs); n++) {
+ t = typeofspec(p, lx, &s);
+
+ Field:
+ dt = getdtor(p);
+ dtor(p, lx, dt, 0);
+ fs[n] = typeofdtor(dt, t);
+ nm[n] = nameof(dt->name);
+ putdtor(p);
+
+ tk = peek(p, 0);
+ switch (tk.kind) {
+ case Acolon:
+ advance(p, lx);
+ cx[n] = expr(p, lx);
+ tk = peek(p, 0);
+ if (tk.kind == Asemi) {
+ tk = advance(p, lx);
+ continue;
+ }
+ if (tk.kind != Acomma) {
+ errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]);
+ goto Bad;
+ }
+ /* fallthrough */
+ case Acomma:
+ advance(p, lx);
+ n++;
+ goto Field;
+
+ case Asemi:
+ tk = advance(p, lx);
+ continue;
+
+ default:
+ errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]);
+ goto Bad;
+ }
+ }
+
+Bad:
+ errorat(tk.pos, "failed to parse aggregate declaration");
+ return nil;
+}
+
+static
error
spec(Parser *p, Lexer *lx, uint64 *spec)
{
Token t;
int n;
Sym *typ;
+ string name;
+ Type *tag;
uint64 s, sm;
s = 0;
@@ -1073,11 +1187,46 @@ spec(Parser *p, Lexer *lx, uint64 *spec)
errorat(lx->pos, "more than one base type specified");
goto Bad;
}
+ s += Bit(n);
break;
case Kstruct: case Kunion:
+ if (s & (Tstruct | Tunion)) {
+ errorat(lx->pos, "more than one aggregate type specified");
+ goto Bad;
+ }
+ s += Bit(n);
+ t = advance(p, lx);
+ if (t.kind != Aident && t.kind != Albrace) {
+ errorat(t.pos, "struct specifier missing valid declaration");
+ goto Bad;
+ }
+
+ name = nil;
+ tag = nil;
+ if (t.kind == Aident) {
+ name = t.val.s;
+ t = advance(p, lx);
+ }
+ if (t.kind == Albrace) {
+ t = advance(p, lx);
+ tag = aggr(p, lx, name, s & (Tstruct | Tunion));
+
+ if (nomatch(t, Arbrace)) {
+ errorat(t.pos, "invalid token %s in struct/union declaration", tokens[t.kind]);
+ goto Bad;
+ }
+ }
+ break;
+
case Kenum:
+ if (s & Tenum) {
+ errorat(lx->pos, "more than one enum type specifier found");
+ goto Bad;
+ }
+ s += Bit(n);
panicf("need to implement");
+ break;
default:
errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]);
@@ -1100,7 +1249,7 @@ Bad:
static
Type*
-typespec(Parser *p, Lexer *lx, uint64 *s)
+typeofspec(Parser *p, Lexer *lx, uint64 *s)
{
int n;
uint64 m;
@@ -1123,7 +1272,9 @@ typespec(Parser *p, Lexer *lx, uint64 *s)
}
return C.type.info + indextypespec[n];
}
- panicf("unreachable");
+
+ errorat(lx->pos, "invalid type specifier");
+ return nil;
}
@@ -1301,7 +1452,7 @@ name(Parser *p, Lexer *lx, Name **nmp, int ab)
} while (t = peek(p, 0), t.kind == Acomma);
if (t.kind == Aellip) {
- nm->sfx.call.vararg = 1;
+ nm->sfx.call.dots = 1;
t = advance(p, lx);
}
@@ -1362,7 +1513,7 @@ Key:
switch (k = t.kind) {
case Akeywd:
if (Kconst <= k && k <= Katomic)
- ptr->kind |= Bit(n + (t.val.i - Kconst + 1));
+ ptr->kind |= Bit(6*n + (t.val.i - Kconst + 1));
else {
errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]);
goto Bad;
diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c
index 24d8e45..d5cd995 100644
--- a/sys/cmd/cc/cc.c
+++ b/sys/cmd/cc/cc.c
@@ -92,6 +92,9 @@ END:
}
// -----------------------------------------------------------------------
+// type interning
+
+// -----------------------------------------------------------------------
// universal compiler builtins
#define KEYWORD(a, b) b,
diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h
index e8ab05e..ce7eca7 100644
--- a/sys/cmd/cc/cc.h
+++ b/sys/cmd/cc/cc.h
@@ -374,7 +374,7 @@ enum
Dvar,
Dvars,
- /* names (shouldn't interact with base AST node enumeration */
+ /* names */
Nident,
Nparen,
Nindex,
@@ -577,7 +577,7 @@ struct Name
} idx;
struct {
int n;
- int vararg : 1;
+ int dots : 1;
Obj *arg;
} call;
};
@@ -641,7 +641,7 @@ struct Type
uintptr size;
uintptr max;
uint16 align : 8;
- uint16 sign : 2;
+ uint8 sign : 2;
union {
struct {
uint32 qual;
@@ -655,12 +655,17 @@ struct Type
int len;
Obj *f;
Expr *x;
- } agr;
+ } aggr;
struct {
int len;
string *s;
Expr *x;
} enm;
+ struct {
+ Type *ret;
+ int n;
+ Type **arg;
+ } func;
};
};
@@ -701,7 +706,7 @@ struct Parser
error parse(Parser *, Lexer *);
// -----------------------------------------------------------------------
-// global compiler
+// global compiler data
struct StrTab
{
@@ -714,14 +719,19 @@ struct StrTab
int32 *vals;
};
-/* cc.c string functions */
-int32 intern(byte **str);
-string internview(byte* beg, byte *end);
+struct TypeSet
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ Type **keys;
+};
/* main data */
struct Compiler
{
- /* storage */
mem·Arena *heap;
StrTab strs;
string outfile;
@@ -739,15 +749,23 @@ struct Compiler
} inc;
struct {
+ struct TypeSet p; // pointer
+ struct TypeSet a; // array
+ struct TypeSet f; // funcs
+ struct TypeSet s; // structs
+ struct TypeSet u; // unions
+ struct TypeSet e; // enums
+
int cap;
int len;
Type *info;
} type;
-};
+};
extern Compiler C;
-/* cc.c compiler functions */
-void init();
+/* cc.c functions */
+void init();
+int32 intern(byte **str);
#undef iota
diff --git a/sys/cmd/rules.mk b/sys/cmd/rules.mk
index 4905a5f..81c1cbe 100644
--- a/sys/cmd/rules.mk
+++ b/sys/cmd/rules.mk
@@ -5,8 +5,8 @@ include share/push.mk
DIR := $(d)/cat
include $(DIR)/rules.mk
-DIR := $(d)/cc
-include $(DIR)/rules.mk
+# DIR := $(d)/cc
+# include $(DIR)/rules.mk
DIR := $(d)/rc
include $(DIR)/rules.mk
diff --git a/sys/libmath/blas.c b/sys/libmath/blas.c
index 43b0e70..224480b 100644
--- a/sys/libmath/blas.c
+++ b/sys/libmath/blas.c
@@ -6,15 +6,15 @@
#include <vendor/blas/cblas.h>
-#define NCOL 2000
-#define NROW 2000
+#define NCOL 20000
+#define NROW 20000
#define NIT 2000
-#define INC 2
+#define INC 1
error
main()
{
int i, j, nit;
- double *x, *y, *m, res[2];
+ double *x, *y, res[2];
clock_t t;
double tprof[2] = { 0 };
@@ -23,17 +23,14 @@ main()
x = malloc(sizeof(*x)*NCOL);
y = malloc(sizeof(*x)*NROW);
- m = malloc(sizeof(*x)*NROW*NCOL);
#define DO_0 t = clock(); \
- blas·gerd(NROW/INC, NCOL/INC, 1.2, x, INC, y, INC, m, NCOL); \
- res[0] += m[0]; \
+ res[0] += blas·dasum(NROW/INC, x, INC); \
t = clock() - t; \
tprof[0] += 1000.*t/CLOCKS_PER_SEC; \
#define DO_1 t = clock(); \
- cblas_dger(CblasRowMajor, NROW/INC, NCOL/INC, 1.2, x, INC, y, INC, m, NCOL); \
- res[1] += m[0]; \
+ res[1] += cblas_dasum(NROW/INC, x, INC); \
t = clock() - t; \
tprof[1] += 1000.*t/CLOCKS_PER_SEC;
@@ -41,9 +38,6 @@ main()
for (i = 0; i < NROW; i++) {
x[i] = rng·random();
y[i] = rng·random();
- for (j = 0; j < NCOL; j++) {
- m[j + NCOL*i] = rng·random();
- }
}
switch (nit % 2) {
diff --git a/sys/libmath/blas1.c b/sys/libmath/blas1.c
index 1907179..d9792f6 100644
--- a/sys/libmath/blas1.c
+++ b/sys/libmath/blas1.c
@@ -1,2082 +1,59 @@
#include <u.h>
-#include <libn.h>
#include <libmath.h>
-/*********************************************************/
-/* THIS CODE IS GENERATED BY GEN1.PY! DON'T EDIT BY HAND */
-/*********************************************************/
-
-
-static
-int
-argminf_kernel16(int *ip, float *x)
-{
- int ix[16];
- float min[16];
- int i;
- int len;
-
- for (i = 0; i < 16; i++) {
- ix[i] = 0;
- }
- for (i = 0; i < 16; i++) {
- min[i] = x[0];
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- if (x[i + 0] < min[0]) {
- ix[0] = (i + 0);
- min[0] = x[ix[0]];
- }
- if (x[i + 1] < min[1]) {
- ix[1] = (i + 1);
- min[1] = x[ix[1]];
- }
- if (x[i + 2] < min[2]) {
- ix[2] = (i + 2);
- min[2] = x[ix[2]];
- }
- if (x[i + 3] < min[3]) {
- ix[3] = (i + 3);
- min[3] = x[ix[3]];
- }
- if (x[i + 4] < min[4]) {
- ix[4] = (i + 4);
- min[4] = x[ix[4]];
- }
- if (x[i + 5] < min[5]) {
- ix[5] = (i + 5);
- min[5] = x[ix[5]];
- }
- if (x[i + 6] < min[6]) {
- ix[6] = (i + 6);
- min[6] = x[ix[6]];
- }
- if (x[i + 7] < min[7]) {
- ix[7] = (i + 7);
- min[7] = x[ix[7]];
- }
- if (x[i + 8] < min[8]) {
- ix[8] = (i + 8);
- min[8] = x[ix[8]];
- }
- if (x[i + 9] < min[9]) {
- ix[9] = (i + 9);
- min[9] = x[ix[9]];
- }
- if (x[i + 10] < min[10]) {
- ix[10] = (i + 10);
- min[10] = x[ix[10]];
- }
- if (x[i + 11] < min[11]) {
- ix[11] = (i + 11);
- min[11] = x[ix[11]];
- }
- if (x[i + 12] < min[12]) {
- ix[12] = (i + 12);
- min[12] = x[ix[12]];
- }
- if (x[i + 13] < min[13]) {
- ix[13] = (i + 13);
- min[13] = x[ix[13]];
- }
- if (x[i + 14] < min[14]) {
- ix[14] = (i + 14);
- min[14] = x[ix[14]];
- }
- if (x[i + 15] < min[15]) {
- ix[15] = (i + 15);
- min[15] = x[ix[15]];
- }
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-static
-int
-argminf_s_kernel8(int *ip, int incx, float *x)
-{
- int ix[8];
- float min[8];
- int i;
- int len;
-
- for (i = 0; i < 8; i++) {
- ix[i] = 0;
- }
- for (i = 0; i < 8; i++) {
- min[i] = x[0];
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- if (x[incx * (i + 0)] < min[0]) {
- ix[0] = (i + 0);
- min[0] = x[incx * ix[0]];
- }
- if (x[incx * (i + 1)] < min[1]) {
- ix[1] = (i + 1);
- min[1] = x[incx * ix[1]];
- }
- if (x[incx * (i + 2)] < min[2]) {
- ix[2] = (i + 2);
- min[2] = x[incx * ix[2]];
- }
- if (x[incx * (i + 3)] < min[3]) {
- ix[3] = (i + 3);
- min[3] = x[incx * ix[3]];
- }
- if (x[incx * (i + 4)] < min[4]) {
- ix[4] = (i + 4);
- min[4] = x[incx * ix[4]];
- }
- if (x[incx * (i + 5)] < min[5]) {
- ix[5] = (i + 5);
- min[5] = x[incx * ix[5]];
- }
- if (x[incx * (i + 6)] < min[6]) {
- ix[6] = (i + 6);
- min[6] = x[incx * ix[6]];
- }
- if (x[incx * (i + 7)] < min[7]) {
- ix[7] = (i + 7);
- min[7] = x[incx * ix[7]];
- }
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-int
-blas·argminf(int len, float *x, int incx)
-{
- int i;
- int ix;
- float min;
-
- if (incx == 1) {
- i = len;
- ix = argminf_kernel16(&i, x);
- } else {
- i = len;
- ix = argminf_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- if (x[incx * i] < min) {
- ix = i;
- min = x[incx * ix];
- }
- }
- for (; i < len; i++) {
- if (x[i] < min) {
- ix = i;
- min = x[ix];
- }
- }
-
- return ix;
-}
-
-static
-int
-argmaxf_kernel16(int *ip, float *x)
-{
- int ix[16];
- float max[16];
- int i;
- int len;
-
- for (i = 0; i < 16; i++) {
- ix[i] = 0;
- }
- for (i = 0; i < 16; i++) {
- max[i] = x[0];
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- if (x[i + 0] > max[0]) {
- ix[0] = (i + 0);
- max[0] = x[ix[0]];
- }
- if (x[i + 1] > max[1]) {
- ix[1] = (i + 1);
- max[1] = x[ix[1]];
- }
- if (x[i + 2] > max[2]) {
- ix[2] = (i + 2);
- max[2] = x[ix[2]];
- }
- if (x[i + 3] > max[3]) {
- ix[3] = (i + 3);
- max[3] = x[ix[3]];
- }
- if (x[i + 4] > max[4]) {
- ix[4] = (i + 4);
- max[4] = x[ix[4]];
- }
- if (x[i + 5] > max[5]) {
- ix[5] = (i + 5);
- max[5] = x[ix[5]];
- }
- if (x[i + 6] > max[6]) {
- ix[6] = (i + 6);
- max[6] = x[ix[6]];
- }
- if (x[i + 7] > max[7]) {
- ix[7] = (i + 7);
- max[7] = x[ix[7]];
- }
- if (x[i + 8] > max[8]) {
- ix[8] = (i + 8);
- max[8] = x[ix[8]];
- }
- if (x[i + 9] > max[9]) {
- ix[9] = (i + 9);
- max[9] = x[ix[9]];
- }
- if (x[i + 10] > max[10]) {
- ix[10] = (i + 10);
- max[10] = x[ix[10]];
- }
- if (x[i + 11] > max[11]) {
- ix[11] = (i + 11);
- max[11] = x[ix[11]];
- }
- if (x[i + 12] > max[12]) {
- ix[12] = (i + 12);
- max[12] = x[ix[12]];
- }
- if (x[i + 13] > max[13]) {
- ix[13] = (i + 13);
- max[13] = x[ix[13]];
- }
- if (x[i + 14] > max[14]) {
- ix[14] = (i + 14);
- max[14] = x[ix[14]];
- }
- if (x[i + 15] > max[15]) {
- ix[15] = (i + 15);
- max[15] = x[ix[15]];
- }
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-static
-int
-argmaxf_s_kernel8(int *ip, int incx, float *x)
-{
- int ix[8];
- float max[8];
- int i;
- int len;
-
- for (i = 0; i < 8; i++) {
- ix[i] = 0;
- }
- for (i = 0; i < 8; i++) {
- max[i] = x[0];
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- if (x[incx * (i + 0)] > max[0]) {
- ix[0] = (i + 0);
- max[0] = x[incx * ix[0]];
- }
- if (x[incx * (i + 1)] > max[1]) {
- ix[1] = (i + 1);
- max[1] = x[incx * ix[1]];
- }
- if (x[incx * (i + 2)] > max[2]) {
- ix[2] = (i + 2);
- max[2] = x[incx * ix[2]];
- }
- if (x[incx * (i + 3)] > max[3]) {
- ix[3] = (i + 3);
- max[3] = x[incx * ix[3]];
- }
- if (x[incx * (i + 4)] > max[4]) {
- ix[4] = (i + 4);
- max[4] = x[incx * ix[4]];
- }
- if (x[incx * (i + 5)] > max[5]) {
- ix[5] = (i + 5);
- max[5] = x[incx * ix[5]];
- }
- if (x[incx * (i + 6)] > max[6]) {
- ix[6] = (i + 6);
- max[6] = x[incx * ix[6]];
- }
- if (x[incx * (i + 7)] > max[7]) {
- ix[7] = (i + 7);
- max[7] = x[incx * ix[7]];
- }
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-int
-blas·argmaxf(int len, float *x, int incx)
-{
- int i;
- int ix;
- float max;
-
- if (incx == 1) {
- i = len;
- ix = argmaxf_kernel16(&i, x);
- } else {
- i = len;
- ix = argmaxf_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- if (x[incx * i] > max) {
- ix = i;
- max = x[incx * ix];
- }
- }
- for (; i < len; i++) {
- if (x[i] > max) {
- ix = i;
- max = x[ix];
- }
- }
-
- return ix;
-}
-
-static
-void
-copyf_kernel16(int *ip, float *x, float *y)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- y[i + 0] = x[i + 0];
- y[i + 1] = x[i + 1];
- y[i + 2] = x[i + 2];
- y[i + 3] = x[i + 3];
- y[i + 4] = x[i + 4];
- y[i + 5] = x[i + 5];
- y[i + 6] = x[i + 6];
- y[i + 7] = x[i + 7];
- y[i + 8] = x[i + 8];
- y[i + 9] = x[i + 9];
- y[i + 10] = x[i + 10];
- y[i + 11] = x[i + 11];
- y[i + 12] = x[i + 12];
- y[i + 13] = x[i + 13];
- y[i + 14] = x[i + 14];
- y[i + 15] = x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-copyf_s_kernel8(int *ip, int incx, int incy, float *x, float *y)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- y[incy * (i + 0)] = x[incx * (i + 0)];
- y[incy * (i + 1)] = x[incx * (i + 1)];
- y[incy * (i + 2)] = x[incx * (i + 2)];
- y[incy * (i + 3)] = x[incx * (i + 3)];
- y[incy * (i + 4)] = x[incx * (i + 4)];
- y[incy * (i + 5)] = x[incx * (i + 5)];
- y[incy * (i + 6)] = x[incx * (i + 6)];
- y[incy * (i + 7)] = x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·copyf(int len, float *x, int incx, float *y, int incy)
-{
- int i;
-
- if (incx == 1 && incy == 1) {
- i = len;
- copyf_kernel16(&i, x, y);
- } else {
- i = len;
- copyf_s_kernel8(&i, incx, incy, x, y);
- }
- for (; i < len; i++) {
- y[incy * i] = x[incx * i];
- }
-}
-
-static
-void
-axpyf_kernel16(int *ip, float *x, float a, float *y)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- y[i + 0] = y[i + 0] + a * x[i + 0];
- y[i + 1] = y[i + 1] + a * x[i + 1];
- y[i + 2] = y[i + 2] + a * x[i + 2];
- y[i + 3] = y[i + 3] + a * x[i + 3];
- y[i + 4] = y[i + 4] + a * x[i + 4];
- y[i + 5] = y[i + 5] + a * x[i + 5];
- y[i + 6] = y[i + 6] + a * x[i + 6];
- y[i + 7] = y[i + 7] + a * x[i + 7];
- y[i + 8] = y[i + 8] + a * x[i + 8];
- y[i + 9] = y[i + 9] + a * x[i + 9];
- y[i + 10] = y[i + 10] + a * x[i + 10];
- y[i + 11] = y[i + 11] + a * x[i + 11];
- y[i + 12] = y[i + 12] + a * x[i + 12];
- y[i + 13] = y[i + 13] + a * x[i + 13];
- y[i + 14] = y[i + 14] + a * x[i + 14];
- y[i + 15] = y[i + 15] + a * x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-axpyf_s_kernel8(int *ip, float *x, float a, int incx, float *y, int incy)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- y[incy * (i + 0)] = y[incy * (i + 0)] + a * x[incx * (i + 0)];
- y[incy * (i + 1)] = y[incy * (i + 1)] + a * x[incx * (i + 1)];
- y[incy * (i + 2)] = y[incy * (i + 2)] + a * x[incx * (i + 2)];
- y[incy * (i + 3)] = y[incy * (i + 3)] + a * x[incx * (i + 3)];
- y[incy * (i + 4)] = y[incy * (i + 4)] + a * x[incx * (i + 4)];
- y[incy * (i + 5)] = y[incy * (i + 5)] + a * x[incx * (i + 5)];
- y[incy * (i + 6)] = y[incy * (i + 6)] + a * x[incx * (i + 6)];
- y[incy * (i + 7)] = y[incy * (i + 7)] + a * x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·axpyf(int len, float a, float *x, int incx, float *y, int incy)
-{
- int i;
-
- if (incx == 1 && incy == 1) {
- i = len;
- axpyf_kernel16(&i, x, a, y);
- } else {
- i = len;
- axpyf_s_kernel8(&i, x, a, incx, y, incy);
- }
- for (; i < len; i++) {
- y[incy * i] = y[incy * i] + a * x[incx * i];
- }
-}
-
-static
-void
-axpbyf_kernel16(int *ip, float a, float *y, float b, float *x)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- y[i + 0] = b * y[i + 0] + a * x[i + 0];
- y[i + 1] = b * y[i + 1] + a * x[i + 1];
- y[i + 2] = b * y[i + 2] + a * x[i + 2];
- y[i + 3] = b * y[i + 3] + a * x[i + 3];
- y[i + 4] = b * y[i + 4] + a * x[i + 4];
- y[i + 5] = b * y[i + 5] + a * x[i + 5];
- y[i + 6] = b * y[i + 6] + a * x[i + 6];
- y[i + 7] = b * y[i + 7] + a * x[i + 7];
- y[i + 8] = b * y[i + 8] + a * x[i + 8];
- y[i + 9] = b * y[i + 9] + a * x[i + 9];
- y[i + 10] = b * y[i + 10] + a * x[i + 10];
- y[i + 11] = b * y[i + 11] + a * x[i + 11];
- y[i + 12] = b * y[i + 12] + a * x[i + 12];
- y[i + 13] = b * y[i + 13] + a * x[i + 13];
- y[i + 14] = b * y[i + 14] + a * x[i + 14];
- y[i + 15] = b * y[i + 15] + a * x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-axpbyf_s_kernel8(int *ip, float a, int incx, float *y, float b, int incy, float *x)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- y[incy * (i + 0)] = b * y[incy * (i + 0)] + a * x[incx * (i + 0)];
- y[incy * (i + 1)] = b * y[incy * (i + 1)] + a * x[incx * (i + 1)];
- y[incy * (i + 2)] = b * y[incy * (i + 2)] + a * x[incx * (i + 2)];
- y[incy * (i + 3)] = b * y[incy * (i + 3)] + a * x[incx * (i + 3)];
- y[incy * (i + 4)] = b * y[incy * (i + 4)] + a * x[incx * (i + 4)];
- y[incy * (i + 5)] = b * y[incy * (i + 5)] + a * x[incx * (i + 5)];
- y[incy * (i + 6)] = b * y[incy * (i + 6)] + a * x[incx * (i + 6)];
- y[incy * (i + 7)] = b * y[incy * (i + 7)] + a * x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·axpbyf(int len, float a, float *x, int incx, float b, float *y, int incy)
-{
- int i;
-
- if (incx == 1 && incy == 1) {
- i = len;
- axpbyf_kernel16(&i, a, y, b, x);
- } else {
- i = len;
- axpbyf_s_kernel8(&i, a, incx, y, b, incy, x);
- }
- for (; i < len; i++) {
- y[incy * i] = b * y[incy * i] + a * x[incx * i];
- }
-}
-
-static
-float
-dotf_kernel16(int *ip, float *y, float *x)
-{
- float sum[16];
- int i;
- int len;
-
- for (i = 0; i < 16; i++) {
- sum[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- sum[0] += x[i + 0] * y[i + 0];
- sum[1] += x[i + 1] * y[i + 1];
- sum[2] += x[i + 2] * y[i + 2];
- sum[3] += x[i + 3] * y[i + 3];
- sum[4] += x[i + 4] * y[i + 4];
- sum[5] += x[i + 5] * y[i + 5];
- sum[6] += x[i + 6] * y[i + 6];
- sum[7] += x[i + 7] * y[i + 7];
- sum[8] += x[i + 8] * y[i + 8];
- sum[9] += x[i + 9] * y[i + 9];
- sum[10] += x[i + 10] * y[i + 10];
- sum[11] += x[i + 11] * y[i + 11];
- sum[12] += x[i + 12] * y[i + 12];
- sum[13] += x[i + 13] * y[i + 13];
- sum[14] += x[i + 14] * y[i + 14];
- sum[15] += x[i + 15] * y[i + 15];
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-static
-float
-dotf_s_kernel8(int *ip, int incy, float *x, float *y, int incx)
-{
- float sum[8];
- int i;
- int len;
-
- for (i = 0; i < 8; i++) {
- sum[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- sum[0] += x[incx * (i + 0)] * y[incy * (i + 0)];
- sum[1] += x[incx * (i + 1)] * y[incy * (i + 1)];
- sum[2] += x[incx * (i + 2)] * y[incy * (i + 2)];
- sum[3] += x[incx * (i + 3)] * y[incy * (i + 3)];
- sum[4] += x[incx * (i + 4)] * y[incy * (i + 4)];
- sum[5] += x[incx * (i + 5)] * y[incy * (i + 5)];
- sum[6] += x[incx * (i + 6)] * y[incy * (i + 6)];
- sum[7] += x[incx * (i + 7)] * y[incy * (i + 7)];
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-float
-blas·dotf(int len, float *x, int incx, float *y, int incy)
-{
- int i;
- float sum;
-
- if (incx == 1 && incy == 1) {
- i = len;
- sum = dotf_kernel16(&i, y, x);
- } else {
- i = len;
- sum = dotf_s_kernel8(&i, incy, x, y, incx);
- }
- for (; i < len; i++) {
- sum += x[incx * i] * y[incy * i];
- }
-
- return sum;
-}
-
-static
-float
-sumf_kernel16(int *ip, float *x)
-{
- float sum[16];
- int i;
- int len;
-
- for (i = 0; i < 16; i++) {
- sum[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- sum[0] += x[i + 0];
- sum[1] += x[i + 1];
- sum[2] += x[i + 2];
- sum[3] += x[i + 3];
- sum[4] += x[i + 4];
- sum[5] += x[i + 5];
- sum[6] += x[i + 6];
- sum[7] += x[i + 7];
- sum[8] += x[i + 8];
- sum[9] += x[i + 9];
- sum[10] += x[i + 10];
- sum[11] += x[i + 11];
- sum[12] += x[i + 12];
- sum[13] += x[i + 13];
- sum[14] += x[i + 14];
- sum[15] += x[i + 15];
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-static
-float
-sumf_s_kernel8(int *ip, int incx, float *x)
-{
- float sum[8];
- int i;
- int len;
-
- for (i = 0; i < 8; i++) {
- sum[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- sum[0] += x[incx * (i + 0)];
- sum[1] += x[incx * (i + 1)];
- sum[2] += x[incx * (i + 2)];
- sum[3] += x[incx * (i + 3)];
- sum[4] += x[incx * (i + 4)];
- sum[5] += x[incx * (i + 5)];
- sum[6] += x[incx * (i + 6)];
- sum[7] += x[incx * (i + 7)];
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-float
-blas·sumf(int len, float *x, int incx)
-{
- int i;
- float sum;
-
- if (incx == 1) {
- i = len;
- sum = sumf_kernel16(&i, x);
- } else {
- i = len;
- sum = sumf_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- sum += x[incx * i];
- }
-
- return sum;
-}
-
-static
-float
-normf_kernel16(int *ip, float *x)
-{
- int i;
- float nrm[16];
- int len;
-
- for (i = 0; i < 16; i++) {
- nrm[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- nrm[0] += x[i + 0] * x[i + 0];
- nrm[1] += x[i + 1] * x[i + 1];
- nrm[2] += x[i + 2] * x[i + 2];
- nrm[3] += x[i + 3] * x[i + 3];
- nrm[4] += x[i + 4] * x[i + 4];
- nrm[5] += x[i + 5] * x[i + 5];
- nrm[6] += x[i + 6] * x[i + 6];
- nrm[7] += x[i + 7] * x[i + 7];
- nrm[8] += x[i + 8] * x[i + 8];
- nrm[9] += x[i + 9] * x[i + 9];
- nrm[10] += x[i + 10] * x[i + 10];
- nrm[11] += x[i + 11] * x[i + 11];
- nrm[12] += x[i + 12] * x[i + 12];
- nrm[13] += x[i + 13] * x[i + 13];
- nrm[14] += x[i + 14] * x[i + 14];
- nrm[15] += x[i + 15] * x[i + 15];
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- nrm[0] += nrm[i];
- }
-
- return nrm[0];
-}
-
-static
-float
-normf_s_kernel8(int *ip, int incx, float *x)
-{
- int i;
- float nrm[8];
- int len;
-
- for (i = 0; i < 8; i++) {
- nrm[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- nrm[0] += x[incx * (i + 0)] * x[incx * (i + 0)];
- nrm[1] += x[incx * (i + 1)] * x[incx * (i + 1)];
- nrm[2] += x[incx * (i + 2)] * x[incx * (i + 2)];
- nrm[3] += x[incx * (i + 3)] * x[incx * (i + 3)];
- nrm[4] += x[incx * (i + 4)] * x[incx * (i + 4)];
- nrm[5] += x[incx * (i + 5)] * x[incx * (i + 5)];
- nrm[6] += x[incx * (i + 6)] * x[incx * (i + 6)];
- nrm[7] += x[incx * (i + 7)] * x[incx * (i + 7)];
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- nrm[0] += nrm[i];
- }
-
- return nrm[0];
-}
-
-float
-blas·normf(int len, float *x, int incx)
-{
- int i;
- float nrm;
-
- if (incx == 1) {
- i = len;
- nrm = normf_kernel16(&i, x);
- } else {
- i = len;
- nrm = normf_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- nrm += x[incx * i] * x[incx * i];
- }
-
- return math·sqrtf(nrm);
-}
-
-static
-void
-scalef_kernel16(int *ip, float *x, float a)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- x[i + 0] = a * x[i + 0];
- x[i + 1] = a * x[i + 1];
- x[i + 2] = a * x[i + 2];
- x[i + 3] = a * x[i + 3];
- x[i + 4] = a * x[i + 4];
- x[i + 5] = a * x[i + 5];
- x[i + 6] = a * x[i + 6];
- x[i + 7] = a * x[i + 7];
- x[i + 8] = a * x[i + 8];
- x[i + 9] = a * x[i + 9];
- x[i + 10] = a * x[i + 10];
- x[i + 11] = a * x[i + 11];
- x[i + 12] = a * x[i + 12];
- x[i + 13] = a * x[i + 13];
- x[i + 14] = a * x[i + 14];
- x[i + 15] = a * x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-scalef_s_kernel8(int *ip, float *x, int incx, float a)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- x[incx * (i + 0)] = a * x[incx * (i + 0)];
- x[incx * (i + 1)] = a * x[incx * (i + 1)];
- x[incx * (i + 2)] = a * x[incx * (i + 2)];
- x[incx * (i + 3)] = a * x[incx * (i + 3)];
- x[incx * (i + 4)] = a * x[incx * (i + 4)];
- x[incx * (i + 5)] = a * x[incx * (i + 5)];
- x[incx * (i + 6)] = a * x[incx * (i + 6)];
- x[incx * (i + 7)] = a * x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·scalef(int len, float *x, int incx, float a)
-{
- int i;
-
- if (incx == 1) {
- i = len;
- scalef_kernel16(&i, x, a);
- } else {
- i = len;
- scalef_s_kernel8(&i, x, incx, a);
- }
- for (; i < len; i++) {
- x[incx * i] = a * x[incx * i];
- }
-}
-
-static
-void
-rotf_kernel16(int *ip, float sin, float *x, float cos, float *y)
-{
- int i;
- float tmp[16];
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- tmp[0] = x[i + 0], x[i + 0] = cos * x[i + 0] + sin * y[i + 0], y[i + 0] = cos * y[i + 0] - sin * tmp[0];
- tmp[1] = x[i + 1], x[i + 1] = cos * x[i + 1] + sin * y[i + 1], y[i + 1] = cos * y[i + 1] - sin * tmp[1];
- tmp[2] = x[i + 2], x[i + 2] = cos * x[i + 2] + sin * y[i + 2], y[i + 2] = cos * y[i + 2] - sin * tmp[2];
- tmp[3] = x[i + 3], x[i + 3] = cos * x[i + 3] + sin * y[i + 3], y[i + 3] = cos * y[i + 3] - sin * tmp[3];
- tmp[4] = x[i + 4], x[i + 4] = cos * x[i + 4] + sin * y[i + 4], y[i + 4] = cos * y[i + 4] - sin * tmp[4];
- tmp[5] = x[i + 5], x[i + 5] = cos * x[i + 5] + sin * y[i + 5], y[i + 5] = cos * y[i + 5] - sin * tmp[5];
- tmp[6] = x[i + 6], x[i + 6] = cos * x[i + 6] + sin * y[i + 6], y[i + 6] = cos * y[i + 6] - sin * tmp[6];
- tmp[7] = x[i + 7], x[i + 7] = cos * x[i + 7] + sin * y[i + 7], y[i + 7] = cos * y[i + 7] - sin * tmp[7];
- tmp[8] = x[i + 8], x[i + 8] = cos * x[i + 8] + sin * y[i + 8], y[i + 8] = cos * y[i + 8] - sin * tmp[8];
- tmp[9] = x[i + 9], x[i + 9] = cos * x[i + 9] + sin * y[i + 9], y[i + 9] = cos * y[i + 9] - sin * tmp[9];
- tmp[10] = x[i + 10], x[i + 10] = cos * x[i + 10] + sin * y[i + 10], y[i + 10] = cos * y[i + 10] - sin * tmp[10];
- tmp[11] = x[i + 11], x[i + 11] = cos * x[i + 11] + sin * y[i + 11], y[i + 11] = cos * y[i + 11] - sin * tmp[11];
- tmp[12] = x[i + 12], x[i + 12] = cos * x[i + 12] + sin * y[i + 12], y[i + 12] = cos * y[i + 12] - sin * tmp[12];
- tmp[13] = x[i + 13], x[i + 13] = cos * x[i + 13] + sin * y[i + 13], y[i + 13] = cos * y[i + 13] - sin * tmp[13];
- tmp[14] = x[i + 14], x[i + 14] = cos * x[i + 14] + sin * y[i + 14], y[i + 14] = cos * y[i + 14] - sin * tmp[14];
- tmp[15] = x[i + 15], x[i + 15] = cos * x[i + 15] + sin * y[i + 15], y[i + 15] = cos * y[i + 15] - sin * tmp[15];
- }
- *ip = i;
-}
-
-static
-void
-rotf_s_kernel8(int *ip, int incy, float cos, float sin, int incx, float *x, float *y)
-{
- int i;
- float tmp[8];
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- tmp[0] = x[incx * (i + 0)], x[incx * (i + 0)] = cos * x[incx * (i + 0)] + sin * y[incy * (i + 0)], y[incy * (i + 0)] = cos * y[incy * (i + 0)] - sin * tmp[0];
- tmp[1] = x[incx * (i + 1)], x[incx * (i + 1)] = cos * x[incx * (i + 1)] + sin * y[incy * (i + 1)], y[incy * (i + 1)] = cos * y[incy * (i + 1)] - sin * tmp[1];
- tmp[2] = x[incx * (i + 2)], x[incx * (i + 2)] = cos * x[incx * (i + 2)] + sin * y[incy * (i + 2)], y[incy * (i + 2)] = cos * y[incy * (i + 2)] - sin * tmp[2];
- tmp[3] = x[incx * (i + 3)], x[incx * (i + 3)] = cos * x[incx * (i + 3)] + sin * y[incy * (i + 3)], y[incy * (i + 3)] = cos * y[incy * (i + 3)] - sin * tmp[3];
- tmp[4] = x[incx * (i + 4)], x[incx * (i + 4)] = cos * x[incx * (i + 4)] + sin * y[incy * (i + 4)], y[incy * (i + 4)] = cos * y[incy * (i + 4)] - sin * tmp[4];
- tmp[5] = x[incx * (i + 5)], x[incx * (i + 5)] = cos * x[incx * (i + 5)] + sin * y[incy * (i + 5)], y[incy * (i + 5)] = cos * y[incy * (i + 5)] - sin * tmp[5];
- tmp[6] = x[incx * (i + 6)], x[incx * (i + 6)] = cos * x[incx * (i + 6)] + sin * y[incy * (i + 6)], y[incy * (i + 6)] = cos * y[incy * (i + 6)] - sin * tmp[6];
- tmp[7] = x[incx * (i + 7)], x[incx * (i + 7)] = cos * x[incx * (i + 7)] + sin * y[incy * (i + 7)], y[incy * (i + 7)] = cos * y[incy * (i + 7)] - sin * tmp[7];
- }
- *ip = i;
-}
-
-void
-blas·rotf(int len, float *x, int incx, float *y, int incy, float cos, float sin)
-{
- int i;
- float tmp;
-
- if (incx == 1 && incy == 1) {
- i = len;
- rotf_kernel16(&i, sin, x, cos, y);
- } else {
- i = len;
- rotf_s_kernel8(&i, incy, cos, sin, incx, x, y);
- }
- for (; i < len; i++) {
- tmp = x[incx * i], x[incx * i] = cos * x[incx * i] + sin * y[incy * i], y[incy * i] = cos * y[incy * i] - sin * tmp;
- }
-}
-
-static
-void
-rotgf_kernel16(int *ip, float *x, float H[5], float *y)
-{
- float tmp[16];
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- tmp[0] = x[i + 0], x[i + 0] = H[1] * x[i + 0] + H[2] * y[i + 0], y[i + 0] = H[3] * y[i + 0] + H[4] * tmp[0];
- tmp[1] = x[i + 1], x[i + 1] = H[1] * x[i + 1] + H[2] * y[i + 1], y[i + 1] = H[3] * y[i + 1] + H[4] * tmp[1];
- tmp[2] = x[i + 2], x[i + 2] = H[1] * x[i + 2] + H[2] * y[i + 2], y[i + 2] = H[3] * y[i + 2] + H[4] * tmp[2];
- tmp[3] = x[i + 3], x[i + 3] = H[1] * x[i + 3] + H[2] * y[i + 3], y[i + 3] = H[3] * y[i + 3] + H[4] * tmp[3];
- tmp[4] = x[i + 4], x[i + 4] = H[1] * x[i + 4] + H[2] * y[i + 4], y[i + 4] = H[3] * y[i + 4] + H[4] * tmp[4];
- tmp[5] = x[i + 5], x[i + 5] = H[1] * x[i + 5] + H[2] * y[i + 5], y[i + 5] = H[3] * y[i + 5] + H[4] * tmp[5];
- tmp[6] = x[i + 6], x[i + 6] = H[1] * x[i + 6] + H[2] * y[i + 6], y[i + 6] = H[3] * y[i + 6] + H[4] * tmp[6];
- tmp[7] = x[i + 7], x[i + 7] = H[1] * x[i + 7] + H[2] * y[i + 7], y[i + 7] = H[3] * y[i + 7] + H[4] * tmp[7];
- tmp[8] = x[i + 8], x[i + 8] = H[1] * x[i + 8] + H[2] * y[i + 8], y[i + 8] = H[3] * y[i + 8] + H[4] * tmp[8];
- tmp[9] = x[i + 9], x[i + 9] = H[1] * x[i + 9] + H[2] * y[i + 9], y[i + 9] = H[3] * y[i + 9] + H[4] * tmp[9];
- tmp[10] = x[i + 10], x[i + 10] = H[1] * x[i + 10] + H[2] * y[i + 10], y[i + 10] = H[3] * y[i + 10] + H[4] * tmp[10];
- tmp[11] = x[i + 11], x[i + 11] = H[1] * x[i + 11] + H[2] * y[i + 11], y[i + 11] = H[3] * y[i + 11] + H[4] * tmp[11];
- tmp[12] = x[i + 12], x[i + 12] = H[1] * x[i + 12] + H[2] * y[i + 12], y[i + 12] = H[3] * y[i + 12] + H[4] * tmp[12];
- tmp[13] = x[i + 13], x[i + 13] = H[1] * x[i + 13] + H[2] * y[i + 13], y[i + 13] = H[3] * y[i + 13] + H[4] * tmp[13];
- tmp[14] = x[i + 14], x[i + 14] = H[1] * x[i + 14] + H[2] * y[i + 14], y[i + 14] = H[3] * y[i + 14] + H[4] * tmp[14];
- tmp[15] = x[i + 15], x[i + 15] = H[1] * x[i + 15] + H[2] * y[i + 15], y[i + 15] = H[3] * y[i + 15] + H[4] * tmp[15];
- }
- *ip = i;
-}
-
-static
-void
-rotgf_s_kernel8(int *ip, int incx, int incy, float *x, float H[5], float *y)
-{
- float tmp[8];
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- tmp[0] = x[incx * (i + 0)], x[incx * (i + 0)] = H[1] * x[incx * (i + 0)] + H[2] * y[incy * (i + 0)], y[incy * (i + 0)] = H[3] * y[incy * (i + 0)] + H[4] * tmp[0];
- tmp[1] = x[incx * (i + 1)], x[incx * (i + 1)] = H[1] * x[incx * (i + 1)] + H[2] * y[incy * (i + 1)], y[incy * (i + 1)] = H[3] * y[incy * (i + 1)] + H[4] * tmp[1];
- tmp[2] = x[incx * (i + 2)], x[incx * (i + 2)] = H[1] * x[incx * (i + 2)] + H[2] * y[incy * (i + 2)], y[incy * (i + 2)] = H[3] * y[incy * (i + 2)] + H[4] * tmp[2];
- tmp[3] = x[incx * (i + 3)], x[incx * (i + 3)] = H[1] * x[incx * (i + 3)] + H[2] * y[incy * (i + 3)], y[incy * (i + 3)] = H[3] * y[incy * (i + 3)] + H[4] * tmp[3];
- tmp[4] = x[incx * (i + 4)], x[incx * (i + 4)] = H[1] * x[incx * (i + 4)] + H[2] * y[incy * (i + 4)], y[incy * (i + 4)] = H[3] * y[incy * (i + 4)] + H[4] * tmp[4];
- tmp[5] = x[incx * (i + 5)], x[incx * (i + 5)] = H[1] * x[incx * (i + 5)] + H[2] * y[incy * (i + 5)], y[incy * (i + 5)] = H[3] * y[incy * (i + 5)] + H[4] * tmp[5];
- tmp[6] = x[incx * (i + 6)], x[incx * (i + 6)] = H[1] * x[incx * (i + 6)] + H[2] * y[incy * (i + 6)], y[incy * (i + 6)] = H[3] * y[incy * (i + 6)] + H[4] * tmp[6];
- tmp[7] = x[incx * (i + 7)], x[incx * (i + 7)] = H[1] * x[incx * (i + 7)] + H[2] * y[incy * (i + 7)], y[incy * (i + 7)] = H[3] * y[incy * (i + 7)] + H[4] * tmp[7];
- }
- *ip = i;
-}
-
-void
-blas·rotgf(int len, float *x, int incx, float *y, int incy, float H[5])
-{
- int i;
- float tmp;
-
- if (incx == 1 && incy == 1) {
- i = len;
- rotgf_kernel16(&i, x, H, y);
- } else {
- i = len;
- rotgf_s_kernel8(&i, incx, incy, x, H, y);
- }
- for (; i < len; i++) {
- tmp = x[incx * i], x[incx * i] = H[1] * x[incx * i] + H[2] * y[incy * i], y[incy * i] = H[3] * y[incy * i] + H[4] * tmp;
- }
-}
-
-static
-int
-argmind_kernel16(int *ip, double *x)
-{
- double min[16];
- int i;
- int ix[16];
- int len;
-
- for (i = 0; i < 16; i++) {
- min[i] = x[0];
- }
- for (i = 0; i < 16; i++) {
- ix[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- if (x[i + 0] < min[0]) {
- ix[0] = (i + 0);
- min[0] = x[ix[0]];
- }
- if (x[i + 1] < min[1]) {
- ix[1] = (i + 1);
- min[1] = x[ix[1]];
- }
- if (x[i + 2] < min[2]) {
- ix[2] = (i + 2);
- min[2] = x[ix[2]];
- }
- if (x[i + 3] < min[3]) {
- ix[3] = (i + 3);
- min[3] = x[ix[3]];
- }
- if (x[i + 4] < min[4]) {
- ix[4] = (i + 4);
- min[4] = x[ix[4]];
- }
- if (x[i + 5] < min[5]) {
- ix[5] = (i + 5);
- min[5] = x[ix[5]];
- }
- if (x[i + 6] < min[6]) {
- ix[6] = (i + 6);
- min[6] = x[ix[6]];
- }
- if (x[i + 7] < min[7]) {
- ix[7] = (i + 7);
- min[7] = x[ix[7]];
- }
- if (x[i + 8] < min[8]) {
- ix[8] = (i + 8);
- min[8] = x[ix[8]];
- }
- if (x[i + 9] < min[9]) {
- ix[9] = (i + 9);
- min[9] = x[ix[9]];
- }
- if (x[i + 10] < min[10]) {
- ix[10] = (i + 10);
- min[10] = x[ix[10]];
- }
- if (x[i + 11] < min[11]) {
- ix[11] = (i + 11);
- min[11] = x[ix[11]];
- }
- if (x[i + 12] < min[12]) {
- ix[12] = (i + 12);
- min[12] = x[ix[12]];
- }
- if (x[i + 13] < min[13]) {
- ix[13] = (i + 13);
- min[13] = x[ix[13]];
- }
- if (x[i + 14] < min[14]) {
- ix[14] = (i + 14);
- min[14] = x[ix[14]];
- }
- if (x[i + 15] < min[15]) {
- ix[15] = (i + 15);
- min[15] = x[ix[15]];
- }
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-static
-int
-argmind_s_kernel8(int *ip, double *x, int incx)
-{
- double min[8];
- int i;
- int ix[8];
- int len;
-
- for (i = 0; i < 8; i++) {
- min[i] = x[0];
- }
- for (i = 0; i < 8; i++) {
- ix[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- if (x[incx * (i + 0)] < min[0]) {
- ix[0] = (i + 0);
- min[0] = x[incx * ix[0]];
- }
- if (x[incx * (i + 1)] < min[1]) {
- ix[1] = (i + 1);
- min[1] = x[incx * ix[1]];
- }
- if (x[incx * (i + 2)] < min[2]) {
- ix[2] = (i + 2);
- min[2] = x[incx * ix[2]];
- }
- if (x[incx * (i + 3)] < min[3]) {
- ix[3] = (i + 3);
- min[3] = x[incx * ix[3]];
- }
- if (x[incx * (i + 4)] < min[4]) {
- ix[4] = (i + 4);
- min[4] = x[incx * ix[4]];
- }
- if (x[incx * (i + 5)] < min[5]) {
- ix[5] = (i + 5);
- min[5] = x[incx * ix[5]];
- }
- if (x[incx * (i + 6)] < min[6]) {
- ix[6] = (i + 6);
- min[6] = x[incx * ix[6]];
- }
- if (x[incx * (i + 7)] < min[7]) {
- ix[7] = (i + 7);
- min[7] = x[incx * ix[7]];
- }
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-int
-blas·argmind(int len, double *x, int incx)
-{
- int i;
- int ix;
- double min;
-
- if (incx == 1) {
- i = len;
- ix = argmind_kernel16(&i, x);
- } else {
- i = len;
- ix = argmind_s_kernel8(&i, x, incx);
- }
- for (; i < len; i++) {
- if (x[incx * i] < min) {
- ix = i;
- min = x[incx * ix];
- }
- }
- for (; i < len; i++) {
- if (x[i] < min) {
- ix = i;
- min = x[ix];
- }
- }
-
- return ix;
-}
-
-static
-int
-argmaxd_kernel16(int *ip, double *x)
-{
- int ix[16];
- int i;
- double max[16];
- int len;
-
- for (i = 0; i < 16; i++) {
- ix[i] = 0;
- }
- for (i = 0; i < 16; i++) {
- max[i] = x[0];
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- if (x[i + 0] > max[0]) {
- ix[0] = (i + 0);
- max[0] = x[ix[0]];
- }
- if (x[i + 1] > max[1]) {
- ix[1] = (i + 1);
- max[1] = x[ix[1]];
- }
- if (x[i + 2] > max[2]) {
- ix[2] = (i + 2);
- max[2] = x[ix[2]];
- }
- if (x[i + 3] > max[3]) {
- ix[3] = (i + 3);
- max[3] = x[ix[3]];
- }
- if (x[i + 4] > max[4]) {
- ix[4] = (i + 4);
- max[4] = x[ix[4]];
- }
- if (x[i + 5] > max[5]) {
- ix[5] = (i + 5);
- max[5] = x[ix[5]];
- }
- if (x[i + 6] > max[6]) {
- ix[6] = (i + 6);
- max[6] = x[ix[6]];
- }
- if (x[i + 7] > max[7]) {
- ix[7] = (i + 7);
- max[7] = x[ix[7]];
- }
- if (x[i + 8] > max[8]) {
- ix[8] = (i + 8);
- max[8] = x[ix[8]];
- }
- if (x[i + 9] > max[9]) {
- ix[9] = (i + 9);
- max[9] = x[ix[9]];
- }
- if (x[i + 10] > max[10]) {
- ix[10] = (i + 10);
- max[10] = x[ix[10]];
- }
- if (x[i + 11] > max[11]) {
- ix[11] = (i + 11);
- max[11] = x[ix[11]];
- }
- if (x[i + 12] > max[12]) {
- ix[12] = (i + 12);
- max[12] = x[ix[12]];
- }
- if (x[i + 13] > max[13]) {
- ix[13] = (i + 13);
- max[13] = x[ix[13]];
- }
- if (x[i + 14] > max[14]) {
- ix[14] = (i + 14);
- max[14] = x[ix[14]];
- }
- if (x[i + 15] > max[15]) {
- ix[15] = (i + 15);
- max[15] = x[ix[15]];
- }
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-static
-int
-argmaxd_s_kernel8(int *ip, int incx, double *x)
-{
- int ix[8];
- int i;
- double max[8];
- int len;
-
- for (i = 0; i < 8; i++) {
- ix[i] = 0;
- }
- for (i = 0; i < 8; i++) {
- max[i] = x[0];
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- if (x[incx * (i + 0)] > max[0]) {
- ix[0] = (i + 0);
- max[0] = x[incx * ix[0]];
- }
- if (x[incx * (i + 1)] > max[1]) {
- ix[1] = (i + 1);
- max[1] = x[incx * ix[1]];
- }
- if (x[incx * (i + 2)] > max[2]) {
- ix[2] = (i + 2);
- max[2] = x[incx * ix[2]];
- }
- if (x[incx * (i + 3)] > max[3]) {
- ix[3] = (i + 3);
- max[3] = x[incx * ix[3]];
- }
- if (x[incx * (i + 4)] > max[4]) {
- ix[4] = (i + 4);
- max[4] = x[incx * ix[4]];
- }
- if (x[incx * (i + 5)] > max[5]) {
- ix[5] = (i + 5);
- max[5] = x[incx * ix[5]];
- }
- if (x[incx * (i + 6)] > max[6]) {
- ix[6] = (i + 6);
- max[6] = x[incx * ix[6]];
- }
- if (x[incx * (i + 7)] > max[7]) {
- ix[7] = (i + 7);
- max[7] = x[incx * ix[7]];
- }
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- if (x[ix[i]] > x[ix[0]]) {
- ix[0] = ix[i];
- }
- }
-
- return ix[0];
-}
-
-int
-blas·argmaxd(int len, double *x, int incx)
-{
- int i;
- int ix;
- double max;
-
- if (incx == 1) {
- i = len;
- ix = argmaxd_kernel16(&i, x);
- } else {
- i = len;
- ix = argmaxd_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- if (x[incx * i] > max) {
- ix = i;
- max = x[incx * ix];
- }
- }
- for (; i < len; i++) {
- if (x[i] > max) {
- ix = i;
- max = x[ix];
- }
- }
-
- return ix;
-}
-
-static
-void
-copyd_kernel16(int *ip, double *y, double *x)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- y[i + 0] = x[i + 0];
- y[i + 1] = x[i + 1];
- y[i + 2] = x[i + 2];
- y[i + 3] = x[i + 3];
- y[i + 4] = x[i + 4];
- y[i + 5] = x[i + 5];
- y[i + 6] = x[i + 6];
- y[i + 7] = x[i + 7];
- y[i + 8] = x[i + 8];
- y[i + 9] = x[i + 9];
- y[i + 10] = x[i + 10];
- y[i + 11] = x[i + 11];
- y[i + 12] = x[i + 12];
- y[i + 13] = x[i + 13];
- y[i + 14] = x[i + 14];
- y[i + 15] = x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-copyd_s_kernel8(int *ip, double *y, int incx, int incy, double *x)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- y[incy * (i + 0)] = x[incx * (i + 0)];
- y[incy * (i + 1)] = x[incx * (i + 1)];
- y[incy * (i + 2)] = x[incx * (i + 2)];
- y[incy * (i + 3)] = x[incx * (i + 3)];
- y[incy * (i + 4)] = x[incx * (i + 4)];
- y[incy * (i + 5)] = x[incx * (i + 5)];
- y[incy * (i + 6)] = x[incx * (i + 6)];
- y[incy * (i + 7)] = x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·copyd(int len, double *x, int incx, double *y, int incy)
-{
- int i;
-
- if (incx == 1 && incy == 1) {
- i = len;
- copyd_kernel16(&i, y, x);
- } else {
- i = len;
- copyd_s_kernel8(&i, y, incx, incy, x);
- }
- for (; i < len; i++) {
- y[incy * i] = x[incx * i];
- }
-}
-
-static
-void
-axpyd_kernel16(int *ip, double *x, double *y, double a)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- y[i + 0] = y[i + 0] + a * x[i + 0];
- y[i + 1] = y[i + 1] + a * x[i + 1];
- y[i + 2] = y[i + 2] + a * x[i + 2];
- y[i + 3] = y[i + 3] + a * x[i + 3];
- y[i + 4] = y[i + 4] + a * x[i + 4];
- y[i + 5] = y[i + 5] + a * x[i + 5];
- y[i + 6] = y[i + 6] + a * x[i + 6];
- y[i + 7] = y[i + 7] + a * x[i + 7];
- y[i + 8] = y[i + 8] + a * x[i + 8];
- y[i + 9] = y[i + 9] + a * x[i + 9];
- y[i + 10] = y[i + 10] + a * x[i + 10];
- y[i + 11] = y[i + 11] + a * x[i + 11];
- y[i + 12] = y[i + 12] + a * x[i + 12];
- y[i + 13] = y[i + 13] + a * x[i + 13];
- y[i + 14] = y[i + 14] + a * x[i + 14];
- y[i + 15] = y[i + 15] + a * x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-axpyd_s_kernel8(int *ip, double *x, int incy, double *y, double a, int incx)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- y[incy * (i + 0)] = y[incy * (i + 0)] + a * x[incx * (i + 0)];
- y[incy * (i + 1)] = y[incy * (i + 1)] + a * x[incx * (i + 1)];
- y[incy * (i + 2)] = y[incy * (i + 2)] + a * x[incx * (i + 2)];
- y[incy * (i + 3)] = y[incy * (i + 3)] + a * x[incx * (i + 3)];
- y[incy * (i + 4)] = y[incy * (i + 4)] + a * x[incx * (i + 4)];
- y[incy * (i + 5)] = y[incy * (i + 5)] + a * x[incx * (i + 5)];
- y[incy * (i + 6)] = y[incy * (i + 6)] + a * x[incx * (i + 6)];
- y[incy * (i + 7)] = y[incy * (i + 7)] + a * x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·axpyd(int len, double a, double *x, int incx, double *y, int incy)
-{
- int i;
-
- if (incx == 1 && incy == 1) {
- i = len;
- axpyd_kernel16(&i, x, y, a);
- } else {
- i = len;
- axpyd_s_kernel8(&i, x, incy, y, a, incx);
- }
- for (; i < len; i++) {
- y[incy * i] = y[incy * i] + a * x[incx * i];
- }
-}
-
-static
-void
-axpbyd_kernel16(int *ip, double a, double b, double *x, double *y)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- y[i + 0] = b * y[i + 0] + a * x[i + 0];
- y[i + 1] = b * y[i + 1] + a * x[i + 1];
- y[i + 2] = b * y[i + 2] + a * x[i + 2];
- y[i + 3] = b * y[i + 3] + a * x[i + 3];
- y[i + 4] = b * y[i + 4] + a * x[i + 4];
- y[i + 5] = b * y[i + 5] + a * x[i + 5];
- y[i + 6] = b * y[i + 6] + a * x[i + 6];
- y[i + 7] = b * y[i + 7] + a * x[i + 7];
- y[i + 8] = b * y[i + 8] + a * x[i + 8];
- y[i + 9] = b * y[i + 9] + a * x[i + 9];
- y[i + 10] = b * y[i + 10] + a * x[i + 10];
- y[i + 11] = b * y[i + 11] + a * x[i + 11];
- y[i + 12] = b * y[i + 12] + a * x[i + 12];
- y[i + 13] = b * y[i + 13] + a * x[i + 13];
- y[i + 14] = b * y[i + 14] + a * x[i + 14];
- y[i + 15] = b * y[i + 15] + a * x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-axpbyd_s_kernel8(int *ip, double a, int incy, double b, double *x, int incx, double *y)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- y[incy * (i + 0)] = b * y[incy * (i + 0)] + a * x[incx * (i + 0)];
- y[incy * (i + 1)] = b * y[incy * (i + 1)] + a * x[incx * (i + 1)];
- y[incy * (i + 2)] = b * y[incy * (i + 2)] + a * x[incx * (i + 2)];
- y[incy * (i + 3)] = b * y[incy * (i + 3)] + a * x[incx * (i + 3)];
- y[incy * (i + 4)] = b * y[incy * (i + 4)] + a * x[incx * (i + 4)];
- y[incy * (i + 5)] = b * y[incy * (i + 5)] + a * x[incx * (i + 5)];
- y[incy * (i + 6)] = b * y[incy * (i + 6)] + a * x[incx * (i + 6)];
- y[incy * (i + 7)] = b * y[incy * (i + 7)] + a * x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·axpbyd(int len, double a, double *x, int incx, double b, double *y, int incy)
-{
- int i;
-
- if (incx == 1 && incy == 1) {
- i = len;
- axpbyd_kernel16(&i, a, b, x, y);
- } else {
- i = len;
- axpbyd_s_kernel8(&i, a, incy, b, x, incx, y);
- }
- for (; i < len; i++) {
- y[incy * i] = b * y[incy * i] + a * x[incx * i];
- }
-}
-
-static
-double
-dotd_kernel16(int *ip, double *y, double *x)
-{
- double sum[16];
- int i;
- int len;
-
- for (i = 0; i < 16; i++) {
- sum[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- sum[0] += x[i + 0] * y[i + 0];
- sum[1] += x[i + 1] * y[i + 1];
- sum[2] += x[i + 2] * y[i + 2];
- sum[3] += x[i + 3] * y[i + 3];
- sum[4] += x[i + 4] * y[i + 4];
- sum[5] += x[i + 5] * y[i + 5];
- sum[6] += x[i + 6] * y[i + 6];
- sum[7] += x[i + 7] * y[i + 7];
- sum[8] += x[i + 8] * y[i + 8];
- sum[9] += x[i + 9] * y[i + 9];
- sum[10] += x[i + 10] * y[i + 10];
- sum[11] += x[i + 11] * y[i + 11];
- sum[12] += x[i + 12] * y[i + 12];
- sum[13] += x[i + 13] * y[i + 13];
- sum[14] += x[i + 14] * y[i + 14];
- sum[15] += x[i + 15] * y[i + 15];
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-static
-double
-dotd_s_kernel8(int *ip, double *y, int incx, int incy, double *x)
-{
- double sum[8];
- int i;
- int len;
-
- for (i = 0; i < 8; i++) {
- sum[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- sum[0] += x[incx * (i + 0)] * y[incy * (i + 0)];
- sum[1] += x[incx * (i + 1)] * y[incy * (i + 1)];
- sum[2] += x[incx * (i + 2)] * y[incy * (i + 2)];
- sum[3] += x[incx * (i + 3)] * y[incy * (i + 3)];
- sum[4] += x[incx * (i + 4)] * y[incy * (i + 4)];
- sum[5] += x[incx * (i + 5)] * y[incy * (i + 5)];
- sum[6] += x[incx * (i + 6)] * y[incy * (i + 6)];
- sum[7] += x[incx * (i + 7)] * y[incy * (i + 7)];
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-double
-blas·dotd(int len, double *x, int incx, double *y, int incy)
-{
- int i;
- double sum;
-
- if (incx == 1 && incy == 1) {
- i = len;
- sum = dotd_kernel16(&i, y, x);
- } else {
- i = len;
- sum = dotd_s_kernel8(&i, y, incx, incy, x);
- }
- for (; i < len; i++) {
- sum += x[incx * i] * y[incy * i];
- }
-
- return sum;
-}
-
-static
-double
-sumd_kernel16(int *ip, double *x)
-{
- int i;
- double sum[16];
- int len;
-
- for (i = 0; i < 16; i++) {
- sum[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- sum[0] += x[i + 0];
- sum[1] += x[i + 1];
- sum[2] += x[i + 2];
- sum[3] += x[i + 3];
- sum[4] += x[i + 4];
- sum[5] += x[i + 5];
- sum[6] += x[i + 6];
- sum[7] += x[i + 7];
- sum[8] += x[i + 8];
- sum[9] += x[i + 9];
- sum[10] += x[i + 10];
- sum[11] += x[i + 11];
- sum[12] += x[i + 12];
- sum[13] += x[i + 13];
- sum[14] += x[i + 14];
- sum[15] += x[i + 15];
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-static
-double
-sumd_s_kernel8(int *ip, int incx, double *x)
-{
- int i;
- double sum[8];
- int len;
-
- for (i = 0; i < 8; i++) {
- sum[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- sum[0] += x[incx * (i + 0)];
- sum[1] += x[incx * (i + 1)];
- sum[2] += x[incx * (i + 2)];
- sum[3] += x[incx * (i + 3)];
- sum[4] += x[incx * (i + 4)];
- sum[5] += x[incx * (i + 5)];
- sum[6] += x[incx * (i + 6)];
- sum[7] += x[incx * (i + 7)];
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- sum[0] += sum[i];
- }
-
- return sum[0];
-}
-
-double
-blas·sumd(int len, double *x, int incx)
-{
- int i;
- double sum;
-
- if (incx == 1) {
- i = len;
- sum = sumd_kernel16(&i, x);
- } else {
- i = len;
- sum = sumd_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- sum += x[incx * i];
- }
-
- return sum;
-}
-
-static
-double
-normd_kernel16(int *ip, double *x)
-{
- int i;
- double nrm[16];
- int len;
-
- for (i = 0; i < 16; i++) {
- nrm[i] = 0;
- }
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- nrm[0] += x[i + 0] * x[i + 0];
- nrm[1] += x[i + 1] * x[i + 1];
- nrm[2] += x[i + 2] * x[i + 2];
- nrm[3] += x[i + 3] * x[i + 3];
- nrm[4] += x[i + 4] * x[i + 4];
- nrm[5] += x[i + 5] * x[i + 5];
- nrm[6] += x[i + 6] * x[i + 6];
- nrm[7] += x[i + 7] * x[i + 7];
- nrm[8] += x[i + 8] * x[i + 8];
- nrm[9] += x[i + 9] * x[i + 9];
- nrm[10] += x[i + 10] * x[i + 10];
- nrm[11] += x[i + 11] * x[i + 11];
- nrm[12] += x[i + 12] * x[i + 12];
- nrm[13] += x[i + 13] * x[i + 13];
- nrm[14] += x[i + 14] * x[i + 14];
- nrm[15] += x[i + 15] * x[i + 15];
- }
- *ip = i;
- for (i = 1; i < 16; i++) {
- nrm[0] += nrm[i];
- }
-
- return nrm[0];
-}
-
-static
-double
-normd_s_kernel8(int *ip, int incx, double *x)
-{
- double nrm[8];
- int i;
- int len;
-
- for (i = 0; i < 8; i++) {
- nrm[i] = 0;
- }
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- nrm[0] += x[incx * (i + 0)] * x[incx * (i + 0)];
- nrm[1] += x[incx * (i + 1)] * x[incx * (i + 1)];
- nrm[2] += x[incx * (i + 2)] * x[incx * (i + 2)];
- nrm[3] += x[incx * (i + 3)] * x[incx * (i + 3)];
- nrm[4] += x[incx * (i + 4)] * x[incx * (i + 4)];
- nrm[5] += x[incx * (i + 5)] * x[incx * (i + 5)];
- nrm[6] += x[incx * (i + 6)] * x[incx * (i + 6)];
- nrm[7] += x[incx * (i + 7)] * x[incx * (i + 7)];
- }
- *ip = i;
- for (i = 1; i < 8; i++) {
- nrm[0] += nrm[i];
- }
-
- return nrm[0];
-}
-
-double
-blas·normd(int len, double *x, int incx)
-{
- int i;
- double nrm;
-
- if (incx == 1) {
- i = len;
- nrm = normd_kernel16(&i, x);
- } else {
- i = len;
- nrm = normd_s_kernel8(&i, incx, x);
- }
- for (; i < len; i++) {
- nrm += x[incx * i] * x[incx * i];
- }
-
- return math·sqrt(nrm);
-}
-
-static
-void
-scaled_kernel16(int *ip, double a, double *x)
-{
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- x[i + 0] = a * x[i + 0];
- x[i + 1] = a * x[i + 1];
- x[i + 2] = a * x[i + 2];
- x[i + 3] = a * x[i + 3];
- x[i + 4] = a * x[i + 4];
- x[i + 5] = a * x[i + 5];
- x[i + 6] = a * x[i + 6];
- x[i + 7] = a * x[i + 7];
- x[i + 8] = a * x[i + 8];
- x[i + 9] = a * x[i + 9];
- x[i + 10] = a * x[i + 10];
- x[i + 11] = a * x[i + 11];
- x[i + 12] = a * x[i + 12];
- x[i + 13] = a * x[i + 13];
- x[i + 14] = a * x[i + 14];
- x[i + 15] = a * x[i + 15];
- }
- *ip = i;
-}
-
-static
-void
-scaled_s_kernel8(int *ip, double *x, int incx, double a)
-{
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- x[incx * (i + 0)] = a * x[incx * (i + 0)];
- x[incx * (i + 1)] = a * x[incx * (i + 1)];
- x[incx * (i + 2)] = a * x[incx * (i + 2)];
- x[incx * (i + 3)] = a * x[incx * (i + 3)];
- x[incx * (i + 4)] = a * x[incx * (i + 4)];
- x[incx * (i + 5)] = a * x[incx * (i + 5)];
- x[incx * (i + 6)] = a * x[incx * (i + 6)];
- x[incx * (i + 7)] = a * x[incx * (i + 7)];
- }
- *ip = i;
-}
-
-void
-blas·scaled(int len, double *x, int incx, double a)
-{
- int i;
-
- if (incx == 1) {
- i = len;
- scaled_kernel16(&i, a, x);
- } else {
- i = len;
- scaled_s_kernel8(&i, x, incx, a);
- }
- for (; i < len; i++) {
- x[incx * i] = a * x[incx * i];
- }
-}
-
-static
-void
-rotd_kernel16(int *ip, double sin, double cos, double *x, double *y)
-{
- double tmp[16];
- int i;
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- tmp[0] = x[i + 0], x[i + 0] = cos * x[i + 0] + sin * y[i + 0], y[i + 0] = cos * y[i + 0] - sin * tmp[0];
- tmp[1] = x[i + 1], x[i + 1] = cos * x[i + 1] + sin * y[i + 1], y[i + 1] = cos * y[i + 1] - sin * tmp[1];
- tmp[2] = x[i + 2], x[i + 2] = cos * x[i + 2] + sin * y[i + 2], y[i + 2] = cos * y[i + 2] - sin * tmp[2];
- tmp[3] = x[i + 3], x[i + 3] = cos * x[i + 3] + sin * y[i + 3], y[i + 3] = cos * y[i + 3] - sin * tmp[3];
- tmp[4] = x[i + 4], x[i + 4] = cos * x[i + 4] + sin * y[i + 4], y[i + 4] = cos * y[i + 4] - sin * tmp[4];
- tmp[5] = x[i + 5], x[i + 5] = cos * x[i + 5] + sin * y[i + 5], y[i + 5] = cos * y[i + 5] - sin * tmp[5];
- tmp[6] = x[i + 6], x[i + 6] = cos * x[i + 6] + sin * y[i + 6], y[i + 6] = cos * y[i + 6] - sin * tmp[6];
- tmp[7] = x[i + 7], x[i + 7] = cos * x[i + 7] + sin * y[i + 7], y[i + 7] = cos * y[i + 7] - sin * tmp[7];
- tmp[8] = x[i + 8], x[i + 8] = cos * x[i + 8] + sin * y[i + 8], y[i + 8] = cos * y[i + 8] - sin * tmp[8];
- tmp[9] = x[i + 9], x[i + 9] = cos * x[i + 9] + sin * y[i + 9], y[i + 9] = cos * y[i + 9] - sin * tmp[9];
- tmp[10] = x[i + 10], x[i + 10] = cos * x[i + 10] + sin * y[i + 10], y[i + 10] = cos * y[i + 10] - sin * tmp[10];
- tmp[11] = x[i + 11], x[i + 11] = cos * x[i + 11] + sin * y[i + 11], y[i + 11] = cos * y[i + 11] - sin * tmp[11];
- tmp[12] = x[i + 12], x[i + 12] = cos * x[i + 12] + sin * y[i + 12], y[i + 12] = cos * y[i + 12] - sin * tmp[12];
- tmp[13] = x[i + 13], x[i + 13] = cos * x[i + 13] + sin * y[i + 13], y[i + 13] = cos * y[i + 13] - sin * tmp[13];
- tmp[14] = x[i + 14], x[i + 14] = cos * x[i + 14] + sin * y[i + 14], y[i + 14] = cos * y[i + 14] - sin * tmp[14];
- tmp[15] = x[i + 15], x[i + 15] = cos * x[i + 15] + sin * y[i + 15], y[i + 15] = cos * y[i + 15] - sin * tmp[15];
- }
- *ip = i;
-}
-
-static
-void
-rotd_s_kernel8(int *ip, double sin, double *x, double *y, double cos, int incy, int incx)
-{
- double tmp[8];
- int i;
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- tmp[0] = x[incx * (i + 0)], x[incx * (i + 0)] = cos * x[incx * (i + 0)] + sin * y[incy * (i + 0)], y[incy * (i + 0)] = cos * y[incy * (i + 0)] - sin * tmp[0];
- tmp[1] = x[incx * (i + 1)], x[incx * (i + 1)] = cos * x[incx * (i + 1)] + sin * y[incy * (i + 1)], y[incy * (i + 1)] = cos * y[incy * (i + 1)] - sin * tmp[1];
- tmp[2] = x[incx * (i + 2)], x[incx * (i + 2)] = cos * x[incx * (i + 2)] + sin * y[incy * (i + 2)], y[incy * (i + 2)] = cos * y[incy * (i + 2)] - sin * tmp[2];
- tmp[3] = x[incx * (i + 3)], x[incx * (i + 3)] = cos * x[incx * (i + 3)] + sin * y[incy * (i + 3)], y[incy * (i + 3)] = cos * y[incy * (i + 3)] - sin * tmp[3];
- tmp[4] = x[incx * (i + 4)], x[incx * (i + 4)] = cos * x[incx * (i + 4)] + sin * y[incy * (i + 4)], y[incy * (i + 4)] = cos * y[incy * (i + 4)] - sin * tmp[4];
- tmp[5] = x[incx * (i + 5)], x[incx * (i + 5)] = cos * x[incx * (i + 5)] + sin * y[incy * (i + 5)], y[incy * (i + 5)] = cos * y[incy * (i + 5)] - sin * tmp[5];
- tmp[6] = x[incx * (i + 6)], x[incx * (i + 6)] = cos * x[incx * (i + 6)] + sin * y[incy * (i + 6)], y[incy * (i + 6)] = cos * y[incy * (i + 6)] - sin * tmp[6];
- tmp[7] = x[incx * (i + 7)], x[incx * (i + 7)] = cos * x[incx * (i + 7)] + sin * y[incy * (i + 7)], y[incy * (i + 7)] = cos * y[incy * (i + 7)] - sin * tmp[7];
- }
- *ip = i;
-}
-
-void
-blas·rotd(int len, double *x, int incx, double *y, int incy, double cos, double sin)
-{
- int i;
- double tmp;
-
- if (incx == 1 && incy == 1) {
- i = len;
- rotd_kernel16(&i, sin, cos, x, y);
- } else {
- i = len;
- rotd_s_kernel8(&i, sin, x, y, cos, incy, incx);
- }
- for (; i < len; i++) {
- tmp = x[incx * i], x[incx * i] = cos * x[incx * i] + sin * y[incy * i], y[incy * i] = cos * y[incy * i] - sin * tmp;
- }
-}
-
-static
-void
-rotgd_kernel16(int *ip, double H[5], double *y, double *x)
-{
- int i;
- double tmp[16];
- int len;
-
- len = *ip & ~15;
- for (i = 0; i < len; i += 16) {
- tmp[0] = x[i + 0], x[i + 0] = H[1] * x[i + 0] + H[2] * y[i + 0], y[i + 0] = H[3] * y[i + 0] + H[4] * tmp[0];
- tmp[1] = x[i + 1], x[i + 1] = H[1] * x[i + 1] + H[2] * y[i + 1], y[i + 1] = H[3] * y[i + 1] + H[4] * tmp[1];
- tmp[2] = x[i + 2], x[i + 2] = H[1] * x[i + 2] + H[2] * y[i + 2], y[i + 2] = H[3] * y[i + 2] + H[4] * tmp[2];
- tmp[3] = x[i + 3], x[i + 3] = H[1] * x[i + 3] + H[2] * y[i + 3], y[i + 3] = H[3] * y[i + 3] + H[4] * tmp[3];
- tmp[4] = x[i + 4], x[i + 4] = H[1] * x[i + 4] + H[2] * y[i + 4], y[i + 4] = H[3] * y[i + 4] + H[4] * tmp[4];
- tmp[5] = x[i + 5], x[i + 5] = H[1] * x[i + 5] + H[2] * y[i + 5], y[i + 5] = H[3] * y[i + 5] + H[4] * tmp[5];
- tmp[6] = x[i + 6], x[i + 6] = H[1] * x[i + 6] + H[2] * y[i + 6], y[i + 6] = H[3] * y[i + 6] + H[4] * tmp[6];
- tmp[7] = x[i + 7], x[i + 7] = H[1] * x[i + 7] + H[2] * y[i + 7], y[i + 7] = H[3] * y[i + 7] + H[4] * tmp[7];
- tmp[8] = x[i + 8], x[i + 8] = H[1] * x[i + 8] + H[2] * y[i + 8], y[i + 8] = H[3] * y[i + 8] + H[4] * tmp[8];
- tmp[9] = x[i + 9], x[i + 9] = H[1] * x[i + 9] + H[2] * y[i + 9], y[i + 9] = H[3] * y[i + 9] + H[4] * tmp[9];
- tmp[10] = x[i + 10], x[i + 10] = H[1] * x[i + 10] + H[2] * y[i + 10], y[i + 10] = H[3] * y[i + 10] + H[4] * tmp[10];
- tmp[11] = x[i + 11], x[i + 11] = H[1] * x[i + 11] + H[2] * y[i + 11], y[i + 11] = H[3] * y[i + 11] + H[4] * tmp[11];
- tmp[12] = x[i + 12], x[i + 12] = H[1] * x[i + 12] + H[2] * y[i + 12], y[i + 12] = H[3] * y[i + 12] + H[4] * tmp[12];
- tmp[13] = x[i + 13], x[i + 13] = H[1] * x[i + 13] + H[2] * y[i + 13], y[i + 13] = H[3] * y[i + 13] + H[4] * tmp[13];
- tmp[14] = x[i + 14], x[i + 14] = H[1] * x[i + 14] + H[2] * y[i + 14], y[i + 14] = H[3] * y[i + 14] + H[4] * tmp[14];
- tmp[15] = x[i + 15], x[i + 15] = H[1] * x[i + 15] + H[2] * y[i + 15], y[i + 15] = H[3] * y[i + 15] + H[4] * tmp[15];
- }
- *ip = i;
-}
-
-static
-void
-rotgd_s_kernel8(int *ip, double H[5], int incx, double *y, double *x, int incy)
-{
- int i;
- double tmp[8];
- int len;
-
- len = *ip & ~7;
- for (i = 0; i < len; i += 8) {
- tmp[0] = x[incx * (i + 0)], x[incx * (i + 0)] = H[1] * x[incx * (i + 0)] + H[2] * y[incy * (i + 0)], y[incy * (i + 0)] = H[3] * y[incy * (i + 0)] + H[4] * tmp[0];
- tmp[1] = x[incx * (i + 1)], x[incx * (i + 1)] = H[1] * x[incx * (i + 1)] + H[2] * y[incy * (i + 1)], y[incy * (i + 1)] = H[3] * y[incy * (i + 1)] + H[4] * tmp[1];
- tmp[2] = x[incx * (i + 2)], x[incx * (i + 2)] = H[1] * x[incx * (i + 2)] + H[2] * y[incy * (i + 2)], y[incy * (i + 2)] = H[3] * y[incy * (i + 2)] + H[4] * tmp[2];
- tmp[3] = x[incx * (i + 3)], x[incx * (i + 3)] = H[1] * x[incx * (i + 3)] + H[2] * y[incy * (i + 3)], y[incy * (i + 3)] = H[3] * y[incy * (i + 3)] + H[4] * tmp[3];
- tmp[4] = x[incx * (i + 4)], x[incx * (i + 4)] = H[1] * x[incx * (i + 4)] + H[2] * y[incy * (i + 4)], y[incy * (i + 4)] = H[3] * y[incy * (i + 4)] + H[4] * tmp[4];
- tmp[5] = x[incx * (i + 5)], x[incx * (i + 5)] = H[1] * x[incx * (i + 5)] + H[2] * y[incy * (i + 5)], y[incy * (i + 5)] = H[3] * y[incy * (i + 5)] + H[4] * tmp[5];
- tmp[6] = x[incx * (i + 6)], x[incx * (i + 6)] = H[1] * x[incx * (i + 6)] + H[2] * y[incy * (i + 6)], y[incy * (i + 6)] = H[3] * y[incy * (i + 6)] + H[4] * tmp[6];
- tmp[7] = x[incx * (i + 7)], x[incx * (i + 7)] = H[1] * x[incx * (i + 7)] + H[2] * y[incy * (i + 7)], y[incy * (i + 7)] = H[3] * y[incy * (i + 7)] + H[4] * tmp[7];
- }
- *ip = i;
-}
-
-void
-blas·rotgd(int len, double *x, int incx, double *y, int incy, double H[5])
-{
- int i;
- double tmp;
-
- if (incx == 1 && incy == 1) {
- i = len;
- rotgd_kernel16(&i, H, y, x);
- } else {
- i = len;
- rotgd_s_kernel8(&i, H, incx, y, x, incy);
- }
- for (; i < len; i++) {
- tmp = x[incx * i], x[incx * i] = H[1] * x[incx * i] + H[2] * y[incy * i], y[incy * i] = H[3] * y[incy * i] + H[4] * tmp;
- }
-}
-
-
+#define UNROLL 8
+#define INT uintptr
+
+// -----------------------------------------------------------------------
+// Templates
+
+#include "loop.h"
+#define BODY_XY() \
+ LOOP(UNROLL, 0, INIT); \
+ n = ROUNDBY(len, UNROLL); \
+ if (incx == 1 && incy == 1) { \
+ for (i = 0; i < n; i+=UNROLL) { \
+ LOOP(UNROLL,0,KERNEL,1,1); \
+ } \
+ } else { \
+ for (i = 0; i < n; i+=UNROLL) { \
+ LOOP(UNROLL,0,KERNEL,incx,incy);\
+ } \
+ } \
+ \
+ for (; i < len; i++) { \
+ LOOP(1,0,KERNEL,incx,incy); \
+ }
+
+#define BODY_X() \
+ LOOP(UNROLL, 0, INIT); \
+ n = ROUNDBY(len, UNROLL); \
+ if (incx == 1) { \
+ for (i = 0; i < n; i+=UNROLL) { \
+ LOOP(UNROLL,0,KERNEL,1); \
+ } \
+ } else { \
+ for (i = 0; i < n; i+=UNROLL) { \
+ LOOP(UNROLL,0,KERNEL,incx); \
+ } \
+ } \
+ \
+ for (; i < len; i++) { \
+ LOOP(1,0,KERNEL,incx); \
+ }
+
+// -----------------------------------------------------------------------
+// Implementation
+
+#define FLOAT double
+#define func(name) blas·d##name
+#include "blas1body"
+#undef FLOAT
+
+#undef FLOAT
+#undef func
+
+#define FLOAT float
+#define func(name) blas·f##name
+#include "blas1body"
+#undef FLOAT
diff --git a/sys/libmath/gen1.py b/sys/libmath/gen1.py
deleted file mode 100755
index 936bc50..0000000
--- a/sys/libmath/gen1.py
+++ /dev/null
@@ -1,360 +0,0 @@
-#!/bin/python
-
-from C import *
-
-NUNROLL = 16
-def pkg(name):
- return f"blas·{name}"
-
-def typeify(string, kind):
- if (kind == Float32):
- return f"{string}f"
- if (kind == Float64):
- return f"{string}d"
-
-def fini(func, loop, strided, calls, ret=[]):
- func.execute(*calls[:2])
- func, scall = Strided(func, loop, NUNROLL//2, strided, *ret)
- calls[2] = scall[0]
- func.execute(*calls[2:])
- func.emit()
-
-# ------------------------------------------------------------------------
-# Blas level 1 functions
-
-def copy(kind):
- name = typeify("copy", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (Ptr(kind), "x"), (Ptr(kind), "y")
- ),
- Vars(
- (Int, "i")
- )
- )
-
- len, x, y, i = F.variables("len", "x", "y", "i")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- Set(Index(y, i), Index(x, i))
- )
-
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
-
- # F.execute(*calls)
- # F.emit()
- fini(F, loop, F.params[1:3], calls)
-
-def swap(kind):
- name = typeify("swap", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (Ptr(kind), "x"), (Ptr(kind), "y")
- ),
- Vars(
- (Int, "i"), (kind, "tmp")
- )
- )
-
- len, x, y, i, tmp = F.variables("len", "x", "y", "i", "tmp")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- Swap(Index(x, i), Index(y, i), tmp)
- )
-
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
-
- # F.execute(*calls)
- # F.emit()
- fini(F, loop, F.params[1:3], calls)
-
-def axpby(kind):
- name = typeify("axpby", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (kind, "a"), (Ptr(kind), "x"), (kind, "b"), (Ptr(kind), "y")
- ),
- Vars(
- (Int, "i"),
- )
- )
-
- len, x, y, i, a, b = F.variables("len", "x", "y", "i", "a", "b")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- Set(Index(y, i),
- Mul(b,
- Add(Index(y, i),
- Mul(a, Index(x, i)),
- )
- )
- )
- )
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
-
- # F.execute(*calls)
- # F.emit()
- fini(F, loop, [F.params[2], F.params[4]], calls)
-
-def axpy(kind):
- name = typeify("axpy", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (kind, "a"), (Ptr(kind), "x"), (Ptr(kind), "y")
- ),
- Vars(
- (Int, "i"),
- )
- )
-
- len, x, y, i, a = F.variables("len", "x", "y", "i", "a")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- Set(Index(y, i),
- Add(Index(y, i),
- Mul(a, Index(x, i)),
- )
- )
- )
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
-
- fini(F, loop, F.params[2:4], calls)
- # F.execute(*calls)
- # F.emit()
-
-def argminmax(kind):
- for operation in [("min", LT), ("max", GT)]:
- name = typeify(f"arg{operation[0]}", kind)
- F = Func(pkg(name), Int,
- Params(
- (Int, "len"), (Ptr(kind), "x"),
- ),
- Vars(
- (Int, "i"), (Int, "ix"), (kind, operation[0])
- )
- )
- len, x, i, ix, op = F.variables("len", "x", "i", "ix", operation[0])
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- If(operation[1](Index(x, i), op),
- Block(
- Set(ix, i),
- Set(op, Index(x, ix)),
- )
- )
- )
-
- ret = (ix, lambda ires, icur, node:
- If(GT(Index(x, Index(node, icur)), Index(x, Index(node, ires))),
- Block(Set(Index(node, ires), Index(node, icur)))
- )
- )
-
- kernel, calls = Unroll(name, loop, NUNROLL, *ret)
- kernel.emit()
-
- # F.execute(*calls, Return(ix))
- # F.emit()
- fini(F, loop, [F.params[1]], calls[:2] + [Set(op, Index(x, ix))] + calls[2:] + [Return(ix)], ret)
-
-def dot(kind):
- nm = typeify("dot", kind)
- F = Func(pkg(nm), kind,
- Params(
- (Int, "len"), (Ptr(kind), "x"), (Ptr(kind), "y")
- ),
- Vars(
- (Int, "i"), (kind, "sum"),
- )
- )
- len, x, i, y, sum = F.variables("len", "x", "i", "y", "sum")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- AddSet(sum, Mul(Index(x, i), Index(y, i)))
- )
- ret = (sum, lambda ires, icur, node: StmtExpr(AddSet(Index(node, ires), Index(node, icur))))
-
- kernel, calls = Unroll(nm, loop, NUNROLL, *ret)
- kernel.emit()
-
- # F.execute(*calls, Return(sum))
- fini(F, loop, F.params[1:3], calls + [Return(sum)], ret)
-
-def norm(kind):
- nm = typeify("norm", kind)
- F = Func(pkg(nm), kind,
- Params(
- (Int, "len"), (Ptr(kind), "x")
- ),
- Vars(
- (Int, "i"), (kind, "nrm"),
- )
- )
- len, x, i, nrm = F.variables("len", "x", "i", "nrm")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- AddSet(nrm, Mul(Index(x, i), Index(x, i)))
- )
-
- ret = (nrm, lambda ires, icur, node:
- StmtExpr(AddSet(Index(node, ires), Index(node, icur)))
- )
-
- kernel, calls = Unroll(nm, loop, NUNROLL, *ret)
- kernel.emit()
-
- if kind == Float64:
- sqrt = Func("math·sqrt", kind)
- elif kind == Float32:
- sqrt = Func("math·sqrtf", kind)
- else:
- raise ValueError(f"no sqrt for type {kind}")
-
- # F.execute(*calls, Return(Call(sqrt, [nrm])))
- # F.emit()
- fini(F, loop, [F.params[1]], calls + [Return(Call(sqrt, [nrm]))], ret)
-
-def sum(kind):
- name = typeify("sum", kind)
- F = Func(pkg(name), kind,
- Params(
- (Int, "len"), (Ptr(kind), "x"),
- ),
- Vars(
- (Int, "i"), (kind, "sum")
- )
- )
-
- len, x, i, sum = F.variables("len", "x", "i", "sum")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- AddSet(sum, Index(x, i))
- )
-
- ret = (sum, lambda ires, icur, node:
- StmtExpr(AddSet(Index(node, ires), Index(node, icur)))
- )
-
- kernel, calls = Unroll(name, loop, NUNROLL, *ret)
- kernel.emit()
-
- fini(F, loop, [F.params[1]], calls + [Return(sum)], ret)
- # F.execute(*calls, Return(sum))
- # F.emit()
-
-def scale(kind):
- name = typeify("scale", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (Ptr(kind), "x"), (kind, "a")
- ),
- Vars(
- (Int, "i"),
- )
- )
-
- len, a, x, i = F.variables("len", "a", "x", "i")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(Set(Index(x, i), Mul(a, Index(x, i))))
-
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
-
- fini(F, loop, [F.params[1]], calls)
- # F.execute(*calls)
- # F.emit()
-
-def rot(kind):
- name = typeify("rot", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (Ptr(kind), "x"), (Ptr(kind), "y"), (kind, "cos"), (kind, "sin")
- ),
- Vars(
- (Int, "i"), (kind, "tmp")
- )
- )
-
- len, x, y, i, tmp, cos, sin = F.variables("len", "x", "y", "i", "tmp", "cos", "sin")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- Comma(Set(tmp, Index(x, i)),
- Comma(
- Set(Index(x, i), Add(Mul(cos, Index(x, i)), Mul(sin, Index(y, i)))),
- Set(Index(y, i), Sub(Mul(cos, Index(y, i)), Mul(sin, tmp)))
- )
- )
- )
-
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
-
- fini(F, loop, [F.params[1], F.params[2]], calls)
- # F.execute(*calls)
- # F.emit()
-
-def rotm(kind):
- name = typeify("rotg", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "len"), (Ptr(kind), "x"), (Ptr(kind), "y"), (Array(kind, 5), "H")
- ),
- Vars(
- (Int, "i"), (kind, "tmp")
- )
- )
-
- len, x, y, i, tmp, H = F.variables("len", "x", "y", "i", "tmp", "H")
-
- loop = For(Set(i, I(0)), LT(i, len), [Inc(i)])
- loop.execute(
- Comma(Set(tmp, Index(x, i)),
- Comma(
- Set(Index(x, i), Add(Mul(Index(H, I(1)), Index(x, i)), Mul(Index(H, I(2)), Index(y, i)))),
- Set(Index(y, i), Add(Mul(Index(H, I(3)), Index(y, i)), Mul(Index(H, I(4)), tmp)))
- )
- )
- )
-
- kernel, calls = Unroll(name, loop, NUNROLL)
- kernel.emit()
- fini(F, loop, F.params[1:3], calls)
-
-if __name__ == "__main__":
- emit("#include <u.h>\n")
- emit("#include <libn.h>\n")
- emit("#include <libmath.h>\n")
- emitln()
- emit("/*********************************************************/\n")
- emit("/* THIS CODE IS GENERATED BY GEN1.PY! DON'T EDIT BY HAND */\n")
- emit("/*********************************************************/\n")
- emitln(2)
-
- # TODO: Implement rotg/rotmg
- for kind in [Float32, Float64]:
- argminmax(kind)
- copy(kind)
- axpy(kind)
- axpby(kind)
- dot(kind)
- sum(kind)
- norm(kind)
- scale(kind)
- rot(kind)
- rotm(kind)
-
- flush()
diff --git a/sys/libmath/gen2.py b/sys/libmath/gen2.py
deleted file mode 100755
index 2afbe1d..0000000
--- a/sys/libmath/gen2.py
+++ /dev/null
@@ -1,410 +0,0 @@
-from C import *
-import copy
-
-ROW = 4
-COL = 4
-
-def pkg(name):
- return f"blas·{name}"
-
-def typeify(string, kind):
- if (kind == Float32):
- return f"{string}f"
- if (kind == Float64):
- return f"{string}d"
-
-# ------------------------------------------------------------------------
-# Helpers (abandoning the automatic unroll from level 1)
-
-def toarray(len: int, *args):
- return [Param(Array(arg.type, len), arg.name) for arg in args]
-
-def TryIndex(x, i):
- if IsArrayType(x.var.type):
- return Index(x, i)
- return x
-
-def AddElts(root, *vars):
- for var in vars:
- root = Add(root ,var)
- return root
-
-def Round(store, number, by):
- return Set(store, And(number, Negate(I(by-1))))
-
-def UnitIncs(root, *incs):
- root = EQ(root, I(1))
- for inc in incs:
- root = AndAnd(root, EQ(inc, I(1)))
- return root
-
-def IsInc(p):
- return p.name != "incx" and p.name != "incy"
-
-def Identity(p):
- return True
-
-def FilterParams(params, func):
- return [p for p in params if func(p)]
-
-def StrideAllIndexedTerms(stmts, var, itor, inc):
- def is_hit(x):
- if isinstance(x, Index):
- if isinstance(x.i, BinaryOp) and x.x == var:
- return x.i.l == itor
-
- return False
-
- terms = []
- for stmt in stmts:
- Visit(stmt, lambda node: Filter(node, is_hit, terms))
-
- for term in terms:
- term.i = Mul(Paren(term.i), inc)
-
-def AsStrided(stmts, var, itor, inc):
- def increment(x):
- if isinstance(x, Index):
- if isinstance(x.i, BinaryOp) and x.x == var:
- return Index(x.x, Mul(Paren(x.i), inc))
-
- return copy.copy(x)
-
- if isinstance(stmts, Block):
- return Block(*[Make(stmt, lambda node: Transform(node, increment)) for stmt in stmts.stmts])
- elif isinstance(stmts, list):
- return [Make(stmt, lambda node: Transform(node, increment(node))) for stmt in stmts]
- else:
- raise TypeError("unrecognized stmts type")
-
-class Iter(object):
- def __init__(self, it, end, len, inc):
- self.it = it
- self.end = end
- self.len = len
- self.inc = inc
-
-def DoubleLoop(top, bot, Kernel, Preamble=[], Postamble=[]):
- def Step(it, inc):
- if inc == 1:
- return Inc(it)
- else:
- return AddSet(it, I(inc))
-
- return For(Set(top.it, I(0)), LT(top.it, top.end), Step(top.it, top.inc),
- Block(*[
- *[func(i) for func in Preamble for i in range(top.inc)],
- For(Set(bot.it, I(0)), LT(bot.it, bot.end), Step(bot.it, bot.inc),
- Block(*[func for i in range(top.inc) for func in Kernel(top.it, bot.it, i, bot.inc)])
- ),
- For(None, LT(bot.it, bot.len), Inc(bot.it),
- Block(*[func for i in range(top.inc) for func in Kernel(top.it, bot.it, i, 1)])
- ),
- *[func(i) for func in Postamble for i in range(bot.inc)]
- ])
- )
-
-def TriangularLoop(top, bot, Kernel, Preamble=[], Postamble=lambda i: None, upper=True):
- # ---------------------------------------------------------------
- # Helper functions
-
- def Finish(j, upper=False):
- if upper:
- if j == 0:
- return Block(For(None, GT(bot.it, top.it), Dec(bot.it),
- Block(*[func for i in range(j, top.inc) for func in Kernel(top.it, bot.it, i, 1)])
- ), Postamble(j)
- )
- return Block(*[func for i in range(j, top.inc) for func in Kernel(top.it, bot.it, i, 1)], Dec(bot.it), Postamble(j))
- if not upper:
- if j == 0:
- return Block(For(None, LT(bot.it, top.it), Inc(bot.it),
- Block(*[func for i in range(j, top.inc) for func in Kernel(top.it, bot.it, i, 1)])
- ), Postamble(j))
- return Block(*[func for i in range(j, top.inc) for func in Kernel(top.it, bot.it, i, 1)], Inc(bot.it), Postamble(j))
-
- def Step(it, inc, down=False):
- if not down:
- if inc == 1:
- return Inc(it)
- else:
- return AddSet(it, I(inc))
- else:
- if inc == 1:
- return Dec(it)
- else:
- return SubSet(it, I(inc))
-
- # ---------------------------------------------------------------
- # Main body
-
- if upper:
- return For(Set(top.it, Sub(top.end, I(1))), GE(top.it, I(0)), Step(top.it, top.inc, down=True),
- Block(*[
- *[func(i) for func in Preamble for i in range(top.inc)],
- Set(bot.end, Sub(Paren(EvenTo(Paren(Sub(top.it, Paren(Sub(bot.len, I(1))))), bot.inc)), Paren(Sub(bot.len, I(1))))),
- For(Set(bot.it, Sub(bot.len, I(1))), GT(bot.it, bot.end), Step(bot.it, bot.inc, down=True),
- Block(*[func for i in range(top.inc) for func in Kernel(top.it, bot.it, i, bot.inc)])
- ),
- *[ Finish(j, upper) if bot.inc > 1 else Postamble(j) for j in range(top.inc) ],
- # *[func(i) for func in Postamble for i in range(bot.inc)]
- ])
- )
- else:
- return For(Set(top.it, I(0)), LT(top.it, top.end), Step(top.it, top.inc),
- Block(*[
- *[func(i) for func in Preamble for i in range(top.inc)],
- Set(bot.end, EvenTo(top.it, bot.inc)),
- For(Set(bot.it, I(0)), LT(bot.it, bot.end), Step(bot.it, bot.inc),
- Block(*[func for i in range(top.inc) for func in Kernel(top.it, bot.it, i, bot.inc)])
- ),
- *[ Finish(j) if bot.inc > 1 else Postamble(j) for j in range(top.inc) ],
- # *[func(i) for func in Postamble for i in range(bot.inc)]
- ])
- )
-
-def Shift(x, i, uplo):
- if uplo == "upper":
- return Sub(x, I(i))
- if uplo == "lower":
- return Add(x, I(i))
- raise ValueError("unrecognized value")
-
-
-def ToKernel(name, loop):
- vars = VarsUsed(StmtExpr(loop.init)) | VarsUsed(StmtExpr(loop.cond)) | \
- VarsUsed(StmtExpr(loop.step)) | VarsUsed(loop.body)
-
-def ExpandAdd(row, x, i: int, c: Emitter, inc: int, uplo):
- offset = Add(c, I(0))
- root = Mul(Index(Index(row, I(i)), offset), Index(x, offset))
- for n in range(1, inc):
- offset = Shift(c, I(n), uplo)
- root = Add(root, Mul(Index(Index(row, I(i)), offset), Index(x, offset)))
- return root
-
-# ------------------------------------------------------------------------
-# Blas level 2 functions
-
-def trsv(kind):
- name = typeify("trsv", kind)
- F = Func(pkg(name), Void,
- Params(
- (UInt32, "flag"), (Int, "len"), (Ptr(kind), "m"), (Int, "incm"), (Ptr(kind), "x"), (Int, "incx")
- ),
- Vars(
- (Int, "r"), (Int, "c"), (Int, "nr"), (Int, "nc"), (Array(Ptr(kind), ROW), "row"), (Array(kind, COL), "res")
- )
- )
-
- r, c, nr, nc, row, res = F.variables("r", "c", "nr", "nc", "row", "res")
- flag, _len, m, incm, x = F.variables("flag", "len", "m", "incm", "x")
- incx = F.variables("incx")
-
- rows, cols = lambda inc_r: Iter(r, nr, _len, inc_r), lambda inc_c: Iter(c, nc, _len, inc_c)
-
- template = lambda inc_r, inc_c, upper: TriangularLoop(rows(inc_r), cols(inc_c),
- Kernel = lambda r, c, i, inc: [AddSet(Index(res, I(i)), ExpandAdd(row, x, i, c, inc, upper))],
- Preamble = [lambda i: Set(Index(row, I(i)), Add(m, Mul(Paren(Shift(r, i, upper)), incm))),
- lambda i: Set(Index(res, I(i)), I(0))],
- Postamble = lambda i: Set(Index(x, Shift(c, I(0), upper)), Div(Index(res, I(i)), Index(Index(row, I(i)), Shift(c, I(0), upper)))),
- upper = upper == "upper"
- )
-
- loop = template(1, 1, "upper")
- loop.emit()
-
-def syr(kind):
- name = typeify("syr", kind)
- F = Func(pkg(name), Void,
- Params(
- (UInt32, "flag"), (Int, "len"), (kind, "a"),
- (Ptr(kind), "x"), (Int, "incx"), (Ptr(kind), "m"), (Int, "incm"),
- ),
- Vars(
- (Int, "r"), (Int, "c"), (Int, "nr"), (Int, "nc"), (Array(Ptr(kind), ROW), "row"), (Array(kind, COL), "res")
- )
- )
-
- r, c, nr, nc, row, res = F.variables("r", "c", "nr", "nc", "row", "res")
- flag, _len, a, m, incm, x = F.variables("flag", "len", "a", "m", "incm", "x")
- incx = F.variables("incx")
-
- rows, cols = lambda inc_r: Iter(r, nr, _len, inc_r), lambda inc_c: Iter(c, nc, _len, inc_c)
-
- template = lambda inc_r, inc_c, upper: TriangularLoop(rows(inc_r), cols(inc_c),
- Kernel = lambda r, c, i, inc: [AddSet(Index(Index(row, I(i)), Shift(c, j, upper)), Mul(Index(res, I(i)), Index(x, Shift(c, j, upper)))) for j in range(inc)],
- Preamble = [lambda i: Set(Index(row, I(i)), Add(m, Mul(Paren(Shift(r, i, upper)), incm))),
- lambda i: Set(Index(res, I(i)), Mul(a, Index(x, Shift(r, i, upper))))],
- upper = upper == "upper"
- )
-
- blocks = []
- for layout in ["lower", "upper"]:
- floop = template(2, 2, layout)
- sloop = template(2, 2, layout)
- sloop.body = AsStrided(sloop.body, x, c, incx)
-
- fini = template(1, 1, layout)
- fini.init = None
- fini.body = AsStrided(fini.body, x, c, incx)
- fini.cond = LT(r, _len)
-
- blocks.append(
- Block(
- If(UnitIncs(incx), Block(floop), Block(sloop)),
- fini,
- Return(),
- )
- )
- F.execute(
- Set(nr, EvenTo(_len, ROW)),
- If(flag, blocks[0], blocks[1])
- )
- F.emit()
-
-def ger(kind):
- name = typeify("ger", kind)
- F = Func(pkg(name), Void,
- Params(
- (Int, "nrow"), (Int, "ncol"), (kind, "a"),
- (Ptr(kind), "x"), (Int, "incx"), (Ptr(kind), "y"), (Int, "incy"), (Ptr(kind), "m"), (Int, "incm"),
- ),
- Vars(
- (Int, "r"), (Int, "c"), (Int, "nr"), (Int, "nc"), (Array(Ptr(kind), ROW), "row"), (Array(kind, COL), "res")
- )
- )
-
- r, c, nr, nc, row, res = F.variables("r", "c", "nr", "nc", "row", "res")
- nrow, ncol, a, m, incm, x, y = F.variables("nrow", "ncol", "a", "m", "incm", "x", "y")
- incx, incy = F.variables("incx", "incy")
-
- rows, cols = lambda incr: Iter(r, nr, nrow, incr), lambda incc: Iter(c, nc, ncol, incc)
-
- template = lambda incr, incc: DoubleLoop(rows(incr), cols(incc),
- Kernel = lambda r, c, i, inc: [AddSet(Index(Index(row, I(i)), Add(c, I(j))), Mul(Index(res, I(i)), Index(y, Add(c, I(j))))) for j in range(inc)],
- Preamble = [lambda i: Set(Index(row, I(i)), Add(m, Mul(Paren(Add(r, I(i))), incm))),
- lambda i: Set(Index(res, I(i)), Mul(a, Index(x, Add(r, I(i)))))],
- )
-
- # loop = template(1, 1)
- # F.execute(loop)
- # F.emit()
- floop = template(ROW, COL)
- sloop = template(ROW, COL)
- sloop.body = AsStrided(AsStrided(sloop.body, x, c, incx), y, r, incy)
-
- fini = template(1, 2*COL)
- fini.init = None
- fini.body = AsStrided(AsStrided(fini.body, x, c, incx), y, r, incy)
- fini.cond = LT(r, nrow)
-
- F.execute(
- Set(nr, EvenTo(nrow, ROW)),
- Set(nc, EvenTo(ncol, COL)),
- If(UnitIncs(incx, incy), Block(floop), Block(sloop))
- )
- F.execute(fini)
- F.emit()
-
-def gemv(kind):
- name = typeify("gemv", kind)
- params = Params(
- (Int, "nrow"), (Int, "ncol"), (kind, "a"), (Ptr(kind), "m"), (Int, "incm"),
- (Ptr(kind), "x"), (Int, "incx"), (kind, "b"), (Ptr(kind), "y"), (Int, "incy")
- )
- stack = Vars((Int, "r"), (Int, "c"), (Ptr(kind), "row"), (kind, "res"))
- F = Func(pkg(name), Void, params, stack)
-
- # ---------------------
- # Kernel
-
- def innerloop(rinc, cit, cend, cinc):
- return For(Set(cit, I(0)), LT(cit, cend), AddSet(cit, I(cinc)),
- Block(*[AddSet(TryIndex(res, I(i)),
- AddElts(*(Mul(
- Index(TryIndex(row, I(i)), Add(cit, I(j))),
- Index(x, Add(cit, I(j)))) for j in range(cinc)
- )
- )
- ) for i in range(rinc)])
- )
-
- def tryloop(rinc, cit, cend, cinc):
- if cinc > 1:
- loop = innerloop(rinc, cit, cend, 1)
- loop.init = None
- return loop
-
- def outerloop(rit, rlen, rinc, cit, cend, clen, cinc, row, res):
- return For(Set(rit, I(0)), LT(rit, rlen), AddSet(r, I(rinc)),
- Block(
- *[Set(TryIndex(row, I(i)), Add(m, Mul(Paren(Add(rit, I(i))), incm))) for i in range(rinc)],
- *[Set(TryIndex(res, I(i)), I(0)) for i in range(rinc)],
- innerloop(rinc, cit, cend, cinc),
- tryloop(rinc, cit, clen, cinc),
- *[Set(Index(y, Add(rit, I(i))), Add(Mul(a, TryIndex(res, I(i))), Mul(b, Index(y, Add(rit, I(i)))))) for i in range(rinc)]
- )
- )
-
- kerns = []
- for func, sfx in [(IsInc, ""), (Identity, "_s")]:
- kern = Func(f"{name}{sfx}_{ROW}x{COL}kernel", Void, FilterParams(params, func), stack[0:2] + toarray(ROW, *stack[2:]), static=True)
- r, c, row, res = kern.variables("r", "c", "row", "res")
- nrow, ncol, a, m, incm, x, b, y = kern.variables("nrow", "ncol", "a", "m", "incm", "x", "b", "y")
-
- ncolr = kern.declare(Var(Int, "ncolr"))
- loop = outerloop(r, nrow, ROW, c, ncolr, ncol, COL, row, res)
-
- kern.execute(Round(ncolr, ncol, COL))
- kern.execute(loop)
- if "_s" in sfx:
- incx, incy = kern.variables("incx", "incy")
- StrideAllIndexedTerms(kern.stmts, x, c, incx)
- StrideAllIndexedTerms(kern.stmts, y, r, incy)
-
- kern.emit()
-
- kerns.append(kern)
-
- r, c, row, res = F.variables("r", "c", "row", "res")
- nrow, ncol, a, m, incm, x, b, y = F.variables("nrow", "ncol", "a", "m", "incm", "x", "b", "y")
- incx, incy = F.variables("incx", "incy")
- F.execute(Round(r, nrow, ROW))
- F.execute(
- If(UnitIncs(incx, incy),
- Block(Call(kerns[0], [r, ncol, a, m, incm, x, b, y])),
- Block(Call(kerns[1], [r, ncol, a, m, incm, x, incx, b, y, incy])),
- )
- )
-
- F.params = Params((UInt32, "flag")) + F.params
-
- remainder = outerloop(r, nrow, 1, c, ncol, ncol, COL, row, res)
- remainder.init = None
- F.execute(remainder)
- StrideAllIndexedTerms(F, x, c, incx)
- StrideAllIndexedTerms(F, y, r, incy)
-
- F.emit()
-
-# ------------------------------------------------------------------------
-# Code Generation
-
-if __name__ == "__main__":
- emit("#include <u.h>\n")
- emit("#include <libn.h>\n")
- emit("#include <libmath.h>\n")
- emitln()
- emit("/*********************************************************/\n")
- emit("/* THIS CODE IS GENERATED BY GEN2.PY! DON'T EDIT BY HAND */\n")
- emit("/*********************************************************/\n")
- emitln(2)
-
- for kind in [Float64]: #[Float32, Float64]:
- trsv(kind)
- # syr(kind)
- # ger(kind)
- # gemv(kind)
-
- flush()
diff --git a/sys/libmath/rules.mk b/sys/libmath/rules.mk
index c7d8d61..fddd74a 100644
--- a/sys/libmath/rules.mk
+++ b/sys/libmath/rules.mk
@@ -5,9 +5,7 @@ include share/push.mk
# Local sources
SRCS_$(d) := \
$(d)/basic.c \
- $(d)/blas1.c \
- $(d)/blas2.c \
- $(d)/linalg.c
+ $(d)/blas1.c
LIBS_$(d) := $(d)/libmath.a
BINS_$(d) :=
TSTS_$(d) := \