aboutsummaryrefslogtreecommitdiff
path: root/sys/libmath/test.c
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-04-30 13:00:53 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-04-30 13:00:53 -0700
commitf9873bfabc066f05ece6510d5c016f5b960d255a (patch)
tree3071a1ed8a86bf2ddda45d097eddaecf8a94c0b3 /sys/libmath/test.c
parent3c32ffdc8e3552aa58bbb8cdf7757ae808ec7eb6 (diff)
chore: broke out blas-like interface into its own file
Diffstat (limited to 'sys/libmath/test.c')
-rw-r--r--sys/libmath/test.c46
1 files changed, 37 insertions, 9 deletions
diff --git a/sys/libmath/test.c b/sys/libmath/test.c
index 4978123..3dfaa31 100644
--- a/sys/libmath/test.c
+++ b/sys/libmath/test.c
@@ -302,9 +302,9 @@ math·freemtx(math·Vec *m)
return 0;
}
-/*
+/************************************************
* multiply matrix to vector
- */
+ ***********************************************/
/*
* Notation: (number of rows) x (number of columns) _ unroll factor
@@ -312,15 +312,37 @@ math·freemtx(math·Vec *m)
*/
static
void
-mtxvec_kernel4xN_4(int ncol, double **a, double *x, double *y)
+mtxvec_kernel4xN_4_avx2(int ncol, double **row, double *x, double *y)
{
int c;
- double *row[4], res[4];
+ __m128d hr;
+ __m256d x256, r256[4];
+
+ for (c = 0; c < 4; c++) {
+ r256[c] = _mm256_setzero_pd();
+ }
+
+ for (c = 0; c < ncol; c += 4) {
+ x256 = _mm256_loadu_pd(x+c);
+ r256[0] += x256 * _mm256_loadu_pd(row[0] + c);
+ r256[1] += x256 * _mm256_loadu_pd(row[1] + c);
+ r256[2] += x256 * _mm256_loadu_pd(row[2] + c);
+ r256[3] += x256 * _mm256_loadu_pd(row[3] + c);
+ }
- row[0] = a[0];
- row[1] = a[1];
- row[2] = a[2];
- row[3] = a[3];
+ for (c = 0; c < 4; c++) {
+ hr = _mm_add_pd(_mm256_extractf128_pd(r256[c], 0), _mm256_extractf128_pd(r256[c], 1));
+ hr = _mm_hadd_pd(hr, hr);
+ y[c] = hr[0];
+ }
+}
+
+static
+void
+mtxvec_kernel4xN_4(int ncol, double **row, double *x, double *y)
+{
+ int c;
+ double res[4];
res[0] = 0.;
res[1] = 0.;
@@ -370,7 +392,7 @@ math·mtxvec(math·Mtx m, double a, math·Vec x, double b, math·Vec y)
row[2] = m.d + (r * (m.dim[1]+2));
row[3] = m.d + (r * (m.dim[1]+3));
- mtxvec_kernel4xN_4(ncol, row, x.d + r, res);
+ mtxvec_kernel4xN_4_avx2(ncol, row, x.d + r, res);
for (c = ncol; c < m.dim[1]; c++) {
res[0] += row[0][c];
@@ -393,8 +415,13 @@ math·mtxvec(math·Mtx m, double a, math·Vec x, double b, math·Vec y)
return 0;
}
+/************************************************
+ * add matrix to vector outerproduct
+ ***********************************************/
+
#define NITER 50
+#if 0
error
main()
{
@@ -441,3 +468,4 @@ main()
return 0;
}
+#endif