From f9873bfabc066f05ece6510d5c016f5b960d255a Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 30 Apr 2020 13:00:53 -0700 Subject: chore: broke out blas-like interface into its own file --- sys/libmath/test.c | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) (limited to 'sys/libmath/test.c') diff --git a/sys/libmath/test.c b/sys/libmath/test.c index 4978123..3dfaa31 100644 --- a/sys/libmath/test.c +++ b/sys/libmath/test.c @@ -302,9 +302,9 @@ math·freemtx(math·Vec *m) return 0; } -/* +/************************************************ * multiply matrix to vector - */ + ***********************************************/ /* * Notation: (number of rows) x (number of columns) _ unroll factor @@ -312,15 +312,37 @@ math·freemtx(math·Vec *m) */ static void -mtxvec_kernel4xN_4(int ncol, double **a, double *x, double *y) +mtxvec_kernel4xN_4_avx2(int ncol, double **row, double *x, double *y) { int c; - double *row[4], res[4]; + __m128d hr; + __m256d x256, r256[4]; + + for (c = 0; c < 4; c++) { + r256[c] = _mm256_setzero_pd(); + } + + for (c = 0; c < ncol; c += 4) { + x256 = _mm256_loadu_pd(x+c); + r256[0] += x256 * _mm256_loadu_pd(row[0] + c); + r256[1] += x256 * _mm256_loadu_pd(row[1] + c); + r256[2] += x256 * _mm256_loadu_pd(row[2] + c); + r256[3] += x256 * _mm256_loadu_pd(row[3] + c); + } - row[0] = a[0]; - row[1] = a[1]; - row[2] = a[2]; - row[3] = a[3]; + for (c = 0; c < 4; c++) { + hr = _mm_add_pd(_mm256_extractf128_pd(r256[c], 0), _mm256_extractf128_pd(r256[c], 1)); + hr = _mm_hadd_pd(hr, hr); + y[c] = hr[0]; + } +} + +static +void +mtxvec_kernel4xN_4(int ncol, double **row, double *x, double *y) +{ + int c; + double res[4]; res[0] = 0.; res[1] = 0.; @@ -370,7 +392,7 @@ math·mtxvec(math·Mtx m, double a, math·Vec x, double b, math·Vec y) row[2] = m.d + (r * (m.dim[1]+2)); row[3] = m.d + (r * (m.dim[1]+3)); - mtxvec_kernel4xN_4(ncol, row, x.d + r, res); + mtxvec_kernel4xN_4_avx2(ncol, row, x.d + r, res); for (c = ncol; c < m.dim[1]; c++) { res[0] += row[0][c]; @@ -393,8 +415,13 @@ math·mtxvec(math·Mtx m, double a, math·Vec x, double b, math·Vec y) return 0; } +/************************************************ + * add matrix to vector outerproduct + ***********************************************/ + #define NITER 50 +#if 0 error main() { @@ -441,3 +468,4 @@ main() return 0; } +#endif -- cgit v1.2.1