#include #include #include #include int argmax2(int len, double *x) { int i, ix[8]; double *end; double max[8]; max[0] = x[0]; max[1] = x[1]; max[2] = x[2]; max[3] = x[3]; max[4] = x[4]; max[5] = x[5]; max[6] = x[6]; max[7] = x[7]; for (i = 0; i < len; i+=8) { if (x[i+0] > max[0]) { max[0] = *x; ix[0] = i; } if (x[i+1] > max[1]) { max[1] = *x; ix[1] = i; } if (x[i+2] > max[2]) { max[2] = *x; ix[2] = i; } if (x[i+3] > max[3]) { max[3] = *x; ix[3] = i; } if (x[i+4] > max[4]) { max[4] = *x; ix[4] = i; } if (x[i+5] > max[5]) { max[5] = *x; ix[5] = i; } if (x[i+6] > max[6]) { max[6] = *x; ix[6] = i; } if (x[i+7] > max[7]) { max[7] = *x; ix[7] = i; } } for (i = 1; i < 8; i++) { if (max[i] > max[0]) { max[0] = max[i]; ix[0] = ix[i] + i; } } return ix[0]; } int argmax(int len, double *x) { int i, ix; double *end; double max; max = *x; for (i = 0; i < len; i++) { if (x[i] > max) { max = *x; ix = i; } } return ix; } #define LEN 1000000 #define NIT 2000 error main() { int i, nit; double *x, *y, res[3]; clock_t t; double tprof[3] = { 0 }; rng·init(0); x = malloc(sizeof(*x)*LEN); // y = malloc(sizeof(*x)*LEN); #define DO_0 t = clock(); \ res[0] += argmax(LEN, x); \ t = clock() - t; \ tprof[0] += 1000.*t/CLOCKS_PER_SEC; \ #define DO_1 t = clock(); \ res[1] += argmax2(LEN, x); \ t = clock() - t; \ tprof[1] += 1000.*t/CLOCKS_PER_SEC; \ #define DO_2 t = clock(); \ res[2] += cblas_idamax(LEN, x, 1); \ t = clock() - t; \ tprof[2] += 1000.*t/CLOCKS_PER_SEC; for (nit = 0; nit < NIT; nit++) { for (i = 0; i < LEN; i++) { x[i] = rng·random(); // y[i] = rng·random(); } switch (nit % 6) { case 0: DO_0; DO_1; DO_2; break; case 1: DO_0; DO_2; DO_1; break; case 2: DO_1; DO_0; DO_2; break; case 3: DO_1; DO_2; DO_0; break; case 4: DO_2; DO_0; DO_1; break; case 5: DO_2; DO_1; DO_0; break; } } printf("mean time/iteration (naive): %fms\n", tprof[0]/NIT); printf("--> result (naive): %f\n", res[0]); printf("mean time/iteration (unrolled): %fms\n", tprof[1]/NIT); printf("--> result (unrolled): %f\n", res[1]); printf("mean time/iteration (openblas): %fms\n", tprof[2]/NIT); printf("--> result (openblas): %f\n", res[2]); return 0; }