aboutsummaryrefslogtreecommitdiff
path: root/sys/libmath/blas2.c
blob: 7e4b08ef0ae560c2d9348a6fc6704df1a2c9285b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#include <u.h>
#include <libmath/blas.h>
#include "loop.h"

// -----------------------------------------------------------------------
// Templates

#define BODY_RECT()                                      \
    nr = ROUNDBY(nrow, UNROW);                           \
    nc = ROUNDBY(ncol, UNCOL);                           \
    if (incx == 1 && incy == 1) {                        \
        for (r = 0; r < nr; r += UNROW) {                \
            LOOP(UNROW,0,INIT,1,1);                      \
            for (c = 0; c < nc; c += UNCOL) {            \
                LOOP(UNROW,0,KERN,1,1,UNCOL);            \
            }                                            \
            for (; c < ncol; c++) {                      \
                LOOP(UNROW,0,KERN,1,1,1);                \
            }                                            \
            LOOP(UNROW,0,FINI,1,1);                      \
        }                                                \
    } else {                                             \
        for (r = 0; r < nr; r += UNROW) {                \
            LOOP(UNROW,0,INIT,incx,incy);                \
            for (c = 0; c < nc; c += UNCOL) {            \
                LOOP(UNROW,0,KERN,incx,incy,UNCOL);      \
            }                                            \
            for (; c < ncol; c++) {                      \
                LOOP(UNROW,0,KERN,incx,incy,1);          \
            }                                            \
            LOOP(UNROW,0,FINI,incx,incy);                \
        }                                                \
    }                                                    \
                                                         \
    for (; r < nrow; r++) {                              \
        LOOP(1,0,INIT,incx,incy);                        \
        for (c = 0; c < nc; c += UNCOL) {                \
            LOOP(1,0,KERN,incx,incy,UNCOL);              \
        }                                                \
        for (; c < ncol; c++) {                          \
            LOOP(1,0,KERN,incx,incy,1);                  \
        }                                                \
        LOOP(1,0,FINI,incx,incy);                        \
    }

#define BODY_LOTRI()                                     \
    nr = ROUNDBY(n, UNROW);                              \
    if (incx == 1) {                                     \
        for (r = 0; r < nr; r += UNROW) {                \
            LOOP(UNROW,0,INIT,1);                        \
            nc = ROUNDBY(r, UNCOL);                      \
            for (c = 0; c < nc; c += UNCOL) {            \
                LOOP(UNROW,0,KERN,1,UNCOL);              \
            }                                            \
            for (; c < r; c++) {                         \
                LOOP(UNROW,0,KERN,1,1);                  \
            }                                            \
            LOOP(UNROW,0,FINI,1);                        \
        }                                                \
    } else {                                             \
        for (r = 0; r < nr; r += UNROW) {                \
            LOOP(UNROW,0,INIT,incx);                     \
            nc = ROUNDBY(r, UNCOL);                      \
            for (c = 0; c < nc; c += UNCOL) {            \
                LOOP(UNROW,0,KERN,incx,UNCOL);           \
            }                                            \
            for (; c < r; c++) {                         \
                LOOP(UNROW,0,KERN,incx,1);               \
            }                                            \
            LOOP(UNROW,0,FINI,incx);                     \
        }                                                \
    }                                                    \
                                                         \
    for (; r < n; r++) {                                 \
        LOOP(1,0,INIT,incx);                             \
        nc = ROUNDBY(r, UNCOL);                          \
        for (c = 0; c < nc; c += UNCOL) {                \
            LOOP(1,0,KERN,incx,UNCOL);                   \
        }                                                \
        for (; c < r; c++) {                             \
            LOOP(1,0,KERN,incx,1);                       \
        }                                                \
        LOOP(1,0,FINI,incx);                             \
    }

#define BODY_UPTRI()                                     \
    nr = n - ROUNDBY(n, UNROW);                          \
    if (incx == 1) {                                     \
        for (r = n-1; r >= nr; r -= UNROW) {             \
            LOOP(UNROW,0,INIT,1);                        \
            nc = n - ROUNDBY(r, UNCOL);                  \
            for (c = n-1; c >= nc; c -= UNCOL) {         \
                LOOP(UNROW,0,KERN,1,UNCOL);              \
            }                                            \
            for (; c > r; c--) {                         \
                LOOP(UNROW,0,KERN,1,1);                  \
            }                                            \
            LOOP(UNROW,0,FINI,1);                        \
        }                                                \
    } else {                                             \
        for (r = n-1; r >= nr; r -= UNROW) {             \
            LOOP(UNROW,0,INIT,incx);                     \
            nc = n - ROUNDBY(r, UNCOL);                  \
            for (c = n-1; c >= nc; c -= UNCOL) {         \
                LOOP(UNROW,0,KERN,incx,UNCOL);           \
            }                                            \
            for (; c > r; c--) {                         \
                LOOP(UNROW,0,KERN,incx,1);               \
            }                                            \
            LOOP(UNROW,0,FINI,incx);                     \
        }                                                \
    }                                                    \
                                                         \
    for (; r >= 0; r--) {                                \
        LOOP(1,0,INIT,incx);                             \
        nc = n - ROUNDBY(r, UNCOL);                      \
        for (c = n-1; c >= nc; c -= UNCOL) {             \
            LOOP(1,0,KERN,incx,UNCOL);                   \
        }                                                \
        for (; c > r; c--) {                             \
            LOOP(1,0,KERN,incx,1);                       \
        }                                                \
        LOOP(1,0,FINI,incx);                             \
    }

#define BODY_LOTRI_XY()                                  \
    nr = ROUNDBY(n, UNROW);                              \
    if (incx == 1 && incy == 1) {                        \
        for (r = 0; r < nr; r += UNROW) {                \
            LOOP(UNROW,0,INIT,1,1);                      \
            nc = ROUNDBY(r, UNCOL);                      \
            for (c = 0; c < nc; c += UNCOL) {            \
                LOOP(UNROW,0,KERN,1,1,UNCOL);            \
            }                                            \
            for (; c < r; c++) {                         \
                LOOP(UNROW,0,KERN,1,1,1);                \
            }                                            \
            LOOP(UNROW,0,FINI,1,1);                      \
        }                                                \
    } else {                                             \
        for (r = 0; r < nr; r += UNROW) {                \
            LOOP(UNROW,0,INIT,incx,incy);                \
            nc = ROUNDBY(r, UNCOL);                      \
            for (c = 0; c < nc; c += UNCOL) {            \
                LOOP(UNROW,0,KERN,incx,incy,UNCOL);      \
            }                                            \
            for (; c < r; c++) {                         \
                LOOP(UNROW,0,KERN,incx,incy,1);          \
            }                                            \
            LOOP(UNROW,0,FINI,incx, incy);               \
        }                                                \
    }                                                    \
                                                         \
    for (; r < n; r++) {                                 \
        LOOP(1,0,INIT,incx,incy);                        \
        nc = ROUNDBY(r, UNCOL);                          \
        for (c = 0; c < nc; c += UNCOL) {                \
            LOOP(1,0,KERN,incx,incy,UNCOL);              \
        }                                                \
        for (; c < r; c++) {                             \
            LOOP(1,0,KERN,incx,incy,1);                  \
        }                                                \
        LOOP(1,0,FINI,incx,incy);                        \
    }

#define BODY_UPTRI_XY()                                  \
    nr = n - ROUNDBY(n, UNROW);                          \
    if (incx == 1 && incy == 1) {                        \
        for (r = n-1; r >= nr; r -= UNROW) {             \
            LOOP(UNROW,0,INIT,1,1);                      \
            nc = n - ROUNDBY(r, UNCOL);                  \
            for (c = n-1; c >= nc; c -= UNCOL) {         \
                LOOP(UNROW,0,KERN,1,1,UNCOL);            \
            }                                            \
            for (; c > r; c--) {                         \
                LOOP(UNROW,0,KERN,1,1,1);                \
            }                                            \
            LOOP(UNROW,0,FINI,1,1);                      \
        }                                                \
    } else {                                             \
        for (r = n-1; r >= nr; r -= UNROW) {             \
            LOOP(UNROW,0,INIT,incx,incy);                \
            nc = n - ROUNDBY(r, UNCOL);                  \
            for (c = n-1; c >= nc; c -= UNCOL) {         \
                LOOP(UNROW,0,KERN,incx,incy,UNCOL);      \
            }                                            \
            for (; c > r; c--) {                         \
                LOOP(UNROW,0,KERN,incx,incy,1);          \
            }                                            \
            LOOP(UNROW,0,FINI,incx,incy);                \
        }                                                \
    }                                                    \
                                                         \
    for (; r >= 0; r--) {                                \
        LOOP(1,0,INIT,incx,incy);                        \
        nc = n - ROUNDBY(r, UNCOL);                      \
        for (c = n-1; c >= nc; c -= UNCOL) {             \
            LOOP(1,0,KERN,incx,incy,UNCOL);              \
        }                                                \
        for (; c > r; c--) {                             \
            LOOP(1,0,KERN,incx,incy,1);                  \
        }                                                \
        LOOP(1,0,FINI,incx,incy);                        \
    }

// -----------------------------------------------------------------------
// implementation

#define UNROW 4
#define UNCOL 4

#define INT   int
#define FLOAT double
#define func(name) blas·d##name
#include "blas2body"

#undef FLOAT
#undef func

#define FLOAT float
#define func(name) blas·f##name
#include "blas2body"