@@ -53,8 +53,8 @@ PROLOGUE
53
53
#endif
54
54
55
55
/* init $f8 and $f9 to zero */
56
- SUB s1 , s1 , s1
57
- SUB s2 , s2 , s2
56
+ xvxor.v $xr8 , $xr8 , $xr8
57
+ xvxor.v $xr9 , $xr9 , $xr9
58
58
slli.d INCX, INCX, BASE_SHIFT
59
59
li.d TEMP, SIZE
60
60
slli.d INCY, INCY, BASE_SHIFT
@@ -64,20 +64,6 @@ PROLOGUE
64
64
65
65
/* !((inc_x == 1) && (inc_y == 1)) */
66
66
67
- /* init $xr8 and $xr9 to zero */
68
- #ifdef DOUBLE
69
- xvldrepl.d $xr0, X, 0
70
- #else
71
- xvldrepl.w $xr0, X, 0
72
- #endif
73
- #ifdef DSDOT
74
- xvfcvtl.d.s $xr0, $xr0
75
- xvfsub.d $xr8, $xr0, $xr0
76
- xvfsub.d $xr9, $xr0, $xr0
77
- #else
78
- XVFSUB $xr8, $xr0, $xr0
79
- XVFSUB $xr9, $xr0, $xr0
80
- #endif
81
67
82
68
#ifdef DOUBLE
83
69
srai.d I, N, 4
@@ -99,31 +85,31 @@ PROLOGUE
99
85
addi.w I, I, -1
100
86
addi.d X, X, 128
101
87
addi.d Y, Y, 128
102
- #ifdef DSDOT
88
+ #ifndef DOUBLE
103
89
xvfcvtl.d.s $xr10, $xr0
104
90
xvfcvtl.d.s $xr11, $xr4
105
91
xvfcvth.d.s $xr12, $xr0
106
92
xvfcvth.d.s $xr13, $xr4
107
- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
108
- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
93
+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
94
+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
109
95
xvfcvtl.d.s $xr10, $xr1
110
96
xvfcvtl.d.s $xr11, $xr5
111
97
xvfcvth.d.s $xr12, $xr1
112
98
xvfcvth.d.s $xr13, $xr5
113
- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
114
- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
99
+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
100
+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
115
101
xvfcvtl.d.s $xr10, $xr2
116
102
xvfcvtl.d.s $xr11, $xr6
117
103
xvfcvth.d.s $xr12, $xr2
118
104
xvfcvth.d.s $xr13, $xr6
119
- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
120
- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
105
+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
106
+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
121
107
xvfcvtl.d.s $xr10, $xr3
122
108
xvfcvtl.d.s $xr11, $xr7
123
109
xvfcvth.d.s $xr12, $xr3
124
110
xvfcvth.d.s $xr13, $xr7
125
- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
126
- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
111
+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
112
+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
127
113
#else
128
114
XVFMADD $xr8, $xr0, $xr4, $xr8
129
115
XVFMADD $xr9, $xr1, $xr5, $xr9
@@ -149,41 +135,26 @@ PROLOGUE
149
135
addi.w I, I, -1
150
136
addi.d X, X, 32
151
137
addi.d Y, Y, 32
152
- #ifdef DSDOT
138
+ #ifndef DOUBLE
153
139
xvfcvtl.d.s $xr10, $xr0
154
140
xvfcvtl.d.s $xr11, $xr4
155
141
xvfcvth.d.s $xr12, $xr0
156
142
xvfcvth.d.s $xr13, $xr4
157
- xvfmadd.d $xr8, $xr10, $xr12 , $xr8
158
- xvfmadd.d $xr9, $xr11 , $xr13, $xr9
143
+ xvfmadd.d $xr8, $xr10, $xr11 , $xr8
144
+ xvfmadd.d $xr9, $xr12 , $xr13, $xr9
159
145
#else
160
146
XVFMADD $xr8, $xr0, $xr4, $xr8
161
147
#endif
162
148
bnez I, .L13
163
149
.align 3
164
150
.L14:
165
151
/* store dot in s1 $f8 */
166
- #ifdef DSDOT
167
152
xvfadd.d $xr8, $xr8, $xr9
168
- fsub .s s2, s2, s2 /* set s2 to 0.0 */
153
+ fsub .d s2, s2, s2 /* set s2 to 0.0 */
169
154
xvpermi.q $xr0, $xr8, 0x1
170
155
vfadd.d $vr8, $vr8, $vr0
171
156
vpackod.d $vr0, $vr8, $vr8
172
157
vfadd.d $vr8, $vr8, $vr0
173
- #else
174
- XVFADD $xr8, $xr8, $xr9
175
- SUB s2, s2, s2 /* set s2 to 0.0 */
176
- xvpermi.q $xr0, $xr8, 0x1
177
- VFADD $vr8, $vr8, $vr0
178
- vpackod.d $vr0, $vr8, $vr8
179
- #ifdef DOUBLE
180
- VFADD $vr8, $vr8, $vr0
181
- #else
182
- VFADD $vr8, $vr8, $vr0
183
- vpackod.w $vr0, $vr8, $vr8
184
- VFADD $vr8, $vr8, $vr0
185
- #endif /* defined DOUBLE */
186
- #endif /* defined DSDOT */
187
158
.align 3
188
159
.L15:
189
160
#ifdef DOUBLE
@@ -197,7 +168,7 @@ PROLOGUE
197
168
/* FLOAT: 1~7 ; DOUBLE: 1~3 */
198
169
LD a1, X, 0
199
170
LD b1, Y, 0
200
- #ifdef DSDOT
171
+ #ifndef DOUBLE
201
172
fcvt.d.s a1, a1
202
173
fcvt.d.s b1, b1
203
174
fmadd.d s1, b1, a1, s1
@@ -240,7 +211,7 @@ PROLOGUE
240
211
add .d X, X, INCX
241
212
LD b1, Y, 0 * SIZE
242
213
add .d Y, Y, INCY
243
- #ifdef DSDOT
214
+ #ifndef DOUBLE
244
215
fcvt.d.s a1, a1
245
216
fcvt.d.s b1, b1
246
217
fmadd.d s1, b1, a1, s1
@@ -252,7 +223,7 @@ PROLOGUE
252
223
add .d X, X, INCX
253
224
LD b1, Y, 0 * SIZE
254
225
add .d Y, Y, INCY
255
- #ifdef DSDOT
226
+ #ifndef DOUBLE
256
227
fcvt.d.s a1, a1
257
228
fcvt.d.s b1, b1
258
229
fmadd.d s2, b1, a1, s2
@@ -264,7 +235,7 @@ PROLOGUE
264
235
add .d X, X, INCX
265
236
LD b1, Y, 0 * SIZE
266
237
add .d Y, Y, INCY
267
- #ifdef DSDOT
238
+ #ifndef DOUBLE
268
239
fcvt.d.s a1, a1
269
240
fcvt.d.s b1, b1
270
241
fmadd.d s1, b1, a1, s1
@@ -276,7 +247,7 @@ PROLOGUE
276
247
add .d X, X, INCX
277
248
LD b1, Y, 0 * SIZE
278
249
add .d Y, Y, INCY
279
- #ifdef DSDOT
250
+ #ifndef DOUBLE
280
251
fcvt.d.s a1, a1
281
252
fcvt.d.s b1, b1
282
253
fmadd.d s2, b1, a1, s2
@@ -288,7 +259,7 @@ PROLOGUE
288
259
add .d X, X, INCX
289
260
LD b1, Y, 0 * SIZE
290
261
add .d Y, Y, INCY
291
- #ifdef DSDOT
262
+ #ifndef DOUBLE
292
263
fcvt.d.s a1, a1
293
264
fcvt.d.s b1, b1
294
265
fmadd.d s1, b1, a1, s1
@@ -300,7 +271,7 @@ PROLOGUE
300
271
add .d X, X, INCX
301
272
LD b1, Y, 0 * SIZE
302
273
add .d Y, Y, INCY
303
- #ifdef DSDOT
274
+ #ifndef DOUBLE
304
275
fcvt.d.s a1, a1
305
276
fcvt.d.s b1, b1
306
277
fmadd.d s2, b1, a1, s2
@@ -312,7 +283,7 @@ PROLOGUE
312
283
add .d X, X, INCX
313
284
LD b1, Y, 0 * SIZE
314
285
add .d Y, Y, INCY
315
- #ifdef DSDOT
286
+ #ifndef DOUBLE
316
287
fcvt.d.s a1, a1
317
288
fcvt.d.s b1, b1
318
289
fmadd.d s1, b1, a1, s1
@@ -325,7 +296,7 @@ PROLOGUE
325
296
LD b1, Y, 0 * SIZE
326
297
add .d Y, Y, INCY
327
298
addi.d I, I, -1
328
- #ifdef DSDOT
299
+ #ifndef DOUBLE
329
300
fcvt.d.s a1, a1
330
301
fcvt.d.s b1, b1
331
302
fmadd.d s2, b1, a1, s2
@@ -346,7 +317,7 @@ PROLOGUE
346
317
LD b1, Y, 0 * SIZE
347
318
add .d Y, Y, INCY
348
319
addi.d I, I, -1
349
- #ifdef DSDOT
320
+ #ifndef DOUBLE
350
321
fcvt.d.s a1, a1
351
322
fcvt.d.s b1, b1
352
323
fmadd.d s1, b1, a1, s1
@@ -357,12 +328,13 @@ PROLOGUE
357
328
.align 3
358
329
359
330
.L999:
360
- #ifdef DSDOT
361
331
fadd .d $f0, s1, s2
332
+ move $r4, $r17
333
+ #if defined(DOUBLE)
334
+ #elif defined(DSDOT)
362
335
#else
363
- ADD $f0, s1, s2
336
+ fcvt.s.d $f0, $f0
364
337
#endif
365
- move $r4, $r17
366
338
jirl $r0, $r1, 0x0
367
339
368
340
EPILOGUE
0 commit comments