Skip to content

Commit ba9569e

Browse files
author
pengxu
committed
Loongarch64: fixed dot_lasx
1 parent dc5fa29 commit ba9569e

File tree

1 file changed

+29
-57
lines changed

1 file changed

+29
-57
lines changed

kernel/loongarch64/dot_lasx.S

Lines changed: 29 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ PROLOGUE
5353
#endif
5454

5555
/* init $f8 and $f9 to zero */
56-
SUB s1, s1, s1
57-
SUB s2, s2, s2
56+
xvxor.v $xr8, $xr8, $xr8
57+
xvxor.v $xr9, $xr9, $xr9
5858
slli.d INCX, INCX, BASE_SHIFT
5959
li.d TEMP, SIZE
6060
slli.d INCY, INCY, BASE_SHIFT
@@ -64,20 +64,6 @@ PROLOGUE
6464

6565
/* !((inc_x == 1) && (inc_y == 1)) */
6666

67-
/* init $xr8 and $xr9 to zero */
68-
#ifdef DOUBLE
69-
xvldrepl.d $xr0, X, 0
70-
#else
71-
xvldrepl.w $xr0, X, 0
72-
#endif
73-
#ifdef DSDOT
74-
xvfcvtl.d.s $xr0, $xr0
75-
xvfsub.d $xr8, $xr0, $xr0
76-
xvfsub.d $xr9, $xr0, $xr0
77-
#else
78-
XVFSUB $xr8, $xr0, $xr0
79-
XVFSUB $xr9, $xr0, $xr0
80-
#endif
8167

8268
#ifdef DOUBLE
8369
srai.d I, N, 4
@@ -99,31 +85,31 @@ PROLOGUE
9985
addi.w I, I, -1
10086
addi.d X, X, 128
10187
addi.d Y, Y, 128
102-
#ifdef DSDOT
88+
#ifndef DOUBLE
10389
xvfcvtl.d.s $xr10, $xr0
10490
xvfcvtl.d.s $xr11, $xr4
10591
xvfcvth.d.s $xr12, $xr0
10692
xvfcvth.d.s $xr13, $xr4
107-
xvfmadd.d $xr8, $xr10, $xr12, $xr8
108-
xvfmadd.d $xr9, $xr11, $xr13, $xr9
93+
xvfmadd.d $xr8, $xr10, $xr11, $xr8
94+
xvfmadd.d $xr9, $xr12, $xr13, $xr9
10995
xvfcvtl.d.s $xr10, $xr1
11096
xvfcvtl.d.s $xr11, $xr5
11197
xvfcvth.d.s $xr12, $xr1
11298
xvfcvth.d.s $xr13, $xr5
113-
xvfmadd.d $xr8, $xr10, $xr12, $xr8
114-
xvfmadd.d $xr9, $xr11, $xr13, $xr9
99+
xvfmadd.d $xr8, $xr10, $xr11, $xr8
100+
xvfmadd.d $xr9, $xr12, $xr13, $xr9
115101
xvfcvtl.d.s $xr10, $xr2
116102
xvfcvtl.d.s $xr11, $xr6
117103
xvfcvth.d.s $xr12, $xr2
118104
xvfcvth.d.s $xr13, $xr6
119-
xvfmadd.d $xr8, $xr10, $xr12, $xr8
120-
xvfmadd.d $xr9, $xr11, $xr13, $xr9
105+
xvfmadd.d $xr8, $xr10, $xr11, $xr8
106+
xvfmadd.d $xr9, $xr12, $xr13, $xr9
121107
xvfcvtl.d.s $xr10, $xr3
122108
xvfcvtl.d.s $xr11, $xr7
123109
xvfcvth.d.s $xr12, $xr3
124110
xvfcvth.d.s $xr13, $xr7
125-
xvfmadd.d $xr8, $xr10, $xr12, $xr8
126-
xvfmadd.d $xr9, $xr11, $xr13, $xr9
111+
xvfmadd.d $xr8, $xr10, $xr11, $xr8
112+
xvfmadd.d $xr9, $xr12, $xr13, $xr9
127113
#else
128114
XVFMADD $xr8, $xr0, $xr4, $xr8
129115
XVFMADD $xr9, $xr1, $xr5, $xr9
@@ -149,41 +135,26 @@ PROLOGUE
149135
addi.w I, I, -1
150136
addi.d X, X, 32
151137
addi.d Y, Y, 32
152-
#ifdef DSDOT
138+
#ifndef DOUBLE
153139
xvfcvtl.d.s $xr10, $xr0
154140
xvfcvtl.d.s $xr11, $xr4
155141
xvfcvth.d.s $xr12, $xr0
156142
xvfcvth.d.s $xr13, $xr4
157-
xvfmadd.d $xr8, $xr10, $xr12, $xr8
158-
xvfmadd.d $xr9, $xr11, $xr13, $xr9
143+
xvfmadd.d $xr8, $xr10, $xr11, $xr8
144+
xvfmadd.d $xr9, $xr12, $xr13, $xr9
159145
#else
160146
XVFMADD $xr8, $xr0, $xr4, $xr8
161147
#endif
162148
bnez I, .L13
163149
.align 3
164150
.L14:
165151
/* store dot in s1 $f8 */
166-
#ifdef DSDOT
167152
xvfadd.d $xr8, $xr8, $xr9
168-
fsub.s s2, s2, s2 /* set s2 to 0.0 */
153+
fsub.d s2, s2, s2 /* set s2 to 0.0 */
169154
xvpermi.q $xr0, $xr8, 0x1
170155
vfadd.d $vr8, $vr8, $vr0
171156
vpackod.d $vr0, $vr8, $vr8
172157
vfadd.d $vr8, $vr8, $vr0
173-
#else
174-
XVFADD $xr8, $xr8, $xr9
175-
SUB s2, s2, s2 /* set s2 to 0.0 */
176-
xvpermi.q $xr0, $xr8, 0x1
177-
VFADD $vr8, $vr8, $vr0
178-
vpackod.d $vr0, $vr8, $vr8
179-
#ifdef DOUBLE
180-
VFADD $vr8, $vr8, $vr0
181-
#else
182-
VFADD $vr8, $vr8, $vr0
183-
vpackod.w $vr0, $vr8, $vr8
184-
VFADD $vr8, $vr8, $vr0
185-
#endif /* defined DOUBLE */
186-
#endif /* defined DSDOT */
187158
.align 3
188159
.L15:
189160
#ifdef DOUBLE
@@ -197,7 +168,7 @@ PROLOGUE
197168
/* FLOAT: 1~7 ; DOUBLE: 1~3 */
198169
LD a1, X, 0
199170
LD b1, Y, 0
200-
#ifdef DSDOT
171+
#ifndef DOUBLE
201172
fcvt.d.s a1, a1
202173
fcvt.d.s b1, b1
203174
fmadd.d s1, b1, a1, s1
@@ -240,7 +211,7 @@ PROLOGUE
240211
add.d X, X, INCX
241212
LD b1, Y, 0 * SIZE
242213
add.d Y, Y, INCY
243-
#ifdef DSDOT
214+
#ifndef DOUBLE
244215
fcvt.d.s a1, a1
245216
fcvt.d.s b1, b1
246217
fmadd.d s1, b1, a1, s1
@@ -252,7 +223,7 @@ PROLOGUE
252223
add.d X, X, INCX
253224
LD b1, Y, 0 * SIZE
254225
add.d Y, Y, INCY
255-
#ifdef DSDOT
226+
#ifndef DOUBLE
256227
fcvt.d.s a1, a1
257228
fcvt.d.s b1, b1
258229
fmadd.d s2, b1, a1, s2
@@ -264,7 +235,7 @@ PROLOGUE
264235
add.d X, X, INCX
265236
LD b1, Y, 0 * SIZE
266237
add.d Y, Y, INCY
267-
#ifdef DSDOT
238+
#ifndef DOUBLE
268239
fcvt.d.s a1, a1
269240
fcvt.d.s b1, b1
270241
fmadd.d s1, b1, a1, s1
@@ -276,7 +247,7 @@ PROLOGUE
276247
add.d X, X, INCX
277248
LD b1, Y, 0 * SIZE
278249
add.d Y, Y, INCY
279-
#ifdef DSDOT
250+
#ifndef DOUBLE
280251
fcvt.d.s a1, a1
281252
fcvt.d.s b1, b1
282253
fmadd.d s2, b1, a1, s2
@@ -288,7 +259,7 @@ PROLOGUE
288259
add.d X, X, INCX
289260
LD b1, Y, 0 * SIZE
290261
add.d Y, Y, INCY
291-
#ifdef DSDOT
262+
#ifndef DOUBLE
292263
fcvt.d.s a1, a1
293264
fcvt.d.s b1, b1
294265
fmadd.d s1, b1, a1, s1
@@ -300,7 +271,7 @@ PROLOGUE
300271
add.d X, X, INCX
301272
LD b1, Y, 0 * SIZE
302273
add.d Y, Y, INCY
303-
#ifdef DSDOT
274+
#ifndef DOUBLE
304275
fcvt.d.s a1, a1
305276
fcvt.d.s b1, b1
306277
fmadd.d s2, b1, a1, s2
@@ -312,7 +283,7 @@ PROLOGUE
312283
add.d X, X, INCX
313284
LD b1, Y, 0 * SIZE
314285
add.d Y, Y, INCY
315-
#ifdef DSDOT
286+
#ifndef DOUBLE
316287
fcvt.d.s a1, a1
317288
fcvt.d.s b1, b1
318289
fmadd.d s1, b1, a1, s1
@@ -325,7 +296,7 @@ PROLOGUE
325296
LD b1, Y, 0 * SIZE
326297
add.d Y, Y, INCY
327298
addi.d I, I, -1
328-
#ifdef DSDOT
299+
#ifndef DOUBLE
329300
fcvt.d.s a1, a1
330301
fcvt.d.s b1, b1
331302
fmadd.d s2, b1, a1, s2
@@ -346,7 +317,7 @@ PROLOGUE
346317
LD b1, Y, 0 * SIZE
347318
add.d Y, Y, INCY
348319
addi.d I, I, -1
349-
#ifdef DSDOT
320+
#ifndef DOUBLE
350321
fcvt.d.s a1, a1
351322
fcvt.d.s b1, b1
352323
fmadd.d s1, b1, a1, s1
@@ -357,12 +328,13 @@ PROLOGUE
357328
.align 3
358329

359330
.L999:
360-
#ifdef DSDOT
361331
fadd.d $f0, s1, s2
332+
move $r4, $r17
333+
#if defined(DOUBLE)
334+
#elif defined(DSDOT)
362335
#else
363-
ADD $f0, s1, s2
336+
fcvt.s.d $f0, $f0
364337
#endif
365-
move $r4, $r17
366338
jirl $r0, $r1, 0x0
367339

368340
EPILOGUE

0 commit comments

Comments
 (0)