Skip to content

Commit 52367ea

Browse files
authored
Merge pull request #5248 from ErnstPeng/fix-lasx
Loongarch64: fixed some functions of LASX for lapck test
2 parents 70dff3b + f19e72c commit 52367ea

File tree

12 files changed

+832
-2087
lines changed

12 files changed

+832
-2087
lines changed

kernel/loongarch64/amax_lasx.S

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,17 +56,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5656
LDINT INCX, 0(INCX)
5757
#endif
5858

59+
xvxor.v VM0, VM0, VM0
5960
bge $r0, N, .L999
6061
bge $r0, INCX, .L999
6162
li.d TEMP, 1
6263
slli.d TEMP, TEMP, BASE_SHIFT
6364
slli.d INCX, INCX, BASE_SHIFT
64-
#ifdef DOUBLE
65-
xvldrepl.d VM0, X, 0
66-
#else
67-
xvldrepl.w VM0, X, 0
68-
#endif
69-
XVFSUB VM0, VM0, VM0
7065
bne INCX, TEMP, .L20
7166

7267
srai.d I, N, 4

kernel/loongarch64/asum_lasx.S

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -103,21 +103,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
103103
xvfadd.d res1, VX2, res1
104104
xvfadd.d res1, VX3, res1
105105
#else
106-
xvfadd.s res2, res1, res2
107106
xvpickve.w VX1, res1, 1
108107
xvpickve.w VX2, res1, 2
109108
xvpickve.w VX3, res1, 3
110109
xvfadd.s res1, VX1, res1
111110
xvfadd.s res1, VX2, res1
112111
xvfadd.s res1, VX3, res1
113-
xvpickve.w VX0, res2, 4
114-
xvpickve.w VX1, res2, 5
115-
xvpickve.w VX2, res2, 6
116-
xvpickve.w VX3, res2, 7
112+
xvpickve.w VX0, res1, 4
113+
xvpickve.w VX1, res1, 5
114+
xvpickve.w VX2, res1, 6
115+
xvpickve.w VX3, res1, 7
117116
xvfadd.s res1, VX0, res1
118117
xvfadd.s res1, VX1, res1
119118
xvfadd.s res1, VX2, res1
120-
xvfadd.s res1, VX2, res1
119+
xvfadd.s res1, VX3, res1
121120
#endif
122121
.align 3
123122

@@ -217,21 +216,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
217216
xvfadd.d res1, VX2, res1
218217
xvfadd.d res1, VX3, res1
219218
#else
220-
xvfadd.s res2, res1, res2
221219
xvpickve.w VX1, res1, 1
222220
xvpickve.w VX2, res1, 2
223221
xvpickve.w VX3, res1, 3
224222
xvfadd.s res1, VX1, res1
225223
xvfadd.s res1, VX2, res1
226224
xvfadd.s res1, VX3, res1
227-
xvpickve.w VX0, res2, 4
228-
xvpickve.w VX1, res2, 5
229-
xvpickve.w VX2, res2, 6
230-
xvpickve.w VX3, res2, 7
225+
xvpickve.w VX0, res1, 4
226+
xvpickve.w VX1, res1, 5
227+
xvpickve.w VX2, res1, 6
228+
xvpickve.w VX3, res1, 7
231229
xvfadd.s res1, VX0, res1
232230
xvfadd.s res1, VX1, res1
233231
xvfadd.s res1, VX2, res1
234-
xvfadd.s res1, VX2, res1
232+
xvfadd.s res1, VX3, res1
235233
#endif
236234
.align 3
237235

kernel/loongarch64/cdot_lasx.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
288288
xvinsgr2vr.w x2, t2, 6
289289
xvinsgr2vr.w x1, t3, 7
290290
xvinsgr2vr.w x2, t4, 7
291-
addi.d Y, Y, 8 * SIZE
291+
addi.d Y, Y, 16 * SIZE
292292
xvpickev.w x3, VX3, VX2
293293
xvpickod.w x4, VX3, VX2
294294
xvfmadd.s res1, x1, x3, res1

kernel/loongarch64/cnrm2_lasx.S

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4747
#define VX4 $xr21
4848
#define res1 $xr19
4949
#define res2 $xr20
50+
#define RCP $f2
51+
#define VALPHA $xr3
5052

5153
PROLOGUE
5254

@@ -55,10 +57,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5557
LDINT INCX, 0(INCX)
5658
#endif
5759

58-
xvxor.v res1, res1, res1
59-
xvxor.v res2, res2, res2
6060
bge $r0, N, .L999
6161
beq $r0, INCX, .L999
62+
63+
addi.d $sp, $sp, -32
64+
st.d $ra, $sp, 0
65+
st.d N, $sp, 8
66+
st.d X, $sp, 16
67+
st.d INCX, $sp, 24
68+
#ifdef DYNAMIC_ARCH
69+
bl camax_k_LA264
70+
#else
71+
bl camax_k
72+
#endif
73+
ld.d $ra, $sp, 0
74+
ld.d N, $sp, 8
75+
ld.d X, $sp, 16
76+
ld.d INCX, $sp, 24
77+
addi.d $sp, $sp, 32
78+
79+
frecip.s RCP, $f0
80+
vreplvei.w $vr3, $vr2, 0
81+
xvpermi.d VALPHA, $xr3,0x00
82+
xvxor.v res1, res1, res1
83+
xvxor.v res2, res2, res2
84+
fcmp.ceq.s $fcc0, $f0, $f19
85+
bcnez $fcc0, .L999
86+
6287
li.d TEMP, SIZE
6388
slli.d INCX, INCX, ZBASE_SHIFT
6489
srai.d I, N, 2
@@ -67,13 +92,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6792
.align 3
6893

6994
.L10:
70-
xvld VX0, X, 0 * SIZE
71-
xvfcvtl.d.s VX1, VX0
72-
xvfcvth.d.s VX2, VX0
73-
xvfmadd.d res1, VX1, VX1, res1
74-
xvfmadd.d res2, VX2, VX2, res2
7595
addi.d I, I, -1
76-
addi.d X, X, 8 * SIZE
96+
97+
xvld VX0, X, 0 * SIZE
98+
xvld VX1, X, 8 * SIZE
99+
xvfmul.s VX0, VX0, VALPHA
100+
xvfmul.s VX1, VX1, VALPHA
101+
xvfmadd.s res1, VX0, VX0, res1
102+
xvfmadd.s res2, VX1, VX1, res2
103+
104+
addi.d X, X, 16 * SIZE
77105
blt $r0, I, .L10
78106
.align 3
79107
b .L996
@@ -103,22 +131,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
103131
xvinsgr2vr.w VX0, t3, 6
104132
xvinsgr2vr.w VX0, t4, 7
105133
add.d X, X, INCX
106-
xvfcvtl.d.s VX1, VX0
107-
xvfcvth.d.s VX2, VX0
108-
xvfmadd.d res1, VX1, VX1, res1
109-
xvfmadd.d res2, VX2, VX2, res2
134+
xvfmul.s VX0, VX0, VALPHA
135+
xvfmadd.s res2, VX0, VX0, res2
110136
addi.d I, I, -1
111137
blt $r0, I, .L21
112138
b .L996
113139

114140
.L996:
115-
xvfadd.d res1, res1, res2
116-
xvpickve.d VX1, res1, 1
117-
xvpickve.d VX2, res1, 2
118-
xvpickve.d VX3, res1, 3
119-
xvfadd.d res1, VX1, res1
120-
xvfadd.d res1, VX2, res1
121-
xvfadd.d res1, VX3, res1
141+
xvfadd.s res1, res1, res2
142+
xvpermi.d VX1, res1, 0x4e
143+
xvfadd.s res1, res1, VX1
144+
vreplvei.w $vr17, $vr19, 1
145+
vreplvei.w $vr18, $vr19, 2
146+
vreplvei.w $vr21, $vr19, 3
147+
xvfadd.s res1, VX2, res1
148+
xvfadd.s res1, VX3, res1
149+
xvfadd.s res1, VX4, res1
122150
.align 3
123151

124152
.L997:
@@ -130,18 +158,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
130158
fld.s a1, X, 0 * SIZE
131159
fld.s a2, X, 1 * SIZE
132160
addi.d I, I, -1
133-
fcvt.d.s a1, a1
134-
fcvt.d.s a2, a2
135-
fmadd.d res, a1, a1, res
136-
fmadd.d res, a2, a2, res
161+
fmul.s a1, a1, RCP
162+
fmul.s a2, a2, RCP
163+
fmadd.s res, a1, a1, res
164+
fmadd.s res, a2, a2, res
137165
add.d X, X, INCX
138166
blt $r0, I, .L998
139167
.align 3
140168

141169
.L999:
142-
fsqrt.d res, res
170+
fsqrt.s res, res
171+
fmul.s $f0, res, $f0
143172
move $r4, $r17
144-
fcvt.s.d $f0, res
145173
jirl $r0, $r1, 0x0
146174

147175
EPILOGUE

kernel/loongarch64/copy_lasx.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
260260
add.d Y, Y, INCY
261261
ST a2, Y, 0
262262
add.d Y, Y, INCY
263-
ST a3, X, 0
263+
ST a3, Y, 0
264264
add.d Y, Y, INCY
265-
ST a4, X, 0
265+
ST a4, Y, 0
266266
add.d Y, Y, INCY
267267
LD a1, X, 0
268268
add.d X, X, INCX
@@ -276,9 +276,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
276276
add.d Y, Y, INCY
277277
ST a2, Y, 0
278278
add.d Y, Y, INCY
279-
ST a3, X, 0
279+
ST a3, Y, 0
280280
add.d Y, Y, INCY
281-
ST a4, X, 0
281+
ST a4, Y, 0
282282
add.d Y, Y, INCY
283283
addi.d I, I, -1
284284
blt $r0, I, .L222

0 commit comments

Comments
 (0)