Skip to content

Commit 2e2691b

Browse files
authored
Merge pull request #5078 from XiWeiGu/la64_fixed_cscal_zscal
LoongArch64: fixed cscal and zscal
2 parents fe220a0 + f4194fc commit 2e2691b

File tree

3 files changed

+66
-164
lines changed

3 files changed

+66
-164
lines changed

kernel/loongarch64/cscal_lasx.S

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9494
CMPEQ $fcc1, ALPHAI, a1
9595
bge $r0, I, .L19
9696
/////// INCX == 1 && N >= 4 ////////
97-
bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.
97+
bnez DUMMY2, .L17 // if DUMMY2 == 1, called from c/zscal.
9898

9999
bceqz $fcc0, .L17
100100

@@ -146,6 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
146146
addi.d I, I, -1
147147
blt $r0, I, .L17
148148
b .L19
149+
149150
.align 3
150151

151152
/////// INCX == 1 && N < 8 ///////
@@ -156,7 +157,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
156157
andi I, N, 7
157158
#endif
158159
beqz I, .L999
159-
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
160+
bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal.
160161

161162
bceqz $fcc0, .L998
162163

@@ -171,7 +172,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
171172
CMPEQ $fcc1, ALPHAI, a1
172173
move XX, X
173174
bge $r0, I, .L29
174-
bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.
175+
bnez DUMMY2, .L25 // if DUMMY2 == 1, called from c/zscal.
175176
bceqz $fcc0, .L25
176177

177178
bceqz $fcc1, .L25
@@ -341,7 +342,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
341342
andi I, N, 7
342343
#endif
343344
beqz I, .L999
344-
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
345+
bnez DUMMY2, .L998 // if DUMMY2 == 1, called from c/zscal.
345346

346347
bceqz $fcc0, .L998
347348

kernel/loongarch64/cscal_lsx.S

Lines changed: 58 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3333
#define ALPHAI $f1
3434
#define X $r7
3535
#define INCX $r8
36+
#define DUMMY2 $r9
3637

3738
#define I $r12
3839
#define TEMP $r13
@@ -65,6 +66,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6566

6667
bge $r0, N, .L999
6768
bge $r0, INCX, .L999
69+
ld.d DUMMY2, $sp, 0
6870
li.d TEMP, 1
6971
movgr2fr.d a1, $r0
7072
FFINT a1, a1
@@ -84,24 +86,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
8486
srai.d I, N, 2
8587
bne INCX, TEMP, .L22
8688

89+
/////// INCX == 1 ////////
8790
.L11:
88-
bge $r0, I, .L997
8991
CMPEQ $fcc0, ALPHAR, a1
9092
CMPEQ $fcc1, ALPHAI, a1
91-
bceqz $fcc0, .L13
92-
b .L14
93-
.align 3
93+
bge $r0, I, .L19
9494

95-
.L13:
96-
bceqz $fcc1, .L114 //alpha_r != 0.0 && alpha_i != 0.0
97-
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
95+
/////// INCX == 1 && N >= 4 ////////
96+
bnez DUMMY2, .L17 // if DUMMPY2 == 1, called from c/zscal.
9897

99-
.L14:
100-
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
101-
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
102-
.align 3
98+
bceqz $fcc0, .L17
10399

104-
.L111: //alpha_r == 0.0 && alpha_i == 0.0
100+
bceqz $fcc1, .L17
101+
102+
.L15: //alpha_r == 0.0 && alpha_i == 0.0
105103
vst VXZ, X, 0 * SIZE
106104
#ifdef DOUBLE
107105
vst VXZ, X, 2 * SIZE
@@ -112,50 +110,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
112110
#endif
113111
addi.d X, X, 8 * SIZE
114112
addi.d I, I, -1
115-
blt $r0, I, .L111
116-
b .L997
117-
.align 3
118-
119-
.L113: //alpha_r != 0.0 && alpha_i == 0.0
120-
vld VX0, X, 0 * SIZE
121-
#ifdef DOUBLE
122-
vld VX1, X, 2 * SIZE
123-
vpickev.d x1, VX1, VX0
124-
vpickod.d x2, VX1, VX0
125-
vfmul.d x3, VXAR, x1
126-
vfmul.d x4, VXAR, x2
127-
vilvl.d VX2, x4 ,x3
128-
vilvh.d VX3, x4, x3
129-
vst VX2, X, 0 * SIZE
130-
vst VX3, X, 2 * SIZE
131-
vld VX0, X, 4 * SIZE
132-
vld VX1, X, 6 * SIZE
133-
vpickev.d x1, VX1, VX0
134-
vpickod.d x2, VX1, VX0
135-
vfmul.d x3, VXAR, x1
136-
vfmul.d x4, VXAR, x2
137-
vilvl.d VX2, x4 ,x3
138-
vilvh.d VX3, x4, x3
139-
vst VX2, X, 4 * SIZE
140-
vst VX3, X, 6 * SIZE
141-
#else
142-
vld VX1, X, 4 * SIZE
143-
vpickev.w x1, VX1, VX0
144-
vpickod.w x2, VX1, VX0
145-
vfmul.s x3, VXAR, x1
146-
vfmul.s x4, VXAR, x2
147-
vilvl.w VX2, x4 ,x3
148-
vilvh.w VX3, x4, x3
149-
vst VX2, X, 0 * SIZE
150-
vst VX3, X, 4 * SIZE
151-
#endif
152-
addi.d X, X, 8 * SIZE
153-
addi.d I, I, -1
154-
blt $r0, I, .L113
155-
b .L997
113+
blt $r0, I, .L15
114+
b .L19
156115
.align 3
157116

158-
.L114: //alpha_r != 0.0 && alpha_i != 0.0
117+
.L17:
159118
vld VX0, X, 0 * SIZE
160119
#ifdef DOUBLE
161120
vld VX1, X, 2 * SIZE
@@ -196,29 +155,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
196155
#endif
197156
addi.d X, X, 8 * SIZE
198157
addi.d I, I, -1
199-
blt $r0, I, .L114
200-
b .L997
158+
blt $r0, I, .L17
159+
b .L19
201160
.align 3
202161

162+
/////// INCX == 1 && N < 8 ///////
163+
.L19:
164+
andi I, N, 3
165+
beqz I, .L999
166+
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
167+
168+
bceqz $fcc0, .L998
169+
170+
bceqz $fcc1, .L998
171+
172+
b .L995 // alpha_r == 0.0 && alpha_i == 0.0
173+
174+
/////// INCX != 1 ////////
203175
.L22:
204-
bge $r0, I, .L997
205-
move XX, X
206176
CMPEQ $fcc0, ALPHAR, a1
207177
CMPEQ $fcc1, ALPHAI, a1
208-
bceqz $fcc0, .L23
209-
b .L24
210-
.align 3
178+
move XX, X
179+
bge $r0, I, .L29
180+
bnez DUMMY2, .L25 // if DUMMPY2 == 1, called from c/zscal.
211181

212-
.L23:
213-
bceqz $fcc1, .L224 //alpha_r != 0.0 && alpha_i != 0.0
214-
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
182+
bceqz $fcc0, .L25
215183

216-
.L24:
217-
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
218-
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
219-
.align 3
184+
bceqz $fcc1, .L25
220185

221-
.L221: //alpha_r == 0.0 && alpha_i == 0.0
186+
.L27: //alpha_r == 0.0 && alpha_i == 0.0
222187
#ifdef DOUBLE
223188
vstelm.d VXZ, X, 0, 0
224189
vstelm.d VXZ, X, 1 * SIZE, 0
@@ -246,92 +211,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
246211
#endif
247212
add.d X, X, INCX
248213
addi.d I, I, -1
249-
blt $r0, I, .L221
250-
b .L997
214+
blt $r0, I, .L27
215+
b .L29
251216
.align 3
252217

253-
.L223: //alpha_r != 0.0 && alpha_i == 0.0
254-
#ifdef DOUBLE
255-
ld.d t1, X, 0 * SIZE
256-
ld.d t2, X, 1 * SIZE
257-
add.d X, X, INCX
258-
ld.d t3, X, 0 * SIZE
259-
ld.d t4, X, 1 * SIZE
260-
add.d X, X, INCX
261-
vinsgr2vr.d x1, t1, 0
262-
vinsgr2vr.d x2, t2, 0
263-
vinsgr2vr.d x1, t3, 1
264-
vinsgr2vr.d x2, t4, 1
265-
vfmul.d x3, VXAR, x1
266-
vfmul.d x4, VXAR, x2
267-
vstelm.d x3, XX, 0 * SIZE, 0
268-
vstelm.d x4, XX, 1 * SIZE, 0
269-
add.d XX, XX, INCX
270-
vstelm.d x3, XX, 0 * SIZE, 1
271-
vstelm.d x4, XX, 1 * SIZE, 1
272-
add.d XX, XX, INCX
273-
274-
ld.d t1, X, 0 * SIZE
275-
ld.d t2, X, 1 * SIZE
276-
add.d X, X, INCX
277-
ld.d t3, X, 0 * SIZE
278-
ld.d t4, X, 1 * SIZE
279-
vinsgr2vr.d x1, t1, 0
280-
vinsgr2vr.d x2, t2, 0
281-
vinsgr2vr.d x1, t3, 1
282-
vinsgr2vr.d x2, t4, 1
283-
add.d X, X, INCX
284-
vfmul.d x3, VXAR, x1
285-
vfmul.d x4, VXAR, x2
286-
addi.d I, I, -1
287-
vstelm.d x3, XX, 0 * SIZE, 0
288-
vstelm.d x4, XX, 1 * SIZE, 0
289-
add.d XX, XX, INCX
290-
vstelm.d x3, XX, 0 * SIZE, 1
291-
vstelm.d x4, XX, 1 * SIZE, 1
292-
#else
293-
ld.w t1, X, 0 * SIZE
294-
ld.w t2, X, 1 * SIZE
295-
add.d X, X, INCX
296-
ld.w t3, X, 0 * SIZE
297-
ld.w t4, X, 1 * SIZE
298-
add.d X, X, INCX
299-
vinsgr2vr.w x1, t1, 0
300-
vinsgr2vr.w x2, t2, 0
301-
vinsgr2vr.w x1, t3, 1
302-
vinsgr2vr.w x2, t4, 1
303-
ld.w t1, X, 0 * SIZE
304-
ld.w t2, X, 1 * SIZE
305-
add.d X, X, INCX
306-
ld.w t3, X, 0 * SIZE
307-
ld.w t4, X, 1 * SIZE
308-
vinsgr2vr.w x1, t1, 2
309-
vinsgr2vr.w x2, t2, 2
310-
vinsgr2vr.w x1, t3, 3
311-
vinsgr2vr.w x2, t4, 3
312-
add.d X, X, INCX
313-
314-
vfmul.s x3, VXAR, x1
315-
vfmul.s x4, VXAR, x2
316-
addi.d I, I, -1
317-
vstelm.w x3, XX, 0 * SIZE, 0
318-
vstelm.w x4, XX, 1 * SIZE, 0
319-
add.d XX, XX, INCX
320-
vstelm.w x3, XX, 0 * SIZE, 1
321-
vstelm.w x4, XX, 1 * SIZE, 1
322-
add.d XX, XX, INCX
323-
vstelm.w x3, XX, 0 * SIZE, 2
324-
vstelm.w x4, XX, 1 * SIZE, 2
325-
add.d XX, XX, INCX
326-
vstelm.w x3, XX, 0 * SIZE, 3
327-
vstelm.w x4, XX, 1 * SIZE, 3
328-
#endif
329-
add.d XX, XX, INCX
330-
blt $r0, I, .L223
331-
b .L997
332-
.align 3
333-
334-
.L224: //alpha_r != 0.0 && alpha_i != 0.0
218+
.L25:
335219
#ifdef DOUBLE
336220
ld.d t1, X, 0 * SIZE
337221
ld.d t2, X, 1 * SIZE
@@ -414,15 +298,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
414298
vstelm.w x4, XX, 1 * SIZE, 3
415299
#endif
416300
add.d XX, XX, INCX
417-
blt $r0, I, .L224
418-
b .L997
301+
blt $r0, I, .L25
302+
b .L29
419303
.align 3
420304

421-
.L997:
422-
andi I, N, 3
423-
bge $r0, I, .L999
424-
.align 3
305+
/////// INCX != 1 && N < 8 ///////
306+
.L29:
307+
andi I, N, 3
308+
beqz I, .L999
309+
bnez DUMMY2, .L998 // if DUMMPY2 == 1, called from c/zscal.
310+
311+
bceqz $fcc0, .L998
312+
313+
bceqz $fcc1, .L998
425314

315+
b .L995 // alpha_r == 0.0 && alpha_i == 0.0
316+
317+
.L995: // alpha_r == 0.0 && alpha_i == 0.0
318+
ST a1, X, 0 * SIZE
319+
ST a1, X, 1 * SIZE
320+
addi.d I, I, -1
321+
add.d X, X, INCX
322+
blt $r0, I, .L995
323+
b .L999
426324
.L998:
427325
LD a1, X, 0 * SIZE
428326
LD a2, X, 1 * SIZE
@@ -435,7 +333,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
435333
ST s2, X, 1 * SIZE
436334
add.d X, X, INCX
437335
blt $r0, I, .L998
438-
.align 3
336+
b .L999
439337

440338
.L999:
441339
move $r4, $r12

kernel/loongarch64/zscal.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5353
PROLOGUE
5454

5555
li.d TEMP, 2 * SIZE
56+
ld.d XX, $sp, 0 // Load dummy2
57+
slli.d XX, XX, ZBASE_SHIFT
5658
MTC a1, $r0
5759
slli.d INCX, INCX, ZBASE_SHIFT
5860
bge $r0, N, .L999
5961
CMPEQ $fcc0, ALPHA_R, a1
6062
CMPEQ $fcc1, ALPHA_I, a1
6163
bceqz $fcc0, .L50
6264
bceqz $fcc1, .L50
65+
beq XX, TEMP, .L50 // if dummp2 == 1, do not directly copy 0
6366
srai.d I, N, 2
6467
bne INCX, TEMP, .L20
6568
bge $r0, I, .L15

0 commit comments

Comments
 (0)