Skip to content

Commit 98c9ff3

Browse files
authored
Merge pull request #4464 from XiWeiGu/loongarch64-zscal
LoongArch64: Handle NAN and INF
2 parents 9f06301 + 83ce97a commit 98c9ff3

File tree

2 files changed

+4
-275
lines changed

2 files changed

+4
-275
lines changed

kernel/loongarch64/cscal_lasx.S

Lines changed: 2 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9999
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
100100

101101
.L14:
102-
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
102+
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
103103
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
104104
.align 3
105105

@@ -117,38 +117,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
117117
b .L997
118118
.align 3
119119

120-
.L112: //alpha_r == 0.0 && alpha_i != 0.0
121-
xvld VX0, X, 0 * SIZE
122-
#ifdef DOUBLE
123-
xvld VX1, X, 4 * SIZE
124-
xvpickev.d x1, VX1, VX0
125-
xvpickod.d x2, VX1, VX0
126-
xvfmul.d x3, VXAI, x2
127-
xvfsub.d x3, VXZ, x3
128-
xvfmul.d x4, VXAI, x1
129-
xvilvl.d VX2, x4 ,x3
130-
xvilvh.d VX3, x4, x3
131-
xvst VX2, X, 0 * SIZE
132-
xvst VX3, X, 4 * SIZE
133-
addi.d X, X, 8 * SIZE
134-
#else
135-
xvld VX1, X, 8 * SIZE
136-
xvpickev.w x1, VX1, VX0
137-
xvpickod.w x2, VX1, VX0
138-
xvfmul.s x3, VXAI, x2
139-
xvfsub.s x3, VXZ, x3
140-
xvfmul.s x4, VXAI, x1
141-
xvilvl.w VX2, x4 ,x3
142-
xvilvh.w VX3, x4, x3
143-
xvst VX2, X, 0 * SIZE
144-
xvst VX3, X, 8 * SIZE
145-
addi.d X, X, 16 * SIZE
146-
#endif
147-
addi.d I, I, -1
148-
blt $r0, I, .L112
149-
b .L997
150-
.align 3
151-
152120
.L113: //alpha_r != 0.0 && alpha_i == 0.0
153121
xvld VX0, X, 0 * SIZE
154122
#ifdef DOUBLE
@@ -227,7 +195,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
227195
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
228196

229197
.L24:
230-
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
198+
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
231199
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
232200
.align 3
233201

@@ -275,119 +243,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
275243
b .L997
276244
.align 3
277245

278-
.L222: //alpha_r == 0.0 && alpha_i != 0.0
279-
#ifdef DOUBLE
280-
ld.d t1, X, 0 * SIZE
281-
ld.d t2, X, 1 * SIZE
282-
add.d X, X, INCX
283-
ld.d t3, X, 0 * SIZE
284-
ld.d t4, X, 1 * SIZE
285-
add.d X, X, INCX
286-
xvinsgr2vr.d x1, t1, 0
287-
xvinsgr2vr.d x2, t2, 0
288-
xvinsgr2vr.d x1, t3, 1
289-
xvinsgr2vr.d x2, t4, 1
290-
ld.d t1, X, 0 * SIZE
291-
ld.d t2, X, 1 * SIZE
292-
add.d X, X, INCX
293-
ld.d t3, X, 0 * SIZE
294-
ld.d t4, X, 1 * SIZE
295-
xvinsgr2vr.d x1, t1, 2
296-
xvinsgr2vr.d x2, t2, 2
297-
xvinsgr2vr.d x1, t3, 3
298-
xvinsgr2vr.d x2, t4, 3
299-
add.d X, X, INCX
300-
301-
xvfmul.d x3, VXAI, x2
302-
xvfsub.d x3, VXZ, x3
303-
xvfmul.d x4, VXAI, x1
304-
addi.d I, I, -1
305-
xvstelm.d x3, XX, 0 * SIZE, 0
306-
xvstelm.d x4, XX, 1 * SIZE, 0
307-
add.d XX, XX, INCX
308-
xvstelm.d x3, XX, 0 * SIZE, 1
309-
xvstelm.d x4, XX, 1 * SIZE, 1
310-
add.d XX, XX, INCX
311-
xvstelm.d x3, XX, 0 * SIZE, 2
312-
xvstelm.d x4, XX, 1 * SIZE, 2
313-
add.d XX, XX, INCX
314-
xvstelm.d x3, XX, 0 * SIZE, 3
315-
xvstelm.d x4, XX, 1 * SIZE, 3
316-
#else
317-
ld.w t1, X, 0 * SIZE
318-
ld.w t2, X, 1 * SIZE
319-
add.d X, X, INCX
320-
ld.w t3, X, 0 * SIZE
321-
ld.w t4, X, 1 * SIZE
322-
add.d X, X, INCX
323-
xvinsgr2vr.w x1, t1, 0
324-
xvinsgr2vr.w x2, t2, 0
325-
xvinsgr2vr.w x1, t3, 1
326-
xvinsgr2vr.w x2, t4, 1
327-
ld.w t1, X, 0 * SIZE
328-
ld.w t2, X, 1 * SIZE
329-
add.d X, X, INCX
330-
ld.w t3, X, 0 * SIZE
331-
ld.w t4, X, 1 * SIZE
332-
xvinsgr2vr.w x1, t1, 2
333-
xvinsgr2vr.w x2, t2, 2
334-
xvinsgr2vr.w x1, t3, 3
335-
xvinsgr2vr.w x2, t4, 3
336-
add.d X, X, INCX
337-
ld.w t1, X, 0 * SIZE
338-
ld.w t2, X, 1 * SIZE
339-
add.d X, X, INCX
340-
ld.w t3, X, 0 * SIZE
341-
ld.w t4, X, 1 * SIZE
342-
add.d X, X, INCX
343-
xvinsgr2vr.w x1, t1, 4
344-
xvinsgr2vr.w x2, t2, 4
345-
xvinsgr2vr.w x1, t3, 5
346-
xvinsgr2vr.w x2, t4, 5
347-
ld.w t1, X, 0 * SIZE
348-
ld.w t2, X, 1 * SIZE
349-
add.d X, X, INCX
350-
ld.w t3, X, 0 * SIZE
351-
ld.w t4, X, 1 * SIZE
352-
xvinsgr2vr.w x1, t1, 6
353-
xvinsgr2vr.w x2, t2, 6
354-
xvinsgr2vr.w x1, t3, 7
355-
xvinsgr2vr.w x2, t4, 7
356-
add.d X, X, INCX
357-
358-
xvfmul.s x3, VXAI, x2
359-
xvfsub.s x3, VXZ, x3
360-
xvfmul.s x4, VXAI, x1
361-
addi.d I, I, -1
362-
xvstelm.w x3, XX, 0 * SIZE, 0
363-
xvstelm.w x4, XX, 1 * SIZE, 0
364-
add.d XX, XX, INCX
365-
xvstelm.w x3, XX, 0 * SIZE, 1
366-
xvstelm.w x4, XX, 1 * SIZE, 1
367-
add.d XX, XX, INCX
368-
xvstelm.w x3, XX, 0 * SIZE, 2
369-
xvstelm.w x4, XX, 1 * SIZE, 2
370-
add.d XX, XX, INCX
371-
xvstelm.w x3, XX, 0 * SIZE, 3
372-
xvstelm.w x4, XX, 1 * SIZE, 3
373-
add.d XX, XX, INCX
374-
xvstelm.w x3, XX, 0 * SIZE, 4
375-
xvstelm.w x4, XX, 1 * SIZE, 4
376-
add.d XX, XX, INCX
377-
xvstelm.w x3, XX, 0 * SIZE, 5
378-
xvstelm.w x4, XX, 1 * SIZE, 5
379-
add.d XX, XX, INCX
380-
xvstelm.w x3, XX, 0 * SIZE, 6
381-
xvstelm.w x4, XX, 1 * SIZE, 6
382-
add.d XX, XX, INCX
383-
xvstelm.w x3, XX, 0 * SIZE, 7
384-
xvstelm.w x4, XX, 1 * SIZE, 7
385-
#endif
386-
add.d XX, XX, INCX
387-
blt $r0, I, .L222
388-
b .L997
389-
.align 3
390-
391246
.L223: //alpha_r != 0.0 && alpha_i == 0.0
392247
#ifdef DOUBLE
393248
ld.d t1, X, 0 * SIZE

kernel/loongarch64/cscal_lsx.S

Lines changed: 2 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9797
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
9898

9999
.L14:
100-
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
100+
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
101101
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
102102
.align 3
103103

@@ -116,48 +116,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
116116
b .L997
117117
.align 3
118118

119-
.L112: //alpha_r == 0.0 && alpha_i != 0.0
120-
vld VX0, X, 0 * SIZE
121-
#ifdef DOUBLE
122-
vld VX1, X, 2 * SIZE
123-
vpickev.d x1, VX1, VX0
124-
vpickod.d x2, VX1, VX0
125-
vfmul.d x3, VXAI, x2
126-
vfsub.d x3, VXZ, x3
127-
vfmul.d x4, VXAI, x1
128-
vilvl.d VX2, x4 ,x3
129-
vilvh.d VX3, x4, x3
130-
vst VX2, X, 0 * SIZE
131-
vst VX3, X, 2 * SIZE
132-
vld VX0, X, 4 * SIZE
133-
vld VX1, X, 6 * SIZE
134-
vpickev.d x1, VX1, VX0
135-
vpickod.d x2, VX1, VX0
136-
vfmul.d x3, VXAI, x2
137-
vfsub.d x3, VXZ, x3
138-
vfmul.d x4, VXAI, x1
139-
vilvl.d VX2, x4 ,x3
140-
vilvh.d VX3, x4, x3
141-
vst VX2, X, 4 * SIZE
142-
vst VX3, X, 6 * SIZE
143-
#else
144-
vld VX1, X, 4 * SIZE
145-
vpickev.w x1, VX1, VX0
146-
vpickod.w x2, VX1, VX0
147-
vfmul.s x3, VXAI, x2
148-
vfsub.s x3, VXZ, x3
149-
vfmul.s x4, VXAI, x1
150-
vilvl.w VX2, x4 ,x3
151-
vilvh.w VX3, x4, x3
152-
vst VX2, X, 0 * SIZE
153-
vst VX3, X, 4 * SIZE
154-
#endif
155-
addi.d X, X, 8 * SIZE
156-
addi.d I, I, -1
157-
blt $r0, I, .L112
158-
b .L997
159-
.align 3
160-
161119
.L113: //alpha_r != 0.0 && alpha_i == 0.0
162120
vld VX0, X, 0 * SIZE
163121
#ifdef DOUBLE
@@ -256,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
256214
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
257215

258216
.L24:
259-
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
217+
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
260218
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
261219
.align 3
262220

@@ -292,90 +250,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
292250
b .L997
293251
.align 3
294252

295-
.L222: //alpha_r == 0.0 && alpha_i != 0.0
296-
#ifdef DOUBLE
297-
ld.d t1, X, 0 * SIZE
298-
ld.d t2, X, 1 * SIZE
299-
add.d X, X, INCX
300-
ld.d t3, X, 0 * SIZE
301-
ld.d t4, X, 1 * SIZE
302-
add.d X, X, INCX
303-
vinsgr2vr.d x1, t1, 0
304-
vinsgr2vr.d x2, t2, 0
305-
vinsgr2vr.d x1, t3, 1
306-
vinsgr2vr.d x2, t4, 1
307-
vfmul.d x3, VXAI, x2
308-
vfsub.d x3, VXZ, x3
309-
vfmul.d x4, VXAI, x1
310-
vstelm.d x3, XX, 0 * SIZE, 0
311-
vstelm.d x4, XX, 1 * SIZE, 0
312-
add.d XX, XX, INCX
313-
vstelm.d x3, XX, 0 * SIZE, 1
314-
vstelm.d x4, XX, 1 * SIZE, 1
315-
add.d XX, XX, INCX
316-
317-
ld.d t1, X, 0 * SIZE
318-
ld.d t2, X, 1 * SIZE
319-
add.d X, X, INCX
320-
ld.d t3, X, 0 * SIZE
321-
ld.d t4, X, 1 * SIZE
322-
vinsgr2vr.d x1, t1, 0
323-
vinsgr2vr.d x2, t2, 0
324-
vinsgr2vr.d x1, t3, 1
325-
vinsgr2vr.d x2, t4, 1
326-
add.d X, X, INCX
327-
vfmul.d x3, VXAI, x2
328-
vfsub.d x3, VXZ, x3
329-
vfmul.d x4, VXAI, x1
330-
addi.d I, I, -1
331-
vstelm.d x3, XX, 0 * SIZE, 0
332-
vstelm.d x4, XX, 1 * SIZE, 0
333-
add.d XX, XX, INCX
334-
vstelm.d x3, XX, 0 * SIZE, 1
335-
vstelm.d x4, XX, 1 * SIZE, 1
336-
#else
337-
ld.w t1, X, 0 * SIZE
338-
ld.w t2, X, 1 * SIZE
339-
add.d X, X, INCX
340-
ld.w t3, X, 0 * SIZE
341-
ld.w t4, X, 1 * SIZE
342-
add.d X, X, INCX
343-
vinsgr2vr.w x1, t1, 0
344-
vinsgr2vr.w x2, t2, 0
345-
vinsgr2vr.w x1, t3, 1
346-
vinsgr2vr.w x2, t4, 1
347-
ld.w t1, X, 0 * SIZE
348-
ld.w t2, X, 1 * SIZE
349-
add.d X, X, INCX
350-
ld.w t3, X, 0 * SIZE
351-
ld.w t4, X, 1 * SIZE
352-
vinsgr2vr.w x1, t1, 2
353-
vinsgr2vr.w x2, t2, 2
354-
vinsgr2vr.w x1, t3, 3
355-
vinsgr2vr.w x2, t4, 3
356-
add.d X, X, INCX
357-
358-
vfmul.s x3, VXAI, x2
359-
vfsub.s x3, VXZ, x3
360-
vfmul.s x4, VXAI, x1
361-
addi.d I, I, -1
362-
vstelm.w x3, XX, 0 * SIZE, 0
363-
vstelm.w x4, XX, 1 * SIZE, 0
364-
add.d XX, XX, INCX
365-
vstelm.w x3, XX, 0 * SIZE, 1
366-
vstelm.w x4, XX, 1 * SIZE, 1
367-
add.d XX, XX, INCX
368-
vstelm.w x3, XX, 0 * SIZE, 2
369-
vstelm.w x4, XX, 1 * SIZE, 2
370-
add.d XX, XX, INCX
371-
vstelm.w x3, XX, 0 * SIZE, 3
372-
vstelm.w x4, XX, 1 * SIZE, 3
373-
#endif
374-
add.d XX, XX, INCX
375-
blt $r0, I, .L222
376-
b .L997
377-
.align 3
378-
379253
.L223: //alpha_r != 0.0 && alpha_i == 0.0
380254
#ifdef DOUBLE
381255
ld.d t1, X, 0 * SIZE

0 commit comments

Comments
 (0)