Skip to content

Commit 9f06301

Browse files
authored
Merge pull request #4463 from XiWeiGu/loongarch64-zamax-zamin
Loongarch64: amax and amin
2 parents 8892121 + 3d4dfd0 commit 9f06301

File tree

13 files changed

+555
-325
lines changed

13 files changed

+555
-325
lines changed

benchmark/Makefile

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,9 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
265265
ismax.goto idmax.goto \
266266
isamin.goto idamin.goto icamin.goto izamin.goto \
267267
ismin.goto idmin.goto \
268-
samax.goto damax.goto camax.goto zamax.goto \
268+
samax.goto damax.goto scamax.goto dzamax.goto \
269269
smax.goto dmax.goto \
270-
samin.goto damin.goto camin.goto zamin.goto \
270+
samin.goto damin.goto scamin.goto dzamin.goto \
271271
smin.goto dmin.goto \
272272
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
273273
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_HALF_TARGETS)
@@ -2832,12 +2832,12 @@ samax.goto : samax.$(SUFFIX) ../$(LIBNAME)
28322832
damax.goto : damax.$(SUFFIX) ../$(LIBNAME)
28332833
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28342834

2835-
############################################## CAMAX ##############################################
2836-
camax.goto : camax.$(SUFFIX) ../$(LIBNAME)
2835+
############################################## SCAMAX ##############################################
2836+
scamax.goto : scamax.$(SUFFIX) ../$(LIBNAME)
28372837
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28382838

2839-
############################################## ZAMAX ##############################################
2840-
zamax.goto : zamax.$(SUFFIX) ../$(LIBNAME)
2839+
############################################## DZAMAX ##############################################
2840+
dzamax.goto : dzamax.$(SUFFIX) ../$(LIBNAME)
28412841
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28422842

28432843
############################################## SMAX ##############################################
@@ -2856,12 +2856,12 @@ samin.goto : samin.$(SUFFIX) ../$(LIBNAME)
28562856
damin.goto : damin.$(SUFFIX) ../$(LIBNAME)
28572857
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28582858

2859-
############################################## CAMIN ##############################################
2860-
camin.goto : camin.$(SUFFIX) ../$(LIBNAME)
2859+
############################################## SCAMIN ##############################################
2860+
scamin.goto : scamin.$(SUFFIX) ../$(LIBNAME)
28612861
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28622862

2863-
############################################## ZAMIN ##############################################
2864-
zamin.goto : zamin.$(SUFFIX) ../$(LIBNAME)
2863+
############################################## DZAMIN ##############################################
2864+
dzamin.goto : dzamin.$(SUFFIX) ../$(LIBNAME)
28652865
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
28662866

28672867
############################################## SMIN ##############################################
@@ -3383,10 +3383,10 @@ samax.$(SUFFIX) : amax.c
33833383
damax.$(SUFFIX) : amax.c
33843384
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
33853385

3386-
camax.$(SUFFIX) : amax.c
3386+
scamax.$(SUFFIX) : amax.c
33873387
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
33883388

3389-
zamax.$(SUFFIX) : amax.c
3389+
dzamax.$(SUFFIX) : amax.c
33903390
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
33913391

33923392

@@ -3403,10 +3403,10 @@ samin.$(SUFFIX) : amin.c
34033403
damin.$(SUFFIX) : amin.c
34043404
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
34053405

3406-
camin.$(SUFFIX) : amin.c
3406+
scamin.$(SUFFIX) : amin.c
34073407
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
34083408

3409-
zamin.$(SUFFIX) : amin.c
3409+
dzamin.$(SUFFIX) : amin.c
34103410
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
34113411

34123412

kernel/loongarch64/KERNEL.LOONGSON2K1000

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lsx.S
1414
SAMAXKERNEL = amax_lsx.S
1515
DAMAXKERNEL = amax_lsx.S
1616
CAMAXKERNEL = camax_lsx.S
17+
ZAMAXKERNEL = camax_lsx.S
1718

1819
SAMINKERNEL = amin_lsx.S
1920
DAMINKERNEL = amin_lsx.S
2021
CAMINKERNEL = camin_lsx.S
22+
ZAMINKERNEL = camin_lsx.S
2123

2224
SMAXKERNEL = max_lsx.S
2325
DMAXKERNEL = max_lsx.S

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lasx.S
1414
SAMAXKERNEL = amax_lasx.S
1515
DAMAXKERNEL = amax_lasx.S
1616
CAMAXKERNEL = camax_lasx.S
17+
ZAMAXKERNEL = camax_lasx.S
1718

1819
SAMINKERNEL = amin_lasx.S
1920
DAMINKERNEL = amin_lasx.S
2021
CAMINKERNEL = camin_lasx.S
22+
ZAMINKERNEL = camin_lasx.S
2123

2224
SMAXKERNEL = max_lsx.S
2325
DMAXKERNEL = max_lsx.S

kernel/loongarch64/amin_lasx.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6666
#else
6767
xvldrepl.w VM0, X, 0
6868
#endif
69-
XVFSUB VM0, VM0, VM0
7069
bne INCX, TEMP, .L20
7170

7271
srai.d I, N, 4

kernel/loongarch64/amin_lsx.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6666
#else
6767
vldrepl.w VM0, X, 0
6868
#endif
69-
VFSUB VM0, VM0, VM0
7069
bne INCX, TEMP, .L20
7170

7271
srai.d I, N, 3

kernel/loongarch64/camax_lasx.S

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -63,42 +63,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6363
bge $r0, N, .L999
6464
bge $r0, INCX, .L999
6565
li.d TEMP, 1
66-
li.w I, -1
6766
slli.d TEMP, TEMP, ZBASE_SHIFT
6867
slli.d INCX, INCX, ZBASE_SHIFT
69-
xvreplgr2vr.w neg1, I
70-
xvffint.s.w neg1, neg1
7168
srai.d I, N, 3
7269
bne INCX, TEMP, .L20
7370
bge $r0, I, .L23
7471
.align 3
7572

7673
.L10:
77-
xvld VX0, X, 0 * SIZE
78-
xvld VX1, X, 8 * SIZE
79-
addi.d I, I, -1
74+
xvld VX0, X, 0
75+
xvld VX1, X, 32
76+
#ifdef DOUBLE
77+
xvpickev.d x1, VX1, VX0
78+
xvpickod.d x2, VX1, VX0
79+
#else
8080
xvpickev.w x1, VX1, VX0
8181
xvpickod.w x2, VX1, VX0
82-
xvfmul.s x3, neg1, x1
83-
xvfmul.s x4, neg1, x2
84-
xvfcmp.clt.s VT0, x1, res0
85-
xvfcmp.clt.s VT1, x2, res0
86-
xvbitsel.v x1, x1, x3, VT0
87-
xvbitsel.v x2, x2, x4, VT1
82+
#endif
83+
XVFSUB x3, res0, x1
84+
XVFSUB x4, res0, x2
85+
XVFMAX x1, x1, x3
86+
XVFMAX x2, x2, x4
87+
XVFADD VM1, x1, x2
88+
XVFMAX VM0, VM0, VM1
89+
#ifdef DOUBLE
90+
xvld VX0, X, 64
91+
xvld VX1, X, 96
92+
xvpickev.d x1, VX1, VX0
93+
xvpickod.d x2, VX1, VX0
94+
XVFSUB x3, res0, x1
95+
XVFSUB x4, res0, x2
96+
XVFMAX x1, x1, x3
97+
XVFMAX x2, x2, x4
98+
XVFADD VM1, x1, x2
99+
XVFMAX VM0, VM0, VM1
100+
#endif
101+
addi.d I, I, -1
88102
addi.d X, X, 16 * SIZE
89-
xvfadd.s VM1, x1, x2
90-
xvfmax.s VM0, VM0, VM1
91103
blt $r0, I, .L10
92104
.align 3
93105

94106
.L11:
107+
#ifdef DOUBLE
108+
xvpickve.d x1, VM0, 0
109+
xvpickve.d x2, VM0, 1
110+
XVFMAX VM0, x1, x2
111+
#else
95112
xvpickve.w x1, VM0, 0
96113
xvpickve.w x2, VM0, 1
97114
xvpickve.w x3, VM0, 2
98115
xvpickve.w x4, VM0, 3
99-
xvfmax.s VM1, x1, x2
100-
xvfmax.s VM0, x3, x4
101-
xvfmax.s VM0, VM0, VM1
116+
XVFMAX VM0, x1, x2
117+
XVFMAX VM1, x3, x4
118+
XVFMAX VM0, VM0, VM1
119+
#endif
102120
b .L23
103121
.align 3
104122

@@ -107,66 +125,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107125
.align 3
108126

109127
.L21:
110-
fld.s t1, X, 0 * SIZE
111-
fld.s t2, X, 1 * SIZE
128+
LD t1, X, 0 * SIZE
129+
LD t2, X, 1 * SIZE
112130
add.d X, X, INCX
113-
fld.s t3, X, 0 * SIZE
114-
fld.s t4, X, 1 * SIZE
131+
LD t3, X, 0 * SIZE
132+
LD t4, X, 1 * SIZE
115133
add.d X, X, INCX
116-
fabs.s t1, t1
117-
fabs.s t2, t2
118-
fabs.s t3, t3
119-
fabs.s t4, t4
120-
fadd.s t1, t1, t2
121-
fadd.s t3, t3, t4
122-
fmax.s s1, t1, t3
123-
fld.s t1, X, 0 * SIZE
124-
fld.s t2, X, 1 * SIZE
134+
FABS t1, t1
135+
FABS t2, t2
136+
FABS t3, t3
137+
FABS t4, t4
138+
ADD t1, t1, t2
139+
ADD t3, t3, t4
140+
FMAX s1, t1, t3
141+
LD t1, X, 0 * SIZE
142+
LD t2, X, 1 * SIZE
125143
add.d X, X, INCX
126-
fld.s t3, X, 0 * SIZE
127-
fld.s t4, X, 1 * SIZE
144+
LD t3, X, 0 * SIZE
145+
LD t4, X, 1 * SIZE
128146
add.d X, X, INCX
129-
fabs.s t1, t1
130-
fabs.s t2, t2
131-
fabs.s t3, t3
132-
fabs.s t4, t4
133-
fadd.s t1, t1, t2
134-
fadd.s t3, t3, t4
135-
fmax.s s1, t1, t3
136-
fld.s t1, X, 0 * SIZE
137-
fld.s t2, X, 1 * SIZE
147+
FABS t1, t1
148+
FABS t2, t2
149+
FABS t3, t3
150+
FABS t4, t4
151+
ADD t1, t1, t2
152+
ADD t3, t3, t4
153+
FMAX s1, t1, t3
154+
LD t1, X, 0 * SIZE
155+
LD t2, X, 1 * SIZE
138156
add.d X, X, INCX
139-
fld.s t3, X, 0 * SIZE
140-
fld.s t4, X, 1 * SIZE
157+
LD t3, X, 0 * SIZE
158+
LD t4, X, 1 * SIZE
141159
add.d X, X, INCX
142-
fabs.s t1, t1
143-
fabs.s t2, t2
144-
fabs.s t3, t3
145-
fabs.s t4, t4
160+
FABS t1, t1
161+
FABS t2, t2
162+
FABS t3, t3
163+
FABS t4, t4
146164
addi.d I, I, -1
147-
fadd.s t1, t1, t2
148-
fadd.s t3, t3, t4
149-
fmax.s s3, t1, t3
150-
fld.s t1, X, 0 * SIZE
151-
fld.s t2, X, 1 * SIZE
165+
ADD t1, t1, t2
166+
ADD t3, t3, t4
167+
FMAX s3, t1, t3
168+
LD t1, X, 0 * SIZE
169+
LD t2, X, 1 * SIZE
152170
add.d X, X, INCX
153-
fld.s t3, X, 0 * SIZE
154-
fld.s t4, X, 1 * SIZE
171+
LD t3, X, 0 * SIZE
172+
LD t4, X, 1 * SIZE
155173
add.d X, X, INCX
156-
fabs.s t1, t1
157-
fabs.s t2, t2
158-
fabs.s t3, t3
159-
fabs.s t4, t4
160-
fadd.s t1, t1, t2
161-
fadd.s t3, t3, t4
162-
fmax.s s4, t1, t3
174+
FABS t1, t1
175+
FABS t2, t2
176+
FABS t3, t3
177+
FABS t4, t4
178+
ADD t1, t1, t2
179+
ADD t3, t3, t4
180+
FMAX s4, t1, t3
163181
blt $r0, I, .L21
164182
.align 3
165183

166184
.L22:
167-
fmax.s s1, s1, s2
168-
fmax.s s3, s3, s4
169-
fmax.s s1, s1, s3
185+
FMAX s1, s1, s2
186+
FMAX s3, s3, s4
187+
FMAX s1, s1, s3
170188
.align 3
171189

172190
.L23: //N<8
@@ -182,12 +200,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
182200
FABS a1, a1
183201
ADD a0, a0, a1
184202
add.d X, X, INCX
185-
fmax.s s1, a0, s1
203+
FMAX s1, a0, s1
186204
blt $r0, I, .L24
187205
.align 3
188206

189207
.L999:
190-
fmov.s $f0, $f22
208+
MOV $f0, $f22
191209
jirl $r0, $r1, 0x0
192210
.align 3
193211

0 commit comments

Comments
 (0)