Skip to content

Commit 276e3eb

Browse files
committed
LoongArch64: Add dzamax and dzamin opt
1 parent d6a5174 commit 276e3eb

File tree

6 files changed

+418
-304
lines changed

6 files changed

+418
-304
lines changed

kernel/loongarch64/KERNEL.LOONGSON2K1000

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lsx.S
1414
SAMAXKERNEL = amax_lsx.S
1515
DAMAXKERNEL = amax_lsx.S
1616
CAMAXKERNEL = camax_lsx.S
17+
ZAMAXKERNEL = camax_lsx.S
1718

1819
SAMINKERNEL = amin_lsx.S
1920
DAMINKERNEL = amin_lsx.S
2021
CAMINKERNEL = camin_lsx.S
22+
ZAMINKERNEL = camin_lsx.S
2123

2224
SMAXKERNEL = max_lsx.S
2325
DMAXKERNEL = max_lsx.S

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lasx.S
1414
SAMAXKERNEL = amax_lasx.S
1515
DAMAXKERNEL = amax_lasx.S
1616
CAMAXKERNEL = camax_lasx.S
17+
ZAMAXKERNEL = camax_lasx.S
1718

1819
SAMINKERNEL = amin_lasx.S
1920
DAMINKERNEL = amin_lasx.S
2021
CAMINKERNEL = camin_lasx.S
22+
ZAMINKERNEL = camin_lasx.S
2123

2224
SMAXKERNEL = max_lsx.S
2325
DMAXKERNEL = max_lsx.S

kernel/loongarch64/camax_lasx.S

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -63,42 +63,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6363
bge $r0, N, .L999
6464
bge $r0, INCX, .L999
6565
li.d TEMP, 1
66-
li.w I, -1
6766
slli.d TEMP, TEMP, ZBASE_SHIFT
6867
slli.d INCX, INCX, ZBASE_SHIFT
69-
xvreplgr2vr.w neg1, I
70-
xvffint.s.w neg1, neg1
7168
srai.d I, N, 3
7269
bne INCX, TEMP, .L20
7370
bge $r0, I, .L23
7471
.align 3
7572

7673
.L10:
77-
xvld VX0, X, 0 * SIZE
78-
xvld VX1, X, 8 * SIZE
79-
addi.d I, I, -1
74+
xvld VX0, X, 0
75+
xvld VX1, X, 32
76+
#ifdef DOUBLE
77+
xvpickev.d x1, VX1, VX0
78+
xvpickod.d x2, VX1, VX0
79+
#else
8080
xvpickev.w x1, VX1, VX0
8181
xvpickod.w x2, VX1, VX0
82-
xvfmul.s x3, neg1, x1
83-
xvfmul.s x4, neg1, x2
84-
xvfcmp.clt.s VT0, x1, res0
85-
xvfcmp.clt.s VT1, x2, res0
86-
xvbitsel.v x1, x1, x3, VT0
87-
xvbitsel.v x2, x2, x4, VT1
82+
#endif
83+
XVFSUB x3, res0, x1
84+
XVFSUB x4, res0, x2
85+
XVFMAX x1, x1, x3
86+
XVFMAX x2, x2, x4
87+
XVFADD VM1, x1, x2
88+
XVFMAX VM0, VM0, VM1
89+
#ifdef DOUBLE
90+
xvld VX0, X, 64
91+
xvld VX1, X, 96
92+
xvpickev.d x1, VX1, VX0
93+
xvpickod.d x2, VX1, VX0
94+
XVFSUB x3, res0, x1
95+
XVFSUB x4, res0, x2
96+
XVFMAX x1, x1, x3
97+
XVFMAX x2, x2, x4
98+
XVFADD VM1, x1, x2
99+
XVFMAX VM0, VM0, VM1
100+
#endif
101+
addi.d I, I, -1
88102
addi.d X, X, 16 * SIZE
89-
xvfadd.s VM1, x1, x2
90-
xvfmax.s VM0, VM0, VM1
91103
blt $r0, I, .L10
92104
.align 3
93105

94106
.L11:
107+
#ifdef DOUBLE
108+
xvpickve.d x1, VM0, 0
109+
xvpickve.d x2, VM0, 1
110+
XVFMAX VM0, x1, x2
111+
#else
95112
xvpickve.w x1, VM0, 0
96113
xvpickve.w x2, VM0, 1
97114
xvpickve.w x3, VM0, 2
98115
xvpickve.w x4, VM0, 3
99-
xvfmax.s VM1, x1, x2
100-
xvfmax.s VM0, x3, x4
101-
xvfmax.s VM0, VM0, VM1
116+
XVFMAX VM0, x1, x2
117+
XVFMAX VM1, x3, x4
118+
XVFMAX VM0, VM0, VM1
119+
#endif
102120
b .L23
103121
.align 3
104122

@@ -107,66 +125,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107125
.align 3
108126

109127
.L21:
110-
fld.s t1, X, 0 * SIZE
111-
fld.s t2, X, 1 * SIZE
128+
LD t1, X, 0 * SIZE
129+
LD t2, X, 1 * SIZE
112130
add.d X, X, INCX
113-
fld.s t3, X, 0 * SIZE
114-
fld.s t4, X, 1 * SIZE
131+
LD t3, X, 0 * SIZE
132+
LD t4, X, 1 * SIZE
115133
add.d X, X, INCX
116-
fabs.s t1, t1
117-
fabs.s t2, t2
118-
fabs.s t3, t3
119-
fabs.s t4, t4
120-
fadd.s t1, t1, t2
121-
fadd.s t3, t3, t4
122-
fmax.s s1, t1, t3
123-
fld.s t1, X, 0 * SIZE
124-
fld.s t2, X, 1 * SIZE
134+
FABS t1, t1
135+
FABS t2, t2
136+
FABS t3, t3
137+
FABS t4, t4
138+
ADD t1, t1, t2
139+
ADD t3, t3, t4
140+
FMAX s1, t1, t3
141+
LD t1, X, 0 * SIZE
142+
LD t2, X, 1 * SIZE
125143
add.d X, X, INCX
126-
fld.s t3, X, 0 * SIZE
127-
fld.s t4, X, 1 * SIZE
144+
LD t3, X, 0 * SIZE
145+
LD t4, X, 1 * SIZE
128146
add.d X, X, INCX
129-
fabs.s t1, t1
130-
fabs.s t2, t2
131-
fabs.s t3, t3
132-
fabs.s t4, t4
133-
fadd.s t1, t1, t2
134-
fadd.s t3, t3, t4
135-
fmax.s s1, t1, t3
136-
fld.s t1, X, 0 * SIZE
137-
fld.s t2, X, 1 * SIZE
147+
FABS t1, t1
148+
FABS t2, t2
149+
FABS t3, t3
150+
FABS t4, t4
151+
ADD t1, t1, t2
152+
ADD t3, t3, t4
153+
FMAX s1, t1, t3
154+
LD t1, X, 0 * SIZE
155+
LD t2, X, 1 * SIZE
138156
add.d X, X, INCX
139-
fld.s t3, X, 0 * SIZE
140-
fld.s t4, X, 1 * SIZE
157+
LD t3, X, 0 * SIZE
158+
LD t4, X, 1 * SIZE
141159
add.d X, X, INCX
142-
fabs.s t1, t1
143-
fabs.s t2, t2
144-
fabs.s t3, t3
145-
fabs.s t4, t4
160+
FABS t1, t1
161+
FABS t2, t2
162+
FABS t3, t3
163+
FABS t4, t4
146164
addi.d I, I, -1
147-
fadd.s t1, t1, t2
148-
fadd.s t3, t3, t4
149-
fmax.s s3, t1, t3
150-
fld.s t1, X, 0 * SIZE
151-
fld.s t2, X, 1 * SIZE
165+
ADD t1, t1, t2
166+
ADD t3, t3, t4
167+
FMAX s3, t1, t3
168+
LD t1, X, 0 * SIZE
169+
LD t2, X, 1 * SIZE
152170
add.d X, X, INCX
153-
fld.s t3, X, 0 * SIZE
154-
fld.s t4, X, 1 * SIZE
171+
LD t3, X, 0 * SIZE
172+
LD t4, X, 1 * SIZE
155173
add.d X, X, INCX
156-
fabs.s t1, t1
157-
fabs.s t2, t2
158-
fabs.s t3, t3
159-
fabs.s t4, t4
160-
fadd.s t1, t1, t2
161-
fadd.s t3, t3, t4
162-
fmax.s s4, t1, t3
174+
FABS t1, t1
175+
FABS t2, t2
176+
FABS t3, t3
177+
FABS t4, t4
178+
ADD t1, t1, t2
179+
ADD t3, t3, t4
180+
FMAX s4, t1, t3
163181
blt $r0, I, .L21
164182
.align 3
165183

166184
.L22:
167-
fmax.s s1, s1, s2
168-
fmax.s s3, s3, s4
169-
fmax.s s1, s1, s3
185+
FMAX s1, s1, s2
186+
FMAX s3, s3, s4
187+
FMAX s1, s1, s3
170188
.align 3
171189

172190
.L23: //N<8
@@ -182,12 +200,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
182200
FABS a1, a1
183201
ADD a0, a0, a1
184202
add.d X, X, INCX
185-
fmax.s s1, a0, s1
203+
FMAX s1, a0, s1
186204
blt $r0, I, .L24
187205
.align 3
188206

189207
.L999:
190-
fmov.s $f0, $f22
208+
MOV $f0, $f22
191209
jirl $r0, $r1, 0x0
192210
.align 3
193211

0 commit comments

Comments
 (0)