Skip to content

Commit 3ffd686

Browse files
committed
Merge branch 'develop' into dev/slewis/merge-from-riscv
2 parents a3b0ef6 + ec74dcd commit 3ffd686

21 files changed

+4154
-4079
lines changed

benchmark/Makefile

Lines changed: 3444 additions & 3444 deletions
Large diffs are not rendered by default.

cblas.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@ CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPE
101101
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
102102
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
103103

104+
float cblas_samax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
105+
double cblas_damax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
106+
float cblas_scamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
107+
double cblas_dzamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
108+
109+
float cblas_samin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
110+
double cblas_damin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
111+
float cblas_scamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
112+
double cblas_dzamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
113+
104114
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
105115
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
106116
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
@@ -116,6 +126,9 @@ void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS
116126
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
117127
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
118128

129+
void cblas_caxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
130+
void cblas_zaxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
131+
119132
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
120133
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
121134
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);

interface/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ endif ()
130130
foreach (float_type ${FLOAT_TYPES})
131131

132132
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
133+
GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})
134+
133135
GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
134136
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})
135137
GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type})

interface/Makefile

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,8 @@ CSBLAS1OBJS = \
270270
cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \
271271
cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \
272272
cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \
273-
cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX)
273+
cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX) cblas_samax.$(SUFFIX) \
274+
cblas_samin.$(SUFFIX)
274275

275276
CSBLAS2OBJS = \
276277
cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \
@@ -295,7 +296,8 @@ CDBLAS1OBJS = \
295296
cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \
296297
cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \
297298
cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \
298-
cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX)
299+
cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX) cblas_damax.$(SUFFIX) \
300+
cblas_damin.$(SUFFIX)
299301

300302
CDBLAS2OBJS = \
301303
cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \
@@ -315,7 +317,7 @@ CCBLAS1OBJS = \
315317
cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \
316318
cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \
317319
cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \
318-
cblas_caxpby.$(SUFFIX) \
320+
cblas_caxpby.$(SUFFIX) cblas_scamax.$(SUFFIX) cblas_caxpyc.$(SUFFIX) cblas_scamin.$(SUFFIX) \
319321
cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX)
320322

321323
CCBLAS2OBJS = \
@@ -340,12 +342,12 @@ CXERBLAOBJ = \
340342

341343
CZBLAS1OBJS = \
342344
cblas_izamax.$(SUFFIX) cblas_izamin.$(SUFFIX) cblas_dzasum.$(SUFFIX) cblas_zaxpy.$(SUFFIX) \
343-
cblas_zcopy.$(SUFFIX) \
345+
cblas_zcopy.$(SUFFIX) cblas_dzamax.$(SUFFIX) cblas_dzamin.$(SUFFIX) \
344346
cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \
345347
cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \
346348
cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \
347349
cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \
348-
cblas_zaxpby.$(SUFFIX) \
350+
cblas_zaxpby.$(SUFFIX) cblas_zaxpyc.$(SUFFIX) \
349351
cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX)
350352

351353

@@ -1533,6 +1535,30 @@ cblas_icmin.$(SUFFIX) cblas_icmin.$(PSUFFIX) : imax.c
15331535
cblas_izmin.$(SUFFIX) cblas_izmin.$(PSUFFIX) : imax.c
15341536
$(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F)
15351537

1538+
cblas_samax.$(SUFFIX) cblas_samax.$(PSUFFIX) : max.c
1539+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
1540+
1541+
cblas_damax.$(SUFFIX) cblas_damax.$(PSUFFIX) : max.c
1542+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
1543+
1544+
cblas_scamax.$(SUFFIX) cblas_scamax.$(PSUFFIX) : max.c
1545+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
1546+
1547+
cblas_dzamax.$(SUFFIX) cblas_dzamax.$(PSUFFIX) : max.c
1548+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
1549+
1550+
cblas_samin.$(SUFFIX) cblas_samin.$(PSUFFIX) : max.c
1551+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
1552+
1553+
cblas_damin.$(SUFFIX) cblas_damin.$(PSUFFIX) : max.c
1554+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
1555+
1556+
cblas_scamin.$(SUFFIX) cblas_scamin.$(PSUFFIX) : max.c
1557+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
1558+
1559+
cblas_dzamin.$(SUFFIX) cblas_dzamin.$(PSUFFIX) : max.c
1560+
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
1561+
15361562
cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c
15371563
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
15381564

@@ -1627,6 +1653,19 @@ cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
16271653
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
16281654
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
16291655

1656+
cblas_caxpyc.$(SUFFIX) cblas_caxpyc.$(PSUFFIX) : zaxpy.c
1657+
$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)
1658+
1659+
cblas_zaxpyc.$(SUFFIX) cblas_zaxpyc.$(PSUFFIX) : zaxpy.c
1660+
$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)
1661+
1662+
cblas_xaxpyc.$(SUFFIX) cblas_xaxpyc.$(PSUFFIX) : zaxpy.c
1663+
$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)
1664+
1665+
sscal.$(SUFFIX) sscal.$(PSUFFIX) : scal.c
1666+
$(CC) $(CFLAGS) -c $< -o $(@F)
1667+
1668+
dscal.$(SUFFIX) dscal.$(PSUFFIX) : scal.c
16301669
cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c
16311670
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
16321671

interface/max.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,13 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){
145145

146146
#else
147147

148+
#ifdef COMPLEX
149+
FLOAT CNAME(blasint n, void *vx, blasint incx){
150+
FLOAT *x = (FLOAT*) vx;
151+
#else
148152
FLOAT CNAME(blasint n, FLOAT *x, blasint incx){
149-
153+
#endif
154+
150155
FLOAT ret;
151156

152157
PRINT_DEBUG_CNAME;

kernel/loongarch64/KERNEL.LOONGSON2K1000

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lsx.S
1414
SAMAXKERNEL = amax_lsx.S
1515
DAMAXKERNEL = amax_lsx.S
1616
CAMAXKERNEL = camax_lsx.S
17+
ZAMAXKERNEL = camax_lsx.S
1718

1819
SAMINKERNEL = amin_lsx.S
1920
DAMINKERNEL = amin_lsx.S
2021
CAMINKERNEL = camin_lsx.S
22+
ZAMINKERNEL = camin_lsx.S
2123

2224
SMAXKERNEL = max_lsx.S
2325
DMAXKERNEL = max_lsx.S

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lasx.S
1414
SAMAXKERNEL = amax_lasx.S
1515
DAMAXKERNEL = amax_lasx.S
1616
CAMAXKERNEL = camax_lasx.S
17+
ZAMAXKERNEL = camax_lasx.S
1718

1819
SAMINKERNEL = amin_lasx.S
1920
DAMINKERNEL = amin_lasx.S
2021
CAMINKERNEL = camin_lasx.S
22+
ZAMINKERNEL = camin_lasx.S
2123

2224
SMAXKERNEL = max_lsx.S
2325
DMAXKERNEL = max_lsx.S

kernel/loongarch64/amin_lasx.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6666
#else
6767
xvldrepl.w VM0, X, 0
6868
#endif
69-
XVFSUB VM0, VM0, VM0
7069
bne INCX, TEMP, .L20
7170

7271
srai.d I, N, 4

kernel/loongarch64/amin_lsx.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6666
#else
6767
vldrepl.w VM0, X, 0
6868
#endif
69-
VFSUB VM0, VM0, VM0
7069
bne INCX, TEMP, .L20
7170

7271
srai.d I, N, 3

kernel/loongarch64/camax_lasx.S

Lines changed: 84 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -63,42 +63,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
6363
bge $r0, N, .L999
6464
bge $r0, INCX, .L999
6565
li.d TEMP, 1
66-
li.w I, -1
6766
slli.d TEMP, TEMP, ZBASE_SHIFT
6867
slli.d INCX, INCX, ZBASE_SHIFT
69-
xvreplgr2vr.w neg1, I
70-
xvffint.s.w neg1, neg1
7168
srai.d I, N, 3
7269
bne INCX, TEMP, .L20
7370
bge $r0, I, .L23
7471
.align 3
7572

7673
.L10:
77-
xvld VX0, X, 0 * SIZE
78-
xvld VX1, X, 8 * SIZE
79-
addi.d I, I, -1
74+
xvld VX0, X, 0
75+
xvld VX1, X, 32
76+
#ifdef DOUBLE
77+
xvpickev.d x1, VX1, VX0
78+
xvpickod.d x2, VX1, VX0
79+
#else
8080
xvpickev.w x1, VX1, VX0
8181
xvpickod.w x2, VX1, VX0
82-
xvfmul.s x3, neg1, x1
83-
xvfmul.s x4, neg1, x2
84-
xvfcmp.clt.s VT0, x1, res0
85-
xvfcmp.clt.s VT1, x2, res0
86-
xvbitsel.v x1, x1, x3, VT0
87-
xvbitsel.v x2, x2, x4, VT1
82+
#endif
83+
XVFSUB x3, res0, x1
84+
XVFSUB x4, res0, x2
85+
XVFMAX x1, x1, x3
86+
XVFMAX x2, x2, x4
87+
XVFADD VM1, x1, x2
88+
XVFMAX VM0, VM0, VM1
89+
#ifdef DOUBLE
90+
xvld VX0, X, 64
91+
xvld VX1, X, 96
92+
xvpickev.d x1, VX1, VX0
93+
xvpickod.d x2, VX1, VX0
94+
XVFSUB x3, res0, x1
95+
XVFSUB x4, res0, x2
96+
XVFMAX x1, x1, x3
97+
XVFMAX x2, x2, x4
98+
XVFADD VM1, x1, x2
99+
XVFMAX VM0, VM0, VM1
100+
#endif
101+
addi.d I, I, -1
88102
addi.d X, X, 16 * SIZE
89-
xvfadd.s VM1, x1, x2
90-
xvfmax.s VM0, VM0, VM1
91103
blt $r0, I, .L10
92104
.align 3
93105

94106
.L11:
107+
#ifdef DOUBLE
108+
xvpickve.d x1, VM0, 0
109+
xvpickve.d x2, VM0, 1
110+
XVFMAX VM0, x1, x2
111+
#else
95112
xvpickve.w x1, VM0, 0
96113
xvpickve.w x2, VM0, 1
97114
xvpickve.w x3, VM0, 2
98115
xvpickve.w x4, VM0, 3
99-
xvfmax.s VM1, x1, x2
100-
xvfmax.s VM0, x3, x4
101-
xvfmax.s VM0, VM0, VM1
116+
XVFMAX VM0, x1, x2
117+
XVFMAX VM1, x3, x4
118+
XVFMAX VM0, VM0, VM1
119+
#endif
102120
b .L23
103121
.align 3
104122

@@ -107,66 +125,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
107125
.align 3
108126

109127
.L21:
110-
fld.s t1, X, 0 * SIZE
111-
fld.s t2, X, 1 * SIZE
128+
LD t1, X, 0 * SIZE
129+
LD t2, X, 1 * SIZE
112130
add.d X, X, INCX
113-
fld.s t3, X, 0 * SIZE
114-
fld.s t4, X, 1 * SIZE
131+
LD t3, X, 0 * SIZE
132+
LD t4, X, 1 * SIZE
115133
add.d X, X, INCX
116-
fabs.s t1, t1
117-
fabs.s t2, t2
118-
fabs.s t3, t3
119-
fabs.s t4, t4
120-
fadd.s t1, t1, t2
121-
fadd.s t3, t3, t4
122-
fmax.s s1, t1, t3
123-
fld.s t1, X, 0 * SIZE
124-
fld.s t2, X, 1 * SIZE
134+
FABS t1, t1
135+
FABS t2, t2
136+
FABS t3, t3
137+
FABS t4, t4
138+
ADD t1, t1, t2
139+
ADD t3, t3, t4
140+
FMAX s1, t1, t3
141+
LD t1, X, 0 * SIZE
142+
LD t2, X, 1 * SIZE
125143
add.d X, X, INCX
126-
fld.s t3, X, 0 * SIZE
127-
fld.s t4, X, 1 * SIZE
144+
LD t3, X, 0 * SIZE
145+
LD t4, X, 1 * SIZE
128146
add.d X, X, INCX
129-
fabs.s t1, t1
130-
fabs.s t2, t2
131-
fabs.s t3, t3
132-
fabs.s t4, t4
133-
fadd.s t1, t1, t2
134-
fadd.s t3, t3, t4
135-
fmax.s s1, t1, t3
136-
fld.s t1, X, 0 * SIZE
137-
fld.s t2, X, 1 * SIZE
147+
FABS t1, t1
148+
FABS t2, t2
149+
FABS t3, t3
150+
FABS t4, t4
151+
ADD t1, t1, t2
152+
ADD t3, t3, t4
153+
FMAX s1, t1, t3
154+
LD t1, X, 0 * SIZE
155+
LD t2, X, 1 * SIZE
138156
add.d X, X, INCX
139-
fld.s t3, X, 0 * SIZE
140-
fld.s t4, X, 1 * SIZE
157+
LD t3, X, 0 * SIZE
158+
LD t4, X, 1 * SIZE
141159
add.d X, X, INCX
142-
fabs.s t1, t1
143-
fabs.s t2, t2
144-
fabs.s t3, t3
145-
fabs.s t4, t4
160+
FABS t1, t1
161+
FABS t2, t2
162+
FABS t3, t3
163+
FABS t4, t4
146164
addi.d I, I, -1
147-
fadd.s t1, t1, t2
148-
fadd.s t3, t3, t4
149-
fmax.s s3, t1, t3
150-
fld.s t1, X, 0 * SIZE
151-
fld.s t2, X, 1 * SIZE
165+
ADD t1, t1, t2
166+
ADD t3, t3, t4
167+
FMAX s3, t1, t3
168+
LD t1, X, 0 * SIZE
169+
LD t2, X, 1 * SIZE
152170
add.d X, X, INCX
153-
fld.s t3, X, 0 * SIZE
154-
fld.s t4, X, 1 * SIZE
171+
LD t3, X, 0 * SIZE
172+
LD t4, X, 1 * SIZE
155173
add.d X, X, INCX
156-
fabs.s t1, t1
157-
fabs.s t2, t2
158-
fabs.s t3, t3
159-
fabs.s t4, t4
160-
fadd.s t1, t1, t2
161-
fadd.s t3, t3, t4
162-
fmax.s s4, t1, t3
174+
FABS t1, t1
175+
FABS t2, t2
176+
FABS t3, t3
177+
FABS t4, t4
178+
ADD t1, t1, t2
179+
ADD t3, t3, t4
180+
FMAX s4, t1, t3
163181
blt $r0, I, .L21
164182
.align 3
165183

166184
.L22:
167-
fmax.s s1, s1, s2
168-
fmax.s s3, s3, s4
169-
fmax.s s1, s1, s3
185+
FMAX s1, s1, s2
186+
FMAX s3, s3, s4
187+
FMAX s1, s1, s3
170188
.align 3
171189

172190
.L23: //N<8
@@ -182,12 +200,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
182200
FABS a1, a1
183201
ADD a0, a0, a1
184202
add.d X, X, INCX
185-
fmax.s s1, a0, s1
203+
FMAX s1, a0, s1
186204
blt $r0, I, .L24
187205
.align 3
188206

189207
.L999:
190-
fmov.s $f0, $f22
208+
MOV $f0, $f22
191209
jirl $r0, $r1, 0x0
192210
.align 3
193211

0 commit comments

Comments
 (0)