Skip to content

Commit d5ed695

Browse files
authored
Merge pull request #3588 from martin-frbg/fix3586
Fix mistaken declaration of CortexX1 as ArmV9 in PR#3586
2 parents 1d4e703 + a55a06c commit d5ed695

File tree

5 files changed

+12
-227
lines changed

5 files changed

+12
-227
lines changed

Makefile.arm64

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -238,18 +238,18 @@ endif
238238

239239
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
240240
ifeq ($(CORE), CORTEXX1)
241-
CCOMMON_OPT += -march=armv9 -mtune=cortexx1
241+
CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
242242
ifneq ($(F_COMPILER), NAG)
243-
FCOMMON_OPT += -march=armv9 -mtune=cortexx1
243+
FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
244244
endif
245245
endif
246246
endif
247247

248248
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
249249
ifeq ($(CORE), CORTEXX2)
250-
CCOMMON_OPT += -march=armv9 -mtune=cortexx2
250+
CCOMMON_OPT += -march=armv8.4-a+sve
251251
ifneq ($(F_COMPILER), NAG)
252-
FCOMMON_OPT += -march=armv9 -mtune=cortexx2
252+
FCOMMON_OPT += -march=armv8.4-a+sve
253253
endif
254254
endif
255255
endif
@@ -266,9 +266,9 @@ endif
266266

267267
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
268268
ifeq ($(CORE), CORTEXA710)
269-
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
269+
CCOMMON_OPT += -march=armv8.4-a+sve
270270
ifneq ($(F_COMPILER), NAG)
271-
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
271+
FCOMMON_OPT += -march=armv8.4-a+sve
272272
endif
273273
endif
274274
endif

cmake/cc.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ endif ()
175175

176176
if (${CORE} STREQUAL CORTEXX1)
177177
if (NOT DYNAMIC_ARCH)
178-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
178+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a")
179179
endif ()
180180
endif ()
181181

getarch.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1288,7 +1288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12881288
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
12891289
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
12901290
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
1291-
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
1291+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
12921292
#define LIBNAME "cortexx1"
12931293
#define CORENAME "CORTEXX1"
12941294
#endif

kernel/arm64/KERNEL.CORTEXX1

Lines changed: 1 addition & 216 deletions
Original file line numberDiff line numberDiff line change
@@ -1,216 +1 @@
1-
SAMINKERNEL = ../arm/amin.c
2-
DAMINKERNEL = ../arm/amin.c
3-
CAMINKERNEL = ../arm/zamin.c
4-
ZAMINKERNEL = ../arm/zamin.c
5-
6-
SMAXKERNEL = ../arm/max.c
7-
DMAXKERNEL = ../arm/max.c
8-
9-
SMINKERNEL = ../arm/min.c
10-
DMINKERNEL = ../arm/min.c
11-
12-
ISAMINKERNEL = ../arm/iamin.c
13-
IDAMINKERNEL = ../arm/iamin.c
14-
ICAMINKERNEL = ../arm/izamin.c
15-
IZAMINKERNEL = ../arm/izamin.c
16-
17-
ISMAXKERNEL = ../arm/imax.c
18-
IDMAXKERNEL = ../arm/imax.c
19-
20-
ISMINKERNEL = ../arm/imin.c
21-
IDMINKERNEL = ../arm/imin.c
22-
23-
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
24-
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
25-
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
26-
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
27-
28-
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
29-
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
30-
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
31-
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
32-
33-
TRSMCOPYLN_M = trsm_lncopy_sve.c
34-
TRSMCOPYLT_M = trsm_ltcopy_sve.c
35-
TRSMCOPYUN_M = trsm_uncopy_sve.c
36-
TRSMCOPYUT_M = trsm_utcopy_sve.c
37-
38-
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
39-
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
40-
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
41-
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
42-
43-
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
44-
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
45-
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
46-
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
47-
48-
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
49-
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
50-
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
51-
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
52-
53-
54-
SAMAXKERNEL = amax.S
55-
DAMAXKERNEL = amax.S
56-
CAMAXKERNEL = zamax.S
57-
ZAMAXKERNEL = zamax.S
58-
59-
SAXPYKERNEL = axpy.S
60-
DAXPYKERNEL = axpy.S
61-
CAXPYKERNEL = zaxpy.S
62-
ZAXPYKERNEL = zaxpy.S
63-
64-
SROTKERNEL = rot.S
65-
DROTKERNEL = rot.S
66-
CROTKERNEL = zrot.S
67-
ZROTKERNEL = zrot.S
68-
69-
SSCALKERNEL = scal.S
70-
DSCALKERNEL = scal.S
71-
CSCALKERNEL = zscal.S
72-
ZSCALKERNEL = zscal.S
73-
74-
SGEMVNKERNEL = gemv_n.S
75-
DGEMVNKERNEL = gemv_n.S
76-
CGEMVNKERNEL = zgemv_n.S
77-
ZGEMVNKERNEL = zgemv_n.S
78-
79-
SGEMVTKERNEL = gemv_t.S
80-
DGEMVTKERNEL = gemv_t.S
81-
CGEMVTKERNEL = zgemv_t.S
82-
ZGEMVTKERNEL = zgemv_t.S
83-
84-
85-
SASUMKERNEL = asum.S
86-
DASUMKERNEL = asum.S
87-
CASUMKERNEL = casum.S
88-
ZASUMKERNEL = zasum.S
89-
90-
SCOPYKERNEL = copy.S
91-
DCOPYKERNEL = copy.S
92-
CCOPYKERNEL = copy.S
93-
ZCOPYKERNEL = copy.S
94-
95-
SSWAPKERNEL = swap.S
96-
DSWAPKERNEL = swap.S
97-
CSWAPKERNEL = swap.S
98-
ZSWAPKERNEL = swap.S
99-
100-
ISAMAXKERNEL = iamax.S
101-
IDAMAXKERNEL = iamax.S
102-
ICAMAXKERNEL = izamax.S
103-
IZAMAXKERNEL = izamax.S
104-
105-
SNRM2KERNEL = nrm2.S
106-
DNRM2KERNEL = nrm2.S
107-
CNRM2KERNEL = znrm2.S
108-
ZNRM2KERNEL = znrm2.S
109-
110-
DDOTKERNEL = dot.S
111-
ifneq ($(C_COMPILER), PGI)
112-
SDOTKERNEL = ../generic/dot.c
113-
else
114-
SDOTKERNEL = dot.S
115-
endif
116-
ifneq ($(C_COMPILER), PGI)
117-
CDOTKERNEL = zdot.S
118-
ZDOTKERNEL = zdot.S
119-
else
120-
CDOTKERNEL = ../arm/zdot.c
121-
ZDOTKERNEL = ../arm/zdot.c
122-
endif
123-
DSDOTKERNEL = dot.S
124-
125-
DGEMM_BETA = dgemm_beta.S
126-
SGEMM_BETA = sgemm_beta.S
127-
128-
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
129-
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
130-
131-
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
132-
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
133-
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
134-
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
135-
136-
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
137-
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
138-
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
139-
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
140-
141-
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
142-
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
143-
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
144-
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
145-
146-
SSYMMUCOPY_M = symm_ucopy_sve.c
147-
SSYMMLCOPY_M = symm_lcopy_sve.c
148-
149-
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
150-
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
151-
152-
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
153-
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
154-
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
155-
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
156-
157-
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
158-
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
159-
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
160-
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
161-
162-
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
163-
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
164-
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
165-
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
166-
167-
DSYMMUCOPY_M = symm_ucopy_sve.c
168-
DSYMMLCOPY_M = symm_lcopy_sve.c
169-
170-
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
171-
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
172-
173-
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
174-
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
175-
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
176-
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
177-
178-
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
179-
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
180-
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
181-
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
182-
183-
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
184-
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
185-
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
186-
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
187-
188-
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
189-
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
190-
191-
CSYMMUCOPY_M = zsymm_ucopy_sve.c
192-
CSYMMLCOPY_M = zsymm_lcopy_sve.c
193-
194-
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
195-
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
196-
197-
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
198-
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
199-
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
200-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
201-
202-
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
203-
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
204-
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
205-
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
206-
207-
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
208-
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
209-
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
210-
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
211-
212-
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
213-
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
214-
215-
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
216-
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
1+
include $(KERNELDIR)/KERNEL.CORTEXA57

param.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3128,7 +3128,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31283128

31293129
#define SYMV_P 16
31303130

3131-
#if defined(CORTEXA57) || \
3131+
#if defined(CORTEXA57) || defined(CORTEXX1) || \
31323132
defined(CORTEXA72) || defined(CORTEXA73) || \
31333133
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
31343134

@@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31473147
/*FIXME: this should be using the cache size, but there is currently no easy way to
31483148
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
31493149
is a big desktop or server with abundant cache rather than a phone or embedded device */
3150-
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
3150+
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
31513151
#define SGEMM_DEFAULT_P 512
31523152
#define DGEMM_DEFAULT_P 256
31533153
#define CGEMM_DEFAULT_P 256
@@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
33773377
#define CGEMM_DEFAULT_R 4096
33783378
#define ZGEMM_DEFAULT_R 4096
33793379

3380-
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)
3380+
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2)
33813381

33823382
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
33833383
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */

0 commit comments

Comments
 (0)