Skip to content

Commit be68ef0

Browse files
davidz-amperedavidz-ampere
authored andcommitted
Add support for Ampere processors
1 parent f1097d1 commit be68ef0

File tree

7 files changed

+499
-3
lines changed

7 files changed

+499
-3
lines changed

Makefile.arm64

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,26 @@ endif
191191
endif
192192
endif
193193

194+
# Detect Ampere AmpereOne(ampere1) processors.
195+
ifeq ($(CORE), AMPERE1)
196+
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
197+
CCOMMON_OPT += -march=armv8.6-a -mtune=ampere1
198+
ifneq ($(F_COMPILER), NAG)
199+
FCOMMON_OPT += -march=armv8.6-a -mtune=ampere1
200+
endif
201+
endif
202+
endif
203+
204+
# Detect Ampere AmpereOne(ampere1a) processors.
205+
ifeq ($(CORE), AMPERE1A)
206+
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ13) $(ISCLANG)))
207+
CCOMMON_OPT += -march=armv8.6-a -mtune=ampere1a
208+
ifneq ($(F_COMPILER), NAG)
209+
FCOMMON_OPT += -march=armv8.6-a -mtune=ampere1a
210+
endif
211+
endif
212+
endif
213+
194214
# Use a53 tunings because a55 is only available in GCC>=8.1
195215
ifeq ($(CORE), CORTEXA55)
196216
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
393393
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
394394
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
395395
GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
396+
GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13)
397+
GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14)
396398
# Note that the behavior of -dumpversion is compile-time-configurable for
397399
# gcc-7.x and newer. Use -dumpfullversion there
398400
ifeq ($(GCCVERSIONGTEQ7),1)

cpuid_arm64.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ size_t length64=sizeof(value64);
7979
#define CPU_TSV110 9
8080
// Ampere
8181
#define CPU_EMAG8180 10
82+
#define CPU_AMPERE1 25
83+
#define CPU_AMPERE1A 26
8284
// Apple
8385
#define CPU_VORTEX 13
8486
// Fujitsu
@@ -111,7 +113,9 @@ static char *cpuname[] = {
111113
"CORTEXA710",
112114
"FT2000",
113115
"CORTEXA76",
114-
"NEOVERSEV2"
116+
"NEOVERSEV2",
117+
"AMPERE1",
118+
"AMPERE1A"
115119
};
116120

117121
static char *cpuname_lower[] = {
@@ -139,7 +143,9 @@ static char *cpuname_lower[] = {
139143
"cortexa710",
140144
"ft2000",
141145
"cortexa76",
142-
"neoversev2"
146+
"neoversev2",
147+
"ampere1",
148+
"ampere1a"
143149
};
144150

145151
static int cpulowperf=0;
@@ -334,6 +340,12 @@ int detect(void)
334340
// Ampere
335341
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
336342
return CPU_EMAG8180;
343+
else if (strstr(cpu_implementer, "0xc0")) {
344+
if (strstr(cpu_part, "0xac3"))
345+
return CPU_AMPERE1;
346+
else if (strstr(cpu_part, "0xac4"))
347+
return CPU_AMPERE1A;
348+
}
337349
// Fujitsu
338350
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
339351
return CPU_A64FX;
@@ -684,6 +696,22 @@ void get_cpuconfig(void)
684696
printf("#define DTB_SIZE 4096\n");
685697
break;
686698

699+
case CPU_AMPERE1:
700+
case CPU_AMPERE1A:
701+
printf("#define %s\n", cpuname[d]);
702+
printf("#define L1_CODE_SIZE 16384\n");
703+
printf("#define L1_CODE_LINESIZE 64\n");
704+
printf("#define L1_CODE_ASSOCIATIVE 4\n");
705+
printf("#define L1_DATA_SIZE 65536\n");
706+
printf("#define L1_DATA_LINESIZE 64\n");
707+
printf("#define L1_DATA_ASSOCIATIVE 4\n");
708+
printf("#define L2_SIZE 2097152\n");
709+
printf("#define L2_LINESIZE 64\n");
710+
printf("#define L2_ASSOCIATIVE 8\n");
711+
printf("#define DTB_DEFAULT_ENTRIES 64\n");
712+
printf("#define DTB_SIZE 4096\n");
713+
break;
714+
687715
case CPU_THUNDERX3T110:
688716
printf("#define THUNDERX3T110 \n");
689717
printf("#define L1_CODE_SIZE 65536 \n");

getarch.c

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
158158
/* #define FORCE_CSKY */
159159
/* #define FORCE_CK860FV */
160160
/* #define FORCE_GENERIC */
161+
/* #define FORCE_AMPERE1 */
162+
/* #define FORCE_AMPERE1A */
161163

162164
#ifdef FORCE_P2
163165
#define FORCE
@@ -1590,6 +1592,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15901592
#define CORENAME "EMAG8180"
15911593
#endif
15921594

1595+
#ifdef FORCE_AMPERE1
1596+
#define FORCE
1597+
#define ARCHITECTURE "ARM64"
1598+
#define SUBARCHITECTURE "AMPERE1"
1599+
#define SUBDIRNAME "arm64"
1600+
#define ARCHCONFIG "-DAMPERE1 " \
1601+
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
1602+
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
1603+
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
1604+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
1605+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \
1606+
"-march=armv8.6-a -mtune=ampere1"
1607+
#define LIBNAME "ampere1"
1608+
#define CORENAME "AMPERE1"
1609+
#endif
1610+
1611+
#ifdef FORCE_AMPERE1A
1612+
#define FORCE
1613+
#define ARCHITECTURE "ARM64"
1614+
#define SUBARCHITECTURE "AMPERE1A"
1615+
#define SUBDIRNAME "arm64"
1616+
#define ARCHCONFIG "-DAMPERE1A " \
1617+
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
1618+
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
1619+
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
1620+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
1621+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \
1622+
"-march=armv8.6-a -mtune=ampere1a"
1623+
#define LIBNAME "ampere1a"
1624+
#define CORENAME "AMPERE1A"
1625+
#endif
1626+
15931627
#ifdef FORCE_THUNDERX3T110
15941628
#define ARMV8
15951629
#define FORCE
@@ -1820,7 +1854,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18201854
#define CORENAME "CK860FV"
18211855
#endif
18221856

1823-
18241857
#ifndef FORCE
18251858

18261859
#ifdef USER_TARGET

kernel/arm64/KERNEL.AMPERE1

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
SAMINKERNEL = ../arm/amin.c
2+
DAMINKERNEL = ../arm/amin.c
3+
CAMINKERNEL = ../arm/zamin.c
4+
ZAMINKERNEL = ../arm/zamin.c
5+
6+
SMAXKERNEL = ../arm/max.c
7+
DMAXKERNEL = ../arm/max.c
8+
9+
SMINKERNEL = ../arm/min.c
10+
DMINKERNEL = ../arm/min.c
11+
12+
ISAMINKERNEL = ../arm/iamin.c
13+
IDAMINKERNEL = ../arm/iamin.c
14+
ICAMINKERNEL = ../arm/izamin.c
15+
IZAMINKERNEL = ../arm/izamin.c
16+
17+
ISMAXKERNEL = ../arm/imax.c
18+
IDMAXKERNEL = ../arm/imax.c
19+
20+
ISMINKERNEL = ../arm/imin.c
21+
IDMINKERNEL = ../arm/imin.c
22+
23+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
24+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
25+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
26+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
27+
28+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
29+
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
30+
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
31+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
32+
33+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
34+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
35+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
36+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
37+
38+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
39+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
40+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
41+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
42+
43+
SAMAXKERNEL = amax.S
44+
DAMAXKERNEL = amax.S
45+
CAMAXKERNEL = zamax.S
46+
ZAMAXKERNEL = zamax.S
47+
48+
SAXPYKERNEL = axpy.S
49+
DAXPYKERNEL = daxpy_thunderx2t99.S
50+
CAXPYKERNEL = zaxpy.S
51+
ZAXPYKERNEL = zaxpy.S
52+
53+
SROTKERNEL = rot.S
54+
DROTKERNEL = rot.S
55+
CROTKERNEL = zrot.S
56+
ZROTKERNEL = zrot.S
57+
58+
SSCALKERNEL = scal.S
59+
DSCALKERNEL = scal.S
60+
CSCALKERNEL = zscal.S
61+
ZSCALKERNEL = zscal.S
62+
63+
SGEMVNKERNEL = gemv_n.S
64+
DGEMVNKERNEL = gemv_n.S
65+
CGEMVNKERNEL = zgemv_n.S
66+
ZGEMVNKERNEL = zgemv_n.S
67+
68+
SGEMVTKERNEL = gemv_t.S
69+
DGEMVTKERNEL = gemv_t.S
70+
CGEMVTKERNEL = zgemv_t.S
71+
ZGEMVTKERNEL = zgemv_t.S
72+
73+
74+
SASUMKERNEL = sasum_thunderx2t99.c
75+
DASUMKERNEL = dasum_thunderx2t99.c
76+
CASUMKERNEL = casum_thunderx2t99.c
77+
ZASUMKERNEL = zasum_thunderx2t99.c
78+
79+
SCOPYKERNEL = copy_thunderx2t99.c
80+
DCOPYKERNEL = copy_thunderx2t99.c
81+
CCOPYKERNEL = copy_thunderx2t99.c
82+
ZCOPYKERNEL = copy_thunderx2t99.c
83+
84+
SSWAPKERNEL = swap_thunderx2t99.S
85+
DSWAPKERNEL = swap_thunderx2t99.S
86+
CSWAPKERNEL = swap_thunderx2t99.S
87+
ZSWAPKERNEL = swap_thunderx2t99.S
88+
89+
ISAMAXKERNEL = iamax_thunderx2t99.c
90+
IDAMAXKERNEL = iamax_thunderx2t99.c
91+
ICAMAXKERNEL = izamax_thunderx2t99.c
92+
IZAMAXKERNEL = izamax_thunderx2t99.c
93+
94+
SNRM2KERNEL = nrm2.S
95+
DNRM2KERNEL = nrm2.S
96+
CNRM2KERNEL = znrm2.S
97+
ZNRM2KERNEL = znrm2.S
98+
99+
DDOTKERNEL = dot.c
100+
SDOTKERNEL = dot.c
101+
CDOTKERNEL = zdot_thunderx2t99.c
102+
ZDOTKERNEL = zdot_thunderx2t99.c
103+
DSDOTKERNEL = dot.S
104+
105+
DGEMM_BETA = dgemm_beta.S
106+
SGEMM_BETA = sgemm_beta.S
107+
108+
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
109+
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
110+
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
111+
ifeq ($(SGEMM_UNROLL_M), 16)
112+
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
113+
else
114+
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
115+
endif
116+
ifeq ($(SGEMM_UNROLL_M), 4)
117+
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
118+
else
119+
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
120+
endif
121+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
122+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
123+
endif
124+
ifeq ($(SGEMM_UNROLL_N), 16)
125+
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
126+
else
127+
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
128+
endif
129+
ifeq ($(SGEMM_UNROLL_N), 4)
130+
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
131+
else
132+
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
133+
endif
134+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
135+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
136+
137+
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
138+
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
139+
140+
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
141+
142+
ifeq ($(DGEMM_UNROLL_M), 8)
143+
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
144+
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
145+
else
146+
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
147+
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
148+
endif
149+
150+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
151+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
152+
endif
153+
154+
ifeq ($(DGEMM_UNROLL_N), 4)
155+
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
156+
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
157+
else
158+
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
159+
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
160+
endif
161+
162+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
163+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
164+
165+
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
166+
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
167+
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
168+
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
169+
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
170+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
171+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
172+
endif
173+
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
174+
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
175+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
176+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
177+
178+
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
179+
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
180+
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
181+
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
182+
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
183+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
184+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
185+
endif
186+
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
187+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
188+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
189+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)

0 commit comments

Comments
 (0)