Skip to content

Commit ee26caf

Browse files
authored
Merge pull request #5309 from davidz-ampere/dev-ampereone
Add support for Ampere AmpereOne processors
2 parents 8b08df5 + aa90ab4 commit ee26caf

File tree

6 files changed

+90
-3
lines changed

6 files changed

+90
-3
lines changed

Makefile.arm64

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,16 @@ endif
191191
endif
192192
endif
193193

194+
# Detect Ampere AmpereOne(ampere1,ampere1a) processors.
195+
ifeq ($(CORE), AMPERE1)
196+
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
197+
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
198+
ifneq ($(F_COMPILER), NAG)
199+
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
200+
endif
201+
endif
202+
endif
203+
194204
# Use a53 tunings because a55 is only available in GCC>=8.1
195205
ifeq ($(CORE), CORTEXA55)
196206
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,8 @@ GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
393393
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
394394
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
395395
GCCVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
396+
GCCVERSIONGTEQ13 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 13)
397+
GCCVERSIONGTEQ14 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 14)
396398
# Note that the behavior of -dumpversion is compile-time-configurable for
397399
# gcc-7.x and newer. Use -dumpfullversion there
398400
ifeq ($(GCCVERSIONGTEQ7),1)

cpuid_arm64.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ size_t length64=sizeof(value64);
7979
#define CPU_TSV110 9
8080
// Ampere
8181
#define CPU_EMAG8180 10
82+
#define CPU_AMPERE1 25
8283
// Apple
8384
#define CPU_VORTEX 13
8485
// Fujitsu
@@ -111,7 +112,8 @@ static char *cpuname[] = {
111112
"CORTEXA710",
112113
"FT2000",
113114
"CORTEXA76",
114-
"NEOVERSEV2"
115+
"NEOVERSEV2",
116+
"AMPERE1"
115117
};
116118

117119
static char *cpuname_lower[] = {
@@ -139,7 +141,9 @@ static char *cpuname_lower[] = {
139141
"cortexa710",
140142
"ft2000",
141143
"cortexa76",
142-
"neoversev2"
144+
"neoversev2",
145+
"ampere1",
146+
"ampere1a"
143147
};
144148

145149
static int cpulowperf=0;
@@ -334,6 +338,10 @@ int detect(void)
334338
// Ampere
335339
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
336340
return CPU_EMAG8180;
341+
else if (strstr(cpu_implementer, "0xc0")) {
342+
if (strstr(cpu_part, "0xac3") || strstr(cpu_part, "0xac4"))
343+
return CPU_AMPERE1;
344+
}
337345
// Fujitsu
338346
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
339347
return CPU_A64FX;
@@ -684,6 +692,21 @@ void get_cpuconfig(void)
684692
printf("#define DTB_SIZE 4096\n");
685693
break;
686694

695+
case CPU_AMPERE1:
696+
printf("#define %s\n", cpuname[d]);
697+
printf("#define L1_CODE_SIZE 16384\n");
698+
printf("#define L1_CODE_LINESIZE 64\n");
699+
printf("#define L1_CODE_ASSOCIATIVE 4\n");
700+
printf("#define L1_DATA_SIZE 65536\n");
701+
printf("#define L1_DATA_LINESIZE 64\n");
702+
printf("#define L1_DATA_ASSOCIATIVE 4\n");
703+
printf("#define L2_SIZE 2097152\n");
704+
printf("#define L2_LINESIZE 64\n");
705+
printf("#define L2_ASSOCIATIVE 8\n");
706+
printf("#define DTB_DEFAULT_ENTRIES 64\n");
707+
printf("#define DTB_SIZE 4096\n");
708+
break;
709+
687710
case CPU_THUNDERX3T110:
688711
printf("#define THUNDERX3T110 \n");
689712
printf("#define L1_CODE_SIZE 65536 \n");

getarch.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
158158
/* #define FORCE_CSKY */
159159
/* #define FORCE_CK860FV */
160160
/* #define FORCE_GENERIC */
161+
/* #define FORCE_AMPERE1 */
161162

162163
#ifdef FORCE_P2
163164
#define FORCE
@@ -1590,6 +1591,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
15901591
#define CORENAME "EMAG8180"
15911592
#endif
15921593

1594+
#ifdef FORCE_AMPERE1
1595+
#define FORCE
1596+
#define ARCHITECTURE "ARM64"
1597+
#define SUBARCHITECTURE "AMPERE1"
1598+
#define SUBDIRNAME "arm64"
1599+
#define ARCHCONFIG "-DAMPERE1 " \
1600+
"-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \
1601+
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \
1602+
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
1603+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
1604+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8 " \
1605+
"-march=armv8.6-a+crypto+crc+fp16+sha3+rng"
1606+
#define LIBNAME "ampere1"
1607+
#define CORENAME "AMPERE1"
1608+
#endif
1609+
15931610
#ifdef FORCE_THUNDERX3T110
15941611
#define ARMV8
15951612
#define FORCE
@@ -1820,7 +1837,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
18201837
#define CORENAME "CK860FV"
18211838
#endif
18221839

1823-
18241840
#ifndef FORCE
18251841

18261842
#ifdef USER_TARGET

kernel/arm64/KERNEL.AMPERE1

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
include $(KERNELDIR)/KERNEL.NEOVERSEN1

param.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3635,6 +3635,41 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
36353635
#define CGEMM_DEFAULT_R 4096
36363636
#define ZGEMM_DEFAULT_R 4096
36373637

3638+
#elif defined(AMPERE1)
3639+
3640+
#if defined(XDOUBLE) || defined(DOUBLE)
3641+
#define SWITCH_RATIO 8
3642+
#else
3643+
#define SWITCH_RATIO 16
3644+
#endif
3645+
3646+
#define SGEMM_DEFAULT_UNROLL_M 16
3647+
#define SGEMM_DEFAULT_UNROLL_N 4
3648+
3649+
#define DGEMM_DEFAULT_UNROLL_M 8
3650+
#define DGEMM_DEFAULT_UNROLL_N 4
3651+
3652+
#define CGEMM_DEFAULT_UNROLL_M 8
3653+
#define CGEMM_DEFAULT_UNROLL_N 4
3654+
3655+
#define ZGEMM_DEFAULT_UNROLL_M 4
3656+
#define ZGEMM_DEFAULT_UNROLL_N 4
3657+
3658+
#define SGEMM_DEFAULT_P 240
3659+
#define DGEMM_DEFAULT_P 240
3660+
#define CGEMM_DEFAULT_P 128
3661+
#define ZGEMM_DEFAULT_P 128
3662+
3663+
#define SGEMM_DEFAULT_Q 640
3664+
#define DGEMM_DEFAULT_Q 320
3665+
#define CGEMM_DEFAULT_Q 224
3666+
#define ZGEMM_DEFAULT_Q 112
3667+
3668+
#define SGEMM_DEFAULT_R 4096
3669+
#define DGEMM_DEFAULT_R 4096
3670+
#define CGEMM_DEFAULT_R 4096
3671+
#define ZGEMM_DEFAULT_R 4096
3672+
36383673
#elif defined(A64FX) // 512-bit SVE
36393674

36403675
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".

0 commit comments

Comments
 (0)