Skip to content

Commit a4896b5

Browse files
authored
Update DYNAMIC_ARCH support for ARM64 and PPC (#2332)
* Update DYNAMIC_ARCH list of ARM64 targets for gmake * Update arm64 cpu list for runtime detection * Update DYNAMIC_ARCH list of ARM64 targets for cmake and add POWERPC targets
1 parent 9d50790 commit a4896b5

File tree

5 files changed

+138
-10
lines changed

5 files changed

+138
-10
lines changed

Makefile.arm64

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
3939
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
4040
endif
4141

42+
ifeq ($(GCCVERSIONGTEQ9), 1)
4243
ifeq ($(CORE), TSV110)
4344
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
4445
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
4546
endif
47+
endif
48+

Makefile.system

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ ifeq ($(C_COMPILER), GCC)
326326
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
327327
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
328328
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
329+
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
329330
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
330331
ifeq ($(GCCVERSIONGT4), 1)
331332
# GCC Major version > 4
@@ -547,9 +548,14 @@ endif
547548

548549
ifeq ($(ARCH), arm64)
549550
DYNAMIC_CORE = ARMV8
551+
DYNAMIC_CORE += CORTEXA53
550552
DYNAMIC_CORE += CORTEXA57
553+
DYNAMIC_CORE += CORTEXA72
554+
DYNAMIC_CORE += CORTEXA73
555+
DYNAMIC_CORE += FALKOR
551556
DYNAMIC_CORE += THUNDERX
552557
DYNAMIC_CORE += THUNDERX2T99
558+
DYNAMIC_CORE += TSV110
553559
endif
554560

555561
ifeq ($(ARCH), power)

cmake/arch.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ endif ()
4545

4646
if (DYNAMIC_ARCH)
4747
if (ARM64)
48-
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99)
48+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110)
49+
endif ()
50+
51+
if (POWER)
52+
set(DYNAMIC_CORE POWER6 POWER8 POWER9)
4953
endif ()
5054

5155
if (X86)

cmake/prebuild.cmake

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,83 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
309309
set(ZGEMM_UNROLL_M 4)
310310
set(ZGEMM_UNROLL_N 4)
311311
set(SYMV_P 16)
312+
elseif ("${TCORE}" STREQUAL "TSV110")
313+
file(APPEND ${TARGET_CONF_TEMP}
314+
"#define ARMV8\n"
315+
"#define L1_CODE_SIZE\t65536\n"
316+
"#define L1_CODE_LINESIZE\t64\n"
317+
"#define L1_CODE_ASSOCIATIVE\t4\n"
318+
"#define L1_DATA_SIZE\t65536\n"
319+
"#define L1_DATA_LINESIZE\t64\n"
320+
"#define L1_DATA_ASSOCIATIVE\t4\n"
321+
"#define L2_SIZE\t524288\n"
322+
"#define L2_LINESIZE\t64\n"
323+
"#define L2_ASSOCIATIVE\t8\n"
324+
"#define DTB_DEFAULT_ENTRIES\t64\n"
325+
"#define DTB_SIZE\t4096\n")
326+
set(SGEMM_UNROLL_M 16)
327+
set(SGEMM_UNROLL_N 4)
328+
set(DGEMM_UNROLL_M 8)
329+
set(DGEMM_UNROLL_N 4)
330+
set(CGEMM_UNROLL_M 8)
331+
set(CGEMM_UNROLL_N 4)
332+
set(ZGEMM_UNROLL_M 4)
333+
set(ZGEMM_UNROLL_N 4)
334+
set(SYMV_P 16)
335+
elseif ("${TCORE}" STREQUAL "POWER6")
336+
file(APPEND ${TARGET_CONF_TEMP}
337+
"#define L1_DATA_SIZE 32768\n"
338+
"#define L1_DATA_LINESIZE 128\n"
339+
"#define L2_SIZE 524288\n"
340+
"#define L2_LINESIZE 128 \n"
341+
"#define DTB_DEFAULT_ENTRIES 128\n"
342+
"#define DTB_SIZE 4096\n"
343+
"#define L2_ASSOCIATIVE 8\n")
344+
set(SGEMM_UNROLL_M 4)
345+
set(SGEMM_UNROLL_N 4)
346+
set(DGEMM_UNROLL_M 4)
347+
set(DGEMM_UNROLL_N 4)
348+
set(CGEMM_UNROLL_M 2)
349+
set(CGEMM_UNROLL_N 4)
350+
set(ZGEMM_UNROLL_M 2)
351+
set(ZGEMM_UNROLL_N 4)
352+
set(SYMV_P 8)
353+
elseif ("${TCORE}" STREQUAL "POWER8")
354+
file(APPEND ${TARGET_CONF_TEMP}
355+
"#define L1_DATA_SIZE 32768\n"
356+
"#define L1_DATA_LINESIZE 128\n"
357+
"#define L2_SIZE 524288\n"
358+
"#define L2_LINESIZE 128 \n"
359+
"#define DTB_DEFAULT_ENTRIES 128\n"
360+
"#define DTB_SIZE 4096\n"
361+
"#define L2_ASSOCIATIVE 8\n")
362+
set(SGEMM_UNROLL_M 16)
363+
set(SGEMM_UNROLL_N 8)
364+
set(DGEMM_UNROLL_M 16)
365+
set(DGEMM_UNROLL_N 4)
366+
set(CGEMM_UNROLL_M 8)
367+
set(CGEMM_UNROLL_N 4)
368+
set(ZGEMM_UNROLL_M 8)
369+
set(ZGEMM_UNROLL_N 2)
370+
set(SYMV_P 8)
371+
elseif ("${TCORE}" STREQUAL "POWER9")
372+
file(APPEND ${TARGET_CONF_TEMP}
373+
"#define L1_DATA_SIZE 32768\n"
374+
"#define L1_DATA_LINESIZE 128\n"
375+
"#define L2_SIZE 524288\n"
376+
"#define L2_LINESIZE 128 \n"
377+
"#define DTB_DEFAULT_ENTRIES 128\n"
378+
"#define DTB_SIZE 4096\n"
379+
"#define L2_ASSOCIATIVE 8\n")
380+
set(SGEMM_UNROLL_M 16)
381+
set(SGEMM_UNROLL_N 8)
382+
set(DGEMM_UNROLL_M 16)
383+
set(DGEMM_UNROLL_N 4)
384+
set(CGEMM_UNROLL_M 8)
385+
set(CGEMM_UNROLL_N 4)
386+
set(ZGEMM_UNROLL_M 8)
387+
set(ZGEMM_UNROLL_N 2)
388+
set(SYMV_P 8)
312389
endif()
313390

314391
# Or should this actually be NUM_CORES?

driver/others/dynamic_arm64.c

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,18 @@
4343
#endif
4444

4545
extern gotoblas_t gotoblas_ARMV8;
46+
extern gotoblas_t gotoblas_CORTEXA53;
4647
extern gotoblas_t gotoblas_CORTEXA57;
48+
extern gotoblas_t gotoblas_CORTEXA72;
49+
extern gotoblas_t gotoblas_CORTEXA73;
50+
extern gotoblas_t gotoblas_FALKOR;
4751
extern gotoblas_t gotoblas_THUNDERX;
4852
extern gotoblas_t gotoblas_THUNDERX2T99;
53+
extern gotoblas_t gotoblas_TSV110;
4954

5055
extern void openblas_warning(int verbose, const char * msg);
5156

52-
#define NUM_CORETYPES 4
57+
#define NUM_CORETYPES 9
5358

5459
/*
5560
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -65,17 +70,27 @@ extern void openblas_warning(int verbose, const char * msg);
6570

6671
static char *corename[] = {
6772
"armv8",
73+
"cortexa53",
6874
"cortexa57",
75+
"cortexa72",
76+
"cortexa73",
77+
"falkor",
6978
"thunderx",
7079
"thunderx2t99",
80+
"tsv110",
7181
"unknown"
7282
};
7383

7484
char *gotoblas_corename(void) {
7585
if (gotoblas == &gotoblas_ARMV8) return corename[ 0];
76-
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 1];
77-
if (gotoblas == &gotoblas_THUNDERX) return corename[ 2];
78-
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 3];
86+
if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1];
87+
if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2];
88+
if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3];
89+
if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4];
90+
if (gotoblas == &gotoblas_FALKOR) return corename[ 5];
91+
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
92+
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
93+
if (gotoblas == &gotoblas_TSV110) return corename[ 8];
7994
return corename[NUM_CORETYPES];
8095
}
8196

@@ -96,9 +111,14 @@ static gotoblas_t *force_coretype(char *coretype) {
96111
switch (found)
97112
{
98113
case 0: return (&gotoblas_ARMV8);
99-
case 1: return (&gotoblas_CORTEXA57);
100-
case 2: return (&gotoblas_THUNDERX);
101-
case 3: return (&gotoblas_THUNDERX2T99);
114+
case 1: return (&gotoblas_CORTEXA53);
115+
case 2: return (&gotoblas_CORTEXA57);
116+
case 3: return (&gotoblas_CORTEXA72);
117+
case 4: return (&gotoblas_CORTEXA73);
118+
case 5: return (&gotoblas_FALKOR);
119+
case 6: return (&gotoblas_THUNDERX);
120+
case 7: return (&gotoblas_THUNDERX2T99);
121+
case 8: return (&gotoblas_TSV110);
102122
}
103123
snprintf(message, 128, "Core not found: %s\n", coretype);
104124
openblas_warning(1, message);
@@ -136,10 +156,14 @@ static gotoblas_t *get_coretype(void) {
136156
case 0x41: // ARM
137157
switch (part)
138158
{
139-
case 0xd07: // Cortex A57
140-
case 0xd08: // Cortex A72
141159
case 0xd03: // Cortex A53
160+
return &gotoblas_CORTEXA53;
161+
case 0xd07: // Cortex A57
142162
return &gotoblas_CORTEXA57;
163+
case 0xd08: // Cortex A72
164+
return &gotoblas_CORTEXA72;
165+
case 0xd09: // Cortex A73
166+
return &gotoblas_CORTEXA73;
143167
}
144168
break;
145169
case 0x42: // Broadcom
@@ -158,6 +182,20 @@ static gotoblas_t *get_coretype(void) {
158182
return &gotoblas_THUNDERX2T99;
159183
}
160184
break;
185+
case 0x48: // HiSilicon
186+
switch (part)
187+
{
188+
case 0xd01: // tsv110
189+
return &gotoblas_TSV110;
190+
}
191+
break;
192+
case 0x51: // Qualcomm
193+
switch (part)
194+
{
195+
case 0xc00: // Falkor
196+
return &gotoblas_FALKOR;
197+
}
198+
break;
161199
}
162200
return NULL;
163201
}

0 commit comments

Comments
 (0)