Skip to content

Commit 744779d

Browse files
authored
Merge pull request #2084 from RashmicaG/develop
Add in runtime CPU detection for POWER.
2 parents e06b843 + bcdf1d4 commit 744779d

File tree

6 files changed

+170
-32
lines changed

6 files changed

+170
-32
lines changed

Makefile.system

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,12 @@ DYNAMIC_CORE += THUNDERX
528528
DYNAMIC_CORE += THUNDERX2T99
529529
endif
530530

531+
ifeq ($(ARCH), power)
532+
DYNAMIC_CORE = POWER6
533+
DYNAMIC_CORE += POWER8
534+
DYNAMIC_CORE += POWER9
535+
endif
536+
531537
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
532538
ifndef DYNAMIC_CORE
533539
override DYNAMIC_ARCH=

driver/others/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,12 @@ ifeq ($(DYNAMIC_ARCH), 1)
1818
ifeq ($(ARCH),arm64)
1919
COMMONOBJS += dynamic_arm64.$(SUFFIX)
2020
else
21+
ifeq ($(ARCH),power)
22+
COMMONOBJS += dynamic_power.$(SUFFIX)
23+
else
2124
COMMONOBJS += dynamic.$(SUFFIX)
2225
endif
26+
endif
2327
else
2428
COMMONOBJS += parameter.$(SUFFIX)
2529
endif
@@ -78,8 +82,12 @@ ifeq ($(DYNAMIC_ARCH), 1)
7882
ifeq ($(ARCH),arm64)
7983
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_arm64.$(SUFFIX)
8084
else
85+
ifeq ($(ARCH),power)
86+
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX)
87+
else
8188
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
8289
endif
90+
endif
8391
else
8492
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
8593
endif

driver/others/dynamic_power.c

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
2+
#include "common.h"
3+
4+
extern gotoblas_t gotoblas_POWER6;
5+
extern gotoblas_t gotoblas_POWER8;
6+
extern gotoblas_t gotoblas_POWER9;
7+
8+
extern void openblas_warning(int verbose, const char *msg);
9+
10+
static char *corename[] = {
11+
"unknown",
12+
"POWER6",
13+
"POWER8",
14+
"POWER9"
15+
};
16+
17+
#define NUM_CORETYPES 4
18+
19+
char *gotoblas_corename(void) {
20+
if (gotoblas == &gotoblas_POWER6) return corename[1];
21+
if (gotoblas == &gotoblas_POWER8) return corename[2];
22+
if (gotoblas == &gotoblas_POWER9) return corename[3];
23+
return corename[0];
24+
}
25+
26+
static gotoblas_t *get_coretype(void) {
27+
28+
if (__builtin_cpu_is("power6") || __builtin_cpu_is("power6x"))
29+
return &gotoblas_POWER6;
30+
if (__builtin_cpu_is("power8"))
31+
return &gotoblas_POWER8;
32+
if (__builtin_cpu_is("power9"))
33+
return &gotoblas_POWER9;
34+
return NULL;
35+
}
36+
37+
static gotoblas_t *force_coretype(char * coretype) {
38+
39+
int i ;
40+
int found = -1;
41+
char message[128];
42+
43+
for ( i = 0 ; i < NUM_CORETYPES; i++)
44+
{
45+
if (!strncasecmp(coretype, corename[i], 20))
46+
{
47+
found = i;
48+
break;
49+
}
50+
}
51+
52+
switch (found)
53+
{
54+
case 1: return (&gotoblas_POWER6);
55+
case 2: return (&gotoblas_POWER8);
56+
case 3: return (&gotoblas_POWER9);
57+
default: return NULL;
58+
}
59+
snprintf(message, 128, "Core not found: %s\n", coretype);
60+
openblas_warning(1, message);
61+
}
62+
63+
void gotoblas_dynamic_init(void) {
64+
65+
char coremsg[128];
66+
char coren[22];
67+
char *p;
68+
69+
70+
if (gotoblas) return;
71+
72+
p = getenv("OPENBLAS_CORETYPE");
73+
if ( p )
74+
{
75+
gotoblas = force_coretype(p);
76+
}
77+
else
78+
{
79+
gotoblas = get_coretype();
80+
}
81+
82+
if (gotoblas == NULL)
83+
{
84+
snprintf(coremsg, 128, "Falling back to POWER8 core\n");
85+
openblas_warning(1, coremsg);
86+
gotoblas = &gotoblas_POWER8;
87+
}
88+
89+
if (gotoblas && gotoblas -> init) {
90+
strncpy(coren,gotoblas_corename(),20);
91+
sprintf(coremsg, "Core: %s\n",coren);
92+
openblas_warning(2, coremsg);
93+
gotoblas -> init();
94+
} else {
95+
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
96+
exit(1);
97+
}
98+
}
99+
100+
void gotoblas_dynamic_quit(void) {
101+
gotoblas = NULL;
102+
}

kernel/power/KERNEL.POWER8

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,40 @@ SGEMMINCOPY = ../generic/gemm_ncopy_16.c
1313
SGEMMITCOPY = sgemm_tcopy_16_power8.S
1414
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
1515
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
16-
SGEMMINCOPYOBJ = sgemm_incopy.o
17-
SGEMMITCOPYOBJ = sgemm_itcopy.o
18-
SGEMMONCOPYOBJ = sgemm_oncopy.o
19-
SGEMMOTCOPYOBJ = sgemm_otcopy.o
16+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
17+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
18+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
19+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
2020

2121
DGEMMKERNEL = dgemm_kernel_16x4_power8.S
2222
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
2323
DGEMMITCOPY = dgemm_tcopy_16_power8.S
2424
DGEMMONCOPY = dgemm_ncopy_4_power8.S
2525
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
26-
DGEMMINCOPYOBJ = dgemm_incopy.o
27-
DGEMMITCOPYOBJ = dgemm_itcopy.o
28-
DGEMMONCOPYOBJ = dgemm_oncopy.o
29-
DGEMMOTCOPYOBJ = dgemm_otcopy.o
26+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
27+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
28+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
29+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
3030

3131
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
3232
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
3333
CGEMMITCOPY = cgemm_tcopy_8_power8.S
3434
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
3535
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
36-
CGEMMONCOPYOBJ = cgemm_oncopy.o
37-
CGEMMOTCOPYOBJ = cgemm_otcopy.o
38-
CGEMMINCOPYOBJ = cgemm_incopy.o
39-
CGEMMITCOPYOBJ = cgemm_itcopy.o
36+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
37+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
38+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
39+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
4040

4141
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
4242
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
4343
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
4444
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
4545
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
46-
ZGEMMONCOPYOBJ = zgemm_oncopy.o
47-
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
48-
ZGEMMINCOPYOBJ = zgemm_incopy.o
49-
ZGEMMITCOPYOBJ = zgemm_itcopy.o
46+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
47+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
48+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
49+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
5050

5151
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
5252
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

kernel/power/KERNEL.POWER9

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,40 @@ SGEMMINCOPY = ../generic/gemm_ncopy_16.c
1313
SGEMMITCOPY = sgemm_tcopy_16_power8.S
1414
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
1515
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
16-
SGEMMINCOPYOBJ = sgemm_incopy.o
17-
SGEMMITCOPYOBJ = sgemm_itcopy.o
18-
SGEMMONCOPYOBJ = sgemm_oncopy.o
19-
SGEMMOTCOPYOBJ = sgemm_otcopy.o
16+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
17+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
18+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
19+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
2020

2121
DGEMMKERNEL = dgemm_kernel_power9.S
2222
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
2323
DGEMMITCOPY = dgemm_tcopy_16_power8.S
2424
DGEMMONCOPY = dgemm_ncopy_4_power8.S
2525
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
26-
DGEMMINCOPYOBJ = dgemm_incopy.o
27-
DGEMMITCOPYOBJ = dgemm_itcopy.o
28-
DGEMMONCOPYOBJ = dgemm_oncopy.o
29-
DGEMMOTCOPYOBJ = dgemm_otcopy.o
26+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
27+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
28+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
29+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
3030

3131
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
3232
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
3333
CGEMMITCOPY = cgemm_tcopy_8_power8.S
3434
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
3535
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
36-
CGEMMONCOPYOBJ = cgemm_oncopy.o
37-
CGEMMOTCOPYOBJ = cgemm_otcopy.o
38-
CGEMMINCOPYOBJ = cgemm_incopy.o
39-
CGEMMITCOPYOBJ = cgemm_itcopy.o
36+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
37+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
38+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
39+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
4040

4141
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
4242
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
4343
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
4444
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
4545
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
46-
ZGEMMONCOPYOBJ = zgemm_oncopy.o
47-
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
48-
ZGEMMINCOPYOBJ = zgemm_incopy.o
49-
ZGEMMITCOPYOBJ = zgemm_itcopy.o
46+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
47+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
48+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
49+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
5050

5151
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
5252
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

kernel/setparam-ref.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,27 @@ static void init_parameter(void) {
718718

719719
}
720720
#else // defined(ARCH_ARM64)
721+
#if defined(ARCH_POWER)
722+
static void init_parameter(void) {
723+
724+
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
725+
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
726+
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
727+
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
728+
729+
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
730+
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
731+
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
732+
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
733+
734+
735+
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
736+
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
737+
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
738+
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
739+
}
740+
#else //POWER
741+
721742
#ifdef ARCH_X86
722743
static int get_l2_size_old(void){
723744
int i, eax, ebx, ecx, edx, cpuid_level;
@@ -1303,4 +1324,5 @@ static void init_parameter(void) {
13031324

13041325

13051326
}
1327+
#endif //POWER
13061328
#endif //defined(ARCH_ARM64)

0 commit comments

Comments
 (0)