Skip to content

Commit 5944441

Browse files
authored
Merge pull request #4280 from ChipKerchner/dynamicDispatchAIXandClang
Add dynamic dispatch to AIX and clang for Power
2 parents 0de786c + 7dcb2d6 commit 5944441

File tree

1 file changed

+74
-27
lines changed

1 file changed

+74
-27
lines changed

driver/others/dynamic_power.c

Lines changed: 74 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
extern gotoblas_t gotoblas_POWER6;
55
extern gotoblas_t gotoblas_POWER8;
6-
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
6+
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
77
extern gotoblas_t gotoblas_POWER9;
88
#endif
99
#ifdef HAVE_P10_SUPPORT
@@ -20,14 +20,14 @@ static char *corename[] = {
2020
"POWER10"
2121
};
2222

23-
#define NUM_CORETYPES 4
23+
#define NUM_CORETYPES 5
2424

2525
char *gotoblas_corename(void) {
2626
#ifndef C_PGI
2727
if (gotoblas == &gotoblas_POWER6) return corename[1];
2828
#endif
2929
if (gotoblas == &gotoblas_POWER8) return corename[2];
30-
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
30+
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
3131
if (gotoblas == &gotoblas_POWER9) return corename[3];
3232
#endif
3333
#ifdef HAVE_P10_SUPPORT
@@ -36,13 +36,37 @@ char *gotoblas_corename(void) {
3636
return corename[0];
3737
}
3838

39-
#if defined(__clang__)
40-
static int __builtin_cpu_supports(char* arg)
39+
#define CPU_UNKNOWN 0
40+
#define CPU_POWER5 5
41+
#define CPU_POWER6 6
42+
#define CPU_POWER8 8
43+
#define CPU_POWER9 9
44+
#define CPU_POWER10 10
45+
46+
#ifdef _AIX
47+
#include <sys/systemcfg.h>
48+
49+
static int cpuid(void)
4150
{
42-
return 0;
43-
}
51+
int arch = _system_configuration.implementation;
52+
#ifdef POWER_6
53+
if (arch == POWER_6) return CPU_POWER6;
4454
#endif
45-
55+
#ifdef POWER_7
56+
else if (arch == POWER_7) return CPU_POWER6;
57+
#endif
58+
#ifdef POWER_8
59+
else if (arch == POWER_8) return CPU_POWER8;
60+
#endif
61+
#ifdef POWER_9
62+
else if (arch == POWER_9) return CPU_POWER9;
63+
#endif
64+
#ifdef POWER_10
65+
else if (arch == POWER_10) return CPU_POWER10;
66+
#endif
67+
return CPU_UNKNOWN;
68+
}
69+
#else
4670
#if defined(C_PGI) || defined(__clang__)
4771
/*
4872
* NV HPC compilers do not yet implement __builtin_cpu_is().
@@ -53,21 +77,12 @@ static int __builtin_cpu_supports(char* arg)
5377
* what was requested.
5478
*/
5579

56-
#include <string.h>
57-
5880
/*
5981
* Define POWER processor version table.
6082
*
6183
* NOTE NV HPC SDK compilers only support POWER8 and POWER9 at this time
6284
*/
6385

64-
#define CPU_UNKNOWN 0
65-
#define CPU_POWER5 5
66-
#define CPU_POWER6 6
67-
#define CPU_POWER8 8
68-
#define CPU_POWER9 9
69-
#define CPU_POWER10 10
70-
7186
static struct {
7287
uint32_t pvr_mask;
7388
uint32_t pvr_value;
@@ -160,7 +175,8 @@ static struct {
160175
},
161176
};
162177

163-
static int __builtin_cpu_is(const char *cpu) {
178+
static int cpuid(void)
179+
{
164180
int i;
165181
uint32_t pvr;
166182
uint32_t cpu_type;
@@ -178,15 +194,42 @@ static int __builtin_cpu_is(const char *cpu) {
178194
pvrPOWER[i].cpu_name, pvrPOWER[i].cpu_type);
179195
#endif
180196
cpu_type = pvrPOWER[i].cpu_type;
197+
return (int)(cpu_type);
198+
}
199+
#endif /* C_PGI */
200+
#endif /* _AIX */
201+
202+
#ifndef __BUILTIN_CPU_SUPPORTS__
203+
#include <string.h>
181204

182-
if (!strcmp(cpu, "power8"))
183-
return cpu_type == CPU_POWER8;
184-
if (!strcmp(cpu, "power9"))
185-
return cpu_type == CPU_POWER9;
186-
return 0;
205+
static int __builtin_cpu_is(const char *arg)
206+
{
207+
static int ipinfo = -1;
208+
if (ipinfo < 0) {
209+
ipinfo = cpuid();
210+
}
211+
#ifdef HAVE_P10_SUPPORT
212+
if (ipinfo == CPU_POWER10) {
213+
if (!strcmp(arg, "power10")) return 1;
214+
}
215+
#endif
216+
if (ipinfo == CPU_POWER9) {
217+
if (!strcmp(arg, "power9")) return 1;
218+
} else if (ipinfo == CPU_POWER8) {
219+
if (!strcmp(arg, "power8")) return 1;
220+
#ifndef C_PGI
221+
} else if (ipinfo == CPU_POWER6) {
222+
if (!strcmp(arg, "power6")) return 1;
223+
#endif
224+
}
225+
return 0;
187226
}
188227

189-
#endif /* C_PGI */
228+
static int __builtin_cpu_supports(const char *arg)
229+
{
230+
return 0;
231+
}
232+
#endif
190233

191234
static gotoblas_t *get_coretype(void) {
192235

@@ -196,19 +239,23 @@ static gotoblas_t *get_coretype(void) {
196239
#endif
197240
if (__builtin_cpu_is("power8"))
198241
return &gotoblas_POWER8;
199-
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
242+
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
200243
if (__builtin_cpu_is("power9"))
201244
return &gotoblas_POWER9;
202245
#endif
203246
#ifdef HAVE_P10_SUPPORT
247+
#if defined(_AIX) || defined(__clang__)
248+
if (__builtin_cpu_is("power10"))
249+
#else
204250
if (__builtin_cpu_supports ("arch_3_1") && __builtin_cpu_supports ("mma"))
251+
#endif
205252
return &gotoblas_POWER10;
206253
#endif
207254
/* Fall back to the POWER9 implementation if the toolchain is too old or the MMA feature is not set */
208255
#if (!defined __GNUC__) || ( __GNUC__ >= 11) || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
209256
if (__builtin_cpu_is("power10"))
210257
return &gotoblas_POWER9;
211-
#endif
258+
#endif
212259
return NULL;
213260
}
214261

@@ -233,7 +280,7 @@ static gotoblas_t *force_coretype(char * coretype) {
233280
case 1: return (&gotoblas_POWER6);
234281
#endif
235282
case 2: return (&gotoblas_POWER8);
236-
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
283+
#if ((!defined __GNUC__) || ( __GNUC__ >= 6)) || defined(__clang__)
237284
case 3: return (&gotoblas_POWER9);
238285
#endif
239286
#ifdef HAVE_P10_SUPPORT

0 commit comments

Comments
 (0)