Skip to content

Commit 673e5a0

Browse files
authored
Replace several POWER8/9 C kernels with their gcc7-generated assembly versions (#2263)
* Add gcc7-generated assembly files for POWER8/9 isa/ica-min/max and POWER9 caxpy To work around internal compiler errors encountered when compiling the original C source with gcc 4 and 5, and wrong code generated by gcc 8.3.0 * Use gcc-generated assembly instead of original C sources to work around internal compiler errors encountered with gcc 4.8/5.4 and wrong code generation by gcc 8.3 * Use gcc-generated assembly instead of the original C source to work around internal compiler errors encountered with gcc 4.8 and 5.4, and wrong code generation by gcc 8.3 * Add gcc7-generated assembler version of caxpy for power8 to work around wrong code generated by gcc 8.3 * Handle CONJ define for caxpyc * Handle CONJ define for caxpyc * Add gcc7-generated assembly cdot for POWER9 * Use prebuilt assembly for POWER9 cdot created with gcc 7.3.1 to work around ICE in older gcc versions * Exclude POWER9 from DYNAMIC_ARCH when gcc versions is lower than 6 * Update Makefile.system * Use PROLOGUE macro to ensure correct function name for DYNAMIC_ARCH * Disable POWER9 with old gcc versions
1 parent bfa2cc7 commit 673e5a0

15 files changed

+4699
-13
lines changed

Makefile.system

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,12 +322,13 @@ CCOMMON_OPT += -DMS_ABI
322322
endif
323323

324324
ifeq ($(C_COMPILER), GCC)
325-
#Test for supporting MS_ABI
325+
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
326326
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
327327
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
328+
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
328329
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
329330
ifeq ($(GCCVERSIONGT4), 1)
330-
# GCC Majar version > 4
331+
# GCC Major version > 4
331332
# It is compatible with MSVC ABI.
332333
CCOMMON_OPT += -DMS_ABI
333334
endif
@@ -554,8 +555,17 @@ endif
554555
ifeq ($(ARCH), power)
555556
DYNAMIC_CORE = POWER6
556557
DYNAMIC_CORE += POWER8
558+
ifneq ($(C_COMPILER), GCC)
557559
DYNAMIC_CORE += POWER9
558560
endif
561+
ifeq ($(C_COMPILER), GCC)
562+
ifeq ($(GCCVERSIONGT5), 1)
563+
DYNAMIC_CORE += POWER9
564+
else
565+
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
566+
endif
567+
endif
568+
endif
559569

560570
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
561571
ifndef DYNAMIC_CORE

driver/others/dynamic_power.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44
extern gotoblas_t gotoblas_POWER6;
55
extern gotoblas_t gotoblas_POWER8;
6+
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
67
extern gotoblas_t gotoblas_POWER9;
8+
#endif
79

810
extern void openblas_warning(int verbose, const char *msg);
911

@@ -19,7 +21,9 @@ static char *corename[] = {
1921
char *gotoblas_corename(void) {
2022
if (gotoblas == &gotoblas_POWER6) return corename[1];
2123
if (gotoblas == &gotoblas_POWER8) return corename[2];
24+
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
2225
if (gotoblas == &gotoblas_POWER9) return corename[3];
26+
#endif
2327
return corename[0];
2428
}
2529

@@ -29,8 +33,10 @@ static gotoblas_t *get_coretype(void) {
2933
return &gotoblas_POWER6;
3034
if (__builtin_cpu_is("power8"))
3135
return &gotoblas_POWER8;
36+
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
3237
if (__builtin_cpu_is("power9"))
3338
return &gotoblas_POWER9;
39+
#endif
3440
return NULL;
3541
}
3642

@@ -53,7 +59,9 @@ static gotoblas_t *force_coretype(char * coretype) {
5359
{
5460
case 1: return (&gotoblas_POWER6);
5561
case 2: return (&gotoblas_POWER8);
62+
#if (!defined C_GCC) || (GCC_VERSION >= 60000)
5663
case 3: return (&gotoblas_POWER9);
64+
#endif
5765
default: return NULL;
5866
}
5967
snprintf(message, 128, "Core not found: %s\n", coretype);

kernel/power/KERNEL.POWER8

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,14 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
8989
#SMINKERNEL = ../arm/min.c
9090
#DMINKERNEL = ../arm/min.c
9191
#
92-
ISAMAXKERNEL = isamax.c
92+
ISAMAXKERNEL = isamax_power8.S
9393
IDAMAXKERNEL = idamax.c
94-
ICAMAXKERNEL = icamax.c
94+
ICAMAXKERNEL = icamax_power8.S
9595
IZAMAXKERNEL = izamax.c
9696
#
97-
ISAMINKERNEL = isamin.c
97+
ISAMINKERNEL = isamin_power8.S
9898
IDAMINKERNEL = idamin.c
99-
ICAMINKERNEL = icamin.c
99+
ICAMINKERNEL = icamin_power8.S
100100
IZAMINKERNEL = izamin.c
101101
#
102102
#ISMAXKERNEL = ../arm/imax.c
@@ -112,7 +112,7 @@ ZASUMKERNEL = zasum.c
112112
#
113113
SAXPYKERNEL = saxpy.c
114114
DAXPYKERNEL = daxpy.c
115-
CAXPYKERNEL = caxpy.c
115+
CAXPYKERNEL = caxpy_power8.S
116116
ZAXPYKERNEL = zaxpy.c
117117
#
118118
SCOPYKERNEL = scopy.c

kernel/power/KERNEL.POWER9

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,14 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
8989
#SMINKERNEL = ../arm/min.c
9090
#DMINKERNEL = ../arm/min.c
9191
#
92-
ISAMAXKERNEL = isamax.c
92+
ISAMAXKERNEL = isamax_power9.S
9393
IDAMAXKERNEL = idamax.c
94-
ICAMAXKERNEL = icamax.c
94+
ICAMAXKERNEL = icamax_power9.S
9595
IZAMAXKERNEL = izamax.c
9696
#
97-
ISAMINKERNEL = isamin.c
97+
ISAMINKERNEL = isamin_power9.S
9898
IDAMINKERNEL = idamin.c
99-
ICAMINKERNEL = icamin.c
99+
ICAMINKERNEL = icamin_power9.S
100100
IZAMINKERNEL = izamin.c
101101
#
102102
#ISMAXKERNEL = ../arm/imax.c
@@ -112,7 +112,7 @@ ZASUMKERNEL = zasum.c
112112
#
113113
SAXPYKERNEL = saxpy.c
114114
DAXPYKERNEL = daxpy.c
115-
CAXPYKERNEL = caxpy.c
115+
CAXPYKERNEL = caxpy_power9.S
116116
ZAXPYKERNEL = zaxpy.c
117117
#
118118
SCOPYKERNEL = scopy.c
@@ -123,7 +123,7 @@ ZCOPYKERNEL = zcopy.c
123123
SDOTKERNEL = sdot.c
124124
DDOTKERNEL = ddot.c
125125
DSDOTKERNEL = sdot.c
126-
CDOTKERNEL = cdot.c
126+
CDOTKERNEL = cdot_power9.S
127127
ZDOTKERNEL = zdot.c
128128
#
129129
SNRM2KERNEL = ../arm/nrm2.c

0 commit comments

Comments
 (0)