Skip to content

Commit 4dec151

Browse files
authored
Merge pull request #2070 from quickwritereader/develop
power9 makefile. dgemm based on power8 kernel with following changes …
2 parents 3ae122e + 7c51cc8 commit 4dec151

36 files changed

+6133
-36
lines changed

Makefile.power

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,15 @@ else
99
USE_OPENMP = 1
1010
endif
1111

12-
12+
ifeq ($(CORE), POWER9)
13+
ifeq ($(USE_OPENMP), 1)
14+
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
15+
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
16+
else
17+
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
18+
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
19+
endif
20+
endif
1321

1422
ifeq ($(CORE), POWER8)
1523
ifeq ($(USE_OPENMP), 1)

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ POWER5
4848
POWER6
4949
POWER7
5050
POWER8
51+
POWER9
5152
PPCG4
5253
PPC970
5354
PPC970MP

common.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,11 @@ typedef int blasint;
348348
#endif
349349
#endif
350350

351+
#ifdef POWER9
352+
#ifndef YIELDING
353+
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
354+
#endif
355+
#endif
351356

352357
/*
353358
#ifdef PILEDRIVER

common_power.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
#ifndef COMMON_POWER
4040
#define COMMON_POWER
4141

42-
#if defined(POWER8)
42+
#if defined(POWER8) || defined(POWER9)
4343
#define MB __asm__ __volatile__ ("eieio":::"memory")
4444
#define WMB __asm__ __volatile__ ("eieio":::"memory")
4545
#else
@@ -241,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
241241
#define HAVE_PREFETCH
242242
#endif
243243

244-
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || ( defined(PPC970) && defined(OS_DARWIN) )
244+
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || ( defined(PPC970) && defined(OS_DARWIN) )
245245
#define DCBT_ARG 0
246246
#else
247247
#define DCBT_ARG 8
@@ -263,7 +263,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
263263
#define L1_PREFETCH dcbtst
264264
#endif
265265

266-
#if defined(POWER8)
266+
#if defined(POWER8) || defined(POWER9)
267267
#define L1_DUALFETCH
268268
#define L1_PREFETCHSIZE (16 + 128 * 100)
269269
#define L1_PREFETCH dcbtst
@@ -812,7 +812,7 @@ Lmcount$lazy_ptr:
812812
#define BUFFER_SIZE ( 2 << 20)
813813
#elif defined(PPC440FP2)
814814
#define BUFFER_SIZE ( 16 << 20)
815-
#elif defined(POWER8)
815+
#elif defined(POWER8) || defined(POWER9)
816816
#define BUFFER_SIZE ( 64 << 20)
817817
#else
818818
#define BUFFER_SIZE ( 16 << 20)

cpuid_power.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ char *corename[] = {
9494
"CELL",
9595
"PPCG4",
9696
"POWER8",
97-
"POWER8"
97+
"POWER9"
9898
};
9999

100100
int detect(void){
@@ -124,7 +124,7 @@ int detect(void){
124124
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
125125
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
126126
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
127-
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
127+
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
128128
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
129129
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
130130

@@ -156,7 +156,7 @@ int detect(void){
156156
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
157157
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
158158
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
159-
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
159+
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
160160
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
161161
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
162162
return CPUTYPE_POWER5;
@@ -180,7 +180,7 @@ int id;
180180
__asm __volatile("mfpvr %0" : "=r"(id));
181181
switch ( id >> 16 ) {
182182
case 0x4e: // POWER9
183-
return CPUTYPE_POWER8;
183+
return CPUTYPE_POWER9;
184184
break;
185185
case 0x4d:
186186
case 0x4b: // POWER8/8E

getarch.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
637637
#define CORENAME "POWER8"
638638
#endif
639639

640+
#if defined(FORCE_POWER9)
641+
#define FORCE
642+
#define ARCHITECTURE "POWER"
643+
#define SUBARCHITECTURE "POWER9"
644+
#define SUBDIRNAME "power"
645+
#define ARCHCONFIG "-DPOWER9 " \
646+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
647+
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
648+
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
649+
#define LIBNAME "power9"
650+
#define CORENAME "POWER9"
651+
#endif
640652

641653
#ifdef FORCE_PPCG4
642654
#define FORCE

kernel/Makefile.L3

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ ifeq ($(CORE), POWER8)
4444
USE_TRMM = 1
4545
endif
4646

47+
ifeq ($(CORE), POWER9)
48+
USE_TRMM = 1
49+
endif
50+
4751
ifeq ($(ARCH), zarch)
4852
USE_TRMM = 1
4953
endif

kernel/power/KERNEL.POWER9

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#SGEMM_BETA = ../generic/gemm_beta.c
2+
#DGEMM_BETA = ../generic/gemm_beta.c
3+
#CGEMM_BETA = ../generic/zgemm_beta.c
4+
#ZGEMM_BETA = ../generic/zgemm_beta.c
5+
6+
STRMMKERNEL = strmm_kernel_16x8_power8.S
7+
DTRMMKERNEL = dgemm_kernel_power9.S
8+
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
9+
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
10+
11+
SGEMMKERNEL = sgemm_kernel_16x8_power8.S
12+
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
13+
SGEMMITCOPY = sgemm_tcopy_16_power8.S
14+
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
15+
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
16+
SGEMMINCOPYOBJ = sgemm_incopy.o
17+
SGEMMITCOPYOBJ = sgemm_itcopy.o
18+
SGEMMONCOPYOBJ = sgemm_oncopy.o
19+
SGEMMOTCOPYOBJ = sgemm_otcopy.o
20+
21+
DGEMMKERNEL = dgemm_kernel_power9.S
22+
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
23+
DGEMMITCOPY = dgemm_tcopy_16_power8.S
24+
DGEMMONCOPY = dgemm_ncopy_4_power8.S
25+
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
26+
DGEMMINCOPYOBJ = dgemm_incopy.o
27+
DGEMMITCOPYOBJ = dgemm_itcopy.o
28+
DGEMMONCOPYOBJ = dgemm_oncopy.o
29+
DGEMMOTCOPYOBJ = dgemm_otcopy.o
30+
31+
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
32+
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
33+
CGEMMITCOPY = cgemm_tcopy_8_power8.S
34+
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
35+
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
36+
CGEMMONCOPYOBJ = cgemm_oncopy.o
37+
CGEMMOTCOPYOBJ = cgemm_otcopy.o
38+
CGEMMINCOPYOBJ = cgemm_incopy.o
39+
CGEMMITCOPYOBJ = cgemm_itcopy.o
40+
41+
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
42+
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
43+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
44+
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
45+
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
46+
ZGEMMONCOPYOBJ = zgemm_oncopy.o
47+
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
48+
ZGEMMINCOPYOBJ = zgemm_incopy.o
49+
ZGEMMITCOPYOBJ = zgemm_itcopy.o
50+
51+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
52+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
53+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
54+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
55+
56+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
57+
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
58+
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
59+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
60+
61+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
62+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
63+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
64+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
65+
66+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
67+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
68+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
69+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
70+
71+
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
72+
#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
73+
#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S
74+
75+
#Pure C for other kernels
76+
#SAMAXKERNEL = ../arm/amax.c
77+
#DAMAXKERNEL = ../arm/amax.c
78+
#CAMAXKERNEL = ../arm/zamax.c
79+
#ZAMAXKERNEL = ../arm/zamax.c
80+
#
81+
#SAMINKERNEL = ../arm/amin.c
82+
#DAMINKERNEL = ../arm/amin.c
83+
#CAMINKERNEL = ../arm/zamin.c
84+
#ZAMINKERNEL = ../arm/zamin.c
85+
#
86+
#SMAXKERNEL = ../arm/max.c
87+
#DMAXKERNEL = ../arm/max.c
88+
#
89+
#SMINKERNEL = ../arm/min.c
90+
#DMINKERNEL = ../arm/min.c
91+
#
92+
ISAMAXKERNEL = isamax.c
93+
IDAMAXKERNEL = idamax.c
94+
ICAMAXKERNEL = icamax.c
95+
IZAMAXKERNEL = izamax.c
96+
#
97+
ISAMINKERNEL = isamin.c
98+
IDAMINKERNEL = idamin.c
99+
ICAMINKERNEL = icamin.c
100+
IZAMINKERNEL = izamin.c
101+
#
102+
#ISMAXKERNEL = ../arm/imax.c
103+
#IDMAXKERNEL = ../arm/imax.c
104+
#
105+
#ISMINKERNEL = ../arm/imin.c
106+
#IDMINKERNEL = ../arm/imin.c
107+
#
108+
SASUMKERNEL = sasum.c
109+
DASUMKERNEL = dasum.c
110+
CASUMKERNEL = casum.c
111+
ZASUMKERNEL = zasum.c
112+
#
113+
SAXPYKERNEL = saxpy.c
114+
DAXPYKERNEL = daxpy.c
115+
CAXPYKERNEL = caxpy.c
116+
ZAXPYKERNEL = zaxpy.c
117+
#
118+
SCOPYKERNEL = scopy.c
119+
DCOPYKERNEL = dcopy.c
120+
CCOPYKERNEL = ccopy.c
121+
ZCOPYKERNEL = zcopy.c
122+
#
123+
SDOTKERNEL = sdot.c
124+
DDOTKERNEL = ddot.c
125+
DSDOTKERNEL = sdot.c
126+
CDOTKERNEL = cdot.c
127+
ZDOTKERNEL = zdot.c
128+
#
129+
SNRM2KERNEL = ../arm/nrm2.c
130+
DNRM2KERNEL = ../arm/nrm2.c
131+
CNRM2KERNEL = ../arm/znrm2.c
132+
ZNRM2KERNEL = ../arm/znrm2.c
133+
#
134+
SROTKERNEL = srot.c
135+
DROTKERNEL = drot.c
136+
CROTKERNEL = crot.c
137+
ZROTKERNEL = zrot.c
138+
#
139+
SSCALKERNEL = sscal.c
140+
DSCALKERNEL = dscal.c
141+
CSCALKERNEL = zscal.c
142+
ZSCALKERNEL = zscal.c
143+
#
144+
SSWAPKERNEL = sswap.c
145+
DSWAPKERNEL = dswap.c
146+
CSWAPKERNEL = cswap.c
147+
ZSWAPKERNEL = zswap.c
148+
#
149+
150+
SGEMVNKERNEL = sgemv_n.c
151+
DGEMVNKERNEL = dgemv_n.c
152+
CGEMVNKERNEL = cgemv_n.c
153+
ZGEMVNKERNEL = zgemv_n_4.c
154+
#
155+
SGEMVTKERNEL = sgemv_t.c
156+
DGEMVTKERNEL = dgemv_t.c
157+
CGEMVTKERNEL = cgemv_t.c
158+
ZGEMVTKERNEL = zgemv_t_4.c
159+
160+
161+
#SSYMV_U_KERNEL = ../generic/symv_k.c
162+
#SSYMV_L_KERNEL = ../generic/symv_k.c
163+
#DSYMV_U_KERNEL = ../generic/symv_k.c
164+
#DSYMV_L_KERNEL = ../generic/symv_k.c
165+
#QSYMV_U_KERNEL = ../generic/symv_k.c
166+
#QSYMV_L_KERNEL = ../generic/symv_k.c
167+
#CSYMV_U_KERNEL = ../generic/zsymv_k.c
168+
#CSYMV_L_KERNEL = ../generic/zsymv_k.c
169+
#ZSYMV_U_KERNEL = ../generic/zsymv_k.c
170+
#ZSYMV_L_KERNEL = ../generic/zsymv_k.c
171+
#XSYMV_U_KERNEL = ../generic/zsymv_k.c
172+
#XSYMV_L_KERNEL = ../generic/zsymv_k.c
173+
174+
#ZHEMV_U_KERNEL = ../generic/zhemv_k.c
175+
#ZHEMV_L_KERNEL = ../generic/zhemv_k.c
176+
177+
LSAME_KERNEL = ../generic/lsame.c
178+
SCABS_KERNEL = ../generic/cabs.c
179+
DCABS_KERNEL = ../generic/cabs.c
180+
QCABS_KERNEL = ../generic/cabs.c
181+
182+
#Dump kernel
183+
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
184+
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

kernel/power/casum.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4646

4747
#endif
4848

49-
#if defined(POWER8)
49+
#if defined(POWER8) || defined(POWER9)
5050
#include "casum_microk_power8.c"
5151
#endif
5252

kernel/power/ccopy.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3535

3636
#include "common.h"
3737

38-
#if defined(POWER8)
38+
#if defined(POWER8) || defined(POWER9)
3939
#include "ccopy_microk_power8.c"
4040
#endif
4141

0 commit comments

Comments
 (0)