Skip to content

Commit 82fc29a

Browse files
author
Rajalakshmi Srinivasaraghavan
committed
POWER10: Fallback to POWER8 functions
As cgemm and zgemm kernels are not optimized for big endian falling back to POWER8 versions. Tested on AIX using gcc and Open XL C.
1 parent bf3183d commit 82fc29a

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

kernel/power/KERNEL.POWER10

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,13 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
1919

2020
STRMMKERNEL = sgemm_kernel_power10.c
2121
DTRMMKERNEL = dgemm_kernel_power10.c
22+
ifeq ($(OSNAME), AIX)
23+
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
24+
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
25+
else
2226
CTRMMKERNEL = cgemm_kernel_power10.S
2327
ZTRMMKERNEL = zgemm_kernel_power10.S
28+
endif
2429

2530
SGEMMKERNEL = sgemm_kernel_power10.c
2631
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
@@ -62,18 +67,30 @@ DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c
6267
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c
6368
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c
6469

70+
ifeq ($(OSNAME), AIX)
71+
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
72+
else
6573
CGEMMKERNEL = cgemm_kernel_power10.S
74+
endif
6675
#CGEMMKERNEL = cgemm_kernel_8x4_power8.S
6776
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
77+
ifeq ($(OSNAME), AIX)
78+
CGEMMITCOPY = cgemm_tcopy_8_power8.S
79+
else
6880
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
81+
endif
6982
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
7083
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
7184
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
7285
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
7386
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
7487
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
7588

89+
ifeq ($(OSNAME), AIX)
90+
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
91+
else
7692
ZGEMMKERNEL = zgemm_kernel_power10.S
93+
endif
7794
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
7895
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
7996
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
@@ -124,6 +141,7 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
124141
#SMINKERNEL = ../arm/min.c
125142
#DMINKERNEL = ../arm/min.c
126143
#
144+
ifeq ($(C_COMPILER), GCC)
127145
ifneq ($(GCCVERSIONGTEQ9),1)
128146
ISAMAXKERNEL = isamax_power9.S
129147
else
@@ -148,6 +166,15 @@ ICAMINKERNEL = icamin_power9.S
148166
else
149167
ICAMINKERNEL = icamin.c
150168
endif
169+
else
170+
ISAMAXKERNEL = isamax.c
171+
IDAMAXKERNEL = idamax.c
172+
ICAMAXKERNEL = icamax.c
173+
IZAMAXKERNEL = izamax.c
174+
ISAMINKERNEL = isamin.c
175+
IDAMINKERNEL = idamin.c
176+
ICAMINKERNEL = icamin.c
177+
endif
151178
IZAMINKERNEL = izamin.c
152179
#
153180
#ISMAXKERNEL = ../arm/imax.c

0 commit comments

Comments
 (0)