Skip to content

Commit b3ac6ee

Browse files
authored
Define alternate kernels for big-endian PPC970
The altivec versions of SGEMM and CGEMM fail most test in LAPACK-TESTING when compiled for big endian, STRSM/CTRSM even cause segfaults. The rot kernels either fail the corresponding utest or lead to failures in LAPACK-TESTING.
1 parent 6082e55 commit b3ac6ee

File tree

1 file changed

+45
-10
lines changed

1 file changed

+45
-10
lines changed

kernel/power/KERNEL.PPC970

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
2+
SGEMMKERNEL = gemm_kernel.S
3+
SGEMMINCOPY =
4+
SGEMMITCOPY =
5+
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
6+
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
7+
SGEMMINCOPYOBJ =
8+
SGEMMITCOPYOBJ =
9+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
10+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
11+
else
112
SGEMMKERNEL = gemm_kernel_altivec.S
213
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
314
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
@@ -7,6 +18,8 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
718
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
819
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
920
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
21+
endif
22+
1023
DGEMMKERNEL = gemm_kernel.S
1124
DGEMMINCOPY =
1225
DGEMMITCOPY =
@@ -16,6 +29,18 @@ DGEMMINCOPYOBJ =
1629
DGEMMITCOPYOBJ =
1730
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
1831
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
32+
33+
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
34+
CGEMMKERNEL = zgemm_kernel.S
35+
CGEMMINCOPY =
36+
CGEMMITCOPY =
37+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
38+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
39+
CGEMMINCOPYOBJ =
40+
CGEMMITCOPYOBJ =
41+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
42+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
43+
else
1944
CGEMMKERNEL = zgemm_kernel_altivec.S
2045
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
2146
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
@@ -25,6 +50,8 @@ CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
2550
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
2651
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
2752
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
53+
endif
54+
2855
ZGEMMKERNEL = zgemm_kernel.S
2956
ZGEMMINCOPY =
3057
ZGEMMITCOPY =
@@ -35,22 +62,30 @@ ZGEMMITCOPYOBJ =
3562
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
3663
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
3764

38-
#STRSMKERNEL_LN = trsm_kernel_LN.S
39-
#STRSMKERNEL_LT = trsm_kernel_LT.S
40-
#STRSMKERNEL_RN = trsm_kernel_LT.S
41-
#STRSMKERNEL_RT = trsm_kernel_RT.S
42-
4365
DTRSMKERNEL_LN = trsm_kernel_LN.S
4466
DTRSMKERNEL_LT = trsm_kernel_LT.S
4567
DTRSMKERNEL_RN = trsm_kernel_LT.S
4668
DTRSMKERNEL_RT = trsm_kernel_RT.S
4769

48-
#CTRSMKERNEL_LN = ztrsm_kernel_LN.S
49-
#CTRSMKERNEL_LT = ztrsm_kernel_LT.S
50-
#CTRSMKERNEL_RN = ztrsm_kernel_LT.S
51-
#CTRSMKERNEL_RT = ztrsm_kernel_RT.S
52-
5370
ZTRSMKERNEL_LN = ztrsm_kernel_LN.S
5471
ZTRSMKERNEL_LT = ztrsm_kernel_LT.S
5572
ZTRSMKERNEL_RN = ztrsm_kernel_LT.S
5673
ZTRSMKERNEL_RT = ztrsm_kernel_RT.S
74+
75+
ifeq ($(__BYTE_ORDER__),$(__ORDER_BIG_ENDIAN__))
76+
STRSMKERNEL_LN = trsm_kernel_LN.S
77+
STRSMKERNEL_LT = trsm_kernel_LT.S
78+
STRSMKERNEL_RN = trsm_kernel_LT.S
79+
STRSMKERNEL_RT = trsm_kernel_RT.S
80+
81+
CTRSMKERNEL_LN = ztrsm_kernel_LN.S
82+
CTRSMKERNEL_LT = ztrsm_kernel_LT.S
83+
CTRSMKERNEL_RN = ztrsm_kernel_LT.S
84+
CTRSMKERNEL_RT = ztrsm_kernel_RT.S
85+
86+
87+
SROTKERNEL = ../arm/rot.c
88+
DROTKERNEL = ../arm/rot.c
89+
CROTKERNEL = ../arm/zrot.c
90+
ZROTKERNEL = ../arm/zrot.c
91+
endif

0 commit comments

Comments
 (0)