Skip to content

Commit 09bb48d

Browse files
committed
Vectorize in-copy packing/copying for SGEMM - 4X faster.
1 parent 8892121 commit 09bb48d

File tree

4 files changed

+487
-6
lines changed

4 files changed

+487
-6
lines changed

kernel/power/KERNEL.POWER10

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ ZTRMMKERNEL = zgemm_kernel_power10.S
2525
endif
2626

2727
SGEMMKERNEL = sgemm_kernel_power10.c
28-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
28+
SGEMMINCOPY = sgemm_ncopy_16_power.c
2929
SGEMMITCOPY = sgemm_tcopy_16_power8.S
3030
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
3131
SGEMMOTCOPY = sgemm_tcopy_8_power8.S

kernel/power/KERNEL.POWER8

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
# Big-endian 32bit (AIX) is supported through the POWER6 GEMM kernels, no separate TRMM
22
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
33
SGEMMKERNEL = gemm_kernel_power6.S
4-
SGEMMINCOPY =
54
SGEMMITCOPY =
65
SGEMMONCOPY = gemm_ncopy_4.S
76
SGEMMOTCOPY = gemm_tcopy_4.S
8-
SGEMMINCOPYOBJ =
97
SGEMMITCOPYOBJ =
108
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
119
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
@@ -50,11 +48,9 @@ CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
5048
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
5149

5250
SGEMMKERNEL = sgemm_kernel_16x8_power8.S
53-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
5451
SGEMMITCOPY = sgemm_tcopy_16_power8.S
5552
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
5653
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
57-
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
5854
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
5955
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
6056
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
@@ -90,6 +86,9 @@ ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
9086
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
9187
endif
9288

89+
SGEMMINCOPY = sgemm_ncopy_16_power.c
90+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
91+
9392
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
9493
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
9594
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c

kernel/power/KERNEL.POWER9

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ CTRMMKERNEL = cgemm_kernel_power9.S
1313
ZTRMMKERNEL = zgemm_kernel_power9.S
1414

1515
SGEMMKERNEL = sgemm_kernel_power9.S
16-
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
16+
SGEMMINCOPY = sgemm_ncopy_16_power.c
1717
SGEMMITCOPY = sgemm_tcopy_16_power8.S
1818
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
1919
SGEMMOTCOPY = sgemm_tcopy_8_power8.S

0 commit comments

Comments
 (0)