Skip to content

Commit 86ab939

Browse files
authored
Merge pull request #2354 from ZuoQ3/develop
[WIP] Use arm neon instructions to optimize tcopy operation
2 parents 375b187 + 50f7fc1 commit 86ab939

File tree

3 files changed

+840
-0
lines changed

3 files changed

+840
-0
lines changed

kernel/arm64/KERNEL.ARMV8

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,20 @@ SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
108108
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
109109
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
110110
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
111+
ifeq ($(SGEMM_UNROLL_M), 16)
112+
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
113+
else
111114
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
115+
endif
112116
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
113117
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
114118
endif
115119
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
120+
ifeq ($(SGEMM_UNROLL_N), 16)
121+
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
122+
else
116123
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
124+
endif
117125
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
118126
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
119127

kernel/arm64/KERNEL.TSV110

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,20 @@ SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
110110
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
111111
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
112112
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
113+
ifeq ($(SGEMM_UNROLL_M), 16)
114+
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
115+
else
113116
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
117+
endif
114118
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
115119
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
116120
endif
117121
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
122+
ifeq ($(SGEMM_UNROLL_N), 16)
123+
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
124+
else
118125
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
126+
endif
119127
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
120128
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
121129

0 commit comments

Comments
 (0)