Skip to content

Commit 08e479f

Browse files
authored
Merge pull request #4487 from ErnstPeng/feature-branch
Optimized zgemm kernel 8x4 LASX, 4x4 LSX and cgemm kernel 8x4 LSX for LoongArch
2 parents e5d2725 + fe3da43 commit 08e479f

16 files changed

+12248
-11
lines changed

kernel/loongarch64/KERNEL.LOONGSON2K1000

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,13 @@ DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
100100
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
101101
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
102102

103-
CGEMMKERNEL = cgemm_kernel_2x2_lsx.S
104-
CGEMMONCOPY = cgemm_ncopy_2_lsx.S
105-
CGEMMOTCOPY = cgemm_tcopy_2_lsx.S
103+
CGEMMKERNEL = cgemm_kernel_8x4_lsx.S
104+
CGEMMINCOPY = cgemm_ncopy_8_lsx.S
105+
CGEMMITCOPY = cgemm_tcopy_8_lsx.S
106+
CGEMMONCOPY = cgemm_ncopy_4_lsx.S
107+
CGEMMOTCOPY = cgemm_tcopy_4_lsx.S
108+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
109+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
106110
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
107111
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
108112

@@ -111,4 +115,14 @@ CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
111115
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
112116
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
113117

118+
ZGEMMKERNEL = zgemm_kernel_4x4_lsx.S
119+
ZGEMMONCOPY = zgemm_ncopy_4_lsx.S
120+
ZGEMMOTCOPY = zgemm_tcopy_4_lsx.S
121+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
122+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
123+
124+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
125+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
126+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
127+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
114128
endif

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,13 @@ CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
122122
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
123123
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
124124

125-
ZGEMMKERNEL = zgemm_kernel_2x2_lasx.S
126-
ZGEMMONCOPY = zgemm_ncopy_2_lasx.S
127-
ZGEMMOTCOPY = zgemm_tcopy_2_lasx.S
125+
ZGEMMKERNEL = zgemm_kernel_8x4_lasx.S
126+
ZGEMMINCOPY = zgemm_ncopy_8_lasx.S
127+
ZGEMMITCOPY = zgemm_tcopy_8_lasx.S
128+
ZGEMMONCOPY = zgemm_ncopy_4_lasx.S
129+
ZGEMMOTCOPY = zgemm_tcopy_4_lasx.S
130+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
131+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
128132
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
129133
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
130134

0 commit comments

Comments
 (0)