Skip to content

Commit 63cf4d0

Browse files
committed
add riscv level3 C,Z kernel functions.
1 parent 387e897 commit 63cf4d0

25 files changed

+3342
-2280
lines changed

kernel/riscv64/KERNEL.x280

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ DGEMVTKERNEL = gemv_t_rvv.c
118118
CGEMVTKERNEL = zgemv_t_rvv.c
119119
ZGEMVTKERNEL = zgemv_t_rvv.c
120120

121-
CTRMMKERNEL = ztrmmkernel_2x2_rvv.c
122-
ZTRMMKERNEL = ztrmmkernel_2x2_rvv.c
121+
CTRMMKERNEL = ztrmmkernel_rvv_v1x4.c
122+
ZTRMMKERNEL = ztrmmkernel_rvv_v1x4.c
123123

124124
# SGEMM_UNROLL_N set in params.h
125125
ifeq ($(SGEMM_UNROLL_N), 8)
@@ -168,17 +168,28 @@ DSYMMUCOPY_M = symm_ucopy_rvv_v1.c
168168
DSYMMLCOPY_M = symm_lcopy_rvv_v1.c
169169
endif
170170

171-
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
172-
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
173-
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
174-
CGEMMONCOPYOBJ = cgemm_oncopy.o
175-
CGEMMOTCOPYOBJ = cgemm_otcopy.o
171+
CGEMMKERNEL = zgemmkernel_rvv_v1x4.c
172+
CGEMMINCOPY = zgemm_ncopy_rvv_v1.c
173+
CGEMMITCOPY = zgemm_tcopy_rvv_v1.c
174+
CGEMMONCOPY = zgemm_ncopy_4_rvv.c
175+
CGEMMOTCOPY = zgemm_tcopy_4_rvv.c
176176

177-
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
178-
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
179-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
180-
ZGEMMONCOPYOBJ = zgemm_oncopy.o
181-
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
177+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
178+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
179+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
180+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
181+
182+
ZGEMMKERNEL = zgemmkernel_rvv_v1x4.c
183+
184+
ZGEMMINCOPY = zgemm_ncopy_rvv_v1.c
185+
ZGEMMITCOPY = zgemm_tcopy_rvv_v1.c
186+
ZGEMMONCOPY = zgemm_ncopy_4_rvv.c
187+
ZGEMMOTCOPY = zgemm_tcopy_4_rvv.c
188+
189+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
190+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
191+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
192+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
182193

183194
STRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
184195
STRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
@@ -190,20 +201,25 @@ DTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
190201
DTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
191202
DTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
192203

193-
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
194-
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
195-
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
196-
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
204+
CTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
205+
CTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
206+
CTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
207+
CTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
197208

198-
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
199-
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
200-
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
201-
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
209+
ZTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
210+
ZTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
211+
ZTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
212+
ZTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
202213

203-
TRSMCOPYLN_M = trsm_lncopy_rvv_v1.c
204-
TRSMCOPYLT_M = trsm_ltcopy_rvv_v1.c
205-
TRSMCOPYUN_M = trsm_uncopy_rvv_v1.c
206-
TRSMCOPYUT_M = trsm_utcopy_rvv_v1.c
214+
TRSMCOPYLN_M = trsm_lncopy_rvv_v1.c
215+
TRSMCOPYLT_M = trsm_ltcopy_rvv_v1.c
216+
TRSMCOPYUN_M = trsm_uncopy_rvv_v1.c
217+
TRSMCOPYUT_M = trsm_utcopy_rvv_v1.c
218+
219+
ZTRSMCOPYLN_M = ztrsm_lncopy_rvv_v1.c
220+
ZTRSMCOPYLT_M = ztrsm_ltcopy_rvv_v1.c
221+
ZTRSMCOPYUN_M = ztrsm_uncopy_rvv_v1.c
222+
ZTRSMCOPYUT_M = ztrsm_utcopy_rvv_v1.c
207223

208224
SSYMV_U_KERNEL = symv_U_rvv.c
209225
SSYMV_L_KERNEL = symv_L_rvv.c
@@ -214,6 +230,27 @@ CSYMV_L_KERNEL = ../generic/zsymv_k.c
214230
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
215231
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
216232

233+
ZHEMMLTCOPY_M = zhemm_ltcopy_rvv_v1.c
234+
ZHEMMUTCOPY_M = zhemm_utcopy_rvv_v1.c
235+
236+
CHEMMLTCOPY_M = zhemm_ltcopy_rvv_v1.c
237+
CHEMMUTCOPY_M = zhemm_utcopy_rvv_v1.c
238+
239+
ZSYMMUCOPY_M = zsymm_ucopy_rvv_v1.c
240+
ZSYMMLCOPY_M = zsymm_lcopy_rvv_v1.c
241+
242+
CSYMMUCOPY_M = zsymm_ucopy_rvv_v1.c
243+
CSYMMLCOPY_M = zsymm_lcopy_rvv_v1.c
244+
245+
ZTRMMUNCOPY_M = ztrmm_uncopy_rvv_v1.c
246+
ZTRMMLNCOPY_M = ztrmm_lncopy_rvv_v1.c
247+
ZTRMMUTCOPY_M = ztrmm_utcopy_rvv_v1.c
248+
ZTRMMLTCOPY_M = ztrmm_ltcopy_rvv_v1.c
249+
250+
CTRMMUNCOPY_M = ztrmm_uncopy_rvv_v1.c
251+
CTRMMLNCOPY_M = ztrmm_lncopy_rvv_v1.c
252+
CTRMMUTCOPY_M = ztrmm_utcopy_rvv_v1.c
253+
CTRMMLTCOPY_M = ztrmm_ltcopy_rvv_v1.c
217254

218255
LSAME_KERNEL = ../generic/lsame.c
219256

kernel/riscv64/trmm_lncopy_rvv_v1.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3636
#define VSEV_FLOAT vse32_v_f32m2
3737
#define VLSEV_FLOAT vlse32_v_f32m2
3838
#define VBOOL_T vbool16_t
39-
#define UINT_V_T vint32m2_t
40-
#define VID_V_UINT vid_v_i32m2
41-
#define VMSGTU_VX_UINT vmsgt_vx_i32m2_b16
42-
#define VMSEQ_VX_UINT vmseq_vx_i32m2_b16
39+
#define UINT_V_T vuint32m2_t
40+
#define VID_V_UINT vid_v_u32m2
41+
#define VMSGTU_VX_UINT vmsgtu_vx_u32m2_b16
42+
#define VMSEQ_VX_UINT vmseq_vx_u32m2_b16
4343
#define VFMERGE_VFM_FLOAT vfmerge_vfm_f32m2
4444
#else
4545
#define VSETVL(n) vsetvl_e64m2(n)

0 commit comments

Comments
 (0)