Skip to content

Commit a4e56e0

Browse files
authored
Merge pull request #4806 from Mousius/small-gemm
Small GEMM for AArch64 with SVE
2 parents 949a7f9 + ea4ab3b commit a4e56e0

11 files changed

+4983
-0
lines changed

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ SMALL_MATRIX_OPT = 1
268268
else ifeq ($(ARCH), power)
269269
SMALL_MATRIX_OPT = 1
270270
BUILD_BFLOAT16 = 1
271+
else ifeq ($(ARCH), arm64)
272+
SMALL_MATRIX_OPT = 1
271273
endif
272274
ifeq ($(ARCH), loongarch64)
273275
SMALL_MATRIX_OPT = 1

kernel/arm64/KERNEL.ARMV8SVE

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,16 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
131131
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
132132
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
133133

134+
SGEMM_SMALL_M_PERMIT = gemm_small_kernel_permit_sve.c
135+
SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_sve.c
136+
SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_sve.c
137+
SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_sve.c
138+
SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_sve.c
139+
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_sve.c
140+
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_sve.c
141+
SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_sve.c
142+
SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_sve.c
143+
134144
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
135145
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
136146
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
@@ -152,6 +162,16 @@ DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
152162
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
153163
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
154164

165+
DGEMM_SMALL_M_PERMIT = gemm_small_kernel_permit_sve.c
166+
DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_sve.c
167+
DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_sve.c
168+
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_sve.c
169+
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_sve.c
170+
DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_sve.c
171+
DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_sve.c
172+
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_sve.c
173+
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_sve.c
174+
155175
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
156176
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
157177
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c

kernel/arm64/dgemm_small_kernel_nn_sve.c

Lines changed: 742 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)