Skip to content

Commit 72caceb

Browse files
authored
Merge pull request #4009 from Mousius/sve-gemm
Use SVE kernel for SGEMM/DGEMM on Arm(R) Neoverse(TM) V1
2 parents d1b6318 + ec334e6 commit 72caceb

File tree

10 files changed

+306
-452
lines changed

10 files changed

+306
-452
lines changed

benchmark/syrk.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/***************************************************************************
2-
Copyright (c) 2014, The OpenBLAS Project
2+
Copyright (c) 2014, 2023 The OpenBLAS Project
33
All rights reserved.
44
Redistribution and use in source and binary forms, with or without
55
modification, are permitted provided that the following conditions are
@@ -67,7 +67,7 @@ int main(int argc, char *argv[]){
6767
int step = 1;
6868
int loops = 1;
6969

70-
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
70+
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
7171

7272
double time1,timeg;
7373

@@ -77,7 +77,7 @@ int main(int argc, char *argv[]){
7777
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
7878
if (argc > 0) { step = atol(*argv); argc--; argv++;}
7979

80-
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
80+
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Loops = %d\n", from, to, step,uplo,trans,loops);
8181

8282

8383
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){

kernel/arm64/KERNEL.ARMV8SVE

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,10 @@ SGEMM_BETA = sgemm_beta.S
128128
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
129129
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
130130

131-
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
132-
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
133-
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
134-
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
131+
SGEMMINCOPY = gemm_ncopy_sve_v1x$(SGEMM_UNROLL_N).c
132+
SGEMMITCOPY = gemm_tcopy_sve_v1x$(SGEMM_UNROLL_N).c
133+
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
134+
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
135135

136136
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
137137
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
@@ -149,8 +149,8 @@ SSYMMLCOPY_M = symm_lcopy_sve.c
149149
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
150150
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
151151

152-
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
153-
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
152+
DGEMMINCOPY = gemm_ncopy_sve_v1x$(DGEMM_UNROLL_N).c
153+
DGEMMITCOPY = gemm_tcopy_sve_v1x$(DGEMM_UNROLL_N).c
154154
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
155155
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
156156

kernel/arm64/KERNEL.NEOVERSEV1

Lines changed: 38 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,6 @@
1-
SAMINKERNEL = ../arm/amin.c
2-
DAMINKERNEL = ../arm/amin.c
3-
CAMINKERNEL = ../arm/zamin.c
4-
ZAMINKERNEL = ../arm/zamin.c
1+
include $(KERNELDIR)/KERNEL.ARMV8SVE
52

6-
SMAXKERNEL = ../arm/max.c
7-
DMAXKERNEL = ../arm/max.c
8-
9-
SMINKERNEL = ../arm/min.c
10-
DMINKERNEL = ../arm/min.c
11-
12-
ISAMINKERNEL = ../arm/iamin.c
13-
IDAMINKERNEL = ../arm/iamin.c
14-
ICAMINKERNEL = ../arm/izamin.c
15-
IZAMINKERNEL = ../arm/izamin.c
16-
17-
ISMAXKERNEL = ../arm/imax.c
18-
IDMAXKERNEL = ../arm/imax.c
19-
20-
ISMINKERNEL = ../arm/imin.c
21-
IDMINKERNEL = ../arm/imin.c
22-
23-
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
24-
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
25-
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
26-
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
27-
28-
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
29-
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
30-
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
31-
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
32-
33-
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
34-
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
35-
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
36-
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
37-
38-
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
39-
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
40-
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
41-
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
42-
43-
SAMAXKERNEL = amax.S
44-
DAMAXKERNEL = amax.S
45-
CAMAXKERNEL = zamax.S
46-
ZAMAXKERNEL = zamax.S
47-
48-
SAXPYKERNEL = axpy.S
493
DAXPYKERNEL = daxpy_thunderx2t99.S
50-
CAXPYKERNEL = zaxpy.S
51-
ZAXPYKERNEL = zaxpy.S
52-
53-
SROTKERNEL = rot.S
54-
DROTKERNEL = rot.S
55-
CROTKERNEL = zrot.S
56-
ZROTKERNEL = zrot.S
57-
58-
SSCALKERNEL = scal.S
59-
DSCALKERNEL = scal.S
60-
CSCALKERNEL = zscal.S
61-
ZSCALKERNEL = zscal.S
62-
63-
SGEMVNKERNEL = gemv_n.S
64-
DGEMVNKERNEL = gemv_n.S
65-
CGEMVNKERNEL = zgemv_n.S
66-
ZGEMVNKERNEL = zgemv_n.S
67-
68-
SGEMVTKERNEL = gemv_t.S
69-
DGEMVTKERNEL = gemv_t.S
70-
CGEMVTKERNEL = zgemv_t.S
71-
ZGEMVTKERNEL = zgemv_t.S
72-
734

745
SASUMKERNEL = sasum_thunderx2t99.c
756
DASUMKERNEL = dasum_thunderx2t99.c
@@ -100,67 +31,20 @@ DDOTKERNEL = dot.c
10031
SDOTKERNEL = dot.c
10132
CDOTKERNEL = zdot_thunderx2t99.c
10233
ZDOTKERNEL = zdot_thunderx2t99.c
103-
DSDOTKERNEL = dot.S
104-
105-
DGEMM_BETA = dgemm_beta.S
106-
SGEMM_BETA = sgemm_beta.S
107-
108-
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
109-
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
110-
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
111-
ifeq ($(SGEMM_UNROLL_M), 16)
112-
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
113-
else
114-
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
115-
endif
116-
ifeq ($(SGEMM_UNROLL_M), 4)
117-
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
118-
else
119-
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
120-
endif
121-
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
122-
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
123-
endif
124-
ifeq ($(SGEMM_UNROLL_N), 16)
125-
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
126-
else
127-
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
128-
endif
129-
ifeq ($(SGEMM_UNROLL_N), 4)
130-
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
131-
else
132-
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
133-
endif
134-
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
135-
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
136-
137-
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
138-
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
13934

140-
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
35+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
36+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
37+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
38+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
14139

142-
ifeq ($(DGEMM_UNROLL_M), 8)
143-
DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
144-
DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
145-
else
146-
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
147-
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
148-
endif
149-
150-
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
151-
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
152-
endif
153-
154-
ifeq ($(DGEMM_UNROLL_N), 4)
155-
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
156-
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
157-
else
158-
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
159-
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
160-
endif
161-
162-
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
163-
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
40+
CTRMMUNCOPY_M =
41+
CTRMMLNCOPY_M =
42+
CTRMMUTCOPY_M =
43+
CTRMMLTCOPY_M =
44+
CHEMMLTCOPY_M =
45+
CHEMMUTCOPY_M =
46+
CSYMMUCOPY_M =
47+
CSYMMLCOPY_M =
16448

16549
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
16650
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
@@ -169,19 +53,44 @@ CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
16953
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
17054
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
17155
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
56+
else
57+
CGEMMINCOPYOBJ =
58+
CGEMMITCOPYOBJ =
17259
endif
17360
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
17461
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
17562
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
17663
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
17764

65+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
66+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
67+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
68+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
69+
70+
ZTRSMCOPYLN_M =
71+
ZTRSMCOPYLT_M =
72+
ZTRSMCOPYUN_M =
73+
ZTRSMCOPYUT_M =
74+
75+
ZTRMMUNCOPY_M =
76+
ZTRMMLNCOPY_M =
77+
ZTRMMUTCOPY_M =
78+
ZTRMMLTCOPY_M =
79+
ZHEMMLTCOPY_M =
80+
ZHEMMUTCOPY_M =
81+
ZSYMMUCOPY_M =
82+
ZSYMMLCOPY_M =
83+
17884
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
17985
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
18086
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
18187
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
18288
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
18389
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
18490
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
91+
else
92+
ZGEMMINCOPYOBJ =
93+
ZGEMMITCOPYOBJ =
18594
endif
18695
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
18796
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c

kernel/arm64/dgemm_ncopy_sve_v1.c

Lines changed: 0 additions & 79 deletions
This file was deleted.

0 commit comments

Comments
 (0)