Skip to content

Commit 3df3d62

Browse files
authored
Merge pull request #3672 from imzhuhl/neoversen2_bf16
sbgemm support for ARM Neoverse N2
2 parents 407a1a2 + ec0d5c7 commit 3df3d62

8 files changed

+1021
-2
lines changed

Makefile.arm64

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,9 @@ ifeq ($(CORE), NEOVERSEN2)
121121
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
122122
ifeq ($(GCCVERSIONGTEQ9), 1)
123123
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ10)))
124-
CCOMMON_OPT += -march=armv8.5-a -mtune=neoverse-n2
124+
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
125125
ifneq ($(F_COMPILER), NAG)
126-
FCOMMON_OPT += -march=armv8.5-a -mtune=neoverse-n2
126+
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
127127
endif
128128
else
129129
CCOMMON_OPT += -march=armv8.5-a -mtune=native

kernel/arm64/KERNEL.NEOVERSEN2

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,14 @@ ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
187187
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
188188
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
189189
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
190+
191+
SBGEMM_BETA = sbgemm_beta_neoversen2.c
192+
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversen2.c
193+
SBGEMMINCOPY = sbgemm_ncopy_neoversen2.c
194+
SBGEMMITCOPY = sbgemm_tcopy_neoversen2.c
195+
SBGEMMONCOPY = sbgemm_ncopy_neoversen2.c
196+
SBGEMMOTCOPY = sbgemm_tcopy_neoversen2.c
197+
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
198+
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
199+
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
200+
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)

kernel/arm64/sbgemm_beta_neoversen2.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/***************************************************************************
2+
* Copyright (c) 2022, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include "common.h"
30+
31+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, IFLOAT *dummy2,
32+
BLASLONG dummy3, IFLOAT *dummy4, BLASLONG dummy5, FLOAT *c,
33+
BLASLONG ldc) {
34+
35+
BLASLONG i, j;
36+
BLASLONG chunk, remain;
37+
FLOAT *c_offset1, *c_offset;
38+
c_offset = c;
39+
chunk = m >> 3;
40+
remain = m & 7;
41+
if (beta == ZERO) {
42+
for (j = n; j > 0; j--) {
43+
c_offset1 = c_offset;
44+
c_offset += ldc;
45+
for (i = chunk; i > 0; i--) {
46+
*(c_offset1 + 0) = ZERO;
47+
*(c_offset1 + 1) = ZERO;
48+
*(c_offset1 + 2) = ZERO;
49+
*(c_offset1 + 3) = ZERO;
50+
*(c_offset1 + 4) = ZERO;
51+
*(c_offset1 + 5) = ZERO;
52+
*(c_offset1 + 6) = ZERO;
53+
*(c_offset1 + 7) = ZERO;
54+
c_offset1 += 8;
55+
}
56+
for (i = remain; i > 0; i--) {
57+
*c_offset1 = ZERO;
58+
c_offset1++;
59+
}
60+
}
61+
} else {
62+
for (j = n; j > 0; j--) {
63+
c_offset1 = c_offset;
64+
c_offset += ldc;
65+
for (i = chunk; i > 0; i--) {
66+
*(c_offset1 + 0) *= beta;
67+
*(c_offset1 + 1) *= beta;
68+
*(c_offset1 + 2) *= beta;
69+
*(c_offset1 + 3) *= beta;
70+
*(c_offset1 + 4) *= beta;
71+
*(c_offset1 + 5) *= beta;
72+
*(c_offset1 + 6) *= beta;
73+
*(c_offset1 + 7) *= beta;
74+
c_offset1 += 8;
75+
}
76+
for (i = remain; i > 0; i--) {
77+
*c_offset1 *= beta;
78+
c_offset1++;
79+
}
80+
}
81+
}
82+
return 0;
83+
};
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/***************************************************************************
2+
* Copyright (c) 2022, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include <arm_sve.h>
30+
31+
#include "common.h"
32+
33+
#define ALPHA_ONE
34+
#include "sbgemm_kernel_8x4_neoversen2_impl.c"
35+
#undef ALPHA_ONE
36+
#include "sbgemm_kernel_8x4_neoversen2_impl.c"
37+
38+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT *A, IFLOAT *B,
39+
FLOAT *C, BLASLONG ldc) {
40+
if (alpha == 1.0f)
41+
return sbgemm_kernel_neoversen2_alpha_one(m, n, k, alpha, A, B, C, ldc);
42+
else
43+
return sbgemm_kernel_neoversen2_alpha(m, n, k, alpha, A, B, C, ldc);
44+
return 0;
45+
}

0 commit comments

Comments
 (0)