Skip to content

Commit 4379a6f

Browse files
aditew01taoye9
authored andcommitted
* checkpoint sbgemm for SVE-256
1 parent c139b63 commit 4379a6f

10 files changed

+1277
-4
lines changed

cmake/system.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,10 +291,10 @@ if (DEFINED TARGET)
291291

292292
if (${TARGET} STREQUAL NEOVERSEV1)
293293
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
294-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
294+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
295295
else ()
296296
if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.4)
297-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
297+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
298298
else ()
299299
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} does not support Neoverse V1.")
300300
endif()

kernel/arm64/KERNEL.NEOVERSEV1

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
11
include $(KERNELDIR)/KERNEL.ARMV8SVE
22

3-
SGEMVTKERNEL = gemv_t_sve_v1x3.c
4-
DGEMVTKERNEL = gemv_t_sve_v1x3.c
3+
SGEMVTKERNEL = gemv_t_sve.c
4+
DGEMVTKERNEL = gemv_t_sve.c
5+
SBGEMM_BETA = sbgemm_beta_neoversev1.c
6+
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversev1.c
7+
SBGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversev1.c
8+
SBGEMMITCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_M)_neoversev1.c
9+
SBGEMMONCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_N)_neoversev1.c
10+
SBGEMMOTCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_N)_neoversev1.c
11+
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
12+
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
13+
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
14+
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)

kernel/arm64/sbgemm_beta_neoversev1.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/***************************************************************************
2+
* Copyright (c) 2024, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include "common.h"
30+
31+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, IFLOAT *dummy2,
32+
BLASLONG dummy3, IFLOAT *dummy4, BLASLONG dummy5, FLOAT *c,
33+
BLASLONG ldc) {
34+
35+
BLASLONG i, j;
36+
BLASLONG chunk, remain;
37+
FLOAT *c_offset1, *c_offset;
38+
c_offset = c;
39+
chunk = m >> 3;
40+
remain = m & 7;
41+
if (beta == ZERO) {
42+
for (j = n; j > 0; j--) {
43+
c_offset1 = c_offset;
44+
c_offset += ldc;
45+
for (i = chunk; i > 0; i--) {
46+
*(c_offset1 + 0) = ZERO;
47+
*(c_offset1 + 1) = ZERO;
48+
*(c_offset1 + 2) = ZERO;
49+
*(c_offset1 + 3) = ZERO;
50+
*(c_offset1 + 4) = ZERO;
51+
*(c_offset1 + 5) = ZERO;
52+
*(c_offset1 + 6) = ZERO;
53+
*(c_offset1 + 7) = ZERO;
54+
c_offset1 += 8;
55+
}
56+
for (i = remain; i > 0; i--) {
57+
*c_offset1 = ZERO;
58+
c_offset1++;
59+
}
60+
}
61+
} else {
62+
for (j = n; j > 0; j--) {
63+
c_offset1 = c_offset;
64+
c_offset += ldc;
65+
for (i = chunk; i > 0; i--) {
66+
*(c_offset1 + 0) *= beta;
67+
*(c_offset1 + 1) *= beta;
68+
*(c_offset1 + 2) *= beta;
69+
*(c_offset1 + 3) *= beta;
70+
*(c_offset1 + 4) *= beta;
71+
*(c_offset1 + 5) *= beta;
72+
*(c_offset1 + 6) *= beta;
73+
*(c_offset1 + 7) *= beta;
74+
c_offset1 += 8;
75+
}
76+
for (i = remain; i > 0; i--) {
77+
*c_offset1 *= beta;
78+
c_offset1++;
79+
}
80+
}
81+
}
82+
return 0;
83+
};
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/***************************************************************************
2+
* Copyright (c) 2024, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
29+
#include <arm_sve.h>
30+
31+
#include "common.h"
32+
33+
#define ALPHA_ONE
34+
#include "sbgemm_kernel_8x4_neoversev1_impl.c"
35+
#undef ALPHA_ONE
36+
#include "sbgemm_kernel_8x4_neoversev1_impl.c"
37+
38+
int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT *A, IFLOAT *B,
39+
FLOAT *C, BLASLONG ldc) {
40+
if (alpha == 1.0f)
41+
return sbgemm_kernel_neoversev1_alpha_one(m, n, k, alpha, A, B, C, ldc);
42+
else
43+
return sbgemm_kernel_neoversev1_alpha(m, n, k, alpha, A, B, C, ldc);
44+
return 0;
45+
}
46+

0 commit comments

Comments
 (0)