Skip to content

Commit 3d31887

Browse files
authored
Merge pull request #5362 from Mousius/fix-bf16
Fix SBGEMM BFLOAT16 build
2 parents 0ddf8eb + 552e1c7 commit 3d31887

File tree

8 files changed

+16
-13
lines changed

8 files changed

+16
-13
lines changed

.github/workflows/arm64_graviton.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,14 @@ jobs:
8888
run: |
8989
case "${{ matrix.build }}" in
9090
"make")
91-
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
91+
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
9292
;;
9393
"cmake")
9494
mkdir build && cd build
9595
cmake -DDYNAMIC_ARCH=1 \
9696
-DNOFORTRAN=0 \
9797
-DBUILD_WITHOUT_LAPACK=0 \
98+
-DBUILD_BFLOAT16=1 \
9899
-DCMAKE_VERBOSE_MAKEFILE=ON \
99100
-DCMAKE_BUILD_TYPE=Release \
100101
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \

cmake/cc.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,14 +211,14 @@ endif ()
211211
if (${CORE} STREQUAL NEOVERSEV1)
212212
if (NOT DYNAMIC_ARCH)
213213
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
214-
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
214+
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
215215
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE)
216216
set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v1")
217217
else ()
218218
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
219-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
219+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
220220
else ()
221-
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
221+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve+bf16")
222222
endif()
223223
endif()
224224
endif ()

cmake/system.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,10 +291,10 @@ if (DEFINED TARGET)
291291

292292
if (${TARGET} STREQUAL NEOVERSEV1)
293293
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
294-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
294+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
295295
else ()
296296
if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.4)
297-
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
297+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1")
298298
else ()
299299
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} does not support Neoverse V1.")
300300
endif()

driver/level3/level3.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2025 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -305,7 +306,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
305306
}
306307

307308
BLASLONG pad_min_l = min_l;
308-
#if defined(HALF)
309+
#if defined(BFLOAT16)
309310
#if defined(DYNAMIC_ARCH)
310311
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
311312
#else

driver/level3/level3_thread.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3-
/* Copyright 2023 The OpenBLAS Project. */
3+
/* Copyright 2023, 2025 The OpenBLAS Project. */
44
/* All rights reserved. */
55
/* */
66
/* Redistribution and use in source and binary forms, with or */
@@ -324,7 +324,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
324324

325325
BLASLONG pad_min_l = min_l;
326326

327-
#if defined(HALF)
327+
#if defined(BFLOAT16)
328328
#if defined(DYNAMIC_ARCH)
329329
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
330330
#else

getarch.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*****************************************************************************
2-
Copyright (c) 2011-2014, The OpenBLAS Project
2+
Copyright (c) 2011-2014, 2025 The OpenBLAS Project
33
All rights reserved.
44
55
Redistribution and use in source and binary forms, with or without
@@ -1476,7 +1476,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
14761476
"-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
14771477
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
14781478
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \
1479-
"-march=armv8.4-a+sve -mtune=neoverse-v1"
1479+
"-march=armv8.4-a+sve+bf16 -mtune=neoverse-v1"
14801480
#define LIBNAME "neoversev1"
14811481
#define CORENAME "NEOVERSEV1"
14821482
#endif

lapack/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa
5252
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)
5353

5454
foreach (float_type ${FLOAT_TYPES})
55-
if (${float_type} STREQUAL "HALF")
55+
if (${float_type} STREQUAL "BFLOAT16")
5656
continue()
5757
endif()
5858
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})

lapack/potrf/potrf_parallel.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2025 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -405,7 +406,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
405406
#elif defined(DOUBLE)
406407
mode = BLAS_DOUBLE | BLAS_REAL;
407408
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1;
408-
#elif defined(HALF)
409+
#elif defined(BFLOAT16)
409410
mode = BLAS_HALF | BLAS_REAL;
410411
mask = MAX(SBGEMM_UNROLL_M, SBGEMM_UNROLL_N) - 1;
411412
#else

0 commit comments

Comments
 (0)