diff --git a/.github/workflows/arm64_graviton.yml b/.github/workflows/arm64_graviton.yml index 6928312b56..4b4e151672 100644 --- a/.github/workflows/arm64_graviton.yml +++ b/.github/workflows/arm64_graviton.yml @@ -88,13 +88,14 @@ jobs: run: | case "${{ matrix.build }}" in "make") - make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" + make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" ;; "cmake") mkdir build && cd build cmake -DDYNAMIC_ARCH=1 \ -DNOFORTRAN=0 \ -DBUILD_WITHOUT_LAPACK=0 \ + -DBUILD_BFLOAT16=1 \ -DCMAKE_VERBOSE_MAKEFILE=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 91037e8511..3679278b05 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -211,14 +211,14 @@ endif () if (${CORE} STREQUAL NEOVERSEV1) if (NOT DYNAMIC_ARCH) if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) - set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1") + set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") elseif (${CMAKE_C_COMPILER_ID} STREQUAL "NVC" AND NOT NO_SVE) set (CCOMMON_OPT "${CCOMMON_OPT} -tp=neoverse-v1") else () if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1") + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") else () - set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve") + set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve+bf16") endif() endif() endif () diff --git a/cmake/system.cmake b/cmake/system.cmake index bac756901f..6ad73525a6 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -291,10 +291,10 @@ if (DEFINED TARGET) if (${TARGET} STREQUAL NEOVERSEV1) if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1") + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") else () if (CMAKE_C_COMPILER_VERSION VERSION_GREATER 10.4 OR CMAKE_C_COMPILER_VERSION VERSION_EQUAL 10.4) - set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1") + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve+bf16 -mtune=neoverse-v1") else () message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_VERSION} does not support Neoverse V1.") endif() diff --git a/driver/level3/level3.c b/driver/level3/level3.c index b7328876b4..5d3438450b 100644 --- a/driver/level3/level3.c +++ b/driver/level3/level3.c @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2025 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -305,7 +306,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, } BLASLONG pad_min_l = min_l; -#if defined(HALF) +#if defined(BFLOAT16) #if defined(DYNAMIC_ARCH) pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); #else diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index db3bffc10a..5ede6153ef 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -1,6 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ -/* Copyright 2023 The OpenBLAS Project. */ +/* Copyright 2023, 2025 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -324,7 +324,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, BLASLONG pad_min_l = min_l; -#if defined(HALF) +#if defined(BFLOAT16) #if defined(DYNAMIC_ARCH) pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); #else diff --git a/getarch.c b/getarch.c index cb0b3cd7ca..3fdbc9799e 100644 --- a/getarch.c +++ b/getarch.c @@ -1,5 +1,5 @@ /***************************************************************************** -Copyright (c) 2011-2014, The OpenBLAS Project +Copyright (c) 2011-2014, 2025 The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without @@ -1476,7 +1476,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. "-DL2_SIZE=1048576 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 " \ - "-march=armv8.4-a+sve -mtune=neoverse-v1" + "-march=armv8.4-a+sve+bf16 -mtune=neoverse-v1" #define LIBNAME "neoversev1" #define CORENAME "NEOVERSEV1" #endif diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 71df986af5..f4a4ee5be1 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -52,7 +52,7 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3) foreach (float_type ${FLOAT_TYPES}) -if (${float_type} STREQUAL "HALF") +if (${float_type} STREQUAL "BFLOAT16") continue() endif() GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type}) diff --git a/lapack/potrf/potrf_parallel.c b/lapack/potrf/potrf_parallel.c index c38a2632d5..2bbbc7bb3e 100644 --- a/lapack/potrf/potrf_parallel.c +++ b/lapack/potrf/potrf_parallel.c @@ -1,5 +1,6 @@ /*********************************************************************/ /* Copyright 2009, 2010 The University of Texas at Austin. */ +/* Copyright 2025 The OpenBLAS Project. */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -405,7 +406,7 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){ #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; -#elif defined(HALF) +#elif defined(BFLOAT16) mode = BLAS_HALF | BLAS_REAL; mask = MAX(SBGEMM_UNROLL_M, SBGEMM_UNROLL_N) - 1; #else