From dadceeb53a3aa70577ee5dd8096ff7343fa751a8 Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 12:35:43 +0000 Subject: [PATCH 01/26] feat: add CMake options for RISC-V and RVV1.0 --- GraphBLAS/CMakeLists.txt | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/GraphBLAS/CMakeLists.txt b/GraphBLAS/CMakeLists.txt index dd146ad193..f296adbd1a 100644 --- a/GraphBLAS/CMakeLists.txt +++ b/GraphBLAS/CMakeLists.txt @@ -162,6 +162,26 @@ if ( DEFINED GBAVX512F ) endif ( ) endif ( ) +#------------------------------------------------------------------------------- +# RISC-V +#------------------------------------------------------------------------------- + +if ( DEFINED GBRISCV64 ) + if ( GBRISCV64 ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRISCV64=0 " ) + endif ( ) +endif ( ) + +if ( DEFINED GBRVV ) + if ( GBRVV ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=1 " ) + else ( ) + set ( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGBRVV=0 " ) + endif ( ) +endif ( ) + #------------------------------------------------------------------------------- # check compiler features #------------------------------------------------------------------------------- From 2cf8670523e13dcbdc98602b9f1374bf42dd0576 Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 12:42:53 +0000 Subject: [PATCH 02/26] feat: add riscv vectorization support in global values --- GraphBLAS/Source/global/GB_Global.c | 32 ++++++++++++++++++++++++++++- GraphBLAS/Source/global/GB_Global.h | 1 + 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/GraphBLAS/Source/global/GB_Global.c b/GraphBLAS/Source/global/GB_Global.c index f0b53e9962..f66a4066a0 100644 --- a/GraphBLAS/Source/global/GB_Global.c +++ b/GraphBLAS/Source/global/GB_Global.c @@ -131,6 +131,7 @@ typedef struct bool cpu_features_avx2 ; // x86_64 with AVX2 bool cpu_features_avx512f ; // x86_64 with AVX512f + bool cpu_features_rvv ; // RISC-V with RVV1.0 //-------------------------------------------------------------------------- // CUDA (DRAFT: in progress): @@ -215,6 +216,7 @@ static GB_Global_struct GB_Global = // CPU features .cpu_features_avx2 = false, // x86_64 with AVX2 .cpu_features_avx512f = false, // x86_64 with AVX512f + .cpu_features_rvv = false, // RISC-V with RVV1.0 // CUDA environment (DRAFT: in progress) .gpu_count = 0, // # of GPUs in the system @@ -292,6 +294,7 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx2 = false ; } #endif + #if defined ( GBAVX512F ) { // the build system asserts whether or not AVX512F is available @@ -303,19 +306,41 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx512f = false ; } #endif + } #endif + } + #endif + #if GBRISCV64 + { + //---------------------------------------------------------------------- + // xRISC-V architecture: see if RVV1.0 is supported + //---------------------------------------------------------------------- + + #if defined ( GBRVV ) + { + // the build system asserts whether or not RVV1.0 is available + GB_Global.cpu_features_rvv = (bool) (GBRVV) ; + } + #else + { + // RVV1.0 not available + GB_Global.cpu_features_rvv = false ; + } + #endif + } #else { //---------------------------------------------------------------------- - // not on the x86_64 architecture, so no AVX2 or AVX512F acceleration + // not on the x86_64 or RISC-V architecture, so no AVX2, AVX512F or RVV1.0 acceleration //---------------------------------------------------------------------- GB_Global.cpu_features_avx2 = false ; GB_Global.cpu_features_avx512f = false ; + GB_Global.cpu_features_rvv = false ; } #endif @@ -331,6 +356,11 @@ bool GB_Global_cpu_features_avx512f (void) return (GB_Global.cpu_features_avx512f) ; } +bool GB_Global_cpu_features_rvv (void) +{ + return (GB_Global.cpu_features_rvv) ; +} + //------------------------------------------------------------------------------ // hyper_switch //------------------------------------------------------------------------------ diff --git a/GraphBLAS/Source/global/GB_Global.h b/GraphBLAS/Source/global/GB_Global.h index 0f42b8767c..c5c5685077 100644 --- a/GraphBLAS/Source/global/GB_Global.h +++ b/GraphBLAS/Source/global/GB_Global.h @@ -17,6 +17,7 @@ void GB_Global_cpu_features_query (void) ; bool GB_Global_cpu_features_avx2 (void) ; bool GB_Global_cpu_features_avx512f (void) ; +bool GB_Global_cpu_features_rvv (void) ; void GB_Global_mode_set (GrB_Mode mode) ; GrB_Mode GB_Global_mode_get (void) ; From 5e6d5f9d9fa3d1820a30a44057aac2f1608c5cfa Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 12:48:05 +0000 Subject: [PATCH 03/26] feat: add defines for RVV1.0 --- GraphBLAS/Source/include/GB_compiler.h | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/GraphBLAS/Source/include/GB_compiler.h b/GraphBLAS/Source/include/GB_compiler.h index ecccdc96a1..c4116c424e 100644 --- a/GraphBLAS/Source/include/GB_compiler.h +++ b/GraphBLAS/Source/include/GB_compiler.h @@ -245,6 +245,16 @@ #endif +#if !defined ( GBRISCV64 ) + + #if defined(__riscv) + #define GBRISCV64 1 + #else + #define GBRISCV64 0 + #endif + +#endif + //------------------------------------------------------------------------------ // AVX2 and AVX512F support for the x86_64 architecture //------------------------------------------------------------------------------ @@ -306,6 +316,31 @@ #define GB_TARGET_AVX2 #endif +//------------------------------------------------------------------------------ +// RVV1.0 support for the RISC-V architecture +//------------------------------------------------------------------------------ + +#if GBRISCV64 + #if GB_COMPILER_GCC + // TODO: add other compilers + #if __GNUC__ >= 13 + #define GB_COMPILER_SUPPORTS_RVV1 1 + #else + #define GB_COMPILER_SUPPORTS_RVV1 0 + #endif + #endif +#else + // non-RISC-V architecture + #define GB_COMPILER_SUPPORTS_RVV1 0 +#endif + +// prefix for function with target rvv1.0 +#if GB_COMPILER_SUPPORTS_RVV1 + #define GB_TARGET_RVV1 __attribute__ ((target ("arch=rv64gcv"))) +#else + #define GB_TARGET_RVV1 +#endif + //------------------------------------------------------------------------------ // disable Google's cpu_featgures on some compilers //------------------------------------------------------------------------------ From 3eb13f9fc657acb8d8bfbc36c97c7e12e334a9ed Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Fri, 29 Nov 2024 13:02:46 +0000 Subject: [PATCH 04/26] feat: add rvv function templates --- .../FactoryKernels/GB_AxB__plus_times_fp32.c | 33 +++++++++++++++ .../FactoryKernels/GB_AxB__plus_times_fp64.c | 33 +++++++++++++++ GraphBLAS/GraphBLAS/rename/GB_rename.h | 1 + GraphBLAS/Source/codegen/Generator/GB_AxB.c | 34 +++++++++++++++ .../template/GB_jit_kernel_AxB_saxpy5.c | 42 +++++++++++++++++++ .../Source/mxm/factory/GB_AxB_saxpy5_meta.c | 11 +++++ 6 files changed, 154 insertions(+) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c index f2fa0f0bfc..13392e8bec 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -282,6 +282,39 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) #endif + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + printf("riscvhype!\n"); + #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + } + + #endif + //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 40d1d7d2ae..8e8ea782a6 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -282,6 +282,39 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) #endif + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + printf("riscvhype!\n"); + #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + } + + #endif + //---------------------------------------------------------------------- // saxpy5 method unrolled, with no vectors //---------------------------------------------------------------------- diff --git a/GraphBLAS/GraphBLAS/rename/GB_rename.h b/GraphBLAS/GraphBLAS/rename/GB_rename.h index 2767635b09..3c86ace6f3 100644 --- a/GraphBLAS/GraphBLAS/rename/GB_rename.h +++ b/GraphBLAS/GraphBLAS/rename/GB_rename.h @@ -389,6 +389,7 @@ #define GB_Global_calloc_function_set GM_Global_calloc_function_set #define GB_Global_cpu_features_avx2 GM_Global_cpu_features_avx2 #define GB_Global_cpu_features_avx512f GM_Global_cpu_features_avx512f +#define GB_Global_cpu_features_rvv GM_Global_cpu_features_rvv #define GB_Global_cpu_features_query GM_Global_cpu_features_query #define GB_Global_flush_get GM_Global_flush_get #define GB_Global_flush_set GM_Global_flush_set diff --git a/GraphBLAS/Source/codegen/Generator/GB_AxB.c b/GraphBLAS/Source/codegen/Generator/GB_AxB.c index c3d8f1b0af..5a52374c88 100644 --- a/GraphBLAS/Source/codegen/Generator/GB_AxB.c +++ b/GraphBLAS/Source/codegen/Generator/GB_AxB.c @@ -310,6 +310,40 @@ m4_divert(if_semiring_has_avx) } #endif +m4_divert(if_semiring_has_rvv) + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + printf("riscvhype!\n"); + #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + } + + #endif + m4_divert(if_saxpy5_enabled) //---------------------------------------------------------------------- diff --git a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index 4b3acc6653..ed56379f39 100644 --- a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -82,6 +82,38 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; } #endif + + //---------------------------------------------------------------------- + // saxpy5 method with RISC-V vectors + //---------------------------------------------------------------------- + #define GB_V16_256 (16 * GB_Z_NBITS <= 256) + #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) + #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) + + #undef GB_V16 + #undef GB_V8 + #undef GB_V4 + + #define GB_V16 GB_V16_256 + #define GB_V8 GB_V8_256 + #define GB_V4 GB_V4_256 + + #if GB_COMPILER_SUPPORTS_RVV1 + + GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_rvv + ( + GrB_Matrix C, + const GrB_Matrix A, + const GrB_Matrix B, + const int ntasks, + const int nthreads, + const int64_t *B_slice + ) + { + #include "template/GB_AxB_saxpy5_unrolled.c" + } + + #endif #endif @@ -168,6 +200,16 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) return (GrB_SUCCESS) ; } #endif + + #if GB_COMPILER_SUPPORTS_RVV1 + if (cpu_has_avx2) + { + // RISC-V64 with RVV1.0 + GB_AxB_saxpy5_unrolled_rvv (C, A, B, ntasks, nthreads, + B_slice) ; + return (GrB_SUCCESS) ; + } + #endif } #endif diff --git a/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c index 9a48d43859..1141c2a8f1 100644 --- a/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -143,6 +143,17 @@ } #endif #endif + + #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION + #if GB_COMPILER_SUPPORTS_RVV1 + if (GB_Global_cpu_features_rvv ( )) + { + GB_AxB_saxpy5_unrolled_rvv (C, A, B, + ntasks, nthreads, B_slice) ; + return (GrB_SUCCESS) ; + } + #endif + #endif // any architecture and any built-in semiring GB_AxB_saxpy5_unrolled_vanilla (C, A, B, ntasks, nthreads, B_slice) ; From 78957e39c51cfb593f6b28e5b66346607215e3a9 Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Sat, 30 Nov 2024 09:57:11 +0000 Subject: [PATCH 05/26] test: add test --- test/test.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 test/test.c diff --git a/test/test.c b/test/test.c new file mode 100644 index 0000000000..b9fe12bc59 --- /dev/null +++ b/test/test.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +// #include "../GraphBLAS/Source/include/GB_compiler.h" + +int main() +{ + GrB_Info info; + GrB_Matrix A, B, C; + + GrB_Index nrows = 1000, ncols = 1000; + int test_count = 20; + + info = GrB_init(GrB_NONBLOCKING); + if (info != GrB_SUCCESS) + { + printf("Initialization failed!\n"); + return 1; + } + GrB_Matrix_new(&A, GrB_FP64, nrows, ncols); + GrB_Matrix_new(&B, GrB_FP64, nrows, ncols); + GrB_Matrix_new(&C, GrB_FP64, nrows, ncols); + + GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); + // INITIALIZE MATRICES + srand(52); + clock_t start = clock(); + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(A, value, i, j); + } + } + + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(B, value, i, j); + } + } + + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(C, 0, i, j); + } + } + // printf("!%d\n",GBX86); + // printf("!%d\n",GBRISCV64); + clock_t end = clock(); + float seconds = (float)(end - start) / CLOCKS_PER_SEC; + printf("==============MATRIX SIZE: %lux%lu==============\n", nrows, ncols); + printf("==============INITIALIZING TIME: %f==============\n\n", seconds); + + // Set Matrices type + + GrB_set(A, GxB_SPARSE, GxB_SPARSITY_CONTROL); + GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); + GrB_set(C, GxB_FULL, GxB_SPARSITY_CONTROL); + int32_t sparsityA; + GrB_get(A, &sparsityA, GxB_SPARSITY_STATUS); + //printf("A matrix type: %d\n", sparsityA); + int32_t sparsityB; + GrB_get(B, &sparsityB, GxB_SPARSITY_STATUS); + // printf("B matrix type: %d\n", sparsityB); + int32_t sparsityC; + GrB_get(C, &sparsityC, GxB_SPARSITY_STATUS); +// printf("C matrix type: %d\n", sparsityB); + + float average_time = 0.0; + printf("=================NUBMER OF TESTS: %d=================\n", test_count); + for (int i = 0; i < test_count; i++) + { + // double element; + // GrB_Matrix_extractElement_FP64(&element,C,0,0); + // printf("first C element %f\n",element); + clock_t start = clock(); + info = GrB_mxm(C, NULL, GrB_PLUS_FP64, GxB_PLUS_TIMES_FP64, A, B, NULL); + if (info != GrB_SUCCESS) + { + printf("Multiplication failed!\n"); + return 1; + } + clock_t end = clock(); + float seconds = (float)(end - start) / CLOCKS_PER_SEC; + // printf("test %d: time: %f\n", i + 1, seconds); + printf("%f;\n", seconds); + average_time += seconds; + for (GrB_Index i = 0; i < nrows; i++) + { + for (GrB_Index j = 0; j < ncols; j++) + { + double value = (double)rand() / RAND_MAX; + info = GrB_Matrix_setElement_FP64(C, 0, i, j); + } + } + if (i==0){ + average_time -= seconds; + } + } + printf("average time: %f\n", average_time / (test_count-1)); + // printf("%f\n", average_time / test_count); + + GrB_Matrix_free(&A); + GrB_Matrix_free(&B); + GrB_Matrix_free(&C); + + GrB_finalize(); + + return 0; +} From cc334ff91cc15f7f3d4c0a85cf62bfb881b2a3e8 Mon Sep 17 00:00:00 2001 From: Suvorov Rodion Date: Sat, 30 Nov 2024 11:39:42 +0000 Subject: [PATCH 06/26] fix: add defines for RVV implementation --- GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c | 1 + GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c | 1 + GraphBLAS/Source/GB_control.h | 3 +-- .../Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c | 6 +++++- GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c | 2 +- GraphBLAS/Source/mxm/include/GB_mxm_shared_definitions.h | 6 ++++++ 6 files changed, 15 insertions(+), 4 deletions(-) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c index 13392e8bec..0162dd5cde 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -43,6 +43,7 @@ // special case semirings: #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 1 +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 1 // monoid properties: #define GB_Z_TYPE float diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 8e8ea782a6..91bc06ad84 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -43,6 +43,7 @@ // special case semirings: #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 1 +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 1 // monoid properties: #define GB_Z_TYPE double diff --git a/GraphBLAS/Source/GB_control.h b/GraphBLAS/Source/GB_control.h index 7d07af3cad..65dc39f3f1 100644 --- a/GraphBLAS/Source/GB_control.h +++ b/GraphBLAS/Source/GB_control.h @@ -2397,5 +2397,4 @@ #define GxB_NO_TIMES_SECONDJ_INT32 1 #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 - #define GxB_NO_TIMES_SECONDJ1_INT64 1 - + #define GxB_NO_TIMES_SECONDJ1_INT64 1 \ No newline at end of file diff --git a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index ed56379f39..1f42be68d0 100644 --- a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -200,8 +200,12 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) return (GrB_SUCCESS) ; } #endif + } + #endif - #if GB_COMPILER_SUPPORTS_RVV1 + #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION + { + #if GB_COMPILER_SUPPORTS_RVV1 if (cpu_has_avx2) { // RISC-V64 with RVV1.0 diff --git a/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c index 1141c2a8f1..8bf7223eac 100644 --- a/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -144,7 +144,7 @@ #endif #endif - #if GB_SEMIRING_HAS_AVX_IMPLEMENTATION + #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION #if GB_COMPILER_SUPPORTS_RVV1 if (GB_Global_cpu_features_rvv ( )) { diff --git a/GraphBLAS/Source/mxm/include/GB_mxm_shared_definitions.h b/GraphBLAS/Source/mxm/include/GB_mxm_shared_definitions.h index 5b530993c6..d709470418 100644 --- a/GraphBLAS/Source/mxm/include/GB_mxm_shared_definitions.h +++ b/GraphBLAS/Source/mxm/include/GB_mxm_shared_definitions.h @@ -71,6 +71,12 @@ #define GB_SEMIRING_HAS_AVX_IMPLEMENTATION 0 #endif +//1 if the semiring has a RVV1.0 implementation +#ifndef GB_SEMIRING_HAS_RVV_IMPLEMENTATION +#define GB_SEMIRING_HAS_RVV_IMPLEMENTATION 0 +#endif + + //------------------------------------------------------------------------------ // special multiply operators //------------------------------------------------------------------------------ From 4b4329dac6e541c44bd8582165ff2283a18251c6 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 14 Dec 2024 15:56:19 +0300 Subject: [PATCH 07/26] feat: implement vectorized with rvv saxpy function --- .../FactoryKernels/GB_AxB__plus_times_fp64.c | 18 ++----- .../Source/mxm/template/GB_AxB_saxpy5_lv.c | 54 +++++++++++++++++++ 2 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 91bc06ad84..65319e8527 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -6,7 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ - +#include #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" @@ -286,18 +286,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 - + #if GB_COMPILER_SUPPORTS_RVV1 GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv @@ -310,8 +299,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("riscvhype!\n"); - #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + #include "mxm/template/GB_AxB_saxpy5_lv.c" } #endif diff --git a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c new file mode 100644 index 0000000000..6d13b14f02 --- /dev/null +++ b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -0,0 +1,54 @@ +{ + + //-------------------------------------------------------------------------- + // get C, A, and B + //-------------------------------------------------------------------------- + + const int64_t m = C->vlen; // # of rows of C and A + const int64_t *restrict Bp = B->p; + const int64_t *restrict Bh = B->h; + const int64_t *restrict Bi = B->i; +#ifdef GB_JIT_KERNEL +#define B_iso GB_B_ISO +#else + const bool B_iso = B->iso; +#endif + const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; +#if !GB_B_IS_PATTERN + const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; +#endif + GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; + int tid; +#pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) + for (tid = 0; tid < ntasks; tid++) + { + // get the task descriptor + const int64_t jB_start = B_slice[tid]; + const int64_t jB_end = B_slice[tid + 1]; + + // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) + for (int64_t jB = jB_start; jB < jB_end; jB++) + { + // get B(:,j) and C(:,j) + const int64_t j = GBH_B(Bh, jB); + GB_C_TYPE *restrict Cxj = Cx + (j * m); + const int64_t pB_start = Bp[jB]; + const int64_t pB_end = Bp[jB + 1]; + + size_t vl = __riscv_vsetvl_e64m8(m); + + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj, vl); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = Bi[pB]; + GB_DECLAREB(bkj); + GB_GETB(bkj, Bx, pB, B_iso); + // const GB_B_TYPE bkj = Bx[pB]; + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + k * m, vl); + vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + } + + __riscv_vse64_v_f64m8(Cxj, vc, vl); + } + } +} From 31a7ed704aeab47316c9d845f0299e0db2f10172 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 14 Dec 2024 17:52:02 +0300 Subject: [PATCH 08/26] feat: correct include --- GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 65319e8527..330363d05a 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -6,7 +6,9 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ +#ifdef GBRISCV64 #include +#endif #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" From 80fd86a27deac44e2fffda5b89328f71dd360063 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 18 Dec 2024 22:55:53 +0300 Subject: [PATCH 09/26] feat: correct support of saxpy5 lv in factory kernels. --- .../FactoryKernels/GB_AxB__plus_times_fp32.c | 20 ++++++------------- .../FactoryKernels/GB_AxB__plus_times_fp64.c | 4 ++++ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c index 0162dd5cde..21eae2fa72 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -6,7 +6,10 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ - +#ifdef GBRISCV64 +#include +#endif +#include "stdio.h" #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" @@ -285,18 +288,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors - //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 + //--------------------------------------------------------------------- #if GB_COMPILER_SUPPORTS_RVV1 @@ -311,7 +303,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) ) { printf("riscvhype!\n"); - #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + #include "mxm/template/GB_AxB_saxpy5_lv.c" } #endif diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 330363d05a..129bcc924c 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -9,6 +9,7 @@ #ifdef GBRISCV64 #include #endif +#include "stdio.h" #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" @@ -280,6 +281,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("avx2\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } @@ -301,6 +303,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("rvv\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } @@ -328,6 +331,7 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { + printf("vanilla\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } From 00fad560c37bd42233b16e9fc918ed9a168231f7 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 18 Dec 2024 22:57:52 +0300 Subject: [PATCH 10/26] fix: correct vector extension define in global structure --- GraphBLAS/Source/global/GB_Global.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GraphBLAS/Source/global/GB_Global.c b/GraphBLAS/Source/global/GB_Global.c index f66a4066a0..9eaaff03f0 100644 --- a/GraphBLAS/Source/global/GB_Global.c +++ b/GraphBLAS/Source/global/GB_Global.c @@ -311,8 +311,7 @@ void GB_Global_cpu_features_query (void) #endif } - #endif - #if GBRISCV64 + #elif GBRISCV64 { //---------------------------------------------------------------------- // xRISC-V architecture: see if RVV1.0 is supported From 85217a40276d8b361b51cee1c23b40a39210e529 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 18 Dec 2024 22:58:53 +0300 Subject: [PATCH 11/26] fix: saxpy function with rvv --- .../Source/mxm/template/GB_AxB_saxpy5_lv.c | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c index 6d13b14f02..6ab7de5124 100644 --- a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -1,54 +1,55 @@ { - - //-------------------------------------------------------------------------- - // get C, A, and B - //-------------------------------------------------------------------------- - const int64_t m = C->vlen; // # of rows of C and A const int64_t *restrict Bp = B->p; const int64_t *restrict Bh = B->h; const int64_t *restrict Bi = B->i; -#ifdef GB_JIT_KERNEL -#define B_iso GB_B_ISO -#else - const bool B_iso = B->iso; -#endif const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; -#if !GB_B_IS_PATTERN const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; -#endif + size_t vl = __riscv_vsetvl_e64m8(m); GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; - int tid; + #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) - for (tid = 0; tid < ntasks; tid++) + for (int tid = 0; tid < ntasks; tid++) { - // get the task descriptor const int64_t jB_start = B_slice[tid]; const int64_t jB_end = B_slice[tid + 1]; - // C(:,jB_start:jB_end-1) += A * B(:,jB_start:jB_end-1) for (int64_t jB = jB_start; jB < jB_end; jB++) { - // get B(:,j) and C(:,j) const int64_t j = GBH_B(Bh, jB); GB_C_TYPE *restrict Cxj = Cx + (j * m); const int64_t pB_start = Bp[jB]; const int64_t pB_end = Bp[jB + 1]; + for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) + { + vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), vl); - size_t vl = __riscv_vsetvl_e64m8(m); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = Bi[pB]; + const GB_B_TYPE bkj = Bx[pB]; + vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), vl); + vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + } - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj, vl); - for (int64_t pB = pB_start; pB < pB_end; pB++) - { - const int64_t k = Bi[pB]; - GB_DECLAREB(bkj); - GB_GETB(bkj, Bx, pB, B_iso); - // const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + k * m, vl); - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); } + int64_t remaining = m % vl; + if (remaining > 0) + { + int64_t i = m - remaining; + vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), remaining); - __riscv_vse64_v_f64m8(Cxj, vc, vl); + for (int64_t pB = pB_start; pB < pB_end; pB++) + { + const int64_t k = Bi[pB]; + const GB_B_TYPE bkj = Bx[pB]; + vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); + vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); + } + + __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); + } } } } From e6070fe612199695617969ca55f4f20560876e14 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Fri, 27 Dec 2024 12:32:26 +0300 Subject: [PATCH 12/26] refactor: remove straided functions --- GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c | 3 --- GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c | 14 +++++++------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 129bcc924c..25fe08225d 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -281,7 +281,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("avx2\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } @@ -303,7 +302,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("rvv\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } @@ -331,7 +329,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) const int64_t *B_slice ) { - printf("vanilla\n"); #include "mxm/template/GB_AxB_saxpy5_unrolled.c" } diff --git a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c index 6ab7de5124..5ef338c915 100644 --- a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -1,5 +1,5 @@ { - const int64_t m = C->vlen; // # of rows of C and A + const int64_t m = C->vlen; const int64_t *restrict Bp = B->p; const int64_t *restrict Bh = B->h; const int64_t *restrict Bi = B->i; @@ -22,33 +22,33 @@ const int64_t pB_end = Bp[jB + 1]; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { - vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), vl); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), vl); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), vl); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); } - __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); + __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); } int64_t remaining = m % vl; if (remaining > 0) { int64_t i = m - remaining; - vfloat64m8_t vc = __riscv_vlse64_v_f64m8(Cxj + i, sizeof(double), remaining); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vlse64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); } - __riscv_vsse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); + __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); } } } From 65936eae42a3c420bba1ccaa0be22b8a3bb76388 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 30 Dec 2024 19:09:32 +0300 Subject: [PATCH 13/26] fix: redundant arguments --- GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c index 5ef338c915..3ca0e2d717 100644 --- a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -22,33 +22,33 @@ const int64_t pB_end = Bp[jB + 1]; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), vl); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), vl); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, vl); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); } - __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, vl); + __riscv_vse64_v_f64m8(Cxj + i, vc, vl); } int64_t remaining = m % vl; if (remaining > 0) { int64_t i = m - remaining; - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, sizeof(double), remaining); + vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, sizeof(double), remaining); + vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, remaining); vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); } - __riscv_vse64_v_f64m8(Cxj + i, sizeof(double), vc, remaining); + __riscv_vse64_v_f64m8(Cxj + i, vc, remaining); } } } From c57ccb20db2dcb906b303ed4bd426496ce4d14ba Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 14:57:33 +0300 Subject: [PATCH 14/26] refactor: remove test files --- test/test.c | 118 ---------------------------------------------------- 1 file changed, 118 deletions(-) delete mode 100644 test/test.c diff --git a/test/test.c b/test/test.c deleted file mode 100644 index b9fe12bc59..0000000000 --- a/test/test.c +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -// #include "../GraphBLAS/Source/include/GB_compiler.h" - -int main() -{ - GrB_Info info; - GrB_Matrix A, B, C; - - GrB_Index nrows = 1000, ncols = 1000; - int test_count = 20; - - info = GrB_init(GrB_NONBLOCKING); - if (info != GrB_SUCCESS) - { - printf("Initialization failed!\n"); - return 1; - } - GrB_Matrix_new(&A, GrB_FP64, nrows, ncols); - GrB_Matrix_new(&B, GrB_FP64, nrows, ncols); - GrB_Matrix_new(&C, GrB_FP64, nrows, ncols); - - GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); - // INITIALIZE MATRICES - srand(52); - clock_t start = clock(); - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(A, value, i, j); - } - } - - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(B, value, i, j); - } - } - - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(C, 0, i, j); - } - } - // printf("!%d\n",GBX86); - // printf("!%d\n",GBRISCV64); - clock_t end = clock(); - float seconds = (float)(end - start) / CLOCKS_PER_SEC; - printf("==============MATRIX SIZE: %lux%lu==============\n", nrows, ncols); - printf("==============INITIALIZING TIME: %f==============\n\n", seconds); - - // Set Matrices type - - GrB_set(A, GxB_SPARSE, GxB_SPARSITY_CONTROL); - GrB_set(B, GxB_FULL, GxB_SPARSITY_CONTROL); - GrB_set(C, GxB_FULL, GxB_SPARSITY_CONTROL); - int32_t sparsityA; - GrB_get(A, &sparsityA, GxB_SPARSITY_STATUS); - //printf("A matrix type: %d\n", sparsityA); - int32_t sparsityB; - GrB_get(B, &sparsityB, GxB_SPARSITY_STATUS); - // printf("B matrix type: %d\n", sparsityB); - int32_t sparsityC; - GrB_get(C, &sparsityC, GxB_SPARSITY_STATUS); -// printf("C matrix type: %d\n", sparsityB); - - float average_time = 0.0; - printf("=================NUBMER OF TESTS: %d=================\n", test_count); - for (int i = 0; i < test_count; i++) - { - // double element; - // GrB_Matrix_extractElement_FP64(&element,C,0,0); - // printf("first C element %f\n",element); - clock_t start = clock(); - info = GrB_mxm(C, NULL, GrB_PLUS_FP64, GxB_PLUS_TIMES_FP64, A, B, NULL); - if (info != GrB_SUCCESS) - { - printf("Multiplication failed!\n"); - return 1; - } - clock_t end = clock(); - float seconds = (float)(end - start) / CLOCKS_PER_SEC; - // printf("test %d: time: %f\n", i + 1, seconds); - printf("%f;\n", seconds); - average_time += seconds; - for (GrB_Index i = 0; i < nrows; i++) - { - for (GrB_Index j = 0; j < ncols; j++) - { - double value = (double)rand() / RAND_MAX; - info = GrB_Matrix_setElement_FP64(C, 0, i, j); - } - } - if (i==0){ - average_time -= seconds; - } - } - printf("average time: %f\n", average_time / (test_count-1)); - // printf("%f\n", average_time / test_count); - - GrB_Matrix_free(&A); - GrB_Matrix_free(&B); - GrB_Matrix_free(&C); - - GrB_finalize(); - - return 0; -} From bee2b98e776d9d009b034404a0892c48ebef0f83 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 14:57:59 +0300 Subject: [PATCH 15/26] refactor: remove debug prints --- GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c | 2 -- GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c | 1 - 2 files changed, 3 deletions(-) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c index 21eae2fa72..3ee25daf5a 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -9,7 +9,6 @@ #ifdef GBRISCV64 #include #endif -#include "stdio.h" #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" @@ -302,7 +301,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp32) const int64_t *B_slice ) { - printf("riscvhype!\n"); #include "mxm/template/GB_AxB_saxpy5_lv.c" } diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 25fe08225d..330363d05a 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -9,7 +9,6 @@ #ifdef GBRISCV64 #include #endif -#include "stdio.h" #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" From 14d84da75515b787aa3fc0f163ecb45baa249ca8 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:15:56 +0300 Subject: [PATCH 16/26] fix: codegen rvv support --- GraphBLAS/Source/GB_control.h | 3 ++- GraphBLAS/Source/codegen/Generator/GB_AxB.c | 17 ++++------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/GraphBLAS/Source/GB_control.h b/GraphBLAS/Source/GB_control.h index 65dc39f3f1..03842b1656 100644 --- a/GraphBLAS/Source/GB_control.h +++ b/GraphBLAS/Source/GB_control.h @@ -2397,4 +2397,5 @@ #define GxB_NO_TIMES_SECONDJ_INT32 1 #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 - #define GxB_NO_TIMES_SECONDJ1_INT64 1 \ No newline at end of file + #define GxB_NO_TIMES_SECONDJ1_INT64 1 + \ No newline at end of file diff --git a/GraphBLAS/Source/codegen/Generator/GB_AxB.c b/GraphBLAS/Source/codegen/Generator/GB_AxB.c index 5a52374c88..a038f3dc96 100644 --- a/GraphBLAS/Source/codegen/Generator/GB_AxB.c +++ b/GraphBLAS/Source/codegen/Generator/GB_AxB.c @@ -7,6 +7,9 @@ //------------------------------------------------------------------------------ +#ifdef GBRISCV64 +#include +#endif #include "GB.h" #include "GB_control.h" #include "mxm/GB_AxB_saxpy.h" @@ -314,17 +317,6 @@ m4_divert(if_semiring_has_rvv) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 #if GB_COMPILER_SUPPORTS_RVV1 @@ -338,8 +330,7 @@ m4_divert(if_semiring_has_rvv) const int64_t *B_slice ) { - printf("riscvhype!\n"); - #include "mxm/template/GB_AxB_saxpy5_unrolled.c" + #include "mxm/template/GB_AxB_saxpy5_lv.c" } #endif From 4e5d24beb4b2ba63227952789e35e8f74bfa94d4 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:21:10 +0300 Subject: [PATCH 17/26] small refactor --- GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c | 1 + GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c | 1 + 2 files changed, 2 insertions(+) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c index 3ee25daf5a..7fc619d4ab 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -6,6 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ + #ifdef GBRISCV64 #include #endif diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 330363d05a..6a019e2624 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -6,6 +6,7 @@ // SPDX-License-Identifier: Apache-2.0 //------------------------------------------------------------------------------ + #ifdef GBRISCV64 #include #endif From 686f76add34450328f83e1b30a666ad880487cb2 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:24:46 +0300 Subject: [PATCH 18/26] refactor: jit kernel rvv support --- .../template/GB_jit_kernel_AxB_saxpy5.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index 1f42be68d0..a7342cc350 100644 --- a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -8,6 +8,9 @@ //------------------------------------------------------------------------------ #include "include/GB_AxB_saxpy3_template.h" +#ifdef GBRISCV64 +#include +#endif GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; @@ -86,17 +89,6 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #define GB_V16_256 (16 * GB_Z_NBITS <= 256) - #define GB_V8_256 ( 8 * GB_Z_NBITS <= 256) - #define GB_V4_256 ( 4 * GB_Z_NBITS <= 256) - - #undef GB_V16 - #undef GB_V8 - #undef GB_V4 - - #define GB_V16 GB_V16_256 - #define GB_V8 GB_V8_256 - #define GB_V4 GB_V4_256 #if GB_COMPILER_SUPPORTS_RVV1 @@ -110,7 +102,7 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; const int64_t *B_slice ) { - #include "template/GB_AxB_saxpy5_unrolled.c" + #include "template/GB_AxB_saxpy5_lv.c" } #endif From 13ff474b91e49878792de2d4c18129e6da093770 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Thu, 2 Jan 2025 15:25:49 +0300 Subject: [PATCH 19/26] refactor: new line symbol --- GraphBLAS/Source/GB_control.h | 1 - 1 file changed, 1 deletion(-) diff --git a/GraphBLAS/Source/GB_control.h b/GraphBLAS/Source/GB_control.h index 03842b1656..2cfabe3e0c 100644 --- a/GraphBLAS/Source/GB_control.h +++ b/GraphBLAS/Source/GB_control.h @@ -2398,4 +2398,3 @@ #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 #define GxB_NO_TIMES_SECONDJ1_INT64 1 - \ No newline at end of file From c8e0894f2f1c041f24ebb6085d2c7e2b16e3251b Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 4 Jan 2025 21:55:42 +0300 Subject: [PATCH 20/26] refactor: very small --- GraphBLAS/Source/global/GB_Global.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/GraphBLAS/Source/global/GB_Global.c b/GraphBLAS/Source/global/GB_Global.c index 9eaaff03f0..2fa8269bdb 100644 --- a/GraphBLAS/Source/global/GB_Global.c +++ b/GraphBLAS/Source/global/GB_Global.c @@ -294,7 +294,6 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx2 = false ; } #endif - #if defined ( GBAVX512F ) { // the build system asserts whether or not AVX512F is available @@ -306,7 +305,6 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx512f = false ; } #endif - } #endif From 72564cf012ca9d84ed66589c0a16346669adf8ca Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Wed, 5 Feb 2025 19:32:44 +0300 Subject: [PATCH 21/26] feat: implement float support --- .../FactoryKernels/GB_AxB__plus_times_fp32.c | 8 ++++++++ .../FactoryKernels/GB_AxB__plus_times_fp64.c | 9 ++++++++- .../Source/mxm/template/GB_AxB_saxpy5_lv.c | 20 +++++++++---------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c index 7fc619d4ab..17d00f1083 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp32.c @@ -17,6 +17,14 @@ #include "assign/GB_bitmap_assign_methods.h" #include "FactoryKernels/GB_AxB__include2.h" +// riscv intrinsics + +#define VSETVL(x) __riscv_vsetvl_e32m8(x) +#define VLE(x,y) __riscv_vle32_v_f32m8(x, y) +#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f32m8(x, y, z, w) +#define VSE(x,y,z) __riscv_vse32_v_f32m8(x, y, z) +#define VECTORTYPE vfloat32m8_t + // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z += (a*b) #define GB_MULT(z,a,b,i,k,j) z = (a*b) diff --git a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c index 6a019e2624..b772916144 100644 --- a/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c +++ b/GraphBLAS/FactoryKernels/GB_AxB__plus_times_fp64.c @@ -17,6 +17,14 @@ #include "assign/GB_bitmap_assign_methods.h" #include "FactoryKernels/GB_AxB__include2.h" +// riscv intrinsics + +#define VSETVL(x) __riscv_vsetvl_e64m8(x) +#define VLE(x,y) __riscv_vle64_v_f64m8(x, y) +#define VFMACC(x,y,z,w) __riscv_vfmacc_vf_f64m8(x, y, z, w) +#define VSE(x,y,z) __riscv_vse64_v_f64m8(x, y, z) +#define VECTORTYPE vfloat64m8_t + // semiring operators: #define GB_MULTADD(z,a,b,i,k,j) z += (a*b) #define GB_MULT(z,a,b,i,k,j) z = (a*b) @@ -289,7 +297,6 @@ GrB_Info GB (_Asaxpy4B__plus_times_fp64) //---------------------------------------------------------------------- // saxpy5 method with RISC-V vectors //---------------------------------------------------------------------- - #if GB_COMPILER_SUPPORTS_RVV1 GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv diff --git a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c index 3ca0e2d717..dde2f6e836 100644 --- a/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c +++ b/GraphBLAS/Source/mxm/template/GB_AxB_saxpy5_lv.c @@ -5,7 +5,7 @@ const int64_t *restrict Bi = B->i; const GB_A_TYPE *restrict Ax = (GB_A_TYPE *)A->x; const GB_B_TYPE *restrict Bx = (GB_B_TYPE *)B->x; - size_t vl = __riscv_vsetvl_e64m8(m); + size_t vl = VSETVL(m); GB_C_TYPE *restrict Cx = (GB_C_TYPE *)C->x; #pragma omp parallel for num_threads(nthreads) schedule(dynamic, 1) @@ -22,33 +22,31 @@ const int64_t pB_end = Bp[jB + 1]; for (int64_t i = 0; i < m && (m - i) >= vl; i += vl) { - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, vl); - + VECTORTYPE vc = VLE(Cxj + i, vl); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, vl); - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, vl); + VECTORTYPE va = VLE(Ax + i + k * m, vl); + vc = VFMACC(vc, bkj, va, vl); } - __riscv_vse64_v_f64m8(Cxj + i, vc, vl); + VSE(Cxj + i, vc, vl); } int64_t remaining = m % vl; if (remaining > 0) { int64_t i = m - remaining; - vfloat64m8_t vc = __riscv_vle64_v_f64m8(Cxj + i, remaining); - + VECTORTYPE vc = VLE(Cxj + i, remaining); for (int64_t pB = pB_start; pB < pB_end; pB++) { const int64_t k = Bi[pB]; const GB_B_TYPE bkj = Bx[pB]; - vfloat64m8_t va = __riscv_vle64_v_f64m8(Ax + i + k * m, remaining); - vc = __riscv_vfmacc_vf_f64m8(vc, bkj, va, remaining); + VECTORTYPE va = VLE(Ax + i + k * m, remaining); + vc = VFMACC(vc, bkj, va, remaining); } - __riscv_vse64_v_f64m8(Cxj + i, vc, remaining); + VSE(Cxj + i, vc, remaining); } } } From 9d566fed644713f054844015a9d6ef1d748f6bbe Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Sat, 8 Feb 2025 14:52:03 +0300 Subject: [PATCH 22/26] feat: implement riscv support in cpufeatures --- GraphBLAS/Source/cpu/GB_cpu_features_impl.c | 1 + GraphBLAS/cpu_features/CMakeLists.txt | 5 + .../cpu_features/include/cpuinfo_riscv.h | 72 ++++++++++++ GraphBLAS/cpu_features/src/impl_riscv_linux.c | 111 ++++++++++++++++++ .../src/utils/list_cpu_features.c | 2 + 5 files changed, 191 insertions(+) create mode 100644 GraphBLAS/cpu_features/include/cpuinfo_riscv.h create mode 100644 GraphBLAS/cpu_features/src/impl_riscv_linux.c diff --git a/GraphBLAS/Source/cpu/GB_cpu_features_impl.c b/GraphBLAS/Source/cpu/GB_cpu_features_impl.c index a8da662b12..1d6d39ab17 100644 --- a/GraphBLAS/Source/cpu/GB_cpu_features_impl.c +++ b/GraphBLAS/Source/cpu/GB_cpu_features_impl.c @@ -41,6 +41,7 @@ #include "src/impl_x86_freebsd.c" #include "src/impl_x86_linux_or_android.c" #include "src/impl_x86_windows.c" + #include "src/impl_riscv_linux.c" #if GBX86 #if (defined(__apple__) || defined(__APPLE__) || defined(__MACH__)) // needed for src/impl_x86_macos.c: diff --git a/GraphBLAS/cpu_features/CMakeLists.txt b/GraphBLAS/cpu_features/CMakeLists.txt index ac3c0a5498..942a1140f0 100644 --- a/GraphBLAS/cpu_features/CMakeLists.txt +++ b/GraphBLAS/cpu_features/CMakeLists.txt @@ -49,6 +49,7 @@ set(PROCESSOR_IS_ARM FALSE) set(PROCESSOR_IS_AARCH64 FALSE) set(PROCESSOR_IS_X86 FALSE) set(PROCESSOR_IS_POWER FALSE) +set(PROCESSOR_IS_RISCV FALSE) if(CMAKE_SYSTEM_PROCESSOR MATCHES "^mips") set(PROCESSOR_IS_MIPS TRUE) @@ -60,6 +61,8 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64|amd64)|(^i.86$)") set(PROCESSOR_IS_X86 TRUE) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)") set(PROCESSOR_IS_POWER TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv") + set(PROCESSOR_IS_RISCV TRUE) endif() macro(add_cpu_features_headers_and_sources HDRS_LIST_NAME SRCS_LIST_NAME) @@ -78,6 +81,8 @@ macro(add_cpu_features_headers_and_sources HDRS_LIST_NAME SRCS_LIST_NAME) list(APPEND ${SRCS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/internal/cpuid_x86.h) elseif(PROCESSOR_IS_POWER) list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_ppc.h) + elseif(PROCESSOR_IS_RISCV) + list(APPEND ${HDRS_LIST_NAME} ${PROJECT_SOURCE_DIR}/include/cpuinfo_riscv.h) else() message(FATAL_ERROR "Unsupported architectures ${CMAKE_SYSTEM_PROCESSOR}") endif() diff --git a/GraphBLAS/cpu_features/include/cpuinfo_riscv.h b/GraphBLAS/cpu_features/include/cpuinfo_riscv.h new file mode 100644 index 0000000000..8c943fe0fb --- /dev/null +++ b/GraphBLAS/cpu_features/include/cpuinfo_riscv.h @@ -0,0 +1,72 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef CPU_FEATURES_INCLUDE_CPUINFO_RISCV_H_ +#define CPU_FEATURES_INCLUDE_CPUINFO_RISCV_H_ + +#include "cpu_features_cache_info.h" +#include "cpu_features_macros.h" + +#if !defined(CPU_FEATURES_ARCH_RISCV) +#error "Including cpuinfo_riscv.h from a non-riscv target." +#endif + +CPU_FEATURES_START_CPP_NAMESPACE + +typedef struct { + // Base + int RV32I : 1; // Base Integer Instruction Set, 32-bit + int RV64I : 1; // Base Integer Instruction Set, 64-bit + + // Extension + int M : 1; // Standard Extension for Integer Multiplication/Division + int A : 1; // Standard Extension for Atomic Instructions + int F : 1; // Standard Extension for Single-Precision Floating-Point + int D : 1; // Standard Extension for Double-Precision Floating-Point + int Q : 1; // Standard Extension for Quad-Precision Floating-Point + int C : 1; // Standard Extension for Compressed Instructions + int V : 1; // Standard Extension for Vector Instructions + int Zicsr : 1; // Control and Status Register (CSR) + int Zifencei : 1; // Instruction-Fetch Fence +} RiscvFeatures; + +typedef struct { + RiscvFeatures features; + char uarch[64]; // 0 terminated string + char vendor[64]; // 0 terminated string +} RiscvInfo; + +typedef enum { + RISCV_RV32I, + RISCV_RV64I, + RISCV_M, + RISCV_A, + RISCV_F, + RISCV_D, + RISCV_Q, + RISCV_C, + RISCV_V, + RISCV_Zicsr, + RISCV_Zifencei, + RISCV_LAST_, +} RiscvFeaturesEnum; + +RiscvInfo GetRiscvInfo(void); +int GetRiscvFeaturesEnumValue(const RiscvFeatures* features, + RiscvFeaturesEnum value); +const char* GetRiscvFeaturesEnumName(RiscvFeaturesEnum); + +CPU_FEATURES_END_CPP_NAMESPACE + +#endif // CPU_FEATURES_INCLUDE_CPUINFO_RISCV_H_ \ No newline at end of file diff --git a/GraphBLAS/cpu_features/src/impl_riscv_linux.c b/GraphBLAS/cpu_features/src/impl_riscv_linux.c new file mode 100644 index 0000000000..dcfb1d17e8 --- /dev/null +++ b/GraphBLAS/cpu_features/src/impl_riscv_linux.c @@ -0,0 +1,111 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cpu_features_macros.h" + +#ifdef CPU_FEATURES_ARCH_RISCV +#if defined(CPU_FEATURES_OS_LINUX) + +#include "cpuinfo_riscv.h" + +// According to +// https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindings/riscv/cpus.yaml +// isa string should match the following regex +// ^rv(?:64|32)imaf?d?q?c?b?v?k?h?(?:_[hsxz](?:[a-z])+)*$ +// +// This means we can test for features in this exact order except for Z +// extensions. + +//////////////////////////////////////////////////////////////////////////////// +// Definitions for introspection. +//////////////////////////////////////////////////////////////////////////////// +#define INTROSPECTION_TABLE \ + LINE(RISCV_RV32I, RV32I, "rv32i", RISCV_HWCAP_32, 0) \ + LINE(RISCV_RV64I, RV64I, "rv64i", RISCV_HWCAP_64, 0) \ + LINE(RISCV_M, M, "m", RISCV_HWCAP_M, 0) \ + LINE(RISCV_A, A, "a", RISCV_HWCAP_A, 0) \ + LINE(RISCV_F, F, "f", RISCV_HWCAP_F, 0) \ + LINE(RISCV_D, D, "d", RISCV_HWCAP_D, 0) \ + LINE(RISCV_Q, Q, "q", RISCV_HWCAP_Q, 0) \ + LINE(RISCV_C, C, "c", RISCV_HWCAP_C, 0) \ + LINE(RISCV_V, V, "v", RISCV_HWCAP_V, 0) \ + LINE(RISCV_Zicsr, Zicsr, "_zicsr", 0, 0) \ + LINE(RISCV_Zifencei, Zifencei, "_zifencei", 0, 0) +#define INTROSPECTION_PREFIX Riscv +#define INTROSPECTION_ENUM_PREFIX RISCV +#include "define_introspection_and_hwcaps.inl" + +//////////////////////////////////////////////////////////////////////////////// +// Implementation. +//////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include "internal/filesystem.h" +#include "internal/stack_line_reader.h" + +static const RiscvInfo kEmptyRiscvInfo; + +static void HandleRiscVIsaLine(StringView line, RiscvFeatures* const features) { + for (size_t i = 0; i < RISCV_LAST_; ++i) { + StringView flag = str(kCpuInfoFlags[i]); + int index_of_flag = CpuFeatures_StringView_IndexOf(line, flag); + bool is_set = index_of_flag != -1; + kSetters[i](features, is_set); + if (is_set) + line = CpuFeatures_StringView_PopFront(line, index_of_flag + flag.size); + } +} + +static bool HandleRiscVLine(const LineResult result, RiscvInfo* const info) { + StringView line = result.line; + StringView key, value; + if (CpuFeatures_StringView_GetAttributeKeyValue(line, &key, &value)) { + if (CpuFeatures_StringView_IsEquals(key, str("isa"))) { + HandleRiscVIsaLine(value, &info->features); + } else if (CpuFeatures_StringView_IsEquals(key, str("uarch"))) { + int index = CpuFeatures_StringView_IndexOfChar(value, ','); + if (index == -1) return true; + StringView vendor = CpuFeatures_StringView_KeepFront(value, index); + StringView uarch = CpuFeatures_StringView_PopFront(value, index + 1); + CpuFeatures_StringView_CopyString(vendor, info->vendor, + sizeof(info->vendor)); + CpuFeatures_StringView_CopyString(uarch, info->uarch, + sizeof(info->uarch)); + } + } + return !result.eof; +} + +static void FillProcCpuInfoData(RiscvInfo* const info) { + const int fd = CpuFeatures_OpenFile("/proc/cpuinfo"); + if (fd >= 0) { + StackLineReader reader; + StackLineReader_Initialize(&reader, fd); + for (;;) { + if (!HandleRiscVLine(StackLineReader_NextLine(&reader), info)) break; + } + CpuFeatures_CloseFile(fd); + } +} + +RiscvInfo GetRiscvInfo(void) { + RiscvInfo info = kEmptyRiscvInfo; + FillProcCpuInfoData(&info); + return info; +} + +#endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) +#endif // CPU_FEATURES_ARCH_RISCV \ No newline at end of file diff --git a/GraphBLAS/cpu_features/src/utils/list_cpu_features.c b/GraphBLAS/cpu_features/src/utils/list_cpu_features.c index 4389f20249..c9d567269b 100644 --- a/GraphBLAS/cpu_features/src/utils/list_cpu_features.c +++ b/GraphBLAS/cpu_features/src/utils/list_cpu_features.c @@ -35,6 +35,8 @@ #include "cpuinfo_mips.h" #elif defined(CPU_FEATURES_ARCH_PPC) #include "cpuinfo_ppc.h" +#elif defined(CPU_FEATURES_ARCH_RISCV) +#include "cpuinfo_riscv.h" #endif // Design principles From 40a9aa531acc5f0338dcdf41cb8480d41cf96b24 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov <107667059+suvorovrain@users.noreply.github.com> Date: Sun, 9 Feb 2025 15:38:19 +0300 Subject: [PATCH 23/26] refactor: add new line symbol --- GraphBLAS/Source/GB_control.h | 1 + 1 file changed, 1 insertion(+) diff --git a/GraphBLAS/Source/GB_control.h b/GraphBLAS/Source/GB_control.h index 2cfabe3e0c..7d07af3cad 100644 --- a/GraphBLAS/Source/GB_control.h +++ b/GraphBLAS/Source/GB_control.h @@ -2398,3 +2398,4 @@ #define GxB_NO_TIMES_SECONDJ_INT64 1 #define GxB_NO_TIMES_SECONDJ1_INT32 1 #define GxB_NO_TIMES_SECONDJ1_INT64 1 + From 0c67d9a46f701c3e685016bfc596f517e3dc3e58 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Mon, 10 Feb 2025 20:17:19 +0300 Subject: [PATCH 24/26] fix: update target --- .../Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c index a7342cc350..d172d3ed7c 100644 --- a/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c +++ b/GraphBLAS/Source/jit_kernels/template/GB_jit_kernel_AxB_saxpy5.c @@ -92,7 +92,7 @@ GB_JIT_GLOBAL GB_JIT_KERNEL_AXB_SAXPY5_PROTO (GB_jit_kernel) ; #if GB_COMPILER_SUPPORTS_RVV1 - GB_TARGET_AVX2 static inline void GB_AxB_saxpy5_unrolled_rvv + GB_TARGET_RVV1 static inline void GB_AxB_saxpy5_unrolled_rvv ( GrB_Matrix C, const GrB_Matrix A, From 1894cb07122861897d94f9e93cb26f17169e7fae Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 11 Feb 2025 00:26:52 +0300 Subject: [PATCH 25/26] fix: new line characters --- GraphBLAS/cpu_features/include/cpuinfo_riscv.h | 2 +- GraphBLAS/cpu_features/src/impl_riscv_linux.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GraphBLAS/cpu_features/include/cpuinfo_riscv.h b/GraphBLAS/cpu_features/include/cpuinfo_riscv.h index 8c943fe0fb..1fa7aa5135 100644 --- a/GraphBLAS/cpu_features/include/cpuinfo_riscv.h +++ b/GraphBLAS/cpu_features/include/cpuinfo_riscv.h @@ -69,4 +69,4 @@ const char* GetRiscvFeaturesEnumName(RiscvFeaturesEnum); CPU_FEATURES_END_CPP_NAMESPACE -#endif // CPU_FEATURES_INCLUDE_CPUINFO_RISCV_H_ \ No newline at end of file +#endif // CPU_FEATURES_INCLUDE_CPUINFO_RISCV_H_ diff --git a/GraphBLAS/cpu_features/src/impl_riscv_linux.c b/GraphBLAS/cpu_features/src/impl_riscv_linux.c index dcfb1d17e8..8abec6eb9c 100644 --- a/GraphBLAS/cpu_features/src/impl_riscv_linux.c +++ b/GraphBLAS/cpu_features/src/impl_riscv_linux.c @@ -108,4 +108,4 @@ RiscvInfo GetRiscvInfo(void) { } #endif // defined(CPU_FEATURES_OS_LINUX) || defined(CPU_FEATURES_OS_ANDROID) -#endif // CPU_FEATURES_ARCH_RISCV \ No newline at end of file +#endif // CPU_FEATURES_ARCH_RISCV From d856df904d1a19602c282d124b25307e41cfe1e9 Mon Sep 17 00:00:00 2001 From: Rodion Suvorov Date: Tue, 11 Feb 2025 00:31:44 +0300 Subject: [PATCH 26/26] refactor: rename global rvv var --- GraphBLAS/GraphBLAS/rename/GB_rename.h | 2 +- GraphBLAS/Source/global/GB_Global.c | 14 +++++++------- GraphBLAS/Source/global/GB_Global.h | 2 +- GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/GraphBLAS/GraphBLAS/rename/GB_rename.h b/GraphBLAS/GraphBLAS/rename/GB_rename.h index 3c86ace6f3..3c8fe4fdf5 100644 --- a/GraphBLAS/GraphBLAS/rename/GB_rename.h +++ b/GraphBLAS/GraphBLAS/rename/GB_rename.h @@ -389,7 +389,7 @@ #define GB_Global_calloc_function_set GM_Global_calloc_function_set #define GB_Global_cpu_features_avx2 GM_Global_cpu_features_avx2 #define GB_Global_cpu_features_avx512f GM_Global_cpu_features_avx512f -#define GB_Global_cpu_features_rvv GM_Global_cpu_features_rvv +#define GB_Global_cpu_features_rvv_1_0 GM_Global_cpu_features_rvv_1_0 #define GB_Global_cpu_features_query GM_Global_cpu_features_query #define GB_Global_flush_get GM_Global_flush_get #define GB_Global_flush_set GM_Global_flush_set diff --git a/GraphBLAS/Source/global/GB_Global.c b/GraphBLAS/Source/global/GB_Global.c index 2fa8269bdb..346dcbd796 100644 --- a/GraphBLAS/Source/global/GB_Global.c +++ b/GraphBLAS/Source/global/GB_Global.c @@ -131,7 +131,7 @@ typedef struct bool cpu_features_avx2 ; // x86_64 with AVX2 bool cpu_features_avx512f ; // x86_64 with AVX512f - bool cpu_features_rvv ; // RISC-V with RVV1.0 + bool cpu_features_rvv_1_0 ; // RISC-V with RVV1.0 //-------------------------------------------------------------------------- // CUDA (DRAFT: in progress): @@ -216,7 +216,7 @@ static GB_Global_struct GB_Global = // CPU features .cpu_features_avx2 = false, // x86_64 with AVX2 .cpu_features_avx512f = false, // x86_64 with AVX512f - .cpu_features_rvv = false, // RISC-V with RVV1.0 + .cpu_features_rvv_1_0 = false, // RISC-V with RVV1.0 // CUDA environment (DRAFT: in progress) .gpu_count = 0, // # of GPUs in the system @@ -318,12 +318,12 @@ void GB_Global_cpu_features_query (void) #if defined ( GBRVV ) { // the build system asserts whether or not RVV1.0 is available - GB_Global.cpu_features_rvv = (bool) (GBRVV) ; + GB_Global.cpu_features_rvv_1_0 = (bool) (GBRVV) ; } #else { // RVV1.0 not available - GB_Global.cpu_features_rvv = false ; + GB_Global.cpu_features_rvv_1_0 = false ; } #endif @@ -337,7 +337,7 @@ void GB_Global_cpu_features_query (void) GB_Global.cpu_features_avx2 = false ; GB_Global.cpu_features_avx512f = false ; - GB_Global.cpu_features_rvv = false ; + GB_Global.cpu_features_rvv_1_0 = false ; } #endif @@ -353,9 +353,9 @@ bool GB_Global_cpu_features_avx512f (void) return (GB_Global.cpu_features_avx512f) ; } -bool GB_Global_cpu_features_rvv (void) +bool GB_Global_cpu_features_rvv_1_0 (void) { - return (GB_Global.cpu_features_rvv) ; + return (GB_Global.cpu_features_rvv_1_0) ; } //------------------------------------------------------------------------------ diff --git a/GraphBLAS/Source/global/GB_Global.h b/GraphBLAS/Source/global/GB_Global.h index c5c5685077..fb0d759495 100644 --- a/GraphBLAS/Source/global/GB_Global.h +++ b/GraphBLAS/Source/global/GB_Global.h @@ -17,7 +17,7 @@ void GB_Global_cpu_features_query (void) ; bool GB_Global_cpu_features_avx2 (void) ; bool GB_Global_cpu_features_avx512f (void) ; -bool GB_Global_cpu_features_rvv (void) ; +bool GB_Global_cpu_features_rvv_1_0 (void) ; void GB_Global_mode_set (GrB_Mode mode) ; GrB_Mode GB_Global_mode_get (void) ; diff --git a/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c b/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c index 8bf7223eac..d0b4646efb 100644 --- a/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c +++ b/GraphBLAS/Source/mxm/factory/GB_AxB_saxpy5_meta.c @@ -146,7 +146,7 @@ #if GB_SEMIRING_HAS_RVV_IMPLEMENTATION #if GB_COMPILER_SUPPORTS_RVV1 - if (GB_Global_cpu_features_rvv ( )) + if (GB_Global_cpu_features_rvv_1_0 ( )) { GB_AxB_saxpy5_unrolled_rvv (C, A, B, ntasks, nthreads, B_slice) ;