From 1ca77276a93e4067cfe0ac5ee6aa43555436960c Mon Sep 17 00:00:00 2001 From: Ilya Zilberter Date: Tue, 18 Feb 2025 21:33:53 -0700 Subject: [PATCH 1/3] Add fallback to lower-memory cuSparse SpGEMM algorithm Address an issue where the default CuSPARSE SpGEMM algorithm estimates an overly large memory buffer for matrices greater than ~4 million rows, causing a memory allocation exception regardless of the actual GPU memory capacity. Since CUDA 12.0, alternate, less memory-intensive algorithms for SpGEMM have been introduced to fix the issue. The spgemm and advanced_spgemm cuda routines now attempt to compute the matrix product using the default CUSPARSE_SPGEMM_ALG1 algorithm, and if it fails, fall back to CUSPARSE_SPGEMM_ALG2. Update the CuSparse bindings for spgemm-related functions to take the algorithm as an argument. --- .../cuda_hip/matrix/csr_kernels.template.cpp | 153 +++++++++++++----- cuda/base/cusparse_bindings.hpp | 43 +++-- 2 files changed, 146 insertions(+), 50 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.template.cpp b/common/cuda_hip/matrix/csr_kernels.template.cpp index cdf363f6a87..8aa8d644482 100644 --- a/common/cuda_hip/matrix/csr_kernels.template.cpp +++ b/common/cuda_hip/matrix/csr_kernels.template.cpp @@ -2468,7 +2468,7 @@ void spgemm(std::shared_ptr exec, } else { GKO_NOT_IMPLEMENTED; } -#else // GKO_COMPILING_CUDA +#else // GKO_COMPILING_CUDA auto a_vals = a->get_const_values(); auto a_row_ptrs = a->get_const_row_ptrs(); auto a_col_idxs = a->get_const_col_idxs(); @@ -2503,26 +2503,63 @@ void spgemm(std::shared_ptr exec, const_cast(b_col_idxs), const_cast(b_vals)); auto c_descr = sparselib::create_csr(m, n, zero_nnz, null_index, null_index, null_value); + auto spgemm_alg = CUSPARSE_SPGEMM_ALG1; - // estimate work size_type buffer1_size{}; - sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, &beta, - c_descr, spgemm_descr, buffer1_size, - nullptr); - array buffer1{exec, buffer1_size}; - sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, &beta, - c_descr, spgemm_descr, buffer1_size, - buffer1.get_data()); - - // compute spgemm size_type buffer2_size{}; - sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, c_descr, - spgemm_descr, buffer1.get_data(), buffer2_size, - nullptr); - array buffer2{exec, buffer2_size}; - sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, c_descr, - spgemm_descr, buffer1.get_data(), buffer2_size, - buffer2.get_data()); + array buffer1{exec}; + array buffer2{exec}; + + // Try CUSPARSE_SPGEMM_ALG1 first as it is fastest for small matrices + try { + // Memory estimate for Alg1 + sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, + &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, nullptr); + buffer1.resize_and_reset(buffer1_size); + sparselib::spgemm_work_estimation( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, buffer1.get_data()); + sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, + c_descr, spgemm_descr, spgemm_alg, + buffer1.get_data(), buffer2_size, nullptr); + // compute spgemm + buffer2.resize_and_reset(buffer2_size); + sparselib::spgemm_compute( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1.get_data(), buffer2_size, buffer2.get_data()); + } + + catch (const CusparseError& cse) { + // If estimated buffer size is too large and CUDA > 12.0, fall back to + // ALG2 +#if CUDA_VERSION >= 12000 + spgemm_alg = CUSPARSE_SPGEMM_ALG2; + // Memory estimate for Alg2/Alg3 + sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, + &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, nullptr); + buffer1.resize_and_reset(buffer1_size); + sparselib::spgemm_work_estimation( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, buffer1.get_data()); + size_type buffer3_size{}; + sparselib::spgemm_estimate_memory( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr); + array buffer3{exec, buffer3_size}; + sparselib::spgemm_estimate_memory( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, 1.0f, buffer3_size, buffer3.get_data(), &buffer2_size); + buffer2.resize_and_reset(buffer2_size); + // compute spgemm + sparselib::spgemm_compute( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1.get_data(), buffer2_size, buffer2.get_data()); +#else // CUDA_VERSION < 12000 + throw(cse); +#endif + } // copy data to result auto c_nnz = sparselib::sparse_matrix_nnz(c_descr); @@ -2533,7 +2570,7 @@ void spgemm(std::shared_ptr exec, c_vals_array.get_data()); sparselib::spgemm_copy(handle, &alpha, a_descr, b_descr, &beta, c_descr, - spgemm_descr); + spgemm_descr, spgemm_alg); sparselib::destroy(c_descr); sparselib::destroy(b_descr); @@ -2632,7 +2669,7 @@ void advanced_spgemm(std::shared_ptr exec, } else { GKO_NOT_IMPLEMENTED; } -#else // GKO_COMPILING_CUDA +#else // GKO_COMPILING_CUDA auto handle = exec->get_sparselib_handle(); sparselib::pointer_mode_guard pm_guard(handle); @@ -2669,26 +2706,66 @@ void advanced_spgemm(std::shared_ptr exec, const_cast(b_col_idxs), const_cast(b_vals)); auto c_descr = sparselib::create_csr(m, n, zero_nnz, null_index, null_index, null_value); + auto spgemm_alg = CUSPARSE_SPGEMM_ALG1; - // estimate work size_type buffer1_size{}; - sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, - &zero_val, c_descr, spgemm_descr, - buffer1_size, nullptr); - array buffer1{exec, buffer1_size}; - sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, - &zero_val, c_descr, spgemm_descr, - buffer1_size, buffer1.get_data()); - - // compute spgemm size_type buffer2_size{}; - sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val, - c_descr, spgemm_descr, buffer1.get_data(), - buffer2_size, nullptr); - array buffer2{exec, buffer2_size}; - sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val, - c_descr, spgemm_descr, buffer1.get_data(), - buffer2_size, buffer2.get_data()); + array buffer1{exec}; + array buffer2{exec}; + + // Try CUSPARSE_SPGEMM_ALG1 first as it is fastest for small matrices + try { + // Memory estimate for Alg1 + sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, + &zero_val, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, nullptr); + buffer1.resize_and_reset(buffer1_size); + sparselib::spgemm_work_estimation( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, buffer1_size, buffer1.get_data()); + sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val, + c_descr, spgemm_descr, spgemm_alg, + buffer1.get_data(), buffer2_size, nullptr); + // compute spgemm + buffer2.resize_and_reset(buffer2_size); + sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val, + c_descr, spgemm_descr, spgemm_alg, + buffer1.get_data(), buffer2_size, + buffer2.get_data()); + } + + catch (const CusparseError& cse) { + // If estimated buffer size is too large and CUDA > 12.0, fall back to + // ALG2 +#if CUDA_VERSION >= 12000 + spgemm_alg = CUSPARSE_SPGEMM_ALG2; + // Memory estimate for Alg2/Alg3 + sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, + &zero_val, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, nullptr); + buffer1.resize_and_reset(buffer1_size); + sparselib::spgemm_work_estimation( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, buffer1_size, buffer1.get_data()); + size_type buffer3_size{}; + sparselib::spgemm_estimate_memory( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr); + array buffer3{exec, buffer3_size}; + sparselib::spgemm_estimate_memory(handle, &one_val, a_descr, b_descr, + &zero_val, c_descr, spgemm_descr, + spgemm_alg, 1.0f, buffer3_size, + buffer3.get_data(), &buffer2_size); + buffer2.resize_and_reset(buffer2_size); + // compute spgemm + sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val, + c_descr, spgemm_descr, spgemm_alg, + buffer1.get_data(), buffer2_size, + buffer2.get_data()); +#else // CUDA_VERSION < 12000 + throw(cse); +#endif + } // write result to temporary storage auto c_tmp_nnz = sparselib::sparse_matrix_nnz(c_descr); @@ -2700,7 +2777,7 @@ void advanced_spgemm(std::shared_ptr exec, c_tmp_vals_array.get_data()); sparselib::spgemm_copy(handle, &one_val, a_descr, b_descr, &zero_val, - c_descr, spgemm_descr); + c_descr, spgemm_descr, spgemm_alg); sparselib::destroy(c_descr); sparselib::destroy(b_descr); diff --git a/cuda/base/cusparse_bindings.hpp b/cuda/base/cusparse_bindings.hpp index 4be00b88aaf..fe58716a02c 100644 --- a/cuda/base/cusparse_bindings.hpp +++ b/cuda/base/cusparse_bindings.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -208,13 +208,31 @@ void spgemm_work_estimation(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t b_descr, const ValueType* beta, cusparseSpMatDescr_t c_descr, cusparseSpGEMMDescr_t spgemm_descr, + cusparseSpGEMMAlg_t spgemm_alg, size_type& buffer1_size, void* buffer1) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_workEstimation( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta, - c_descr, cuda_data_type(), CUSPARSE_SPGEMM_DEFAULT, - spgemm_descr, &buffer1_size, buffer1)); + c_descr, cuda_data_type(), spgemm_alg, spgemm_descr, + &buffer1_size, buffer1)); +} + +template +void spgemm_estimate_memory(cusparseHandle_t handle, const ValueType* alpha, + cusparseSpMatDescr_t a_descr, + cusparseSpMatDescr_t b_descr, const ValueType* beta, + cusparseSpMatDescr_t c_descr, + cusparseSpGEMMDescr_t spgemm_descr, + cusparseSpGEMMAlg_t spgemm_alg, + float chunk_fraction, size_type& buffer3_size, + void* buffer3, size_type* buffer2_size) +{ + GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_estimateMemory( + handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta, + c_descr, cuda_data_type(), spgemm_alg, spgemm_descr, + chunk_fraction, &buffer3_size, buffer3, buffer2_size)); } @@ -222,14 +240,15 @@ template void spgemm_compute(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t a_descr, cusparseSpMatDescr_t b_descr, const ValueType* beta, cusparseSpMatDescr_t c_descr, - cusparseSpGEMMDescr_t spgemm_descr, void* buffer1, + cusparseSpGEMMDescr_t spgemm_descr, + cusparseSpGEMMAlg_t spgemm_alg, void* buffer1, size_type& buffer2_size, void* buffer2) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_compute( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta, - c_descr, cuda_data_type(), CUSPARSE_SPGEMM_DEFAULT, - spgemm_descr, &buffer2_size, buffer2)); + c_descr, cuda_data_type(), spgemm_alg, spgemm_descr, + &buffer2_size, buffer2)); } @@ -237,13 +256,13 @@ template void spgemm_copy(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t a_descr, cusparseSpMatDescr_t b_descr, const ValueType* beta, cusparseSpMatDescr_t c_descr, - cusparseSpGEMMDescr_t spgemm_descr) + cusparseSpGEMMDescr_t spgemm_descr, + cusparseSpGEMMAlg_t spgemm_alg) { - GKO_ASSERT_NO_CUSPARSE_ERRORS( - cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, - CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, - b_descr, beta, c_descr, cuda_data_type(), - CUSPARSE_SPGEMM_DEFAULT, spgemm_descr)); + GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_copy( + handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta, + c_descr, cuda_data_type(), spgemm_alg, spgemm_descr)); } From 8c9ddb3f78f8d61102fd05e9dc71afcc79143210 Mon Sep 17 00:00:00 2001 From: Ilya Zilberter Date: Thu, 20 Feb 2025 09:42:25 -0700 Subject: [PATCH 2/3] Fallback cusparse_spgemm algorithm only if CUSPARSE_STATUS_INSUFFICIENT_RESOURCES --- .../cuda_hip/matrix/csr_kernels.template.cpp | 104 ++++++++++-------- cuda/base/exception.cpp | 3 +- 2 files changed, 60 insertions(+), 47 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.template.cpp b/common/cuda_hip/matrix/csr_kernels.template.cpp index 8aa8d644482..ed68a30898f 100644 --- a/common/cuda_hip/matrix/csr_kernels.template.cpp +++ b/common/cuda_hip/matrix/csr_kernels.template.cpp @@ -2534,28 +2534,35 @@ void spgemm(std::shared_ptr exec, // If estimated buffer size is too large and CUDA > 12.0, fall back to // ALG2 #if CUDA_VERSION >= 12000 - spgemm_alg = CUSPARSE_SPGEMM_ALG2; - // Memory estimate for Alg2/Alg3 - sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, - &beta, c_descr, spgemm_descr, - spgemm_alg, buffer1_size, nullptr); - buffer1.resize_and_reset(buffer1_size); - sparselib::spgemm_work_estimation( - handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, - spgemm_alg, buffer1_size, buffer1.get_data()); - size_type buffer3_size{}; - sparselib::spgemm_estimate_memory( - handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, - spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr); - array buffer3{exec, buffer3_size}; - sparselib::spgemm_estimate_memory( - handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, - spgemm_alg, 1.0f, buffer3_size, buffer3.get_data(), &buffer2_size); - buffer2.resize_and_reset(buffer2_size); - // compute spgemm - sparselib::spgemm_compute( - handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, - spgemm_alg, buffer1.get_data(), buffer2_size, buffer2.get_data()); + const char* error_code = "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES"; + if (strstr(cse.what(), error_code)) { + spgemm_alg = CUSPARSE_SPGEMM_ALG2; + // Memory estimate for Alg2/Alg3 + sparselib::spgemm_work_estimation( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, nullptr); + buffer1.resize_and_reset(buffer1_size); + sparselib::spgemm_work_estimation( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, buffer1_size, buffer1.get_data()); + size_type buffer3_size{}; + sparselib::spgemm_estimate_memory( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr); + array buffer3{exec, buffer3_size}; + sparselib::spgemm_estimate_memory( + handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr, + spgemm_alg, 1.0f, buffer3_size, buffer3.get_data(), + &buffer2_size); + buffer2.resize_and_reset(buffer2_size); + // compute spgemm + sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, + c_descr, spgemm_descr, spgemm_alg, + buffer1.get_data(), buffer2_size, + buffer2.get_data()); + } else { + throw(cse); + } #else // CUDA_VERSION < 12000 throw(cse); #endif @@ -2738,30 +2745,35 @@ void advanced_spgemm(std::shared_ptr exec, // If estimated buffer size is too large and CUDA > 12.0, fall back to // ALG2 #if CUDA_VERSION >= 12000 - spgemm_alg = CUSPARSE_SPGEMM_ALG2; - // Memory estimate for Alg2/Alg3 - sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr, - &zero_val, c_descr, spgemm_descr, - spgemm_alg, buffer1_size, nullptr); - buffer1.resize_and_reset(buffer1_size); - sparselib::spgemm_work_estimation( - handle, &one_val, a_descr, b_descr, &zero_val, c_descr, - spgemm_descr, spgemm_alg, buffer1_size, buffer1.get_data()); - size_type buffer3_size{}; - sparselib::spgemm_estimate_memory( - handle, &one_val, a_descr, b_descr, &zero_val, c_descr, - spgemm_descr, spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr); - array buffer3{exec, buffer3_size}; - sparselib::spgemm_estimate_memory(handle, &one_val, a_descr, b_descr, - &zero_val, c_descr, spgemm_descr, - spgemm_alg, 1.0f, buffer3_size, - buffer3.get_data(), &buffer2_size); - buffer2.resize_and_reset(buffer2_size); - // compute spgemm - sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val, - c_descr, spgemm_descr, spgemm_alg, - buffer1.get_data(), buffer2_size, - buffer2.get_data()); + const char* error_code = "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES"; + if (strstr(cse.what(), error_code)) { + spgemm_alg = CUSPARSE_SPGEMM_ALG2; + // Memory estimate for Alg2/Alg3 + sparselib::spgemm_work_estimation( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, buffer1_size, nullptr); + buffer1.resize_and_reset(buffer1_size); + sparselib::spgemm_work_estimation( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, buffer1_size, buffer1.get_data()); + size_type buffer3_size{}; + sparselib::spgemm_estimate_memory( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr); + array buffer3{exec, buffer3_size}; + sparselib::spgemm_estimate_memory( + handle, &one_val, a_descr, b_descr, &zero_val, c_descr, + spgemm_descr, spgemm_alg, 1.0f, buffer3_size, + buffer3.get_data(), &buffer2_size); + buffer2.resize_and_reset(buffer2_size); + // compute spgemm + sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, + &zero_val, c_descr, spgemm_descr, + spgemm_alg, buffer1.get_data(), + buffer2_size, buffer2.get_data()); + } else { + throw(cse); + } #else // CUDA_VERSION < 12000 throw(cse); #endif diff --git a/cuda/base/exception.cpp b/cuda/base/exception.cpp index 7bb7fae5bd5..1f7d90113a4 100644 --- a/cuda/base/exception.cpp +++ b/cuda/base/exception.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -6,6 +6,7 @@ #include +#include #include #include #include From f4d20f626b7259c26d277c8902355bbd269b803c Mon Sep 17 00:00:00 2001 From: Ilya Zilberter Date: Thu, 20 Feb 2025 15:19:47 -0700 Subject: [PATCH 3/3] Add accessor for CusparseError error code Use this to check for CUSPARSE_STATUS_INSUFFICIENT_RESOURCES when falling back to spgemm ALG2. --- common/cuda_hip/matrix/csr_kernels.template.cpp | 6 ++---- contributors.txt | 1 + include/ginkgo/core/base/exception.hpp | 11 +++++++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/common/cuda_hip/matrix/csr_kernels.template.cpp b/common/cuda_hip/matrix/csr_kernels.template.cpp index ed68a30898f..906be6ff3b5 100644 --- a/common/cuda_hip/matrix/csr_kernels.template.cpp +++ b/common/cuda_hip/matrix/csr_kernels.template.cpp @@ -2534,8 +2534,7 @@ void spgemm(std::shared_ptr exec, // If estimated buffer size is too large and CUDA > 12.0, fall back to // ALG2 #if CUDA_VERSION >= 12000 - const char* error_code = "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES"; - if (strstr(cse.what(), error_code)) { + if (cse.get_error_code() == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) { spgemm_alg = CUSPARSE_SPGEMM_ALG2; // Memory estimate for Alg2/Alg3 sparselib::spgemm_work_estimation( @@ -2745,8 +2744,7 @@ void advanced_spgemm(std::shared_ptr exec, // If estimated buffer size is too large and CUDA > 12.0, fall back to // ALG2 #if CUDA_VERSION >= 12000 - const char* error_code = "CUSPARSE_STATUS_INSUFFICIENT_RESOURCES"; - if (strstr(cse.what(), error_code)) { + if (cse.get_error_code() == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) { spgemm_alg = CUSPARSE_SPGEMM_ALG2; // Memory estimate for Alg2/Alg3 sparselib::spgemm_work_estimation( diff --git a/contributors.txt b/contributors.txt index aec120d93dd..2d9be655046 100644 --- a/contributors.txt +++ b/contributors.txt @@ -25,3 +25,4 @@ Olenik Gregor HPSim Ribizel Tobias Karlsruhe Institute of Technology Riemer Lukas Karlsruhe Institute of Technology Tsai Yuhsiang National Taiwan University +Ilya Zilberter Tech-X Corporation diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index febc5e17034..c60834c23dc 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -250,11 +250,18 @@ class CusparseError : public Error { */ CusparseError(const std::string& file, int line, const std::string& func, int64 error_code) - : Error(file, line, func + ": " + get_error(error_code)) + : Error(file, line, func + ": " + get_error(error_code)), + err_code(error_code) {} + /** + * Returns the error code + */ + int64 get_error_code() const noexcept { return err_code; } + private: static std::string get_error(int64 error_code); + const int64 err_code; };