Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 125 additions & 38 deletions common/cuda_hip/matrix/csr_kernels.template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2468,7 +2468,7 @@ void spgemm(std::shared_ptr<const DefaultExecutor> exec,
} else {
GKO_NOT_IMPLEMENTED;
}
#else // GKO_COMPILING_CUDA
#else // GKO_COMPILING_CUDA
auto a_vals = a->get_const_values();
auto a_row_ptrs = a->get_const_row_ptrs();
auto a_col_idxs = a->get_const_col_idxs();
Expand Down Expand Up @@ -2503,26 +2503,69 @@ void spgemm(std::shared_ptr<const DefaultExecutor> exec,
const_cast<IndexType*>(b_col_idxs), const_cast<ValueType*>(b_vals));
auto c_descr = sparselib::create_csr(m, n, zero_nnz, null_index, null_index,
null_value);
auto spgemm_alg = CUSPARSE_SPGEMM_ALG1;

// estimate work
size_type buffer1_size{};
sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, &beta,
c_descr, spgemm_descr, buffer1_size,
nullptr);
array<char> buffer1{exec, buffer1_size};
sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr, &beta,
c_descr, spgemm_descr, buffer1_size,
buffer1.get_data());

// compute spgemm
size_type buffer2_size{};
sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, c_descr,
spgemm_descr, buffer1.get_data(), buffer2_size,
nullptr);
array<char> buffer2{exec, buffer2_size};
sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta, c_descr,
spgemm_descr, buffer1.get_data(), buffer2_size,
buffer2.get_data());
array<char> buffer1{exec};
array<char> buffer2{exec};

// Try CUSPARSE_SPGEMM_ALG1 first as it is fastest for small matrices
try {
// Memory estimate for Alg1
sparselib::spgemm_work_estimation(handle, &alpha, a_descr, b_descr,
&beta, c_descr, spgemm_descr,
spgemm_alg, buffer1_size, nullptr);
buffer1.resize_and_reset(buffer1_size);
sparselib::spgemm_work_estimation(
handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr,
spgemm_alg, buffer1_size, buffer1.get_data());
sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta,
c_descr, spgemm_descr, spgemm_alg,
buffer1.get_data(), buffer2_size, nullptr);
// compute spgemm
buffer2.resize_and_reset(buffer2_size);
sparselib::spgemm_compute(
handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr,
spgemm_alg, buffer1.get_data(), buffer2_size, buffer2.get_data());
}

catch (const CusparseError& cse) {
// If estimated buffer size is too large and CUDA > 12.0, fall back to
// ALG2
#if CUDA_VERSION >= 12000
if (cse.get_error_code() == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) {
spgemm_alg = CUSPARSE_SPGEMM_ALG2;
// Memory estimate for Alg2/Alg3
sparselib::spgemm_work_estimation(
handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr,
spgemm_alg, buffer1_size, nullptr);
buffer1.resize_and_reset(buffer1_size);
sparselib::spgemm_work_estimation(
handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr,
spgemm_alg, buffer1_size, buffer1.get_data());
size_type buffer3_size{};
sparselib::spgemm_estimate_memory(
handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr,
spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr);
array<char> buffer3{exec, buffer3_size};
sparselib::spgemm_estimate_memory(
handle, &alpha, a_descr, b_descr, &beta, c_descr, spgemm_descr,
spgemm_alg, 1.0f, buffer3_size, buffer3.get_data(),
&buffer2_size);
buffer2.resize_and_reset(buffer2_size);
// compute spgemm
sparselib::spgemm_compute(handle, &alpha, a_descr, b_descr, &beta,
c_descr, spgemm_descr, spgemm_alg,
buffer1.get_data(), buffer2_size,
buffer2.get_data());
} else {
throw(cse);
}
#else // CUDA_VERSION < 12000
throw(cse);
#endif
}

// copy data to result
auto c_nnz = sparselib::sparse_matrix_nnz(c_descr);
Expand All @@ -2533,7 +2576,7 @@ void spgemm(std::shared_ptr<const DefaultExecutor> exec,
c_vals_array.get_data());

sparselib::spgemm_copy(handle, &alpha, a_descr, b_descr, &beta, c_descr,
spgemm_descr);
spgemm_descr, spgemm_alg);

sparselib::destroy(c_descr);
sparselib::destroy(b_descr);
Expand Down Expand Up @@ -2632,7 +2675,7 @@ void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec,
} else {
GKO_NOT_IMPLEMENTED;
}
#else // GKO_COMPILING_CUDA
#else // GKO_COMPILING_CUDA
auto handle = exec->get_sparselib_handle();
sparselib::pointer_mode_guard pm_guard(handle);

Expand Down Expand Up @@ -2669,26 +2712,70 @@ void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec,
const_cast<IndexType*>(b_col_idxs), const_cast<ValueType*>(b_vals));
auto c_descr = sparselib::create_csr(m, n, zero_nnz, null_index, null_index,
null_value);
auto spgemm_alg = CUSPARSE_SPGEMM_ALG1;

// estimate work
size_type buffer1_size{};
sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr,
&zero_val, c_descr, spgemm_descr,
buffer1_size, nullptr);
array<char> buffer1{exec, buffer1_size};
sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr,
&zero_val, c_descr, spgemm_descr,
buffer1_size, buffer1.get_data());

// compute spgemm
size_type buffer2_size{};
sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val,
c_descr, spgemm_descr, buffer1.get_data(),
buffer2_size, nullptr);
array<char> buffer2{exec, buffer2_size};
sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val,
c_descr, spgemm_descr, buffer1.get_data(),
buffer2_size, buffer2.get_data());
array<char> buffer1{exec};
array<char> buffer2{exec};

// Try CUSPARSE_SPGEMM_ALG1 first as it is fastest for small matrices
try {
// Memory estimate for Alg1
sparselib::spgemm_work_estimation(handle, &one_val, a_descr, b_descr,
&zero_val, c_descr, spgemm_descr,
spgemm_alg, buffer1_size, nullptr);
buffer1.resize_and_reset(buffer1_size);
sparselib::spgemm_work_estimation(
handle, &one_val, a_descr, b_descr, &zero_val, c_descr,
spgemm_descr, spgemm_alg, buffer1_size, buffer1.get_data());
sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val,
c_descr, spgemm_descr, spgemm_alg,
buffer1.get_data(), buffer2_size, nullptr);
// compute spgemm
buffer2.resize_and_reset(buffer2_size);
sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr, &zero_val,
c_descr, spgemm_descr, spgemm_alg,
buffer1.get_data(), buffer2_size,
buffer2.get_data());
}

catch (const CusparseError& cse) {
// If estimated buffer size is too large and CUDA > 12.0, fall back to
// ALG2
#if CUDA_VERSION >= 12000
if (cse.get_error_code() == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) {
spgemm_alg = CUSPARSE_SPGEMM_ALG2;
// Memory estimate for Alg2/Alg3
sparselib::spgemm_work_estimation(
handle, &one_val, a_descr, b_descr, &zero_val, c_descr,
spgemm_descr, spgemm_alg, buffer1_size, nullptr);
buffer1.resize_and_reset(buffer1_size);
sparselib::spgemm_work_estimation(
handle, &one_val, a_descr, b_descr, &zero_val, c_descr,
spgemm_descr, spgemm_alg, buffer1_size, buffer1.get_data());
size_type buffer3_size{};
sparselib::spgemm_estimate_memory(
handle, &one_val, a_descr, b_descr, &zero_val, c_descr,
spgemm_descr, spgemm_alg, 1.0f, buffer3_size, nullptr, nullptr);
array<char> buffer3{exec, buffer3_size};
sparselib::spgemm_estimate_memory(
handle, &one_val, a_descr, b_descr, &zero_val, c_descr,
spgemm_descr, spgemm_alg, 1.0f, buffer3_size,
buffer3.get_data(), &buffer2_size);
buffer2.resize_and_reset(buffer2_size);
// compute spgemm
sparselib::spgemm_compute(handle, &one_val, a_descr, b_descr,
&zero_val, c_descr, spgemm_descr,
spgemm_alg, buffer1.get_data(),
buffer2_size, buffer2.get_data());
} else {
throw(cse);
}
#else // CUDA_VERSION < 12000
throw(cse);
#endif
}

// write result to temporary storage
auto c_tmp_nnz = sparselib::sparse_matrix_nnz(c_descr);
Expand All @@ -2700,7 +2787,7 @@ void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec,
c_tmp_vals_array.get_data());

sparselib::spgemm_copy(handle, &one_val, a_descr, b_descr, &zero_val,
c_descr, spgemm_descr);
c_descr, spgemm_descr, spgemm_alg);

sparselib::destroy(c_descr);
sparselib::destroy(b_descr);
Expand Down
1 change: 1 addition & 0 deletions contributors.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ Olenik Gregor <go@hpsim.de> HPSim
Ribizel Tobias <mail@upsj.de> Karlsruhe Institute of Technology
Riemer Lukas <lksriemer@gmail.com> Karlsruhe Institute of Technology
Tsai Yuhsiang <yhmtsai@gmail.com> National Taiwan University
Ilya Zilberter <izilberter@txcorp.com> Tech-X Corporation
43 changes: 31 additions & 12 deletions cuda/base/cusparse_bindings.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -208,42 +208,61 @@ void spgemm_work_estimation(cusparseHandle_t handle, const ValueType* alpha,
cusparseSpMatDescr_t b_descr, const ValueType* beta,
cusparseSpMatDescr_t c_descr,
cusparseSpGEMMDescr_t spgemm_descr,
cusparseSpGEMMAlg_t spgemm_alg,
size_type& buffer1_size, void* buffer1)
{
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_workEstimation(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta,
c_descr, cuda_data_type<ValueType>(), CUSPARSE_SPGEMM_DEFAULT,
spgemm_descr, &buffer1_size, buffer1));
c_descr, cuda_data_type<ValueType>(), spgemm_alg, spgemm_descr,
&buffer1_size, buffer1));
}

template <typename ValueType>
void spgemm_estimate_memory(cusparseHandle_t handle, const ValueType* alpha,
cusparseSpMatDescr_t a_descr,
cusparseSpMatDescr_t b_descr, const ValueType* beta,
cusparseSpMatDescr_t c_descr,
cusparseSpGEMMDescr_t spgemm_descr,
cusparseSpGEMMAlg_t spgemm_alg,
float chunk_fraction, size_type& buffer3_size,
void* buffer3, size_type* buffer2_size)
{
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_estimateMemory(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta,
c_descr, cuda_data_type<ValueType>(), spgemm_alg, spgemm_descr,
chunk_fraction, &buffer3_size, buffer3, buffer2_size));
}


template <typename ValueType>
void spgemm_compute(cusparseHandle_t handle, const ValueType* alpha,
cusparseSpMatDescr_t a_descr, cusparseSpMatDescr_t b_descr,
const ValueType* beta, cusparseSpMatDescr_t c_descr,
cusparseSpGEMMDescr_t spgemm_descr, void* buffer1,
cusparseSpGEMMDescr_t spgemm_descr,
cusparseSpGEMMAlg_t spgemm_alg, void* buffer1,
size_type& buffer2_size, void* buffer2)
{
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_compute(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta,
c_descr, cuda_data_type<ValueType>(), CUSPARSE_SPGEMM_DEFAULT,
spgemm_descr, &buffer2_size, buffer2));
c_descr, cuda_data_type<ValueType>(), spgemm_alg, spgemm_descr,
&buffer2_size, buffer2));
}


template <typename ValueType>
void spgemm_copy(cusparseHandle_t handle, const ValueType* alpha,
cusparseSpMatDescr_t a_descr, cusparseSpMatDescr_t b_descr,
const ValueType* beta, cusparseSpMatDescr_t c_descr,
cusparseSpGEMMDescr_t spgemm_descr)
cusparseSpGEMMDescr_t spgemm_descr,
cusparseSpGEMMAlg_t spgemm_alg)
{
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSpGEMM_copy(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr,
b_descr, beta, c_descr, cuda_data_type<ValueType>(),
CUSPARSE_SPGEMM_DEFAULT, spgemm_descr));
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_copy(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_NON_TRANSPOSE, alpha, a_descr, b_descr, beta,
c_descr, cuda_data_type<ValueType>(), spgemm_alg, spgemm_descr));
}


Expand Down
3 changes: 2 additions & 1 deletion cuda/base/exception.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#include "ginkgo/core/base/exception.hpp"

#include <string>

#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cufft.h>
Expand Down
11 changes: 9 additions & 2 deletions include/ginkgo/core/base/exception.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

Expand Down Expand Up @@ -250,11 +250,18 @@ class CusparseError : public Error {
*/
CusparseError(const std::string& file, int line, const std::string& func,
int64 error_code)
: Error(file, line, func + ": " + get_error(error_code))
: Error(file, line, func + ": " + get_error(error_code)),
err_code(error_code)
{}

/**
* Returns the error code
*/
int64 get_error_code() const noexcept { return err_code; }

private:
static std::string get_error(int64 error_code);
const int64 err_code;
};


Expand Down