Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions benchmark/sparse_blas/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,7 @@ class SpgemmOperation : public BenchmarkOperation {
std::pair<bool, double> validate() const override
{
auto ref = gko::ReferenceExecutor::create();
auto correct = Mtx::create(ref, mtx_out_->get_size());
gko::make_temporary_clone(ref, mtx_)->apply(mtx2_, correct);
auto correct = gko::make_temporary_clone(ref, mtx_)->multiply(mtx2_);
return validate_result(correct, mtx_out_);
}

Expand Down Expand Up @@ -179,22 +178,33 @@ class SpgemmOperation : public BenchmarkOperation {
(sizeof(etype) + sizeof(itype));
}

void prepare() override
{
mtx_out_ =
Mtx::create(mtx_->get_executor(),
gko::dim<2>{mtx_->get_size()[0], mtx2_->get_size()[1]});
}
void prepare() override {}

void run() override { mtx_->apply(mtx2_, mtx_out_); }
void run() override { mtx_out_ = mtx_->multiply(mtx2_); }

private:
protected:
const Mtx* mtx_;
std::unique_ptr<Mtx> mtx2_;
std::unique_ptr<Mtx> mtx_out_;
};


class SpgemmReuseOperation : public SpgemmOperation {
public:
using SpgemmOperation::SpgemmOperation;

void prepare() override
{
std::tie(mtx_out_, reuse_) = mtx_->multiply_reuse(mtx2_);
}

void run() override { reuse_.update_values(mtx_, mtx2_, mtx_out_); }

protected:
Mtx::multiply_reuse_info reuse_;
};


class SpgeamOperation : public BenchmarkOperation {
public:
explicit SpgeamOperation(const Mtx* mtx) : mtx_{mtx}
Expand Down Expand Up @@ -772,6 +782,10 @@ const std::map<std::string,
operation_map{
{"spgemm",
[](const Mtx* mtx) { return std::make_unique<SpgemmOperation>(mtx); }},
{"spgemm_reuse",
[](const Mtx* mtx) {
return std::make_unique<SpgemmReuseOperation>(mtx);
}},
{"spgeam",
[](const Mtx* mtx) { return std::make_unique<SpgeamOperation>(mtx); }},
{"transpose",
Expand Down
3 changes: 2 additions & 1 deletion benchmark/sparse_blas/sparse_blas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ using mat_data = gko::matrix_data<etype, itype>;

const char* operations_string =
"Comma-separated list of operations to be benchmarked. Can be "
"spgemm, spgeam, transpose, sort, is_sorted, generate_lookup, "
"spgemm, spgemm_reuse, spgeam, transpose, sort, is_sorted, "
"generate_lookup, "
"lookup, symbolic_lu, symbolic_lu_near_symm, symbolic_cholesky, "
"symbolic_cholesky_symmetric, reorder_rcm, "
#if GKO_HAVE_METIS
Expand Down
2 changes: 2 additions & 0 deletions common/cuda_hip/matrix/csr_kernels.instantiate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_SPGEMM_REUSE_KERNEL);
GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
89 changes: 89 additions & 0 deletions common/cuda_hip/matrix/csr_kernels.template.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/components/uninitialized_array.hpp"
#include "core/base/array_access.hpp"
#include "core/base/index_range.hpp"
#include "core/base/mixed_precision_types.hpp"
#include "core/components/fill_array_kernels.hpp"
#include "core/components/format_conversion_kernels.hpp"
Expand Down Expand Up @@ -2735,6 +2736,94 @@ void advanced_spgemm(std::shared_ptr<const DefaultExecutor> exec,
}


namespace kernel {


template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void spgemm_reuse(
const IndexType* __restrict__ a_row_ptrs,
const IndexType* __restrict__ a_cols, const ValueType* __restrict__ a_vals,
const IndexType* __restrict__ b_row_ptrs,
const IndexType* __restrict__ b_cols, const ValueType* __restrict__ b_vals,
const IndexType* __restrict__ c_row_ptrs,
const IndexType* __restrict__ c_cols, ValueType* __restrict__ c_vals,
const IndexType* __restrict__ lookup_storage_offsets,
const int32* __restrict__ lookup_storage,
const int64* __restrict__ lookup_descs, IndexType num_rows)
{
constexpr auto subwarp_size = config::warp_size;
const auto subwarp =
group::tiled_partition<subwarp_size>(group::this_thread_block());
const auto row = thread::get_subwarp_id_flat<subwarp_size, IndexType>();
const auto lane = static_cast<IndexType>(subwarp.thread_rank());
if (row >= num_rows) {
return;
}
const auto a_begin = a_row_ptrs[row];
const auto a_end = a_row_ptrs[row + 1];
const auto c_begin = c_row_ptrs[row];
const auto c_end = c_row_ptrs[row + 1];
const auto c_row_lookup = matrix::csr::device_sparsity_lookup<IndexType>{
c_row_ptrs, c_cols, lookup_storage_offsets,
lookup_storage, lookup_descs, static_cast<size_type>(row)};
for (auto i = c_begin + lane; i < c_end; i += subwarp_size) {
c_vals[i] = zero<ValueType>();
}
for (const auto a_nz : irange{a_begin, a_end}) {
const auto a_col = a_cols[a_nz];
const auto a_val = a_vals[a_nz];
const auto b_begin = b_row_ptrs[a_col];
const auto b_end = b_row_ptrs[a_col + 1];
for (auto b_nz = b_begin + lane; b_nz < b_end; b_nz += subwarp_size) {
const auto b_col = b_cols[b_nz];
const auto b_val = b_vals[b_nz];
const auto rel_nz = c_row_lookup[b_col];
GKO_ASSERT(rel_nz != invalid_index<IndexType>());
c_vals[c_begin + rel_nz] += a_val * b_val;
}
// this is necessary to avoid data races between two rows of B
// sharing the same column index
subwarp.sync();
}
}


} // namespace kernel


template <typename ValueType, typename IndexType>
void spgemm_reuse(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* a,
const matrix::Csr<ValueType, IndexType>* b,
const matrix::csr::lookup_data<IndexType>& c_lookup,
matrix::Csr<ValueType, IndexType>* c)
{
const auto num_rows = static_cast<IndexType>(c->get_size()[0]);
const auto a_row_ptrs = a->get_const_row_ptrs();
const auto b_row_ptrs = b->get_const_row_ptrs();
const auto c_row_ptrs = c->get_const_row_ptrs();
const auto a_cols = a->get_const_col_idxs();
const auto b_cols = b->get_const_col_idxs();
const auto c_cols = c->get_const_col_idxs();
const auto a_vals = as_device_type(a->get_const_values());
const auto b_vals = as_device_type(b->get_const_values());
const auto c_vals = as_device_type(c->get_values());
const auto lookup_storage_offsets =
c_lookup.storage_offsets.get_const_data();
const auto lookup_storage = c_lookup.storage.get_const_data();
const auto lookup_descs = c_lookup.row_descs.get_const_data();
if (num_rows > 0) {
const auto num_blocks =
ceildiv(num_rows, default_block_size / config::warp_size);
kernel::spgemm_reuse<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(
a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals, c_row_ptrs,
c_cols, c_vals, lookup_storage_offsets, lookup_storage,
lookup_descs, num_rows);
}
}


template <typename ValueType, typename IndexType>
void transpose(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Csr<ValueType, IndexType>* orig,
Expand Down
1 change: 1 addition & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,7 @@ GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL);
GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_REUSE_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL);
GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_CONVERT_TO_ELL_KERNEL);
Expand Down
105 changes: 105 additions & 0 deletions core/matrix/csr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
#include "ginkgo/core/matrix/csr.hpp"

#include <ginkgo/core/base/array.hpp>
#include <ginkgo/core/base/exception.hpp>
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/executor.hpp>
#include <ginkgo/core/base/index_set.hpp>
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/precision_dispatch.hpp>
#include <ginkgo/core/base/temporary_clone.hpp>
#include <ginkgo/core/base/utils.hpp>
#include <ginkgo/core/matrix/coo.hpp>
#include <ginkgo/core/matrix/dense.hpp>
Expand Down Expand Up @@ -45,6 +47,7 @@ GKO_REGISTER_OPERATION(spmv, csr::spmv);
GKO_REGISTER_OPERATION(advanced_spmv, csr::advanced_spmv);
GKO_REGISTER_OPERATION(spgemm, csr::spgemm);
GKO_REGISTER_OPERATION(advanced_spgemm, csr::advanced_spgemm);
GKO_REGISTER_OPERATION(spgemm_reuse, csr::spgemm_reuse);
GKO_REGISTER_OPERATION(spgeam, csr::spgeam);
GKO_REGISTER_OPERATION(convert_idxs_to_ptrs, components::convert_idxs_to_ptrs);
GKO_REGISTER_OPERATION(convert_ptrs_to_idxs, components::convert_ptrs_to_idxs);
Expand Down Expand Up @@ -643,6 +646,108 @@ void Csr<ValueType, IndexType>::write(mat_data& data) const
}


template <typename ValueType, typename IndexType>
std::unique_ptr<Csr<ValueType, IndexType>> Csr<ValueType, IndexType>::multiply(
ptr_param<const Csr> other) const
{
GKO_ASSERT_CONFORMANT(this, other);
auto result_size = gko::dim<2>{this->get_size()[0], other->get_size()[1]};
auto exec = this->get_executor();
auto local_other = make_temporary_clone(exec, other);
auto result = Csr::create(exec, result_size);
exec->run(csr::make_spgemm(this, local_other.get(), result.get()));
return result;
}


template <typename ValueType, typename IndexType>
struct Csr<ValueType, IndexType>::multiply_reuse_info::lookup_data {
dim<2> size1;
dim<2> size2;
dim<2> size_out;
size_type nnz1;
size_type nnz2;
size_type nnz_out;
csr::lookup_data<IndexType> data;
};


template <typename ValueType, typename IndexType>
Csr<ValueType, IndexType>::multiply_reuse_info::~multiply_reuse_info() =
default;


template <typename ValueType, typename IndexType>
Csr<ValueType, IndexType>::multiply_reuse_info::multiply_reuse_info(
multiply_reuse_info&&) = default;


template <typename ValueType, typename IndexType>
typename Csr<ValueType, IndexType>::multiply_reuse_info&
Csr<ValueType, IndexType>::multiply_reuse_info::operator=(
multiply_reuse_info&&) = default;


template <typename ValueType, typename IndexType>
Csr<ValueType, IndexType>::multiply_reuse_info::multiply_reuse_info()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

=default ?

{}


template <typename ValueType, typename IndexType>
Csr<ValueType, IndexType>::multiply_reuse_info::multiply_reuse_info(
std::unique_ptr<lookup_data> data)
: internal{std::move(data)}
{}


template <typename ValueType, typename IndexType>
void Csr<ValueType, IndexType>::multiply_reuse_info::update_values(
ptr_param<const Csr> mtx1, ptr_param<const Csr> mtx2,
ptr_param<Csr> out) const
{
if (!internal) {
throw InvalidStateError{
__FILE__, __LINE__, __func__,
"Attempting to use uninitialized multiply_reuse_info"};
}
GKO_ASSERT_EQUAL_DIMENSIONS(mtx1, internal->size1);
GKO_ASSERT_EQUAL_DIMENSIONS(mtx2, internal->size2);
GKO_ASSERT_EQUAL_DIMENSIONS(out, internal->size_out);
GKO_ASSERT_EQ(mtx1->get_num_stored_elements(), internal->nnz1);
GKO_ASSERT_EQ(mtx2->get_num_stored_elements(), internal->nnz2);
GKO_ASSERT_EQ(out->get_num_stored_elements(), internal->nnz_out);
auto exec = internal->data.storage.get_executor();
auto local_mtx1 = make_temporary_clone(exec, mtx1);
auto local_mtx2 = make_temporary_clone(exec, mtx2);
auto local_out = make_temporary_clone(exec, out);
exec->run(csr::make_spgemm_reuse(local_mtx1.get(), local_mtx2.get(),
internal->data, local_out.get()));
}


template <typename ValueType, typename IndexType>
std::pair<std::unique_ptr<Csr<ValueType, IndexType>>,
typename Csr<ValueType, IndexType>::multiply_reuse_info>
Csr<ValueType, IndexType>::multiply_reuse(ptr_param<const Csr> other) const
{
GKO_ASSERT_CONFORMANT(this, other);
auto result_size = gko::dim<2>{this->get_size()[0], other->get_size()[1]};
auto exec = this->get_executor();
auto local_other = make_temporary_clone(exec, other);
auto result = Csr::create(exec, result_size);
exec->run(csr::make_spgemm(this, local_other.get(), result.get()));
auto lookup = csr::build_lookup(result.get());
auto reuse_info = multiply_reuse_info{
std::make_unique<typename multiply_reuse_info::lookup_data>(
typename multiply_reuse_info::lookup_data{
this->get_size(), other->get_size(), result_size,
this->get_num_stored_elements(),
other->get_num_stored_elements(),
result->get_num_stored_elements(), std::move(lookup)})};
return std::make_pair(std::move(result), std::move(reuse_info));
}


template <typename ValueType, typename IndexType, typename TransformClosure>
std::pair<std::unique_ptr<Csr<ValueType, IndexType>>,
typename Csr<ValueType, IndexType>::permuting_reuse_info>
Expand Down
9 changes: 9 additions & 0 deletions core/matrix/csr_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ namespace kernels {
const matrix::Csr<ValueType, IndexType>* d, \
matrix::Csr<ValueType, IndexType>* c)

#define GKO_DECLARE_CSR_SPGEMM_REUSE_KERNEL(ValueType, IndexType) \
void spgemm_reuse(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<ValueType, IndexType>* a, \
const matrix::Csr<ValueType, IndexType>* b, \
const matrix::csr::lookup_data<IndexType>& c_lookup, \
matrix::Csr<ValueType, IndexType>* c)

#define GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType) \
void spgeam(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Dense<ValueType>* alpha, \
Expand Down Expand Up @@ -278,6 +285,8 @@ namespace kernels {
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_SPGEMM_REUSE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL(ValueType, IndexType); \
Expand Down
10 changes: 3 additions & 7 deletions core/preconditioner/isai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ std::shared_ptr<Csr> extend_sparsity(std::shared_ptr<const Executor>& exec,
return {std::move(mtx->clone())};
}
auto id_power = mtx->clone();
auto tmp = Csr::create(exec, mtx->get_size());
// accumulates mtx * the remainder from odd powers
auto acc = mtx->clone();
// compute id^(n-1) using square-and-multiply
Expand All @@ -77,18 +76,15 @@ std::shared_ptr<Csr> extend_sparsity(std::shared_ptr<const Executor>& exec,
if (i % 2 != 0) {
// store one power in acc:
// i^(2n+1) -> i*i^2n
id_power->apply(acc, tmp);
std::swap(acc, tmp);
acc = id_power->multiply(acc);
i--;
}
// square id_power: i^2n -> (i^2)^n
id_power->apply(id_power, tmp);
std::swap(id_power, tmp);
id_power = id_power->multiply(id_power);
i /= 2;
}
// combine acc and id_power again
id_power->apply(acc, tmp);
return {std::move(tmp)};
return id_power->multiply(acc);
}


Expand Down
Loading
Loading