From 7cce4a0e251638a70e4014274cc4aa607cf30921 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 26 Mar 2025 15:03:56 +0100 Subject: [PATCH 1/2] turn sort_row_major to handle ptr and add coo transpose --- .../base/device_matrix_data_kernels.cpp | 13 ++-- common/unified/matrix/coo_kernels.cpp | 17 ++++- core/base/device_matrix_data.cpp | 6 +- core/base/device_matrix_data_kernels.hpp | 5 +- core/device_hooks/common_kernels.inc.cpp | 1 + core/matrix/coo.cpp | 25 ++++++- core/matrix/coo_kernels.hpp | 35 ++++++---- dpcpp/base/device_matrix_data_kernels.dp.cpp | 17 +++-- include/ginkgo/core/matrix/coo.hpp | 8 ++- omp/base/device_matrix_data_kernels.cpp | 15 ++-- reference/base/device_matrix_data_kernels.cpp | 16 ++--- reference/matrix/coo_kernels.cpp | 14 +++- reference/test/matrix/coo_kernels.cpp | 70 ++++++++++++++++++- test/matrix/coo_kernels.cpp | 26 ++++++- 14 files changed, 213 insertions(+), 55 deletions(-) diff --git a/common/cuda_hip/base/device_matrix_data_kernels.cpp b/common/cuda_hip/base/device_matrix_data_kernels.cpp index 6d30e330415..e64828acaf8 100644 --- a/common/cuda_hip/base/device_matrix_data_kernels.cpp +++ b/common/cuda_hip/base/device_matrix_data_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -118,13 +118,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_row_major(std::shared_ptr exec, - device_matrix_data& data) + size_type num_elems, IndexType* row_idxs, + IndexType* col_idxs, ValueType* vals) { - auto it = thrust::make_zip_iterator( - thrust::make_tuple(data.get_row_idxs(), data.get_col_idxs())); - auto vals = as_device_type(data.get_values()); - thrust::sort_by_key(thrust_policy(exec), it, - it + data.get_num_stored_elements(), vals); + auto it = thrust::make_zip_iterator(thrust::make_tuple(row_idxs, col_idxs)); + auto vals_it = as_device_type(vals); + thrust::sort_by_key(thrust_policy(exec), it, it + num_elems, vals_it); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/common/unified/matrix/coo_kernels.cpp b/common/unified/matrix/coo_kernels.cpp index ce13d7500ab..5d4de372e70 100644 --- a/common/unified/matrix/coo_kernels.cpp +++ b/common/unified/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -62,6 +62,21 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_COO_FILL_IN_DENSE_KERNEL); +template +void conj_array(std::shared_ptr exec, + size_type num_elems, ValueType* values) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto tidx, auto values) { + values[tidx] = conj(values[tidx]); + }, + num_elems, values); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COO_CONJ_ARRAY_KERNEL); + + } // namespace coo } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/base/device_matrix_data.cpp b/core/base/device_matrix_data.cpp index 4c71fffe275..f0f2e00bfed 100644 --- a/core/base/device_matrix_data.cpp +++ b/core/base/device_matrix_data.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -105,7 +105,9 @@ void device_matrix_data::fill_zero() template void device_matrix_data::sort_row_major() { - this->values_.get_executor()->run(components::make_sort_row_major(*this)); + this->values_.get_executor()->run(components::make_sort_row_major( + this->get_num_stored_elements(), this->get_row_idxs(), + this->get_col_idxs(), this->get_values())); } diff --git a/core/base/device_matrix_data_kernels.hpp b/core/base/device_matrix_data_kernels.hpp index bcaeebdf0cb..21d50f14265 100644 --- a/core/base/device_matrix_data_kernels.hpp +++ b/core/base/device_matrix_data_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -45,7 +45,8 @@ namespace kernels { #define GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL(ValueType, \ IndexType) \ void sort_row_major(std::shared_ptr exec, \ - device_matrix_data& data) + size_type num_elems, IndexType* row_idxs, \ + IndexType* col_idxs, ValueType* values) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index c0f90120d9e..e276723e220 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -810,6 +810,7 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_ADVANCED_SPMV2_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_FILL_IN_DENSE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_COO_CONJ_ARRAY_KERNEL); } // namespace coo diff --git a/core/matrix/coo.cpp b/core/matrix/coo.cpp index 6316e8e948a..4ad3980f79c 100644 --- a/core/matrix/coo.cpp +++ b/core/matrix/coo.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -42,6 +42,8 @@ GKO_REGISTER_OPERATION(inplace_absolute_array, GKO_REGISTER_OPERATION(outplace_absolute_array, components::outplace_absolute_array); GKO_REGISTER_OPERATION(aos_to_soa, components::aos_to_soa); +GKO_REGISTER_OPERATION(sort_row_major, components::sort_row_major); +GKO_REGISTER_OPERATION(conj_array, coo::conj_array); } // anonymous namespace @@ -378,6 +380,27 @@ void Coo::write(mat_data& data) const } } +template +std::unique_ptr Coo::transpose() const +{ + auto coo = this->clone(); + std::swap(coo->row_idxs_, coo->col_idxs_); + auto size = this->get_size(); + coo->set_size(dim<2>{size[1], size[0]}); + coo->get_executor()->run(coo::make_sort_row_major( + coo->get_num_stored_elements(), coo->get_row_idxs(), + coo->get_col_idxs(), coo->get_values())); + return coo; +} + +template +std::unique_ptr Coo::conj_transpose() const +{ + auto coo = as(this->transpose()); + coo->get_executor()->run(coo::make_conj_array( + coo->get_num_stored_elements(), coo->get_values())); + return coo; +} template std::unique_ptr> diff --git a/core/matrix/coo_kernels.hpp b/core/matrix/coo_kernels.hpp index a2cc44b74d9..d68e838e833 100644 --- a/core/matrix/coo_kernels.hpp +++ b/core/matrix/coo_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -54,19 +54,26 @@ namespace kernels { const matrix::Coo* orig, \ matrix::Diagonal* diag) -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_COO_SPMV_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_COO_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_COO_SPMV2_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_COO_ADVANCED_SPMV2_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_COO_FILL_IN_DENSE_KERNEL(ValueType, IndexType); \ - template \ - GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL(ValueType, IndexType) +#define GKO_DECLARE_COO_CONJ_ARRAY_KERNEL(ValueType) \ + void conj_array(std::shared_ptr exec, \ + size_type num_elems, ValueType* values) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_COO_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_COO_ADVANCED_SPMV_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_COO_SPMV2_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_COO_ADVANCED_SPMV2_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_COO_FILL_IN_DENSE_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_COO_CONJ_ARRAY_KERNEL(ValueType) GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(coo, GKO_DECLARE_ALL_AS_TEMPLATES); diff --git a/dpcpp/base/device_matrix_data_kernels.dp.cpp b/dpcpp/base/device_matrix_data_kernels.dp.cpp index 2c26bfeeba2..0e77d16bc65 100644 --- a/dpcpp/base/device_matrix_data_kernels.dp.cpp +++ b/dpcpp/base/device_matrix_data_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -103,16 +103,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_row_major(std::shared_ptr exec, - device_matrix_data& data) + size_type num_elems, IndexType* row_idxs, + IndexType* col_idxs, ValueType* vals) { auto policy = onedpl_policy(exec); - auto input_it = oneapi::dpl::make_zip_iterator( - data.get_row_idxs(), data.get_col_idxs(), data.get_values()); - std::sort(policy, input_it, input_it + data.get_num_stored_elements(), - [](auto a, auto b) { - return std::tie(std::get<0>(a), std::get<1>(a)) < - std::tie(std::get<0>(b), std::get<1>(b)); - }); + auto input_it = oneapi::dpl::make_zip_iterator(row_idxs, col_idxs, vals); + std::sort(policy, input_it, input_it + num_elems, [](auto a, auto b) { + return std::tie(std::get<0>(a), std::get<1>(a)) < + std::tie(std::get<0>(b), std::get<1>(b)); + }); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/include/ginkgo/core/matrix/coo.hpp b/include/ginkgo/core/matrix/coo.hpp index 89e94568f0f..49c329af368 100644 --- a/include/ginkgo/core/matrix/coo.hpp +++ b/include/ginkgo/core/matrix/coo.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -58,6 +58,7 @@ class Coo : public EnableLinOp>, public DiagonalExtractable, public ReadableFromMatrixData, public WritableToMatrixData, + public Transposable, public EnableAbsoluteComputation< remove_complex>> { friend class EnablePolymorphicObject; @@ -80,6 +81,7 @@ class Coo : public EnableLinOp>, using value_type = ValueType; using index_type = IndexType; + using transposed_type = Coo; using mat_data = matrix_data; using device_mat_data = device_matrix_data; using absolute_type = remove_complex; @@ -122,6 +124,10 @@ class Coo : public EnableLinOp>, void write(mat_data& data) const override; + std::unique_ptr transpose() const override; + + std::unique_ptr conj_transpose() const override; + std::unique_ptr> extract_diagonal() const override; std::unique_ptr compute_absolute() const override; diff --git a/omp/base/device_matrix_data_kernels.cpp b/omp/base/device_matrix_data_kernels.cpp index bce89e2f409..1cf4a473786 100644 --- a/omp/base/device_matrix_data_kernels.cpp +++ b/omp/base/device_matrix_data_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -9,6 +9,7 @@ #include #include "core/base/allocator.hpp" +#include "core/base/iterator_factory.hpp" #include "core/components/format_conversion_kernels.hpp" #include "core/components/prefix_sum_kernels.hpp" @@ -133,13 +134,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_row_major(std::shared_ptr exec, - device_matrix_data& data) + size_type num_elems, IndexType* row_idxs, + IndexType* col_idxs, ValueType* vals) { - array> tmp{ - exec, data.get_num_stored_elements()}; - soa_to_aos(exec, data, tmp); - std::sort(tmp.get_data(), tmp.get_data() + tmp.get_size()); - aos_to_soa(exec, tmp, data); + auto it = detail::make_zip_iterator(row_idxs, col_idxs, vals); + std::stable_sort(it, it + num_elems, [](auto a, auto b) { + return std::tie(get<0>(a), get<1>(a)) < std::tie(get<0>(b), get<1>(b)); + }); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/reference/base/device_matrix_data_kernels.cpp b/reference/base/device_matrix_data_kernels.cpp index f9a23b35e69..069fe513f4f 100644 --- a/reference/base/device_matrix_data_kernels.cpp +++ b/reference/base/device_matrix_data_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -8,9 +8,9 @@ #include +#include "core/base/iterator_factory.hpp" #include "core/components/prefix_sum_kernels.hpp" - namespace gko { namespace kernels { namespace reference { @@ -133,13 +133,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_row_major(std::shared_ptr exec, - device_matrix_data& data) + size_type num_elems, IndexType* row_idxs, + IndexType* col_idxs, ValueType* vals) { - array> tmp{ - exec, data.get_num_stored_elements()}; - soa_to_aos(exec, data, tmp); - std::sort(tmp.get_data(), tmp.get_data() + tmp.get_size()); - aos_to_soa(exec, tmp, data); + auto it = detail::make_zip_iterator(row_idxs, col_idxs, vals); + std::stable_sort(it, it + num_elems, [](auto a, auto b) { + return std::tie(get<0>(a), get<1>(a)) < std::tie(get<0>(b), get<1>(b)); + }); } GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/reference/matrix/coo_kernels.cpp b/reference/matrix/coo_kernels.cpp index f9bf9f5f33d..6ebc4a282d6 100644 --- a/reference/matrix/coo_kernels.cpp +++ b/reference/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -140,6 +140,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL); +template +void conj_array(std::shared_ptr exec, + size_type num_elems, ValueType* values) +{ + for (size_type idx = 0; idx < num_elems; idx++) { + values[idx] = conj(values[idx]); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COO_CONJ_ARRAY_KERNEL); + + } // namespace coo } // namespace reference } // namespace kernels diff --git a/reference/test/matrix/coo_kernels.cpp b/reference/test/matrix/coo_kernels.cpp index f7063317a73..32f609f2e1d 100644 --- a/reference/test/matrix/coo_kernels.cpp +++ b/reference/test/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -900,6 +900,34 @@ TYPED_TEST(Coo, ApplyAddsScaledToMixedComplex) } +TYPED_TEST(Coo, Transpose) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto transposed_ans = gko::initialize( + {I{1.0, 0.0}, I{3.0, 5.0}, I{2.0, 0.0}}, this->exec); + + auto result = gko::as(this->mtx->transpose()); + + GKO_ASSERT_MTX_EQ_SPARSITY(result, transposed_ans); + GKO_ASSERT_MTX_NEAR(result, transposed_ans, 0.0); +} + + +TYPED_TEST(Coo, ConjTranspose) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto conj_transposed_ans = gko::initialize( + {I{1.0, 0.0}, I{3.0, 5.0}, I{2.0, 0.0}}, this->exec); + + auto result = gko::as(this->mtx->conj_transpose()); + + GKO_ASSERT_MTX_EQ_SPARSITY(result, conj_transposed_ans); + GKO_ASSERT_MTX_NEAR(result, conj_transposed_ans, 0.0); +} + + template class CooComplex : public ::testing::Test { protected: @@ -954,4 +982,44 @@ TYPED_TEST(CooComplex, InplaceAbsolute) } +TYPED_TEST(CooComplex, Transpose) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = gko::ReferenceExecutor::create(); + auto mtx = gko::initialize({{T{1.0, 1.0}, T{3.0, -1.0}, T{2.0, 2.0}}, + {T{0.0, 0.0}, T{5.0, -3.0}, T{0.0, 0.0}}}, + exec); + auto transposed_ans = gko::initialize( + {I{T{1.0, 1.0}, T{0.0, 0.0}}, I{T{3.0, -1.0}, T{5.0, -3.0}}, + I{T{2.0, 2.0}, T{0.0, 0.0}}}, + exec); + + auto result = gko::as(mtx->transpose()); + + GKO_ASSERT_MTX_EQ_SPARSITY(result, transposed_ans); + GKO_ASSERT_MTX_NEAR(result, transposed_ans, 0.0); +} + + +TYPED_TEST(CooComplex, ConjTranspose) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + auto exec = gko::ReferenceExecutor::create(); + auto mtx = gko::initialize({{T{1.0, 1.0}, T{3.0, -1.0}, T{2.0, 2.0}}, + {T{0.0, 0.0}, T{5.0, -3.0}, T{0.0, 0.0}}}, + exec); + auto conj_transposed_ans = gko::initialize( + {I{T{1.0, -1.0}, T{0.0, 0.0}}, I{T{3.0, 1.0}, T{5.0, 3.0}}, + I{T{2.0, -2.0}, T{0.0, 0.0}}}, + exec); + + auto result = gko::as(mtx->conj_transpose()); + + GKO_ASSERT_MTX_EQ_SPARSITY(result, conj_transposed_ans); + GKO_ASSERT_MTX_NEAR(result, conj_transposed_ans, 0.0); +} + + } // namespace diff --git a/test/matrix/coo_kernels.cpp b/test/matrix/coo_kernels.cpp index 091f95544e6..fc0e38fb510 100644 --- a/test/matrix/coo_kernels.cpp +++ b/test/matrix/coo_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -347,3 +347,27 @@ TEST_F(Coo, OutplaceAbsoluteMatrixIsEquivalentToRef) GKO_ASSERT_MTX_NEAR(abs_mtx, dabs_mtx, r::value); } + + +TEST_F(Coo, TransposeIsEquivalentToRef) +{ + set_up_apply_data(); + + auto trans = gko::as(mtx->transpose()); + auto dtrans = gko::as(dmtx->transpose()); + + GKO_ASSERT_MTX_EQ_SPARSITY(dtrans, trans); + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0.0); +} + + +TEST_F(Coo, ConjugateTransposeIsEquivalentToRef) +{ + set_up_apply_data(); + + auto trans = gko::as(mtx->conj_transpose()); + auto dtrans = gko::as(dmtx->conj_transpose()); + + GKO_ASSERT_MTX_EQ_SPARSITY(dtrans, trans); + GKO_ASSERT_MTX_NEAR(dtrans, trans, 0.0); +} From fc280abd951ed1b605e21119b431eaa46344bcaa Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Mon, 7 Apr 2025 15:51:04 +0200 Subject: [PATCH 2/2] reuse the components sort row major in pgm --- common/cuda_hip/multigrid/pgm_kernels.cpp | 18 +----------------- core/device_hooks/common_kernels.inc.cpp | 1 - core/multigrid/pgm.cpp | 7 ++++++- core/multigrid/pgm_kernels.hpp | 9 +-------- dpcpp/multigrid/pgm_kernels.dp.cpp | 21 +-------------------- omp/multigrid/pgm_kernels.cpp | 15 +-------------- reference/multigrid/pgm_kernels.cpp | 15 +-------------- 7 files changed, 11 insertions(+), 75 deletions(-) diff --git a/common/cuda_hip/multigrid/pgm_kernels.cpp b/common/cuda_hip/multigrid/pgm_kernels.cpp index 61a2f9ac74a..93659cb1f6e 100644 --- a/common/cuda_hip/multigrid/pgm_kernels.cpp +++ b/common/cuda_hip/multigrid/pgm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -81,22 +81,6 @@ void sort_agg(std::shared_ptr exec, IndexType num, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PGM_SORT_AGG_KERNEL); -template -void sort_row_major(std::shared_ptr exec, size_type nnz, - IndexType* row_idxs, IndexType* col_idxs, ValueType* vals) -{ - auto vals_it = as_device_type(vals); - auto it = thrust::make_zip_iterator(thrust::make_tuple(row_idxs, col_idxs)); - // Because reduce_by_key is not deterministic, so we do not need - // stable_sort_by_key - // TODO: If we have deterministic reduce_by_key, it should be - // stable_sort_by_key - thrust::sort_by_key(thrust_policy(exec), it, it + nnz, vals_it); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_SORT_ROW_MAJOR); - - template void compute_coarse_coo(std::shared_ptr exec, size_type fine_nnz, const IndexType* row_idxs, diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index e276723e220..f5b41ff19c9 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -1097,7 +1097,6 @@ GKO_STUB_INDEX_TYPE(GKO_DECLARE_PGM_COUNT_UNREPEATED_NNZ_KERNEL); GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PGM_FIND_STRONGEST_NEIGHBOR); GKO_STUB_NON_COMPLEX_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_ASSIGN_TO_EXIST_AGG); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_SORT_ROW_MAJOR); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_COMPUTE_COARSE_COO); GKO_STUB_INDEX_TYPE(GKO_DECLARE_PGM_GATHER_INDEX); diff --git a/core/multigrid/pgm.cpp b/core/multigrid/pgm.cpp index 7d28409134e..fddefd3ddf0 100644 --- a/core/multigrid/pgm.cpp +++ b/core/multigrid/pgm.cpp @@ -23,6 +23,7 @@ #include #include +#include "core/base/device_matrix_data_kernels.hpp" #include "core/base/dispatch_helper.hpp" #include "core/base/iterator_factory.hpp" #include "core/base/utils.hpp" @@ -48,7 +49,7 @@ GKO_REGISTER_OPERATION(assign_to_exist_agg, pgm::assign_to_exist_agg); GKO_REGISTER_OPERATION(sort_agg, pgm::sort_agg); GKO_REGISTER_OPERATION(map_row, pgm::map_row); GKO_REGISTER_OPERATION(map_col, pgm::map_col); -GKO_REGISTER_OPERATION(sort_row_major, pgm::sort_row_major); +GKO_REGISTER_OPERATION(sort_row_major, components::sort_row_major); GKO_REGISTER_OPERATION(count_unrepeated_nnz, pgm::count_unrepeated_nnz); GKO_REGISTER_OPERATION(compute_coarse_coo, pgm::compute_coarse_coo); GKO_REGISTER_OPERATION(fill_array, components::fill_array); @@ -115,6 +116,10 @@ std::shared_ptr> generate_coarse( non_local_agg.get_const_data(), col_idxs.get_data())); // sort by row, col + // Because reduce_by_key is not deterministic, so we do not need + // stable_sort_by_key + // TODO: If we have deterministic reduce_by_key, we might consider + // stable_sort_by_key exec->run(pgm::make_sort_row_major(nnz, row_idxs.get_data(), col_idxs.get_data(), vals.get_data())); // compute the total nnz and create the fine csr diff --git a/core/multigrid/pgm_kernels.hpp b/core/multigrid/pgm_kernels.hpp index a7a0a4aa099..214ee05da83 100644 --- a/core/multigrid/pgm_kernels.hpp +++ b/core/multigrid/pgm_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -69,11 +69,6 @@ namespace pgm { const matrix::Diagonal* diag, array& agg, \ array& intermediate_agg) -#define GKO_DECLARE_PGM_SORT_ROW_MAJOR(ValueType, IndexType) \ - void sort_row_major(std::shared_ptr exec, \ - size_type nnz, IndexType* row_idxs, \ - IndexType* col_idxs, ValueType* vals) - #define GKO_DECLARE_PGM_COMPUTE_COARSE_COO(ValueType, IndexType) \ void compute_coarse_coo(std::shared_ptr exec, \ size_type fine_nnz, const IndexType* row_idxs, \ @@ -106,8 +101,6 @@ namespace pgm { template \ GKO_DECLARE_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType); \ template \ - GKO_DECLARE_PGM_SORT_ROW_MAJOR(ValueType, IndexType); \ - template \ GKO_DECLARE_PGM_COMPUTE_COARSE_COO(ValueType, IndexType); \ template \ GKO_DECLARE_PGM_GATHER_INDEX(IndexType) diff --git a/dpcpp/multigrid/pgm_kernels.dp.cpp b/dpcpp/multigrid/pgm_kernels.dp.cpp index 398fc5255e2..08760da557f 100644 --- a/dpcpp/multigrid/pgm_kernels.dp.cpp +++ b/dpcpp/multigrid/pgm_kernels.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -70,25 +70,6 @@ void sort_agg(std::shared_ptr exec, IndexType num, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PGM_SORT_AGG_KERNEL); -template -void sort_row_major(std::shared_ptr exec, size_type nnz, - IndexType* row_idxs, IndexType* col_idxs, ValueType* vals) -{ - auto policy = onedpl_policy(exec); - auto it = oneapi::dpl::make_zip_iterator(row_idxs, col_idxs, vals); - // Because reduce_by_segment is not deterministic, so we do not need - // stable_sort - // TODO: If we have deterministic reduce_by_segment, it should be - // stable_sort - std::sort(policy, it, it + nnz, [](auto a, auto b) { - return std::tie(std::get<0>(a), std::get<1>(a)) < - std::tie(std::get<0>(b), std::get<1>(b)); - }); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_SORT_ROW_MAJOR); - - template class coarse_coo_policy {}; diff --git a/omp/multigrid/pgm_kernels.cpp b/omp/multigrid/pgm_kernels.cpp index fb64796c4f7..7c7fc1be818 100644 --- a/omp/multigrid/pgm_kernels.cpp +++ b/omp/multigrid/pgm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -62,19 +62,6 @@ void sort_agg(std::shared_ptr exec, IndexType num, GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_PGM_SORT_AGG_KERNEL); -template -void sort_row_major(std::shared_ptr exec, size_type nnz, - IndexType* row_idxs, IndexType* col_idxs, ValueType* vals) -{ - auto it = detail::make_zip_iterator(row_idxs, col_idxs, vals); - std::stable_sort(it, it + nnz, [](auto a, auto b) { - return std::tie(get<0>(a), get<1>(a)) < std::tie(get<0>(b), get<1>(b)); - }); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_SORT_ROW_MAJOR); - - template void compute_coarse_coo(std::shared_ptr exec, size_type fine_nnz, const IndexType* row_idxs, diff --git a/reference/multigrid/pgm_kernels.cpp b/reference/multigrid/pgm_kernels.cpp index bff2a776c6b..ab0d86b1831 100644 --- a/reference/multigrid/pgm_kernels.cpp +++ b/reference/multigrid/pgm_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -264,19 +264,6 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PGM_ASSIGN_TO_EXIST_AGG); -template -void sort_row_major(std::shared_ptr exec, size_type nnz, - IndexType* row_idxs, IndexType* col_idxs, ValueType* vals) -{ - auto it = detail::make_zip_iterator(row_idxs, col_idxs, vals); - std::stable_sort(it, it + nnz, [](auto a, auto b) { - return std::tie(get<0>(a), get<1>(a)) < std::tie(get<0>(b), get<1>(b)); - }); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_PGM_SORT_ROW_MAJOR); - - template void compute_coarse_coo(std::shared_ptr exec, size_type fine_nnz, const IndexType* row_idxs,