From 91979f513c60d214cb695ba5a31f1fe15641c145 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 27 Jun 2023 16:38:42 +0200 Subject: [PATCH 01/11] adds Dense::row_scatter currently version with index_set as input is not implemented for devices Co-authored-by: Yu-Hsiang M. Tsai Signed-off-by: Marcel Koch --- .../unified/matrix/dense_kernels.template.cpp | 20 ++++ core/device_hooks/common_kernels.inc.cpp | 3 + core/matrix/dense.cpp | 72 ++++++++++++- core/matrix/dense_kernels.hpp | 20 +++- dpcpp/matrix/dense_kernels.dp.cpp | 11 ++ include/ginkgo/core/base/index_set.hpp | 29 +++++ include/ginkgo/core/matrix/dense.hpp | 42 +++++++- omp/matrix/dense_kernels.cpp | 12 +++ reference/matrix/dense_kernels.cpp | 42 ++++++++ reference/test/matrix/dense_kernels.cpp | 102 ++++++++++++++++++ test/matrix/dense_kernels.cpp | 83 +++++++++++++- 11 files changed, 431 insertions(+), 5 deletions(-) diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp index 577a89ca693..a0f82007ec0 100644 --- a/common/unified/matrix/dense_kernels.template.cpp +++ b/common/unified/matrix/dense_kernels.template.cpp @@ -468,6 +468,26 @@ void advanced_row_gather(std::shared_ptr exec, } +template +void row_scatter(std::shared_ptr exec, + const array* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto row, auto col, auto orig, auto rows, + auto scattered) { + scattered(rows[row], col) = orig(row, col); + }, + dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]}, orig, *row_idxs, + target); +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); + + template void col_permute(std::shared_ptr exec, const IndexType* permutation_indices, diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 2e0f096cf7d..1cbcf0e8957 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -476,6 +476,9 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_GATHER_KERNEL); GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL); +GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); +GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 308b5e8f11e..c13c3c8688a 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -74,6 +74,7 @@ GKO_REGISTER_OPERATION(nonsymm_permute, dense::nonsymm_permute); GKO_REGISTER_OPERATION(inv_nonsymm_permute, dense::inv_nonsymm_permute); GKO_REGISTER_OPERATION(row_gather, dense::row_gather); GKO_REGISTER_OPERATION(advanced_row_gather, dense::advanced_row_gather); +GKO_REGISTER_OPERATION(row_scatter, dense::row_scatter); GKO_REGISTER_OPERATION(col_permute, dense::col_permute); GKO_REGISTER_OPERATION(inverse_row_permute, dense::inv_row_permute); GKO_REGISTER_OPERATION(inverse_col_permute, dense::inv_col_permute); @@ -1310,6 +1311,41 @@ void Dense::row_gather_impl(const Dense* alpha, } +template +template +void Dense::row_scatter_impl(const array* row_idxs, + Dense* target) const +{ + auto exec = this->get_executor(); + dim<2> expected_dim{row_idxs->get_num_elems(), this->get_size()[1]}; + GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this); + GKO_ASSERT_EQUAL_COLS(this, target); + // @todo check that indices are inbounds for target + + exec->run(dense::make_row_scatter( + make_temporary_clone(exec, row_idxs).get(), this, + make_temporary_clone(exec, target).get())); +} + + +template +template +void Dense::row_scatter_impl(const index_set* row_idxs, + Dense* target) const +{ + auto exec = this->get_executor(); + dim<2> expected_dim{static_cast(row_idxs->get_num_elems()), + this->get_size()[1]}; + GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this); + GKO_ASSERT_EQUAL_COLS(this, target); + // @todo check that indices are inbounds for target + + exec->run(dense::make_row_scatter( + make_temporary_clone(exec, row_idxs).get(), this, + make_temporary_clone(exec, target).get())); +} + + template std::unique_ptr Dense::permute( const array* permutation_indices) const @@ -1613,6 +1649,28 @@ void Dense::row_gather(ptr_param alpha, } +template +template +void Dense::row_scatter(const array* row_idxs, + ptr_param row_collection) const +{ + gather_mixed_real_complex( + [&](auto dense) { this->row_scatter_impl(row_idxs, dense); }, + row_collection.get()); +} + + +template +template +void Dense::row_scatter(const index_set* row_idxs, + ptr_param row_collection) const +{ + gather_mixed_real_complex( + [&](auto dense) { this->row_scatter_impl(row_idxs, dense); }, + row_collection.get()); +} + + template std::unique_ptr Dense::column_permute( const array* permutation_indices) const @@ -2058,6 +2116,18 @@ Dense::Dense(std::shared_ptr exec, #define GKO_DECLARE_DENSE_MATRIX(_type) class Dense<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_MATRIX); +#define GKO_DECLARE_DENSE_ROW_SCATTER_ARRAY(_vtype, _itype) \ + void Dense<_vtype>::row_scatter(const array<_itype>* row_idxs, \ + ptr_param row_collection) const +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_ROW_SCATTER_ARRAY); + +#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET(_vtype, _itype) \ + void Dense<_vtype>::row_scatter(const index_set<_itype>* row_idxs, \ + ptr_param row_collection) const +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET); + } // namespace matrix } // namespace gko diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 7422b431aa0..0645f8d3a81 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -294,6 +295,18 @@ namespace kernels { const matrix::Dense<_vtype>* orig, const matrix::Dense<_vtype>* beta, \ matrix::Dense<_otype>* row_collection) +#define GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL(_vtype, _otype, _itype) \ + void row_scatter(std::shared_ptr exec, \ + const array<_itype>* gather_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_otype>* target) + +#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \ + void row_scatter(std::shared_ptr exec, \ + const index_set<_itype>* gather_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_otype>* target) + #define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype) \ void col_permute(std::shared_ptr exec, \ const _itype* permutation_indices, \ @@ -432,6 +445,11 @@ namespace kernels { template \ GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(ValueType, OutputType, \ IndexType); \ + template \ + GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL(ValueType, OutputType, IndexType); \ + template \ + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(ValueType, OutputType, \ + IndexType); \ template \ GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(ValueType, IndexType); \ template \ diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 99aac7064e5..e762a54fe21 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -18,6 +18,7 @@ #include #include +#include "core/base/mixed_precision_types.hpp" #include "core/components/prefix_sum_kernels.hpp" #include "dpcpp/base/config.hpp" #include "dpcpp/base/dim3.dp.hpp" @@ -586,6 +587,16 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void row_scatter(std::shared_ptr exec, + const index_set* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); + + } // namespace dense } // namespace dpcpp } // namespace kernels diff --git a/include/ginkgo/core/base/index_set.hpp b/include/ginkgo/core/base/index_set.hpp index f21f8df644a..804a6e56136 100644 --- a/include/ginkgo/core/base/index_set.hpp +++ b/include/ginkgo/core/base/index_set.hpp @@ -418,6 +418,35 @@ class index_set { }; +namespace detail { + + +template +struct temporary_clone_helper> { + static std::unique_ptr> create( + std::shared_ptr exec, index_set* ptr, bool copy_data) + { + if (copy_data) { + return std::make_unique>(std::move(exec), *ptr); + } else { + GKO_NOT_IMPLEMENTED; + } + } +}; + +template +struct temporary_clone_helper> { + static std::unique_ptr> create( + std::shared_ptr exec, const index_set* ptr, bool) + { + return std::make_unique>(std::move(exec), *ptr); + } +}; + + +} // namespace detail + + } // namespace gko diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 9e19109e82a..34ba5ec31f6 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -41,6 +41,10 @@ class VectorCache; } // namespace experimental +template +class index_set; + + namespace matrix { @@ -711,6 +715,42 @@ class Dense ptr_param beta, ptr_param row_collection) const; + /** + * Copies this matrix into the given rows of the target matrix. + * + * @tparam IndexType the index type, either int32 or int64 + * + * @param scatter_indices row indices of the target matrix. It must + * have the same number of indices as rows in + * this matrix. + * @param target matrix where the scattered rows are stored, i.e. + * `target(scatter_indices[i], j) = this(i, j)` + * + * @warning scatter_indices may not contain duplicates, unless if + * for indices `i, j` with `scatter_indices[i] == + * scatter_indices[j]` the rows `i, j` of this matrix are + * identical. + */ + template + void row_scatter(const array* scatter_indices, + ptr_param target) const; + + /** + * Copies this matrix into the given rows of the target matrix. + * + * @tparam IndexType the index type, either int32 or int64 + * + * @param scatter_indices row indices of the target matrix. It must + * have the same number of indices as rows in + * this matrix. + * @param target matrix where the scattered rows are stored, i.e. + * `target(scatter_indices.get_global_index(i), j) + * = this(i, j)` + */ + template + void row_scatter(const index_set* scatter_indices, + ptr_param target) const; + std::unique_ptr column_permute( const array* permutation_indices) const override; diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index fe1f58ef93d..24f786ed938 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -22,6 +22,7 @@ #include "accessor/block_col_major.hpp" #include "accessor/range.hpp" +#include "core/base/mixed_precision_types.hpp" #include "core/components/prefix_sum_kernels.hpp" @@ -465,6 +466,17 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DENSE_COUNT_NONZERO_BLOCKS_PER_ROW_KERNEL); +template +void row_scatter(std::shared_ptr exec, + const index_set* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target) +{} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); + + } // namespace dense } // namespace omp } // namespace kernels diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 7c36d9101d5..a45d124e631 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -953,6 +953,48 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL); +template +void row_scatter(std::shared_ptr exec, + const array* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target) +{ + auto rows = row_idxs->get_const_data(); + for (size_type i = 0; i < row_idxs->get_size(); ++i) { + for (size_type j = 0; j < orig->get_size()[1]; ++j) { + target->at(rows[i], j) = orig->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); + + +template +void row_scatter(std::shared_ptr exec, + const index_set* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target) +{ + auto set_begins = row_idxs->get_subsets_begin(); + auto set_ends = row_idxs->get_subsets_end(); + auto set_offsets = row_idxs->get_superset_indices(); + for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { + for (int target_row = set_begins[set]; target_row < set_ends[set]; + ++target_row) { + auto orig_row = target_row - set_begins[set] + set_offsets[set]; + for (size_type j = 0; j < orig->get_size()[1]; ++j) { + target->at(target_row, j) = orig->at(orig_row, j); + } + } + } +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); + + template void col_permute(std::shared_ptr exec, const IndexType* perm, const matrix::Dense* orig, diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 2ce7b023a1c..e0c5d11e921 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -2700,6 +2700,108 @@ TYPED_TEST(DenseWithIndexType, } +TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto row_collection = + gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); + gko::array permute_idxs{exec, {2, 0}}; + + row_collection->row_scatter(&permute_idxs, this->mtx5); + + GKO_ASSERT_MTX_NEAR( + this->mtx5, l({{0.7, 1.1, 4.0}, {-2.0, 2.0, 4.5}, {3.0, 2.7, 6.5}}), + 0.0); +} + + +TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsIntoDenseSubmatrix) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto row_collection = gko::initialize(I>{{3.0, 2.7}}, exec); + gko::array permute_idxs{exec, {0}}; + + row_collection->row_scatter(&permute_idxs, + this->mtx5->create_submatrix({2}, {1, 3})); + + GKO_ASSERT_MTX_NEAR( + this->mtx5, + l({{1.0, -1.0, -0.5}, {-2.0, 2.0, 4.5}, {2.1, 3.0, 2.7}}), 0.0); +} + + +TYPED_TEST(DenseWithIndexType, MatrixScatterRowsFailsWithWrongDimensions) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto row_collection1 = + gko::initialize(I>{{3.0, 2.7}, {0.7, 1.1}}, exec); + auto row_collection2 = + gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); + gko::array permute_idxs1{exec, {2, 0}}; + gko::array permute_idxs2{exec, {1}}; + + ASSERT_THROW(row_collection1->row_scatter(&permute_idxs1, this->mtx5), + gko::DimensionMismatch); + ASSERT_THROW(row_collection2->row_scatter(&permute_idxs2, this->mtx5), + gko::DimensionMismatch); +} + + +TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsUsingIndexSetIntoDense) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto mtx = gko::initialize({{2.2, 6.9, 7.8}, + {4.7, 1.3, 7.6}, + {9.2, 8.6, 4.5}, + {8.1, 9.4, 6.8}, + {9.6, 7.1, 2.5}}, + exec); + auto row_collection = gko::initialize( + {{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}, {3.4, 3.8, 7.8}}, exec); + gko::index_set permute_idxs{exec, {1, 0, 4}}; + + row_collection->row_scatter(&permute_idxs, mtx); + + GKO_ASSERT_MTX_NEAR(mtx, + l({{3.0, 2.7, 6.5}, + {0.7, 1.1, 4.0}, + {9.2, 8.6, 4.5}, + {8.1, 9.4, 6.8}, + {3.4, 3.8, 7.8}}), + 0.0); +} + + +TYPED_TEST(DenseWithIndexType, MatrixGatherScatterIsIdentity) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto mtx = this->template gen_mtx(23, 4); + gko::array idxs{exec, {3, 6, 11, 9, 22, 8}}; + + auto gather = mtx->row_gather(&idxs); + mtx->fill(-gko::one()); + gather->row_scatter(&idxs, mtx); + auto result = mtx->row_gather(&idxs); + + GKO_ASSERT_MTX_NEAR(gather, result, 0.0); +} + + TYPED_TEST(DenseWithIndexType, SquareMatrixIsPermutable) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 76e6487aa89..eff41a5e2b2 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -98,6 +98,7 @@ class Dense : public CommonTestFixture { x = gen_mtx(65, 25); y = gen_mtx(25, 35); c_x = gen_mtx(65, 25); + u = gen_mtx(7, 25); alpha = gko::initialize({2.0}, ref); beta = gko::initialize({-1.0}, ref); result = gen_mtx(65, 35); @@ -105,6 +106,7 @@ class Dense : public CommonTestFixture { dx = gko::clone(exec, x); dy = gko::clone(exec, y); dc_x = gko::clone(exec, c_x); + du = gko::clone(exec, u); dresult = gko::clone(exec, result); dalpha = gko::clone(exec, alpha); dbeta = gko::clone(exec, beta); @@ -117,7 +119,7 @@ class Dense : public CommonTestFixture { std::vector tmp2(x->get_size()[1], 0); std::iota(tmp2.begin(), tmp2.end(), 0); std::shuffle(tmp2.begin(), tmp2.end(), rng); - std::vector tmp3(x->get_size()[0] / 10); + std::vector tmp3(u->get_size()[0]); std::vector scale_factors(tmp.size()); std::vector scale_factors2(tmp2.size()); std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); @@ -125,6 +127,9 @@ class Dense : public CommonTestFixture { for (auto& i : tmp3) { i = row_dist(rng); } + std::vector tmp4(sub_rows.length()); + std::iota(tmp4.begin(), tmp4.end(), 0); + std::shuffle(tmp4.begin(), tmp4.end(), rng); for (auto& s : scale_factors) { s = scale_dist(rng); } @@ -137,6 +142,10 @@ class Dense : public CommonTestFixture { std::unique_ptr(new Arr{ref, tmp2.begin(), tmp2.end()}); rgather_idxs = std::unique_ptr(new Arr{ref, tmp3.begin(), tmp3.end()}); + rscatter_idxs = std::unique_ptr( + new Arr{ref, tmp.begin(), tmp.begin() + u->get_size()[0]}); + rscatter_idxs_sub = std::unique_ptr( + new Arr{ref, tmp4.begin(), tmp4.begin() + u->get_size()[0]}); rpermutation = Permutation::create(ref, *rpermute_idxs); cpermutation = Permutation::create(ref, *cpermute_idxs); rspermutation = ScaledPermutation::create( @@ -166,6 +175,7 @@ class Dense : public CommonTestFixture { std::unique_ptr c_y; std::unique_ptr c_alpha; std::unique_ptr y; + std::unique_ptr u; std::unique_ptr alpha; std::unique_ptr beta; std::unique_ptr result; @@ -175,6 +185,7 @@ class Dense : public CommonTestFixture { std::unique_ptr dc_y; std::unique_ptr dc_alpha; std::unique_ptr dy; + std::unique_ptr du; std::unique_ptr dalpha; std::unique_ptr dbeta; std::unique_ptr dresult; @@ -186,6 +197,11 @@ class Dense : public CommonTestFixture { std::unique_ptr rspermutation; std::unique_ptr cspermutation; std::unique_ptr rgather_idxs; + std::unique_ptr rscatter_idxs; + std::unique_ptr rscatter_idxs_sub; + + gko::span sub_rows{5, 43}; + gko::span sub_cols{3, 19}; }; @@ -1276,6 +1292,69 @@ TEST_F(Dense, CanAdvancedGatherRowsIntoMixedDenseCrossExecutor) } +TEST_F(Dense, CanScatterRowsIntoDense) +{ + set_up_apply_data(); + + u->row_scatter(rscatter_idxs.get(), x); + du->row_scatter(rscatter_idxs.get(), dx); + + GKO_ASSERT_MTX_NEAR(x, dx, 0); +} + + +TEST_F(Dense, CanScatterRowsIntoDenseSubmatrix) +{ + set_up_apply_data(); + auto sx = x->create_submatrix(sub_rows, sub_cols); + auto dsx = dx->create_submatrix(sub_rows, sub_cols); + + u->create_submatrix({0, u->get_size()[0]}, sub_cols) + ->row_scatter(rscatter_idxs_sub.get(), sx); + du->create_submatrix({0, du->get_size()[0]}, sub_cols) + ->row_scatter(rscatter_idxs_sub.get(), dsx); + + GKO_ASSERT_MTX_NEAR(sx, dsx, 0); +} + + +TEST_F(Dense, CanScatterRowsIntoDenseCrossExecutor) +{ + set_up_apply_data(); + + u->row_scatter(rscatter_idxs.get(), x); + u->row_scatter(rscatter_idxs.get(), dx); + + GKO_ASSERT_MTX_NEAR(x, dx, 0); +} + + +TEST_F(Dense, CanScatterRowsIntoDenseUsingIndexSet) +{ + set_up_apply_data(); + auto rindices = std::make_unique>( + ref, x->get_size()[0], *rscatter_idxs); + + u->row_scatter(rindices.get(), x); + du->row_scatter(rindices.get(), dx); + + GKO_ASSERT_MTX_NEAR(x, dx, 0); +} + + +TEST_F(Dense, GatherScatterIsIdentity) +{ + set_up_apply_data(); + + auto gather = dx->row_gather(rgather_idxs.get()); + dx->fill(-gko::one()); + gather->row_scatter(rgather_idxs.get(), dx); + auto result = dx->row_gather(rgather_idxs.get()); + + GKO_ASSERT_MTX_NEAR(gather, result, 0); +} + + TEST_F(Dense, IsGenericPermutable) { using gko::matrix::permute_mode; From ea6825c0e26761b03c1fa506be0978d094e31a57 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 15 Dec 2023 09:31:30 +0000 Subject: [PATCH 02/11] adds device index set kernels Signed-off-by: Marcel Koch --- common/cuda_hip/matrix/dense_kernels.cpp | 54 ++++++++++++++++++- .../unified/matrix/dense_kernels.template.cpp | 2 +- dpcpp/matrix/dense_kernels.dp.cpp | 41 +++++++++++++- omp/matrix/dense_kernels.cpp | 16 +++++- 4 files changed, 109 insertions(+), 4 deletions(-) diff --git a/common/cuda_hip/matrix/dense_kernels.cpp b/common/cuda_hip/matrix/dense_kernels.cpp index d8391ace023..6a6419aa27c 100644 --- a/common/cuda_hip/matrix/dense_kernels.cpp +++ b/common/cuda_hip/matrix/dense_kernels.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -24,6 +24,7 @@ #include "common/cuda_hip/components/reduction.hpp" #include "common/cuda_hip/components/thread_ids.hpp" #include "common/cuda_hip/components/uninitialized_array.hpp" +#include "core/base/mixed_precision_types.hpp" #include "core/base/utils.hpp" #include "core/components/prefix_sum_kernels.hpp" @@ -432,6 +433,34 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( } +template +__global__ __launch_bounds__(default_block_size) void row_scatter( + size_type num_sets, IndexType* __restrict__ row_set_begins, + IndexType* __restrict__ row_set_offsets, size_type orig_num_rows, + size_type num_cols, size_type orig_stride, + const ValueType* __restrict__ orig_values, size_type target_stride, + OutputType* __restrict__ target_values) +{ + auto id = thread::get_thread_id_flat(); + auto row = id / num_cols; + auto col = id % num_cols; + + if (row >= orig_num_rows) { + return; + } + + auto set_id = + binary_search( + 0, num_sets + 1, [=](auto i) { return row < row_set_offsets[i]; }) - + 1; + auto set_local_row = row - row_set_offsets[set_id]; + auto target_row = set_local_row + row_set_begins[set_id]; + + target_values[target_row * target_stride + col] = + orig_values[row * orig_stride + col]; +} + + } // namespace kernel @@ -840,6 +869,29 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); +template +void row_scatter(std::shared_ptr exec, + const index_set* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target) +{ + auto size = orig->get_size(); + if (size) { + constexpr auto block_size = default_block_size; + auto num_blocks = ceildiv(size[0] * size[1], block_size); + kernel::row_scatter<<get_stream()>>>( + row_idxs->get_num_subsets(), + as_device_type(row_idxs->get_subsets_begin()), + as_device_type(row_idxs->get_superset_indices()), size[0], size[1], + orig->get_stride(), as_device_type(orig->get_const_values()), + target->get_stride(), as_device_type(target->get_values())); + } +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); + + } // namespace dense } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp index a0f82007ec0..d236088ae9c 100644 --- a/common/unified/matrix/dense_kernels.template.cpp +++ b/common/unified/matrix/dense_kernels.template.cpp @@ -480,7 +480,7 @@ void row_scatter(std::shared_ptr exec, auto scattered) { scattered(rows[row], col) = orig(row, col); }, - dim<2>{row_idxs->get_num_elems(), orig->get_size()[1]}, orig, *row_idxs, + dim<2>{row_idxs->get_size(), orig->get_size()[1]}, orig, *row_idxs, target); } diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index e762a54fe21..f3636b29108 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -28,6 +28,7 @@ #include "dpcpp/base/types.hpp" #include "dpcpp/components/cooperative_groups.dp.hpp" #include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/searching.dp.hpp" #include "dpcpp/components/thread_ids.dp.hpp" #include "dpcpp/components/uninitialized_array.hpp" #include "dpcpp/synthesizer/implementation_selection.hpp" @@ -591,7 +592,45 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) GKO_NOT_IMPLEMENTED; + matrix::Dense* target) +{ + const auto num_sets = row_idxs->get_num_subsets(); + const auto num_rows = row_idxs->get_num_elems(); + const auto num_cols = orig->get_size()[1]; + + const auto* row_set_begins = row_idxs->get_subsets_begin(); + const auto* row_set_offsets = row_idxs->get_superset_indices(); + + const auto orig_stride = orig->get_stride(); + const auto* orig_values = orig->get_const_values(); + + const auto target_stride = target->get_stride(); + auto* target_values = target->get_values(); + + exec->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + static_cast(num_rows * num_cols), + [=](sycl::item<1> item) { + const auto row = static_cast(item[0]) / num_cols; + const auto col = static_cast(item[0]) % num_cols; + + if (row >= num_rows) { + return; + } + + auto set_id = + binary_search( + 0, num_sets + 1, + [=](auto i) { return row < row_set_offsets[i]; }) - + 1; + auto set_local_row = row - row_set_offsets[set_id]; + auto target_row = set_local_row + row_set_begins[set_id]; + + target_values[target_row * target_stride + col] = + orig_values[row * orig_stride + col]; + }); + }); +} GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 24f786ed938..1ed7b472dba 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -471,7 +471,21 @@ void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, matrix::Dense* target) -{} +{ + auto set_begins = row_idxs->get_subsets_begin(); + auto set_ends = row_idxs->get_subsets_end(); + auto set_offsets = row_idxs->get_superset_indices(); +#pragma omp parallel for + for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { + for (int target_row = set_begins[set]; target_row < set_ends[set]; + ++target_row) { + auto orig_row = target_row - set_begins[set] + set_offsets[set]; + for (size_type j = 0; j < orig->get_size()[1]; ++j) { + target->at(target_row, j) = orig->at(orig_row, j); + } + } + } +} GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); From eca9b9761cb72ac328ef8e70f73e8d335d912b95 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Mon, 7 Aug 2023 17:02:28 +0200 Subject: [PATCH 03/11] adds in-bounds check to kernels Co-authored-by: Yu-Hsiang M. Tsai --- common/cuda_hip/matrix/dense_kernels.cpp | 36 +++++--- .../unified/matrix/dense_kernels.template.cpp | 15 +++- core/matrix/dense.cpp | 53 ++++++------ core/matrix/dense_kernels.hpp | 4 +- dpcpp/matrix/dense_kernels.dp.cpp | 85 +++++++++++-------- omp/matrix/dense_kernels.cpp | 14 ++- reference/matrix/dense_kernels.cpp | 13 ++- reference/test/matrix/dense_kernels.cpp | 37 ++++++++ test/matrix/dense_kernels.cpp | 26 ++++++ 9 files changed, 201 insertions(+), 82 deletions(-) diff --git a/common/cuda_hip/matrix/dense_kernels.cpp b/common/cuda_hip/matrix/dense_kernels.cpp index 6a6419aa27c..7b55a04461d 100644 --- a/common/cuda_hip/matrix/dense_kernels.cpp +++ b/common/cuda_hip/matrix/dense_kernels.cpp @@ -436,16 +436,16 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( template __global__ __launch_bounds__(default_block_size) void row_scatter( size_type num_sets, IndexType* __restrict__ row_set_begins, - IndexType* __restrict__ row_set_offsets, size_type orig_num_rows, - size_type num_cols, size_type orig_stride, + IndexType* __restrict__ row_set_offsets, size_type target_num_rows, + size_type num_cols, size_type orig_num_rows, size_type orig_stride, const ValueType* __restrict__ orig_values, size_type target_stride, - OutputType* __restrict__ target_values) + OutputType* __restrict__ target_values, bool* __restrict__ invalid_access) { auto id = thread::get_thread_id_flat(); auto row = id / num_cols; auto col = id % num_cols; - if (row >= orig_num_rows) { + if (row >= orig_num_rows || *invalid_access) { return; } @@ -456,6 +456,11 @@ __global__ __launch_bounds__(default_block_size) void row_scatter( auto set_local_row = row - row_set_offsets[set_id]; auto target_row = set_local_row + row_set_begins[set_id]; + if (target_row >= target_num_rows) { + *invalid_access = true; + return; + } + target_values[target_row * target_stride + col] = orig_values[row * orig_stride + col]; } @@ -873,19 +878,28 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { - auto size = orig->get_size(); - if (size) { + auto orig_size = orig->get_size(); + auto target_size = target->get_size(); + + array invalid_access_arr(exec, {false}); + + if (orig_size) { constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(size[0] * size[1], block_size); + auto num_blocks = ceildiv(orig_size[0] * orig_size[1], block_size); kernel::row_scatter<<get_stream()>>>( row_idxs->get_num_subsets(), as_device_type(row_idxs->get_subsets_begin()), - as_device_type(row_idxs->get_superset_indices()), size[0], size[1], - orig->get_stride(), as_device_type(orig->get_const_values()), - target->get_stride(), as_device_type(target->get_values())); + as_device_type(row_idxs->get_superset_indices()), target_size[0], + target_size[1], orig_size[0], orig->get_stride(), + as_device_type(orig->get_const_values()), target->get_stride(), + as_device_type(target->get_values()), + as_device_type(invalid_access_arr.get_data())); } + + invalid_access = + exec->copy_val_to_host(invalid_access_arr.get_const_data()); } GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp index d236088ae9c..1445d3ad919 100644 --- a/common/unified/matrix/dense_kernels.template.cpp +++ b/common/unified/matrix/dense_kernels.template.cpp @@ -472,16 +472,23 @@ template void row_scatter(std::shared_ptr exec, const array* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { + array invalid_access_arr{exec, {false}}; run_kernel( exec, - [] GKO_KERNEL(auto row, auto col, auto orig, auto rows, - auto scattered) { + [num_rows = target->get_size()[0]] GKO_KERNEL( + auto row, auto col, auto orig, auto rows, auto scattered, + auto* invalid_access_ptr) { + if (rows[row] >= num_rows) { + *invalid_access_ptr = true; + return; + } scattered(rows[row], col) = orig(row, col); }, dim<2>{row_idxs->get_size(), orig->get_size()[1]}, orig, *row_idxs, - target); + target, invalid_access_arr.get_data()); + invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); } GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index c13c3c8688a..797edfffb3a 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1311,38 +1311,39 @@ void Dense::row_gather_impl(const Dense* alpha, } -template -template -void Dense::row_scatter_impl(const array* row_idxs, - Dense* target) const +template +size_type get_size(const array* arr) { - auto exec = this->get_executor(); - dim<2> expected_dim{row_idxs->get_num_elems(), this->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this); - GKO_ASSERT_EQUAL_COLS(this, target); - // @todo check that indices are inbounds for target + return arr->get_size(); +} - exec->run(dense::make_row_scatter( - make_temporary_clone(exec, row_idxs).get(), this, - make_temporary_clone(exec, target).get())); +template +size_type get_size(const index_set* is) +{ + return is->get_num_elems(); } -template -template -void Dense::row_scatter_impl(const index_set* row_idxs, - Dense* target) const +template +void row_scatter_impl(const IndexContainer* row_idxs, + const Dense* orig, Dense* target) { - auto exec = this->get_executor(); - dim<2> expected_dim{static_cast(row_idxs->get_num_elems()), - this->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, this); - GKO_ASSERT_EQUAL_COLS(this, target); - // @todo check that indices are inbounds for target + auto exec = orig->get_executor(); + dim<2> expected_dim{static_cast(get_size(row_idxs)), + orig->get_size()[1]}; + GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, orig); + GKO_ASSERT_EQUAL_COLS(orig, target); + + bool invalid_access = false; exec->run(dense::make_row_scatter( - make_temporary_clone(exec, row_idxs).get(), this, - make_temporary_clone(exec, target).get())); + make_temporary_clone(exec, row_idxs).get(), orig, + make_temporary_clone(exec, target).get(), invalid_access)); + + if (invalid_access) { + GKO_INVALID_STATE( + "Out-of-bounds access detected during kernel execution."); + } } @@ -1655,7 +1656,7 @@ void Dense::row_scatter(const array* row_idxs, ptr_param row_collection) const { gather_mixed_real_complex( - [&](auto dense) { this->row_scatter_impl(row_idxs, dense); }, + [&](auto dense) { row_scatter_impl(row_idxs, this, dense); }, row_collection.get()); } @@ -1666,7 +1667,7 @@ void Dense::row_scatter(const index_set* row_idxs, ptr_param row_collection) const { gather_mixed_real_complex( - [&](auto dense) { this->row_scatter_impl(row_idxs, dense); }, + [&](auto dense) { row_scatter_impl(row_idxs, this, dense); }, row_collection.get()); } diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 0645f8d3a81..337d182c046 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -299,13 +299,13 @@ namespace kernels { void row_scatter(std::shared_ptr exec, \ const array<_itype>* gather_indices, \ const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* target) + matrix::Dense<_otype>* target, bool& invalid_access) #define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \ void row_scatter(std::shared_ptr exec, \ const index_set<_itype>* gather_indices, \ const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* target) + matrix::Dense<_otype>* target, bool& invalid_access) #define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype) \ void col_permute(std::shared_ptr exec, \ diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index f3636b29108..f52e7b608c9 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -170,6 +170,51 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, dcfg_sq_list); +template +void row_scatter_impl(std::shared_ptr exec, + const index_set* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target, bool* invalid_access) +{ + const auto num_sets = row_idxs->get_num_subsets(); + const auto num_rows = row_idxs->get_num_elems(); + const auto num_cols = orig->get_size()[1]; + + const auto* row_set_begins = row_idxs->get_subsets_begin(); + const auto* row_set_offsets = row_idxs->get_superset_indices(); + + const auto orig_stride = orig->get_stride(); + const auto* orig_values = orig->get_const_values(); + + const auto target_stride = target->get_stride(); + auto* target_values = target->get_values(); + + exec->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + static_cast(num_rows * num_cols), + [=](sycl::item<1> item) { + const auto row = static_cast(item[0]) / num_cols; + const auto col = static_cast(item[0]) % num_cols; + + if (row >= num_rows) { + return; + } + + auto set_id = + binary_search( + 0, num_sets + 1, + [=](auto i) { return row < row_set_offsets[i]; }) - + 1; + auto set_local_row = row - row_set_offsets[set_id]; + auto target_row = set_local_row + row_set_begins[set_id]; + + target_values[target_row * target_stride + col] = + orig_values[row * orig_stride + col]; + }); + }); +} + + } // namespace kernel @@ -592,44 +637,14 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { - const auto num_sets = row_idxs->get_num_subsets(); - const auto num_rows = row_idxs->get_num_elems(); - const auto num_cols = orig->get_size()[1]; + array invalid_access_arr{exec, {false}}; - const auto* row_set_begins = row_idxs->get_subsets_begin(); - const auto* row_set_offsets = row_idxs->get_superset_indices(); + kernel::row_scatter_impl(exec, row_idxs, orig, target, + invalid_access_arr.get_data()); - const auto orig_stride = orig->get_stride(); - const auto* orig_values = orig->get_const_values(); - - const auto target_stride = target->get_stride(); - auto* target_values = target->get_values(); - - exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - static_cast(num_rows * num_cols), - [=](sycl::item<1> item) { - const auto row = static_cast(item[0]) / num_cols; - const auto col = static_cast(item[0]) % num_cols; - - if (row >= num_rows) { - return; - } - - auto set_id = - binary_search( - 0, num_sets + 1, - [=](auto i) { return row < row_set_offsets[i]; }) - - 1; - auto set_local_row = row - row_set_offsets[set_id]; - auto target_row = set_local_row + row_set_begins[set_id]; - - target_values[target_row * target_stride + col] = - orig_values[row * orig_stride + col]; - }); - }); + invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); } GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 1ed7b472dba..53197a9d435 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -470,16 +470,26 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { auto set_begins = row_idxs->get_subsets_begin(); auto set_ends = row_idxs->get_subsets_end(); auto set_offsets = row_idxs->get_superset_indices(); -#pragma omp parallel for + invalid_access = false; +#pragma omp parallel for shared(invalid_access) for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { + if (invalid_access) { + continue; + } for (int target_row = set_begins[set]; target_row < set_ends[set]; ++target_row) { + if (invalid_access || target_row >= target->get_size()[0]) { + invalid_access = true; + break; + } + auto orig_row = target_row - set_begins[set] + set_offsets[set]; + for (size_type j = 0; j < orig->get_size()[1]; ++j) { target->at(target_row, j) = orig->at(orig_row, j); } diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index a45d124e631..a0a142d7fdf 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -957,10 +957,14 @@ template void row_scatter(std::shared_ptr exec, const array* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { auto rows = row_idxs->get_const_data(); for (size_type i = 0; i < row_idxs->get_size(); ++i) { + if (rows[i] >= target->get_size()[0]) { + invalid_access = true; + return; + } for (size_type j = 0; j < orig->get_size()[1]; ++j) { target->at(rows[i], j) = orig->at(i, j); } @@ -975,14 +979,19 @@ template void row_scatter(std::shared_ptr exec, const index_set* row_idxs, const matrix::Dense* orig, - matrix::Dense* target) + matrix::Dense* target, bool& invalid_access) { auto set_begins = row_idxs->get_subsets_begin(); auto set_ends = row_idxs->get_subsets_end(); auto set_offsets = row_idxs->get_superset_indices(); + invalid_access = false; for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { for (int target_row = set_begins[set]; target_row < set_ends[set]; ++target_row) { + if (target_row >= target->get_size()[0]) { + invalid_access = true; + return; + } auto orig_row = target_row - set_begins[set] + set_offsets[set]; for (size_type j = 0; j < orig->get_size()[1]; ++j) { target->at(target_row, j) = orig->at(orig_row, j); diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index e0c5d11e921..6001363b50a 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -2756,6 +2756,21 @@ TYPED_TEST(DenseWithIndexType, MatrixScatterRowsFailsWithWrongDimensions) } +TYPED_TEST(DenseWithIndexType, MatrixScatterRowsFailsWithInvalidState) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto row_collection = + gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); + gko::array permute_idxs{exec, {200, 0}}; + + ASSERT_THROW(row_collection->row_scatter(&permute_idxs, this->mtx5), + gko::InvalidStateError); +} + + TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsUsingIndexSetIntoDense) { using Mtx = typename TestFixture::Mtx; @@ -2784,6 +2799,28 @@ TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsUsingIndexSetIntoDense) } +TYPED_TEST(DenseWithIndexType, + MatrixScatterRowsUsingIndexSetFailsWithInvalidState) +{ + using Mtx = typename TestFixture::Mtx; + using T = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + auto exec = this->mtx5->get_executor(); + auto mtx = gko::initialize({{2.2, 6.9, 7.8}, + {4.7, 1.3, 7.6}, + {9.2, 8.6, 4.5}, + {8.1, 9.4, 6.8}, + {9.6, 7.1, 2.5}}, + exec); + auto row_collection = gko::initialize( + {{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}, {3.4, 3.8, 7.8}}, exec); + gko::index_set permute_idxs{exec, {1, 0, 44}}; + + ASSERT_THROW(row_collection->row_scatter(&permute_idxs, mtx), + gko::InvalidStateError); +} + + TYPED_TEST(DenseWithIndexType, MatrixGatherScatterIsIdentity) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index eff41a5e2b2..c8fd9ad3639 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1303,6 +1303,16 @@ TEST_F(Dense, CanScatterRowsIntoDense) } +TEST_F(Dense, CanScatterRowsIntoDenseFailsWithInvalidState) +{ + set_up_apply_data(); + gko::array out_of_bounds(ref, du->get_size()[0]); + out_of_bounds.get_data()[0] = dx->get_size()[0] * 40; + + ASSERT_THROW(du->row_scatter(&out_of_bounds, dx), gko::InvalidStateError); +} + + TEST_F(Dense, CanScatterRowsIntoDenseSubmatrix) { set_up_apply_data(); @@ -1342,6 +1352,22 @@ TEST_F(Dense, CanScatterRowsIntoDenseUsingIndexSet) } +#ifdef NDEBUG +// this test can only be run if C asserts are disabled. Otherwise, +// an assert in the constructor of index_set may fail. +TEST_F(Dense, CanScatterRowsIntoDenseUsingIndexSetFailsWithInvalidState) +{ + set_up_apply_data(); + gko::array out_of_bounds(ref, du->get_size()[0]); + out_of_bounds.get_data()[0] = dx->get_size()[0] * 40; + auto rindices = std::make_unique>( + ref, x->get_size()[0], out_of_bounds); + + ASSERT_THROW(du->row_scatter(&out_of_bounds, dx), gko::InvalidStateError); +} +#endif + + TEST_F(Dense, GatherScatterIsIdentity) { set_up_apply_data(); From 0efaf63de631261acebf23f67476260e0992c617 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 23 Oct 2024 16:51:16 +0200 Subject: [PATCH 04/11] remove index-set overload Co-authored-by: Tobias Ribizel --- common/cuda_hip/matrix/dense_kernels.cpp | 65 ------------------------ core/device_hooks/common_kernels.inc.cpp | 2 - core/matrix/dense.cpp | 17 ------- core/matrix/dense_kernels.hpp | 9 ---- dpcpp/matrix/dense_kernels.dp.cpp | 63 ----------------------- include/ginkgo/core/matrix/dense.hpp | 16 ------ omp/matrix/dense_kernels.cpp | 35 ------------- reference/matrix/dense_kernels.cpp | 29 ----------- reference/test/matrix/dense_kernels.cpp | 50 ------------------ test/matrix/dense_kernels.cpp | 13 ----- 10 files changed, 299 deletions(-) diff --git a/common/cuda_hip/matrix/dense_kernels.cpp b/common/cuda_hip/matrix/dense_kernels.cpp index 7b55a04461d..192bf5177f2 100644 --- a/common/cuda_hip/matrix/dense_kernels.cpp +++ b/common/cuda_hip/matrix/dense_kernels.cpp @@ -433,39 +433,6 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( } -template -__global__ __launch_bounds__(default_block_size) void row_scatter( - size_type num_sets, IndexType* __restrict__ row_set_begins, - IndexType* __restrict__ row_set_offsets, size_type target_num_rows, - size_type num_cols, size_type orig_num_rows, size_type orig_stride, - const ValueType* __restrict__ orig_values, size_type target_stride, - OutputType* __restrict__ target_values, bool* __restrict__ invalid_access) -{ - auto id = thread::get_thread_id_flat(); - auto row = id / num_cols; - auto col = id % num_cols; - - if (row >= orig_num_rows || *invalid_access) { - return; - } - - auto set_id = - binary_search( - 0, num_sets + 1, [=](auto i) { return row < row_set_offsets[i]; }) - - 1; - auto set_local_row = row - row_set_offsets[set_id]; - auto target_row = set_local_row + row_set_begins[set_id]; - - if (target_row >= target_num_rows) { - *invalid_access = true; - return; - } - - target_values[target_row * target_stride + col] = - orig_values[row * orig_stride + col]; -} - - } // namespace kernel @@ -874,38 +841,6 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); -template -void row_scatter(std::shared_ptr exec, - const index_set* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool& invalid_access) -{ - auto orig_size = orig->get_size(); - auto target_size = target->get_size(); - - array invalid_access_arr(exec, {false}); - - if (orig_size) { - constexpr auto block_size = default_block_size; - auto num_blocks = ceildiv(orig_size[0] * orig_size[1], block_size); - kernel::row_scatter<<get_stream()>>>( - row_idxs->get_num_subsets(), - as_device_type(row_idxs->get_subsets_begin()), - as_device_type(row_idxs->get_superset_indices()), target_size[0], - target_size[1], orig_size[0], orig->get_stride(), - as_device_type(orig->get_const_values()), target->get_stride(), - as_device_type(target->get_values()), - as_device_type(invalid_access_arr.get_data())); - } - - invalid_access = - exec->copy_val_to_host(invalid_access_arr.get_const_data()); -} - -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); - - } // namespace dense } // namespace GKO_DEVICE_NAMESPACE } // namespace kernels diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 1cbcf0e8957..f17b25d8306 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -477,8 +477,6 @@ GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_GATHER_KERNEL); GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL); GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); -GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL); diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 797edfffb3a..6c800c0a02d 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1661,17 +1661,6 @@ void Dense::row_scatter(const array* row_idxs, } -template -template -void Dense::row_scatter(const index_set* row_idxs, - ptr_param row_collection) const -{ - gather_mixed_real_complex( - [&](auto dense) { row_scatter_impl(row_idxs, this, dense); }, - row_collection.get()); -} - - template std::unique_ptr Dense::column_permute( const array* permutation_indices) const @@ -2123,12 +2112,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_MATRIX); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DENSE_ROW_SCATTER_ARRAY); -#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET(_vtype, _itype) \ - void Dense<_vtype>::row_scatter(const index_set<_itype>* row_idxs, \ - ptr_param row_collection) const -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET); - } // namespace matrix } // namespace gko diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 337d182c046..8c4bfef2cee 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -301,12 +301,6 @@ namespace kernels { const matrix::Dense<_vtype>* orig, \ matrix::Dense<_otype>* target, bool& invalid_access) -#define GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(_vtype, _otype, _itype) \ - void row_scatter(std::shared_ptr exec, \ - const index_set<_itype>* gather_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* target, bool& invalid_access) - #define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype) \ void col_permute(std::shared_ptr exec, \ const _itype* permutation_indices, \ @@ -447,9 +441,6 @@ namespace kernels { IndexType); \ template \ GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL(ValueType, OutputType, IndexType); \ - template \ - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL(ValueType, OutputType, \ - IndexType); \ template \ GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(ValueType, IndexType); \ template \ diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index f52e7b608c9..464d0af40a3 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -170,51 +170,6 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, dcfg_sq_list); -template -void row_scatter_impl(std::shared_ptr exec, - const index_set* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool* invalid_access) -{ - const auto num_sets = row_idxs->get_num_subsets(); - const auto num_rows = row_idxs->get_num_elems(); - const auto num_cols = orig->get_size()[1]; - - const auto* row_set_begins = row_idxs->get_subsets_begin(); - const auto* row_set_offsets = row_idxs->get_superset_indices(); - - const auto orig_stride = orig->get_stride(); - const auto* orig_values = orig->get_const_values(); - - const auto target_stride = target->get_stride(); - auto* target_values = target->get_values(); - - exec->get_queue()->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - static_cast(num_rows * num_cols), - [=](sycl::item<1> item) { - const auto row = static_cast(item[0]) / num_cols; - const auto col = static_cast(item[0]) % num_cols; - - if (row >= num_rows) { - return; - } - - auto set_id = - binary_search( - 0, num_sets + 1, - [=](auto i) { return row < row_set_offsets[i]; }) - - 1; - auto set_local_row = row - row_set_offsets[set_id]; - auto target_row = set_local_row + row_set_begins[set_id]; - - target_values[target_row * target_stride + col] = - orig_values[row * orig_stride + col]; - }); - }); -} - - } // namespace kernel @@ -633,24 +588,6 @@ void conj_transpose(std::shared_ptr exec, GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); -template -void row_scatter(std::shared_ptr exec, - const index_set* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool& invalid_access) -{ - array invalid_access_arr{exec, {false}}; - - kernel::row_scatter_impl(exec, row_idxs, orig, target, - invalid_access_arr.get_data()); - - invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); -} - -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); - - } // namespace dense } // namespace dpcpp } // namespace kernels diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 34ba5ec31f6..085231d211b 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -735,22 +735,6 @@ class Dense void row_scatter(const array* scatter_indices, ptr_param target) const; - /** - * Copies this matrix into the given rows of the target matrix. - * - * @tparam IndexType the index type, either int32 or int64 - * - * @param scatter_indices row indices of the target matrix. It must - * have the same number of indices as rows in - * this matrix. - * @param target matrix where the scattered rows are stored, i.e. - * `target(scatter_indices.get_global_index(i), j) - * = this(i, j)` - */ - template - void row_scatter(const index_set* scatter_indices, - ptr_param target) const; - std::unique_ptr column_permute( const array* permutation_indices) const override; diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index 53197a9d435..5ae199bdac1 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -466,41 +466,6 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_DENSE_COUNT_NONZERO_BLOCKS_PER_ROW_KERNEL); -template -void row_scatter(std::shared_ptr exec, - const index_set* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool& invalid_access) -{ - auto set_begins = row_idxs->get_subsets_begin(); - auto set_ends = row_idxs->get_subsets_end(); - auto set_offsets = row_idxs->get_superset_indices(); - invalid_access = false; -#pragma omp parallel for shared(invalid_access) - for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { - if (invalid_access) { - continue; - } - for (int target_row = set_begins[set]; target_row < set_ends[set]; - ++target_row) { - if (invalid_access || target_row >= target->get_size()[0]) { - invalid_access = true; - break; - } - - auto orig_row = target_row - set_begins[set] + set_offsets[set]; - - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - target->at(target_row, j) = orig->at(orig_row, j); - } - } - } -} - -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); - - } // namespace dense } // namespace omp } // namespace kernels diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index a0a142d7fdf..69c5d2a204d 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -975,35 +975,6 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); -template -void row_scatter(std::shared_ptr exec, - const index_set* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool& invalid_access) -{ - auto set_begins = row_idxs->get_subsets_begin(); - auto set_ends = row_idxs->get_subsets_end(); - auto set_offsets = row_idxs->get_superset_indices(); - invalid_access = false; - for (size_type set = 0; set < row_idxs->get_num_subsets(); ++set) { - for (int target_row = set_begins[set]; target_row < set_ends[set]; - ++target_row) { - if (target_row >= target->get_size()[0]) { - invalid_access = true; - return; - } - auto orig_row = target_row - set_begins[set] + set_offsets[set]; - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - target->at(target_row, j) = orig->at(orig_row, j); - } - } - } -} - -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_INDEX_SET_KERNEL); - - template void col_permute(std::shared_ptr exec, const IndexType* perm, const matrix::Dense* orig, diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 6001363b50a..5d65d16366b 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -2771,56 +2771,6 @@ TYPED_TEST(DenseWithIndexType, MatrixScatterRowsFailsWithInvalidState) } -TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsUsingIndexSetIntoDense) -{ - using Mtx = typename TestFixture::Mtx; - using T = typename TestFixture::value_type; - using index_type = typename TestFixture::index_type; - auto exec = this->mtx5->get_executor(); - auto mtx = gko::initialize({{2.2, 6.9, 7.8}, - {4.7, 1.3, 7.6}, - {9.2, 8.6, 4.5}, - {8.1, 9.4, 6.8}, - {9.6, 7.1, 2.5}}, - exec); - auto row_collection = gko::initialize( - {{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}, {3.4, 3.8, 7.8}}, exec); - gko::index_set permute_idxs{exec, {1, 0, 4}}; - - row_collection->row_scatter(&permute_idxs, mtx); - - GKO_ASSERT_MTX_NEAR(mtx, - l({{3.0, 2.7, 6.5}, - {0.7, 1.1, 4.0}, - {9.2, 8.6, 4.5}, - {8.1, 9.4, 6.8}, - {3.4, 3.8, 7.8}}), - 0.0); -} - - -TYPED_TEST(DenseWithIndexType, - MatrixScatterRowsUsingIndexSetFailsWithInvalidState) -{ - using Mtx = typename TestFixture::Mtx; - using T = typename TestFixture::value_type; - using index_type = typename TestFixture::index_type; - auto exec = this->mtx5->get_executor(); - auto mtx = gko::initialize({{2.2, 6.9, 7.8}, - {4.7, 1.3, 7.6}, - {9.2, 8.6, 4.5}, - {8.1, 9.4, 6.8}, - {9.6, 7.1, 2.5}}, - exec); - auto row_collection = gko::initialize( - {{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}, {3.4, 3.8, 7.8}}, exec); - gko::index_set permute_idxs{exec, {1, 0, 44}}; - - ASSERT_THROW(row_collection->row_scatter(&permute_idxs, mtx), - gko::InvalidStateError); -} - - TYPED_TEST(DenseWithIndexType, MatrixGatherScatterIsIdentity) { using Mtx = typename TestFixture::Mtx; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index c8fd9ad3639..981b14a89b0 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -1339,19 +1339,6 @@ TEST_F(Dense, CanScatterRowsIntoDenseCrossExecutor) } -TEST_F(Dense, CanScatterRowsIntoDenseUsingIndexSet) -{ - set_up_apply_data(); - auto rindices = std::make_unique>( - ref, x->get_size()[0], *rscatter_idxs); - - u->row_scatter(rindices.get(), x); - du->row_scatter(rindices.get(), dx); - - GKO_ASSERT_MTX_NEAR(x, dx, 0); -} - - #ifdef NDEBUG // this test can only be run if C asserts are disabled. Otherwise, // an assert in the constructor of index_set may fail. From 14d04ed87b3341e09108bcee5619e02c61f81ecd Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 23 Oct 2024 16:51:53 +0200 Subject: [PATCH 05/11] review updates: - add todo - more specific error message Co-authored-by: Tobias Ribizel --- core/matrix/dense.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 6c800c0a02d..d3afb005720 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -1340,9 +1340,9 @@ void row_scatter_impl(const IndexContainer* row_idxs, make_temporary_clone(exec, row_idxs).get(), orig, make_temporary_clone(exec, target).get(), invalid_access)); + // TODO: find a uniform way to handle device-side errors if (invalid_access) { - GKO_INVALID_STATE( - "Out-of-bounds access detected during kernel execution."); + GKO_INVALID_STATE("Out-of-bounds scatter index detected."); } } From 0a831d771cab35101ce3415653b354559653863f Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 23 Oct 2024 17:35:20 +0200 Subject: [PATCH 06/11] wip: use row-scatterer type Signed-off-by: Marcel Koch --- core/CMakeLists.txt | 1 + core/matrix/row_scatterer.cpp | 37 ++++++++++++++++ include/ginkgo/core/matrix/row_scatterer.hpp | 45 ++++++++++++++++++++ 3 files changed, 83 insertions(+) create mode 100644 core/matrix/row_scatterer.cpp create mode 100644 include/ginkgo/core/matrix/row_scatterer.hpp diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index afe96d1538d..ba770aebb09 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -90,6 +90,7 @@ target_sources( matrix/identity.cpp matrix/permutation.cpp matrix/row_gatherer.cpp + matrix/row_scatterer.cpp matrix/scaled_permutation.cpp matrix/sellp.cpp matrix/sparsity_csr.cpp diff --git a/core/matrix/row_scatterer.cpp b/core/matrix/row_scatterer.cpp new file mode 100644 index 00000000000..9f187a4b6de --- /dev/null +++ b/core/matrix/row_scatterer.cpp @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: 2024 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "ginkgo/core/matrix/row_scatterer.hpp" + +namespace gko { +namespace matrix { + + +template +RowScatterer::RowScatterer(std::shared_ptr exec) + : EnableLinOp(std::move(exec)) +{} + + +template +RowScatterer::RowScatterer(std::shared_ptr exec, + array idxs, size_type to_size) + : EnableLinOp(exec, {to_size, idxs.get_size()}), + idxs_(exec, std::move(idxs)) +{} + + +template +void RowScatterer::apply_impl(const LinOp* b, LinOp* x) const +{} + + +template +void RowScatterer::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const +{} + + +} // namespace matrix +} // namespace gko diff --git a/include/ginkgo/core/matrix/row_scatterer.hpp b/include/ginkgo/core/matrix/row_scatterer.hpp new file mode 100644 index 00000000000..1925bb65a39 --- /dev/null +++ b/include/ginkgo/core/matrix/row_scatterer.hpp @@ -0,0 +1,45 @@ +// SPDX-FileCopyrightText: 2024 - 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include + +namespace gko { + +namespace matrix { + + +template +class RowScatterer : public EnableLinOp> { + friend class EnablePolymorphicObject; + +public: + static std::unique_ptr create( + std::shared_ptr exec, array idxs, + size_type to_size) + { + return std::unique_ptr( + new RowScatterer(std::move(exec), std::move(idxs), to_size)); + } + +protected: + void apply_impl(const LinOp* b, LinOp* x) const override; + + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; + +private: + explicit RowScatterer(std::shared_ptr exec); + + explicit RowScatterer(std::shared_ptr exec, + array idxs, size_type to_size); + + array idxs_; +}; + + +} // namespace matrix + +} // namespace gko From 4a4610deac2982feabc8c1f3be5d444d7e39362c Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 12 Feb 2025 12:35:23 +0100 Subject: [PATCH 07/11] move row_scatter kernels into own file --- common/unified/CMakeLists.txt | 1 + .../unified/matrix/dense_kernels.template.cpp | 27 -------- common/unified/matrix/row_scatterer.cpp | 46 +++++++++++++ core/device_hooks/common_kernels.inc.cpp | 12 +++- core/matrix/dense.cpp | 68 +++++++++---------- core/matrix/dense_kernels.hpp | 8 --- core/matrix/row_scatterer.cpp | 58 +++++++++++++++- core/matrix/row_scatterer_kernels.hpp | 36 ++++++++++ include/ginkgo/core/matrix/dense.hpp | 18 ++++- include/ginkgo/core/matrix/row_scatterer.hpp | 10 +-- reference/CMakeLists.txt | 1 + reference/matrix/dense_kernels.cpp | 22 ------ reference/matrix/row_scatterer_kernels.cpp | 40 +++++++++++ 13 files changed, 239 insertions(+), 108 deletions(-) create mode 100644 common/unified/matrix/row_scatterer.cpp create mode 100644 core/matrix/row_scatterer_kernels.hpp create mode 100644 reference/matrix/row_scatterer_kernels.cpp diff --git a/common/unified/CMakeLists.txt b/common/unified/CMakeLists.txt index 8795476158b..69da35b7600 100644 --- a/common/unified/CMakeLists.txt +++ b/common/unified/CMakeLists.txt @@ -15,6 +15,7 @@ set(UNIFIED_SOURCES matrix/ell_kernels.cpp matrix/hybrid_kernels.cpp matrix/permutation_kernels.cpp + matrix/row_scatterer.cpp matrix/scaled_permutation_kernels.cpp matrix/sellp_kernels.cpp matrix/sparsity_csr_kernels.cpp diff --git a/common/unified/matrix/dense_kernels.template.cpp b/common/unified/matrix/dense_kernels.template.cpp index 1445d3ad919..577a89ca693 100644 --- a/common/unified/matrix/dense_kernels.template.cpp +++ b/common/unified/matrix/dense_kernels.template.cpp @@ -468,33 +468,6 @@ void advanced_row_gather(std::shared_ptr exec, } -template -void row_scatter(std::shared_ptr exec, - const array* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool& invalid_access) -{ - array invalid_access_arr{exec, {false}}; - run_kernel( - exec, - [num_rows = target->get_size()[0]] GKO_KERNEL( - auto row, auto col, auto orig, auto rows, auto scattered, - auto* invalid_access_ptr) { - if (rows[row] >= num_rows) { - *invalid_access_ptr = true; - return; - } - scattered(rows[row], col) = orig(row, col); - }, - dim<2>{row_idxs->get_size(), orig->get_size()[1]}, orig, *row_idxs, - target, invalid_access_arr.get_data()); - invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); -} - -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); - - template void col_permute(std::shared_ptr exec, const IndexType* permutation_indices, diff --git a/common/unified/matrix/row_scatterer.cpp b/common/unified/matrix/row_scatterer.cpp new file mode 100644 index 00000000000..aa8399fad6d --- /dev/null +++ b/common/unified/matrix/row_scatterer.cpp @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "common/unified/base/kernel_launch.hpp" +#include "core/base/mixed_precision_types.hpp" +#include "core/matrix/row_scatterer_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +namespace row_scatter { + + +template +void row_scatter(std::shared_ptr exec, + const array* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target, bool& invalid_access) +{ + array invalid_access_arr{exec, {false}}; + run_kernel( + exec, + [num_rows = target->get_size()[0]] GKO_KERNEL( + auto row, auto col, auto orig, auto rows, auto scattered, + auto* invalid_access_ptr) { + if (rows[row] >= num_rows) { + *invalid_access_ptr = true; + return; + } + scattered(rows[row], col) = orig(row, col); + }, + dim<2>{row_idxs->get_size(), orig->get_size()[1]}, orig, *row_idxs, + target, invalid_access_arr.get_data()); + invalid_access = exec->copy_val_to_host(invalid_access_arr.get_data()); +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_ROW_SCATTER_SIMPLE_APPLY); + + +} // namespace row_scatter +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index f17b25d8306..1eabb23caf3 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -4,6 +4,8 @@ #include +#include + #include #include @@ -476,7 +478,6 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL); GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_GATHER_KERNEL); GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL); -GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL); GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_DENSE_INV_COL_PERMUTE_KERNEL); @@ -1115,6 +1116,15 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL); } // namespace implicit_residual_norm + + +namespace row_scatter { + + +GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE_2(GKO_DECLARE_ROW_SCATTER_SIMPLE_APPLY); + + +} } // namespace GKO_HOOK_MODULE } // namespace kernels } // namespace gko diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index d3afb005720..37c84d933fc 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -74,7 +75,6 @@ GKO_REGISTER_OPERATION(nonsymm_permute, dense::nonsymm_permute); GKO_REGISTER_OPERATION(inv_nonsymm_permute, dense::inv_nonsymm_permute); GKO_REGISTER_OPERATION(row_gather, dense::row_gather); GKO_REGISTER_OPERATION(advanced_row_gather, dense::advanced_row_gather); -GKO_REGISTER_OPERATION(row_scatter, dense::row_scatter); GKO_REGISTER_OPERATION(col_permute, dense::col_permute); GKO_REGISTER_OPERATION(inverse_row_permute, dense::inv_row_permute); GKO_REGISTER_OPERATION(inverse_col_permute, dense::inv_col_permute); @@ -1324,29 +1324,6 @@ size_type get_size(const index_set* is) } -template -void row_scatter_impl(const IndexContainer* row_idxs, - const Dense* orig, Dense* target) -{ - auto exec = orig->get_executor(); - dim<2> expected_dim{static_cast(get_size(row_idxs)), - orig->get_size()[1]}; - GKO_ASSERT_EQUAL_DIMENSIONS(expected_dim, orig); - GKO_ASSERT_EQUAL_COLS(orig, target); - - bool invalid_access = false; - - exec->run(dense::make_row_scatter( - make_temporary_clone(exec, row_idxs).get(), orig, - make_temporary_clone(exec, target).get(), invalid_access)); - - // TODO: find a uniform way to handle device-side errors - if (invalid_access) { - GKO_INVALID_STATE("Out-of-bounds scatter index detected."); - } -} - - template std::unique_ptr Dense::permute( const array* permutation_indices) const @@ -1651,13 +1628,38 @@ void Dense::row_gather(ptr_param alpha, template -template -void Dense::row_scatter(const array* row_idxs, - ptr_param row_collection) const +void Dense::row_scatter( + ptr_param> scatterer, ptr_param target) { - gather_mixed_real_complex( - [&](auto dense) { row_scatter_impl(row_idxs, this, dense); }, - row_collection.get()); + scatterer->apply(this, target); +} + + +template +void Dense::row_scatter( + ptr_param alpha, + ptr_param> scatterer, ptr_param beta, + ptr_param target) +{ + scatterer->apply(alpha, this, beta, target); +} + + +template +void Dense::row_scatter( + ptr_param> scatterer, ptr_param target) +{ + scatterer->apply(this, target); +} + + +template +void Dense::row_scatter( + ptr_param alpha, + ptr_param> scatterer, ptr_param beta, + ptr_param target) +{ + scatterer->apply(alpha, this, beta, target); } @@ -2106,12 +2108,6 @@ Dense::Dense(std::shared_ptr exec, #define GKO_DECLARE_DENSE_MATRIX(_type) class Dense<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_MATRIX); -#define GKO_DECLARE_DENSE_ROW_SCATTER_ARRAY(_vtype, _itype) \ - void Dense<_vtype>::row_scatter(const array<_itype>* row_idxs, \ - ptr_param row_collection) const -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_DENSE_ROW_SCATTER_ARRAY); - } // namespace matrix } // namespace gko diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 8c4bfef2cee..0def2e244ab 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -295,12 +295,6 @@ namespace kernels { const matrix::Dense<_vtype>* orig, const matrix::Dense<_vtype>* beta, \ matrix::Dense<_otype>* row_collection) -#define GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL(_vtype, _otype, _itype) \ - void row_scatter(std::shared_ptr exec, \ - const array<_itype>* gather_indices, \ - const matrix::Dense<_vtype>* orig, \ - matrix::Dense<_otype>* target, bool& invalid_access) - #define GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(_vtype, _itype) \ void col_permute(std::shared_ptr exec, \ const _itype* permutation_indices, \ @@ -439,8 +433,6 @@ namespace kernels { template \ GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL(ValueType, OutputType, \ IndexType); \ - template \ - GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL(ValueType, OutputType, IndexType); \ template \ GKO_DECLARE_DENSE_COL_PERMUTE_KERNEL(ValueType, IndexType); \ template \ diff --git a/core/matrix/row_scatterer.cpp b/core/matrix/row_scatterer.cpp index 9f187a4b6de..01aa2addbae 100644 --- a/core/matrix/row_scatterer.cpp +++ b/core/matrix/row_scatterer.cpp @@ -4,27 +4,75 @@ #include "ginkgo/core/matrix/row_scatterer.hpp" +#include +#include + +#include "core/base/dispatch_helper.hpp" +#include "core/matrix/row_scatterer_kernels.hpp" + namespace gko { namespace matrix { +namespace { + + +GKO_REGISTER_OPERATION(row_scatter, row_scatter::row_scatter); + + +} + + +template +std::unique_ptr> RowScatterer::create( + std::shared_ptr exec, array idxs, + size_type to_size) +{ + return std::unique_ptr( + new RowScatterer(std::move(exec), std::move(idxs), to_size)); +} template RowScatterer::RowScatterer(std::shared_ptr exec) - : EnableLinOp(std::move(exec)) + : EnableLinOp>(std::move(exec)) {} template RowScatterer::RowScatterer(std::shared_ptr exec, array idxs, size_type to_size) - : EnableLinOp(exec, {to_size, idxs.get_size()}), + : EnableLinOp>(exec, {to_size, idxs.get_size()}), idxs_(exec, std::move(idxs)) {} template void RowScatterer::apply_impl(const LinOp* b, LinOp* x) const -{} +{ + auto impl = [&](const auto* orig, auto* target) { + auto exec = orig->get_executor(); + bool invalid_access = false; + + exec->run(make_row_scatter( + make_temporary_clone(exec, &idxs_).get(), orig, + make_temporary_clone(exec, target).get(), invalid_access)); + + // TODO: find a uniform way to handle device-side errors + if (invalid_access) { + GKO_INVALID_STATE("Out-of-bounds scatter index detected."); + } + }; + + run, +#endif + float, double, std::complex, std::complex>( + b, [&](auto* orig) { + using value_type = + typename std::decay_t::value_type; + mixed_precision_dispatch_real_complex(impl, orig, x); + }); +} template @@ -33,5 +81,9 @@ void RowScatterer::apply_impl(const LinOp* alpha, const LinOp* b, {} +#define GKO_DECLARE_ROW_SCATTER(_type) class RowScatterer<_type> +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_ROW_SCATTER); + + } // namespace matrix } // namespace gko diff --git a/core/matrix/row_scatterer_kernels.hpp b/core/matrix/row_scatterer_kernels.hpp new file mode 100644 index 00000000000..13f4576efd6 --- /dev/null +++ b/core/matrix/row_scatterer_kernels.hpp @@ -0,0 +1,36 @@ +// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + + +#include + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + +#define GKO_DECLARE_ROW_SCATTER_SIMPLE_APPLY(_vtype, _otype, _itype) \ + void row_scatter(std::shared_ptr exec, \ + const array<_itype>* gather_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_otype>* target, bool& invalid_access) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_ROW_SCATTER_SIMPLE_APPLY(ValueType, OutputType, IndexType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(row_scatter, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 085231d211b..4f750ddda8d 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -72,6 +72,8 @@ class Sellp; template class SparsityCsr; +template +class RowScatterer; /** * Dense is a matrix format which explicitly stores all values of the matrix. @@ -731,9 +733,19 @@ class Dense * scatter_indices[j]` the rows `i, j` of this matrix are * identical. */ - template - void row_scatter(const array* scatter_indices, - ptr_param target) const; + void row_scatter(ptr_param> scatterer, + ptr_param target); + + void row_scatter(ptr_param alpha, + ptr_param> scatterer, + ptr_param beta, ptr_param target); + + void row_scatter(ptr_param> scatterer, + ptr_param target); + + void row_scatter(ptr_param alpha, + ptr_param> scatterer, + ptr_param beta, ptr_param target); std::unique_ptr column_permute( const array* permutation_indices) const override; diff --git a/include/ginkgo/core/matrix/row_scatterer.hpp b/include/ginkgo/core/matrix/row_scatterer.hpp index 1925bb65a39..09e2b76e830 100644 --- a/include/ginkgo/core/matrix/row_scatterer.hpp +++ b/include/ginkgo/core/matrix/row_scatterer.hpp @@ -7,22 +7,17 @@ #include namespace gko { - namespace matrix { template class RowScatterer : public EnableLinOp> { - friend class EnablePolymorphicObject; + friend class EnablePolymorphicObject, LinOp>; public: static std::unique_ptr create( std::shared_ptr exec, array idxs, - size_type to_size) - { - return std::unique_ptr( - new RowScatterer(std::move(exec), std::move(idxs), to_size)); - } + size_type to_size); protected: void apply_impl(const LinOp* b, LinOp* x) const override; @@ -41,5 +36,4 @@ class RowScatterer : public EnableLinOp> { } // namespace matrix - } // namespace gko diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 7696de3c8dd..f739a0868a7 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -42,6 +42,7 @@ target_sources( matrix/fft_kernels.cpp matrix/hybrid_kernels.cpp matrix/permutation_kernels.cpp + matrix/row_scatterer_kernels.cpp matrix/scaled_permutation_kernels.cpp matrix/sellp_kernels.cpp matrix/sparsity_csr_kernels.cpp diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index 69c5d2a204d..7c36d9101d5 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -953,28 +953,6 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( GKO_DECLARE_DENSE_ADVANCED_ROW_GATHER_KERNEL); -template -void row_scatter(std::shared_ptr exec, - const array* row_idxs, - const matrix::Dense* orig, - matrix::Dense* target, bool& invalid_access) -{ - auto rows = row_idxs->get_const_data(); - for (size_type i = 0; i < row_idxs->get_size(); ++i) { - if (rows[i] >= target->get_size()[0]) { - invalid_access = true; - return; - } - for (size_type j = 0; j < orig->get_size()[1]; ++j) { - target->at(rows[i], j) = orig->at(i, j); - } - } -} - -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( - GKO_DECLARE_DENSE_ROW_SCATTER_KERNEL); - - template void col_permute(std::shared_ptr exec, const IndexType* perm, const matrix::Dense* orig, diff --git a/reference/matrix/row_scatterer_kernels.cpp b/reference/matrix/row_scatterer_kernels.cpp new file mode 100644 index 00000000000..90237c68b54 --- /dev/null +++ b/reference/matrix/row_scatterer_kernels.cpp @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: 2025 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/row_scatterer_kernels.hpp" + +#include "core/base/mixed_precision_types.hpp" + +namespace gko { +namespace kernels { +namespace reference { +namespace row_scatter { + + +template +void row_scatter(std::shared_ptr exec, + const array* row_idxs, + const matrix::Dense* orig, + matrix::Dense* target, bool& invalid_access) +{ + auto rows = row_idxs->get_const_data(); + for (size_type i = 0; i < row_idxs->get_size(); ++i) { + if (rows[i] >= target->get_size()[0]) { + invalid_access = true; + return; + } + for (size_type j = 0; j < orig->get_size()[1]; ++j) { + target->at(rows[i], j) = orig->at(i, j); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE_2( + GKO_DECLARE_ROW_SCATTER_SIMPLE_APPLY); + + +} // namespace row_scatter +} // namespace reference +} // namespace kernels +} // namespace gko From 19cf8f1f92c17ee9775369251430da6d97c8a65e Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 12 Feb 2025 12:49:52 +0100 Subject: [PATCH 08/11] update tests to new interface --- reference/test/matrix/dense_kernels.cpp | 35 ++++++++++++++++--------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 5d65d16366b..ed9260bdc68 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2708,9 +2709,10 @@ TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsIntoDense) auto exec = this->mtx5->get_executor(); auto row_collection = gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); - gko::array permute_idxs{exec, {2, 0}}; + auto scatter = gko::matrix::RowScatterer::create( + exec, gko::array{exec, {2, 0}}, this->mtx5->get_size()[0]); - row_collection->row_scatter(&permute_idxs, this->mtx5); + row_collection->row_scatter(scatter, this->mtx5); GKO_ASSERT_MTX_NEAR( this->mtx5, l({{0.7, 1.1, 4.0}, {-2.0, 2.0, 4.5}, {3.0, 2.7, 6.5}}), @@ -2725,10 +2727,11 @@ TYPED_TEST(DenseWithIndexType, MatrixCanScatterRowsIntoDenseSubmatrix) using index_type = typename TestFixture::index_type; auto exec = this->mtx5->get_executor(); auto row_collection = gko::initialize(I>{{3.0, 2.7}}, exec); - gko::array permute_idxs{exec, {0}}; + auto submtx = this->mtx5->create_submatrix({2}, {1, 3}); + auto scatter = gko::matrix::RowScatterer::create( + exec, gko::array{exec, {0}}, submtx->get_size()[0]); - row_collection->row_scatter(&permute_idxs, - this->mtx5->create_submatrix({2}, {1, 3})); + row_collection->row_scatter(scatter, submtx); GKO_ASSERT_MTX_NEAR( this->mtx5, @@ -2746,12 +2749,14 @@ TYPED_TEST(DenseWithIndexType, MatrixScatterRowsFailsWithWrongDimensions) gko::initialize(I>{{3.0, 2.7}, {0.7, 1.1}}, exec); auto row_collection2 = gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); - gko::array permute_idxs1{exec, {2, 0}}; - gko::array permute_idxs2{exec, {1}}; + auto scatter1 = gko::matrix::RowScatterer::create( + exec, gko::array{exec, {2, 0}}, this->mtx5->get_size()[0]); + auto scatter2 = gko::matrix::RowScatterer::create( + exec, gko::array{exec, {1}}, 2); - ASSERT_THROW(row_collection1->row_scatter(&permute_idxs1, this->mtx5), + ASSERT_THROW(row_collection1->row_scatter(scatter1, this->mtx5), gko::DimensionMismatch); - ASSERT_THROW(row_collection2->row_scatter(&permute_idxs2, this->mtx5), + ASSERT_THROW(row_collection2->row_scatter(scatter2, this->mtx5), gko::DimensionMismatch); } @@ -2764,9 +2769,11 @@ TYPED_TEST(DenseWithIndexType, MatrixScatterRowsFailsWithInvalidState) auto exec = this->mtx5->get_executor(); auto row_collection = gko::initialize({{3.0, 2.7, 6.5}, {0.7, 1.1, 4.0}}, exec); - gko::array permute_idxs{exec, {200, 0}}; + auto scatter = gko::matrix::RowScatterer::create( + exec, gko::array{exec, {200, 0}}, + this->mtx5->get_size()[0]); - ASSERT_THROW(row_collection->row_scatter(&permute_idxs, this->mtx5), + ASSERT_THROW(row_collection->row_scatter(scatter, this->mtx5), gko::InvalidStateError); } @@ -2778,11 +2785,13 @@ TYPED_TEST(DenseWithIndexType, MatrixGatherScatterIsIdentity) using index_type = typename TestFixture::index_type; auto exec = this->mtx5->get_executor(); auto mtx = this->template gen_mtx(23, 4); - gko::array idxs{exec, {3, 6, 11, 9, 22, 8}}; + auto idxs = gko::array{exec, {3, 6, 11, 9, 22, 8}}; + auto scatter = gko::matrix::RowScatterer::create( + exec, idxs, mtx->get_size()[0]); auto gather = mtx->row_gather(&idxs); mtx->fill(-gko::one()); - gather->row_scatter(&idxs, mtx); + gather->row_scatter(scatter, mtx); auto result = mtx->row_gather(&idxs); GKO_ASSERT_MTX_NEAR(gather, result, 0.0); From 5ce5b3d45f39634077d99a8f07856e6b2deb549f Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 12 Feb 2025 16:15:01 +0100 Subject: [PATCH 09/11] add reference kernel test --- core/test/utils.hpp | 42 +++++++++++ reference/test/matrix/CMakeLists.txt | 1 + .../test/matrix/row_scatterer_kernels.cpp | 72 +++++++++++++++++++ 3 files changed, 115 insertions(+) create mode 100644 reference/test/matrix/row_scatterer_kernels.cpp diff --git a/core/test/utils.hpp b/core/test/utils.hpp index fb42540fa64..582318bebb2 100644 --- a/core/test/utils.hpp +++ b/core/test/utils.hpp @@ -174,6 +174,24 @@ struct add_inner_wrapper> { }; +/** + * @see duplicate_t for details + */ +template +struct duplicate {}; + +template