Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions common/cuda_hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set(CUDA_HIP_SOURCES
base/device_matrix_data_kernels.cpp
base/index_set_kernels.cpp
components/prefix_sum_kernels.cpp
components/bitvector.cpp
distributed/assembly_kernels.cpp
distributed/index_map_kernels.cpp
distributed/matrix_kernels.cpp
Expand Down
22 changes: 22 additions & 0 deletions common/cuda_hip/components/bitvector.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// SPDX-FileCopyrightText: 2025 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#include "common/cuda_hip/components/bitvector.hpp"

namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace bitvector {


template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(gko::int32*);
template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(gko::int64*);
template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(const gko::int32*);
template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(const gko::int64*);


} // namespace bitvector
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
25 changes: 25 additions & 0 deletions common/cuda_hip/components/bitvector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,13 @@ from_sorted_indices(
typename std::iterator_traits<IndexIterator>::difference_type count,
typename std::iterator_traits<IndexIterator>::value_type size)
{
// Using EXEC_TYPE as an indicator in test.
// We pre-compile the routine for test in library to avoid thrust issue before
// CUDA 12.4
#ifdef EXEC_TYPE
static_assert(std::is_same_v<IndexIterator, void>,
"must only compile this kernel in ginkgo library");
#else
using index_type = typename std::iterator_traits<IndexIterator>::value_type;
using storage_type = typename device_bitvector<index_type>::storage_type;
constexpr auto block_size = device_bitvector<index_type>::block_size;
Expand Down Expand Up @@ -170,8 +177,26 @@ from_sorted_indices(
ranks.get_data(), index_type{});

return gko::bitvector<index_type>{std::move(bits), std::move(ranks), size};
#endif
}

#define GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(IndexIterator) \
gko::bitvector<typename std::iterator_traits<IndexIterator>::value_type> \
from_sorted_indices( \
std::shared_ptr<const DefaultExecutor> exec, IndexIterator it, \
typename std::iterator_traits<IndexIterator>::difference_type count, \
typename std::iterator_traits<IndexIterator>::value_type size)

// Before CUDA 12.4 (or NCCL 2.3), THRUST_CUB_WRAPPED_NAMESPACE is required for
// separating the thrust implementation in different shared library. The test
// also compiles thrust kernel, so it leads the thrust issue between test and
// ginkgo library. Compiling the kernel used by the test in the library to
// work around this issue.
extern template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(gko::int32*);
extern template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(gko::int64*);
extern template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(const gko::int32*);
extern template GKO_DECLARE_BITVECTOR_FROM_SORTED_INDICES(const gko::int64*);


} // namespace bitvector
} // namespace GKO_DEVICE_NAMESPACE
Expand Down