Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
204 commits
Select commit Hold shift + click to select a range
a233e35
add kernels for intermediate norm computation
MarcelKoch Nov 30, 2021
c9af255
add distributed vector class
MarcelKoch Nov 30, 2021
0e94339
add tests for distributed vector
MarcelKoch Dec 22, 2021
71c9737
rework read distributed reference kernels
MarcelKoch Dec 22, 2021
4326e3d
fix formatting
MarcelKoch Jan 24, 2022
929eadc
make name of constant accessor more reflective
MarcelKoch Jan 24, 2022
3c92d68
update local vector typename
MarcelKoch Jan 24, 2022
dda490c
minor refactoring
MarcelKoch Jan 31, 2022
b0f25b9
Format files
ginkgo-bot Feb 3, 2022
60ab604
use GPU aware MPI if specified
MarcelKoch Feb 4, 2022
b853799
review updates
MarcelKoch Feb 8, 2022
ab295fd
adds DenseCache to reuse a dense vector without repeated allocations
MarcelKoch Feb 10, 2022
7407092
fixes switch to soa of device matrix data from rebase
MarcelKoch Feb 10, 2022
e75553b
remove partition from read* and make partition mandatory in constructor
MarcelKoch Feb 10, 2022
b33932a
remove partition member from vector
MarcelKoch Feb 11, 2022
64744f1
Format files
ginkgo-bot Feb 14, 2022
fb83613
keep communicator on assignment
MarcelKoch Feb 15, 2022
4ecea25
review updates
MarcelKoch Feb 16, 2022
b7ff630
refactor cmake mpi test handling
MarcelKoch Feb 16, 2022
0785f70
adds missing typedef documentation
MarcelKoch Feb 17, 2022
c374113
adds exec initializer with MPI for common test
MarcelKoch Feb 17, 2022
ec2635b
fix need for host buffer check
MarcelKoch Feb 17, 2022
f20af86
add common mpi test for vector
MarcelKoch Feb 17, 2022
263727f
adds constructor from local vector
MarcelKoch Feb 25, 2022
7d072b5
adds create_real_view to distributed vector
MarcelKoch Feb 25, 2022
6e77249
move read_distributed impl to .cpp
MarcelKoch Feb 25, 2022
59f4ce3
use unique ptr for local vector parameter
MarcelKoch Feb 25, 2022
31a1bfa
add constructor tests
MarcelKoch Feb 25, 2022
58c351a
review updates
MarcelKoch Feb 28, 2022
fc90d8c
add read_distributed device kernels
MarcelKoch Mar 9, 2022
3b1de7a
add test to check create_real_view behaviour
MarcelKoch Feb 28, 2022
e15b1e4
fix formatting and test
MarcelKoch Mar 9, 2022
8779b20
review updates
MarcelKoch Mar 2, 2022
9bd2919
always mark lambdas as host device
MarcelKoch May 23, 2022
156aa8c
merge reference and common mpi tests
MarcelKoch Mar 2, 2022
fb27038
wip: update dpcpp kernels
MarcelKoch May 23, 2022
62fc32e
review updates
MarcelKoch Mar 3, 2022
f441fdf
disable dpcpp kernels
MarcelKoch May 23, 2022
18e8a66
only allow const access to local vector
MarcelKoch Mar 4, 2022
76317cb
Format files
ginkgo-bot Jun 2, 2022
ab90bd8
fixes sonarcloud issues where it makes sense
MarcelKoch Mar 9, 2022
ebdd6d5
review updates:
MarcelKoch Jun 20, 2022
b1dacd1
Format files
ginkgo-bot Apr 21, 2022
5daaddc
Format files
ginkgo-bot Jun 20, 2022
a81454d
Merge distributed vector
MarcelKoch Apr 21, 2022
395167b
add test generator for device matrix data
MarcelKoch Feb 11, 2022
33c0275
Merge device (CUDA/HIP) kernel for Vector read_distributed
MarcelKoch Jun 28, 2022
588c8bf
adds row distributed matrix class
MarcelKoch Feb 11, 2022
6667894
add omp read_distributed matrix kernel
MarcelKoch Jul 6, 2022
0e9e741
add distributed matrix apply test
MarcelKoch Feb 14, 2022
add26cf
remove partition member from distributed matrix
MarcelKoch Feb 14, 2022
94886e3
fixes handling of multiple right-hand-sides for distributed apply
MarcelKoch Feb 15, 2022
3662d7c
updates documentation
MarcelKoch Feb 18, 2022
fb97f94
adds convert to next precision
MarcelKoch Feb 18, 2022
73c2852
use MPI_COMM_WORLD as default communicator for matrix
MarcelKoch Feb 18, 2022
3f0eef4
updates distributed matrix mpi reference tests
MarcelKoch Feb 18, 2022
c73ba31
fix read_distribution host buffer check and convert_to
MarcelKoch Feb 18, 2022
8b0fe71
fix cmake after rebase
MarcelKoch Aug 16, 2022
18ab198
adds distributed matrix common mpi test
MarcelKoch Feb 18, 2022
68b5b66
adds non const real_view to distributed vector
MarcelKoch Apr 21, 2022
61f07d4
formatting & read_distributed kernel fix
MarcelKoch Feb 21, 2022
dd3928a
adds tmp array to compute_squared_norm2
MarcelKoch Apr 21, 2022
b66f3a2
move device_matrix_data resize_and_reset into header
MarcelKoch Feb 22, 2022
914d7cf
adds reduction with tmp array to distributed vector
MarcelKoch Apr 21, 2022
971ee50
Add long long support
pratikvn Feb 14, 2022
b9fe227
adds dispatch for distributed vector
MarcelKoch Feb 23, 2022
9ee139f
remove not implemented and unused kernel declaration
MarcelKoch Feb 22, 2022
8ab88d4
adds helper functions to access local data of dense/dist::vector
MarcelKoch Feb 23, 2022
df13542
small read_distributed clean up
MarcelKoch Feb 22, 2022
8f24902
adds create_with_config_of and get_stride to distributed vector
MarcelKoch Jul 11, 2022
b473df0
don't use device_matrix_data.resize in kernel
MarcelKoch Mar 10, 2022
2eee134
adds distributed capabilities to some solvers
MarcelKoch Feb 23, 2022
65b7cfe
remove mutable local matrix access
MarcelKoch Mar 10, 2022
bc85236
add distributed dispatch to residual norm criteria
MarcelKoch Feb 23, 2022
7070b88
Revert "move device_matrix_data resize_and_reset into header"
MarcelKoch Mar 10, 2022
4a336e8
adds distributed solver example
MarcelKoch Feb 23, 2022
3ee07e3
fixes missing implementation of vector::get_[const_]local_values
MarcelKoch Mar 10, 2022
8bd6348
small rename
MarcelKoch Feb 24, 2022
08fd00c
add value semantics
MarcelKoch Mar 10, 2022
2238b21
add distributed dispatch to identity
MarcelKoch Feb 24, 2022
189b705
allows specifying different partition for rows and cols
MarcelKoch Mar 14, 2022
798ada8
add generic distributed solver tests
MarcelKoch Feb 24, 2022
a38b2d1
adds documentation of the main class
MarcelKoch Mar 24, 2022
7141399
fixes residual norm dispatch
MarcelKoch Feb 24, 2022
649f93e
switch to runtime matrix types
MarcelKoch Mar 24, 2022
4085ea5
adds mixed + complex apply to solver tests
MarcelKoch Feb 25, 2022
77e2eed
review updates
MarcelKoch Mar 24, 2022
241041c
adds complex-to-real dispatch for distributed
MarcelKoch Feb 25, 2022
1479b9c
fix doc handling of predefined macros
MarcelKoch Feb 23, 2022
afd7578
fixes non-mpi residual norm dispatch
MarcelKoch Mar 1, 2022
cd6ef94
fix some distributed documentation
MarcelKoch Feb 23, 2022
b1f1ed5
adds precision dispatch to distributed matrix apply
MarcelKoch Apr 22, 2022
e21da06
review updates
MarcelKoch Mar 25, 2022
d458f7b
fix formatting
MarcelKoch Apr 25, 2022
2fcf8de
add documentation for runtime local matrix type
MarcelKoch Apr 21, 2022
889c32f
review updates
MarcelKoch May 5, 2022
00976fb
add omp read_distributed matrix kernel
MarcelKoch Feb 14, 2022
c9a6169
add communicator size check to copy/move assignment
MarcelKoch Apr 21, 2022
efb18b7
Format files
ginkgo-bot May 5, 2022
2ddc06a
kernel return send sizes instead of offsets
MarcelKoch Jun 2, 2022
98a9601
use arrays instead of device_matrix_data in read_distributed kernels
MarcelKoch Apr 22, 2022
c7a8483
review updates
MarcelKoch May 5, 2022
1a99c5b
thrust implementation of read_distributed kernels
MarcelKoch Jul 8, 2022
4c7b997
Format files
ginkgo-bot Apr 25, 2022
2f6488c
review updates
MarcelKoch May 9, 2022
ce221ef
remove unnecessary iteration over all input elements
MarcelKoch Mar 31, 2022
eb7212f
fix Array->array renaming
MarcelKoch May 4, 2022
d26bef1
add i_send/i_recv with datatypes
MarcelKoch May 11, 2022
4433198
adds common test for distributed matrix kernels
MarcelKoch Jul 8, 2022
ca39451
remove noexcept from distributed matrix move assignment
MarcelKoch May 4, 2022
e5edbd1
use template vector type for Idr iterate
MarcelKoch May 5, 2022
8c3d6bc
update dpcpp kernels
MarcelKoch May 23, 2022
bf3493c
review updates
MarcelKoch May 23, 2022
f5de4d0
use device allocation mode and disable device reset for distributed t…
MarcelKoch May 23, 2022
db4de8e
remove dpcpp kernels
MarcelKoch May 23, 2022
bd46a78
review updates
MarcelKoch May 31, 2022
5e19549
Format files
ginkgo-bot Jun 14, 2022
3914cd4
Format files
ginkgo-bot May 23, 2022
345e3bb
synchronize device before all-to-all
MarcelKoch May 23, 2022
c398bdc
Format files
ginkgo-bot Jul 12, 2022
8acda25
review updates:
MarcelKoch Jun 23, 2022
45f6a91
review updates:
MarcelKoch Jun 14, 2022
18d8b06
fixes matrix' copy and move assignment
MarcelKoch Aug 24, 2022
6cbc03f
unify distributed matrix tests
MarcelKoch Jun 23, 2022
c095b41
rename locally stored matrices
MarcelKoch Jun 15, 2022
0407086
adds distributed example kind
MarcelKoch Aug 26, 2022
ee4cf1b
update to logger changes
MarcelKoch Jul 8, 2022
a0c1bc9
review updated:
MarcelKoch Jun 22, 2022
c124517
removes template apply_impl of Bicg
MarcelKoch Aug 26, 2022
a4ef007
Format files
ginkgo-bot Jul 11, 2022
4accc7f
review updates:
MarcelKoch Jun 23, 2022
8da0935
review updates:
MarcelKoch Aug 26, 2022
84d67fd
review updates:
MarcelKoch Jul 12, 2022
f1382fe
review updates:
MarcelKoch Jul 6, 2022
13bd3d0
fixes residual_norm precision dispatch for non-mpi
MarcelKoch Sep 19, 2022
ebcb060
review updates:
MarcelKoch Jul 18, 2022
8df0534
Format files
ginkgo-bot Jul 6, 2022
19bdb73
adds test with different partition types
MarcelKoch Sep 19, 2022
16a5cd3
Format files
ginkgo-bot Jul 18, 2022
e52ec14
review updates:
MarcelKoch Jul 7, 2022
4de6953
removes special case if no non-local matrix
MarcelKoch Sep 20, 2022
3e2f60b
fix distributed tests
upsj Aug 12, 2022
b765814
Merge distributed matrix
MarcelKoch Jul 8, 2022
123f3aa
frees mpi request and makes it move-only
MarcelKoch Sep 20, 2022
6353aa3
Merge distributed matrix kernels
upsj Aug 12, 2022
9cdd7d0
review updates:
MarcelKoch Sep 26, 2022
8ec9757
fix cmake after rebase
MarcelKoch Aug 16, 2022
f90097e
Merge distributed solvers
MarcelKoch Sep 28, 2022
2033445
adds RAII move-only type to set device id generically
MarcelKoch May 24, 2022
d2f9568
remove old device guard
MarcelKoch May 24, 2022
bfcf8ce
add mapping from rank to device id
MarcelKoch Jun 1, 2022
14e9d42
add mapping if not building mpi
MarcelKoch Jun 1, 2022
06e26ad
fix non-mpi rank->device_id map and mpi wait
MarcelKoch Jun 2, 2022
46c6646
fix header guard
MarcelKoch Jul 11, 2022
8a5f9d7
remove default constructor for communicator
MarcelKoch Jul 18, 2022
df6c417
fix device guard in cuda linops for benchmarks
MarcelKoch Jul 19, 2022
4d28610
adds documentation to scoped device id classes
MarcelKoch Jul 19, 2022
df99ae3
add tests for scoped_device_id
MarcelKoch Jul 20, 2022
812d5ce
fix various compilation issues
MarcelKoch Jul 21, 2022
67119ce
review updates:
MarcelKoch Jul 22, 2022
072a311
enables move operations for device guard at all times
MarcelKoch Aug 24, 2022
930d567
removes initialized check in map_rank_to_device_id
MarcelKoch Aug 24, 2022
abb3c39
sets scoped_device_id for MPI calls with buffers
MarcelKoch Aug 24, 2022
f2af99c
adds traits struct to for polymorphic object implementations
MarcelKoch Jul 22, 2022
0d87f72
use traits for conversion
MarcelKoch Jul 22, 2022
142099d
implements polymorphic_object_traits for distributed types
MarcelKoch Jul 22, 2022
3687038
updates comm call sites with executor argument
MarcelKoch Aug 25, 2022
047610c
review updates:
MarcelKoch Aug 26, 2022
a72146d
review updates:
MarcelKoch Aug 26, 2022
5a1f735
Format files
ginkgo-bot Sep 28, 2022
7e0b576
review updates:
MarcelKoch Sep 29, 2022
0ff799e
moves scoped_device_id implementations into module code
MarcelKoch Sep 29, 2022
028b528
changes friend class po_traits -> friend struct po_traits
MarcelKoch Sep 30, 2022
e33fb00
Format files
ginkgo-bot Sep 30, 2022
2d72d1d
adds guard suffix to scope classes
MarcelKoch Sep 30, 2022
cdf89ae
fixes guard use in non-member function
MarcelKoch Sep 30, 2022
7a51a13
fixes renaming
MarcelKoch Oct 2, 2022
dd7fc44
adds omp_device to reference lib
MarcelKoch Oct 4, 2022
1f4e8ec
moves implementation of ReferenceExecutor method back into header
MarcelKoch Oct 4, 2022
d477926
Format files
ginkgo-bot Oct 4, 2022
06b1b4a
Merge adding device-id guard to MPI communicator and polymorphic_obje…
MarcelKoch Oct 5, 2022
95377ca
fixup cmake after rebase
MarcelKoch Oct 5, 2022
258267f
adjusts distributed test to test updates
MarcelKoch Oct 5, 2022
ac9f49f
adds tests for rank to device-id mapping
MarcelKoch Oct 6, 2022
5e42487
moves new distributed additions into experimental namespace
MarcelKoch Oct 6, 2022
6095d88
Format files
ginkgo-bot Oct 6, 2022
e307c8a
makes generic_scoped_device_id_guard destructor noexcept
MarcelKoch Oct 7, 2022
92b8958
fixes MPI tests
MarcelKoch Oct 7, 2022
15185ff
checks correct comm size only when necessary in tests
MarcelKoch Oct 7, 2022
3b90dbe
returns correct exit code from mpi tests
MarcelKoch Oct 7, 2022
ff631e0
always initializes scalar solver variables (CG/CGS/FCG/BICG/BICGSTAB)
MarcelKoch Oct 8, 2022
289c01d
adds all_to_all_v overload with mpi datatypes
MarcelKoch Oct 10, 2022
3ebf663
uses blocking comm if openmpi version is less than 4.1
MarcelKoch Oct 10, 2022
2c05a79
removes false positives in test for openmpi version
MarcelKoch Oct 14, 2022
9731f45
review updates:
MarcelKoch Oct 20, 2022
2bc941c
moves Partition into experimental namespace
MarcelKoch Oct 25, 2022
a9c6076
moves distributed dispatch into experimental namespace
MarcelKoch Oct 25, 2022
db86d19
review updates:
MarcelKoch Oct 25, 2022
7757bf5
review updates:
MarcelKoch Oct 27, 2022
832d184
moves mpi wrapper into experimental namespace
MarcelKoch Oct 27, 2022
8844c90
Format files
ginkgo-bot Oct 27, 2022
ca3538a
adds explicit error message to device id guard destructor failure
ginkgo-bot Oct 27, 2022
8f8f5a0
replaces extremely slow horeka test with nla-gpu
MarcelKoch Oct 31, 2022
b59a9dd
fixes scoped device id guard test
MarcelKoch Oct 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 4 additions & 35 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -341,38 +341,6 @@ build/cuda102/nompi/intel/cuda/debug/static:
CUDA_ARCH: 35

# cuda 11.0 and friends on HoreKa with tests
build/cuda110/mvapich2/gcc/cuda/debug/shared:
extends:
- .build_template
- .default_variables
- .full_test_condition
- .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020
variables:
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_MPI: "ON"
BUILD_TYPE: "Debug"
FAST_TESTS: "ON"
CUDA_ARCH: 80
USE_NAME: "cuda110-mvapich2-gcc-${CI_PIPELINE_ID}"
KEEP_CONTAINER: "ON"
USE_SLURM: 0

test/cuda110/mvapich2/gcc/cuda/debug/shared:
extends:
- .horeka_test_template
- .default_variables
- .full_test_condition
- .use_gko-cuda110-mvapich2-gnu9-llvm9-intel2020
variables:
USE_NAME: "cuda110-mvapich2-gcc-${CI_PIPELINE_ID}"
SLURM_PARTITION: "accelerated"
SLURM_GRES: "gpu:1"
SLURM_TIME: "00:45:00"
dependencies: null
needs: [ "build/cuda110/mvapich2/gcc/cuda/debug/shared" ]


build/cuda110/nompi/clang/cuda/release/static:
extends:
- .build_template
Expand Down Expand Up @@ -533,13 +501,15 @@ build/amd/openmpi/clang/rocm502/release/shared:
extends:
- .build_and_test_template
- .default_variables
- .quick_test_condition
- .use_gko-rocm502-openmpi-gnu11-llvm11
- .full_test_condition
- .use_gko-rocm502-openmpi-gnu11-llvm11-multi-gpu
variables:
C_COMPILER: "clang"
CXX_COMPILER: "clang++"
BUILD_OMP: "ON"
BUILD_HIP: "ON"
BUILD_MPI: "ON"
MPI_AS_ROOT: "ON"
RUN_EXAMPLES: "ON"
BUILD_TYPE: "Release"

Expand Down Expand Up @@ -834,7 +804,6 @@ iwyu:
variables:
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_CUDA: "HIP"
EXTRA_CMAKE_FLAGS: '-DGINKGO_WITH_IWYU=ON'
allow_failure: yes

Expand Down
6 changes: 6 additions & 0 deletions .gitlab/image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@
- amdci
- gpu

.use_gko-rocm502-openmpi-gnu11-llvm11-multi-gpu:
image: ginkgohub/rocm:502-openmpi-gnu11-llvm11
tags:
- private_ci
- nla-gpu

.use_gko-oneapi-cpu:
image: ginkgohub/oneapi:latest
tags:
Expand Down
41 changes: 33 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ option(GINKGO_DPCPP_SINGLE_MODE "Do not compile double kernels for the DPC++ bac
option(GINKGO_INSTALL_RPATH "Set the RPATH when installing its libraries." ON)
option(GINKGO_INSTALL_RPATH_ORIGIN "Add $ORIGIN (Linux) or @loader_path (MacOS) to the installation RPATH." ON)
option(GINKGO_INSTALL_RPATH_DEPENDENCIES "Add dependencies to the installation RPATH." OFF)
option(GINKGO_FORCE_GPU_AWARE_MPI "Assert that the MPI library is GPU aware. This forces Ginkgo to assume that GPU aware functionality is available (OFF (default) or ON), but may fail
catastrophically in case the MPI implementation is not GPU Aware, and GPU aware functionality has been forced" OFF)

# load executor-specific configuration
if(GINKGO_BUILD_CUDA)
Expand All @@ -107,10 +109,10 @@ include(cmake/build_type_helpers.cmake)
include(cmake/build_helpers.cmake)
include(cmake/install_helpers.cmake)

if (MSVC)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
endif()
if (MINGW OR CYGWIN)
if(MINGW OR CYGWIN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mbig-obj")
endif()

Expand Down Expand Up @@ -204,8 +206,31 @@ else()
message(STATUS "HWLOC is being forcibly switched off")
endif()

set(GINKGO_HAVE_GPU_AWARE_MPI OFF)
set(GINKGO_FORCE_SPMV_BLOCKING_COMM OFF)
if(GINKGO_BUILD_MPI)
find_package(MPI REQUIRED)
if(GINKGO_FORCE_GPU_AWARE_MPI)
set(GINKGO_HAVE_GPU_AWARE_MPI ON)
else()
set(GINKGO_HAVE_GPU_AWARE_MPI OFF)
endif()

try_run(uses_openmpi gko_result_unused
${PROJECT_BINARY_DIR}
${CMAKE_SOURCE_DIR}/cmake/openmpi_test.cpp
LINK_LIBRARIES MPI::MPI_CXX
RUN_OUTPUT_VARIABLE openmpi_version
)
if(uses_openmpi)
if(openmpi_version VERSION_LESS "4.1")
message(WARNING
"OpenMPI v4.0.x has a bug that forces us to use blocking communication in our distributed "
"matrix class. To enable faster, non-blocking communication, consider updating your OpenMPI version or "
"switch to a different vendor.")
set(GINKGO_FORCE_SPMV_BLOCKING_COMM ON)
endif()
endif()
endif()

# Try to find the third party packages before using our subdirectories
Expand Down Expand Up @@ -241,21 +266,21 @@ add_subdirectory(common) # Import list of unified kernel source files
if(GINKGO_BUILD_CUDA)
add_subdirectory(cuda) # High-performance kernels for NVIDIA GPUs
endif()
if (GINKGO_BUILD_REFERENCE)
if(GINKGO_BUILD_REFERENCE)
add_subdirectory(reference) # Reference kernel implementations
endif()
if(GINKGO_BUILD_HIP)
add_subdirectory(hip) # High-performance kernels for AMD or NVIDIA GPUs
endif()
if (GINKGO_BUILD_DPCPP)
if(GINKGO_BUILD_DPCPP)
add_subdirectory(dpcpp) # High-performance DPC++ kernels
endif()
if (GINKGO_BUILD_OMP)
if(GINKGO_BUILD_OMP)
add_subdirectory(omp) # High-performance omp kernels
endif()
add_subdirectory(core) # Core Ginkgo types and top-level functions
add_subdirectory(include) # Public API self-contained check
if (GINKGO_BUILD_TESTS)
if(GINKGO_BUILD_TESTS)
add_subdirectory(test) # Tests running on all executors
endif()

Expand Down Expand Up @@ -323,7 +348,7 @@ endif()
configure_file(${Ginkgo_SOURCE_DIR}/cmake/ginkgo.pc.in
${Ginkgo_BINARY_DIR}/ginkgo.pc.in @ONLY)
file(GENERATE OUTPUT ${Ginkgo_BINARY_DIR}/ginkgo_$<CONFIG>.pc
INPUT ${Ginkgo_BINARY_DIR}/ginkgo.pc.in)
INPUT ${Ginkgo_BINARY_DIR}/ginkgo.pc.in)

# WINDOWS NVCC has " inside the string, add escape character
# to avoid config problem.
Expand Down Expand Up @@ -356,7 +381,7 @@ endif()
file(MAKE_DIRECTORY "${GINKGO_TEST_INSTALL_BIN_DIR}")
file(MAKE_DIRECTORY "${GINKGO_TEST_EXPORTBUILD_BIN_DIR}")
set(TOOLSET "")
if (NOT "${CMAKE_GENERATOR_TOOLSET}" STREQUAL "")
if(NOT "${CMAKE_GENERATOR_TOOLSET}" STREQUAL "")
set(TOOLSET "-T${CMAKE_GENERATOR_TOOLSET}")
endif()
add_custom_target(test_install
Expand Down
56 changes: 22 additions & 34 deletions benchmark/utils/cuda_linops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "benchmark/utils/sparselib_linops.hpp"
#include "benchmark/utils/types.hpp"
#include "cuda/base/cusparse_bindings.hpp"
#include "cuda/base/device_guard.hpp"
#include "cuda/base/pointer_mode_guard.hpp"
#include "cuda/base/types.hpp"

Expand Down Expand Up @@ -102,12 +101,12 @@ protected:

void initialize_descr()
{
const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto exec = this->get_gpu_exec();
auto guard = exec->get_scoped_device_id_guard();
this->descr_ = handle_manager<cusparseMatDescr>(
gko::kernels::cuda::cusparse::create_mat_descr(),
[id](cusparseMatDescr_t descr) {
gko::cuda::device_guard g{id};
[exec](cusparseMatDescr_t descr) {
auto guard = exec->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::destroy(descr);
});
}
Expand All @@ -130,7 +129,7 @@ class CusparseCsrmp
public gko::ReadableFromMatrixData<ValueType, IndexType>,
public gko::EnableCreateMethod<CusparseCsrmp<ValueType, IndexType>> {
friend class gko::EnableCreateMethod<CusparseCsrmp>;
friend class gko::EnablePolymorphicObject<CusparseCsrmp, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseCsrmp>;

public:
using csr = gko::matrix::Csr<ValueType, IndexType>;
Expand Down Expand Up @@ -166,8 +165,7 @@ protected:
auto db = dense_b->get_const_values();
auto dx = dense_x->get_values();

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmv_mp(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_size()[0], this->get_size()[1],
Expand Down Expand Up @@ -205,7 +203,7 @@ class CusparseCsr
public gko::EnableCreateMethod<CusparseCsr<ValueType, IndexType>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<CusparseCsr>;
friend class gko::EnablePolymorphicObject<CusparseCsr, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseCsr>;

public:
using csr = gko::matrix::Csr<ValueType, IndexType>;
Expand Down Expand Up @@ -241,8 +239,7 @@ protected:
auto db = dense_b->get_const_values();
auto dx = dense_x->get_values();

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmv(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_size()[0], this->get_size()[1],
Expand Down Expand Up @@ -281,7 +278,7 @@ class CusparseCsrmm
public gko::EnableCreateMethod<CusparseCsrmm<ValueType, IndexType>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<CusparseCsrmm>;
friend class gko::EnablePolymorphicObject<CusparseCsrmm, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseCsrmm>;

public:
using csr = gko::matrix::Csr<ValueType, IndexType>;
Expand Down Expand Up @@ -317,8 +314,7 @@ protected:
auto db = dense_b->get_const_values();
auto dx = dense_x->get_values();

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmm(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_size()[0], dense_b->get_size()[1], this->get_size()[1],
Expand Down Expand Up @@ -361,7 +357,7 @@ class CusparseCsrEx
public gko::EnableCreateMethod<CusparseCsrEx<ValueType, IndexType>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<CusparseCsrEx>;
friend class gko::EnablePolymorphicObject<CusparseCsrEx, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseCsrEx>;

public:
using csr = gko::matrix::Csr<ValueType, IndexType>;
Expand Down Expand Up @@ -404,8 +400,7 @@ protected:
ValueType beta = gko::zero<ValueType>();
gko::size_type buffer_size = 0;

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
auto handle = this->get_gpu_exec()->get_cusparse_handle();
// This function seems to require the pointer mode to be set to HOST.
// Ginkgo use pointer mode DEVICE by default, so we change this
Expand Down Expand Up @@ -468,7 +463,7 @@ class CusparseHybrid
CusparseHybrid<ValueType, IndexType, Partition, Threshold>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<CusparseHybrid>;
friend class gko::EnablePolymorphicObject<CusparseHybrid, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseHybrid>;

public:
using csr = gko::matrix::Csr<ValueType, IndexType>;
Expand All @@ -492,8 +487,7 @@ public:
t_csr->read(data);
this->set_size(t_csr->get_size());

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::csr2hyb(
this->get_gpu_exec()->get_cusparse_handle(), this->get_size()[0],
this->get_size()[1], this->get_descr(), t_csr->get_const_values(),
Expand All @@ -503,9 +497,8 @@ public:

~CusparseHybrid() override
{
const auto id = this->get_gpu_exec()->get_device_id();
try {
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_));
} catch (const std::exception& e) {
std::cerr << "Error when unallocating CusparseHybrid hyb_ matrix: "
Expand All @@ -525,8 +518,7 @@ protected:
auto db = dense_b->get_const_values();
auto dx = dense_x->get_values();

const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmv(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
&scalars.get_const_data()[0], this->get_descr(), hyb_, db,
Expand All @@ -542,8 +534,7 @@ protected:
: gko::EnableLinOp<CusparseHybrid, CusparseBase>(exec, size),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
{
const auto id = this->get_gpu_exec()->get_device_id();
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateHybMat(&hyb_));
}

Expand Down Expand Up @@ -576,8 +567,7 @@ void cusparse_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
auto db = dense_b->get_const_values();
auto dx = dense_x->get_values();
const auto id = gpu_exec->get_device_id();
gko::cuda::device_guard g{id};
auto guard = gpu_exec->get_scoped_device_id_guard();
cusparseDnVecDescr_t vecb, vecx;
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseCreateDnVec(&vecx, dense_x->get_num_stored_elements(),
Expand Down Expand Up @@ -612,7 +602,7 @@ class CusparseGenericCsr
CusparseGenericCsr<ValueType, IndexType, Alg>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<CusparseGenericCsr>;
friend class gko::EnablePolymorphicObject<CusparseGenericCsr, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseGenericCsr>;

public:
using csr = gko::matrix::Csr<ValueType, IndexType>;
Expand Down Expand Up @@ -653,9 +643,8 @@ public:

~CusparseGenericCsr() override
{
const auto id = this->get_gpu_exec()->get_device_id();
try {
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_));
} catch (const std::exception& e) {
std::cerr
Expand Down Expand Up @@ -705,7 +694,7 @@ class CusparseGenericCoo
public gko::EnableCreateMethod<CusparseGenericCoo<ValueType, IndexType>>,
public gko::ReadableFromMatrixData<ValueType, IndexType> {
friend class gko::EnableCreateMethod<CusparseGenericCoo>;
friend class gko::EnablePolymorphicObject<CusparseGenericCoo, CusparseBase>;
friend class gko::polymorphic_object_traits<CusparseGenericCoo>;

public:
using coo = gko::matrix::Coo<ValueType, IndexType>;
Expand Down Expand Up @@ -746,9 +735,8 @@ public:

~CusparseGenericCoo() override
{
const auto id = this->get_gpu_exec()->get_device_id();
try {
gko::cuda::device_guard g{id};
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_));
} catch (const std::exception& e) {
std::cerr
Expand Down
Loading