Skip to content

Commit 8e19009

Browse files
authored
Convert part of RMM to a precompiled library (#1896)
This PR is a starting point for #1779. It converts RMM to a precompiled shared library, and moves some implementations into `src/` files. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Vyas Ramasubramani (https://github.com/vyasr) - Matthew Murray (https://github.com/Matt711) URL: #1896
1 parent dbd8cc7 commit 8e19009

File tree

15 files changed

+470
-199
lines changed

15 files changed

+470
-199
lines changed

ci/build_wheel_python.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,12 @@ rapids-telemetry-record sccache-stats.txt sccache --show-adv-stats
3434

3535
EXCLUDE_ARGS=(
3636
--exclude "librapids_logger.so"
37+
--exclude "librmm.so"
3738
)
38-
python -m auditwheel repair "${EXCLUDE_ARGS[@]}" -w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" dist/*
39+
python -m auditwheel repair \
40+
"${EXCLUDE_ARGS[@]}" \
41+
-w "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}" \
42+
dist/*
3943

4044
../../ci/validate_wheel.sh "${RAPIDS_WHEEL_BLD_OUTPUT_DIR}"
4145

conda/recipes/librmm/recipe.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,19 +69,31 @@ outputs:
6969
- cmake --install cpp/build
7070
dynamic_linking:
7171
overlinking_behavior: "error"
72+
prefix_detection:
73+
ignore:
74+
# See https://github.com/rapidsai/build-planning/issues/160
75+
- lib/librmm.so
7276
requirements:
7377
build:
7478
- cmake ${{ cmake_version }}
79+
- ${{ stdlib("c") }}
7580
host:
7681
- cuda-version =${{ cuda_version }}
82+
- if: cuda_major == "11"
83+
then: cudatoolkit
84+
else: cuda-cudart-dev
7785
run:
7886
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
7987
- if: cuda_major == "11"
8088
then: cudatoolkit
89+
else: cuda-cudart
8190
- rapids-logger =0.1
8291
run_exports:
8392
- ${{ pin_subpackage("librmm", upper_bound="x.x") }}
8493
ignore_run_exports:
94+
from_package:
95+
- if: cuda_major != "11"
96+
then: cuda-cudart-dev
8597
by_name:
8698
- cuda-version
8799
- if: cuda_major == "11"

cpp/CMakeLists.txt

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,31 +83,40 @@ include(cmake/thirdparty/get_nvtx.cmake)
8383
# ##################################################################################################
8484
# * library targets --------------------------------------------------------------------------------
8585

86-
add_library(rmm INTERFACE)
86+
add_library(rmm src/aligned.cpp src/cuda_device.cpp src/cuda_stream_pool.cpp
87+
src/cuda_stream_view.cpp src/cuda_stream.cpp)
8788
add_library(rmm::rmm ALIAS rmm)
8889

8990
target_include_directories(
9091
rmm
91-
INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
92-
"$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>" "$<INSTALL_INTERFACE:include>")
92+
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
93+
"$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
94+
INTERFACE "$<INSTALL_INTERFACE:include>")
9395

9496
if(CUDA_STATIC_RUNTIME)
9597
message(STATUS "RMM: Enabling static linking of cudart")
96-
target_link_libraries(rmm INTERFACE CUDA::cudart_static)
98+
target_link_libraries(rmm PUBLIC CUDA::cudart_static)
9799
else()
98-
target_link_libraries(rmm INTERFACE CUDA::cudart)
100+
target_link_libraries(rmm PUBLIC CUDA::cudart)
99101
endif()
100102

101-
target_link_libraries(rmm INTERFACE CCCL::CCCL)
102-
target_link_libraries(rmm INTERFACE dl)
103-
target_link_libraries(rmm INTERFACE nvtx3::nvtx3-cpp)
104-
target_link_libraries(rmm INTERFACE rapids_logger::rapids_logger)
105-
target_compile_features(rmm INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
106-
target_compile_definitions(rmm INTERFACE LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
103+
target_link_libraries(rmm PUBLIC CCCL::CCCL ${CMAKE_DL_LIBS} nvtx3::nvtx3-cpp
104+
rapids_logger::rapids_logger)
105+
106+
set_target_properties(
107+
rmm
108+
PROPERTIES BUILD_RPATH "\$ORIGIN"
109+
INSTALL_RPATH "\$ORIGIN"
110+
CXX_STANDARD 17
111+
CXX_STANDARD_REQUIRED ON
112+
CXX_VISIBILITY_PRESET hidden
113+
POSITION_INDEPENDENT_CODE ON
114+
INTERFACE_POSITION_INDEPENDENT_CODE ON)
115+
target_compile_definitions(rmm PUBLIC LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE)
107116

108117
# Enable NVTX if necessary
109118
if(RMM_NVTX)
110-
target_compile_definitions(rmm INTERFACE RMM_NVTX)
119+
target_compile_definitions(rmm PUBLIC RMM_NVTX)
111120
endif()
112121

113122
# ##################################################################################################

cpp/include/rmm/aligned.hpp

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
2222
#include <cstddef>
2323
#include <cstdint>
2424

25-
namespace RMM_NAMESPACE {
25+
namespace RMM_EXPORT rmm {
2626

2727
/**
2828
* @addtogroup utilities
@@ -49,10 +49,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
4949
*
5050
* @return True if the input is a power of two with non-negative integer exponent, false otherwise.
5151
*/
52-
[[nodiscard]] constexpr bool is_pow2(std::size_t value) noexcept
53-
{
54-
return (value != 0U) && ((value & (value - 1)) == 0U);
55-
}
52+
[[nodiscard]] bool is_pow2(std::size_t value) noexcept;
5653

5754
/**
5855
* @brief Returns whether or not `alignment` is a valid memory alignment.
@@ -61,10 +58,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
6158
*
6259
* @return True if the alignment is valid, false otherwise.
6360
*/
64-
[[nodiscard]] constexpr bool is_supported_alignment(std::size_t alignment) noexcept
65-
{
66-
return is_pow2(alignment);
67-
}
61+
[[nodiscard]] bool is_supported_alignment(std::size_t alignment) noexcept;
6862

6963
/**
7064
* @brief Align up to nearest multiple of specified power of 2
@@ -74,11 +68,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
7468
*
7569
* @return the aligned value
7670
*/
77-
[[nodiscard]] constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
78-
{
79-
assert(is_supported_alignment(alignment));
80-
return (value + (alignment - 1)) & ~(alignment - 1);
81-
}
71+
[[nodiscard]] std::size_t align_up(std::size_t value, std::size_t alignment) noexcept;
8272

8373
/**
8474
* @brief Align down to the nearest multiple of specified power of 2
@@ -88,11 +78,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
8878
*
8979
* @return the aligned value
9080
*/
91-
[[nodiscard]] constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept
92-
{
93-
assert(is_supported_alignment(alignment));
94-
return value & ~(alignment - 1);
95-
}
81+
[[nodiscard]] std::size_t align_down(std::size_t value, std::size_t alignment) noexcept;
9682

9783
/**
9884
* @brief Checks whether a value is aligned to a multiple of a specified power of 2
@@ -102,11 +88,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
10288
*
10389
* @return true if aligned
10490
*/
105-
[[nodiscard]] constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept
106-
{
107-
assert(is_supported_alignment(alignment));
108-
return value == align_down(value, alignment);
109-
}
91+
[[nodiscard]] bool is_aligned(std::size_t value, std::size_t alignment) noexcept;
11092

11193
/**
11294
* @brief Checks whether the provided pointer is aligned to a specified @p alignment
@@ -116,13 +98,9 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256};
11698
*
11799
* @return true if the pointer is aligned
118100
*/
119-
[[nodiscard]] inline bool is_pointer_aligned(
120-
void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) noexcept
121-
{
122-
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
123-
return is_aligned(reinterpret_cast<std::uintptr_t>(ptr), alignment);
124-
}
101+
[[nodiscard]] bool is_pointer_aligned(void* ptr,
102+
std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) noexcept;
125103

126104
/** @} */ // end of group
127105

128-
} // namespace RMM_NAMESPACE
106+
} // namespace RMM_EXPORT rmm

cpp/include/rmm/cuda_device.hpp

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,15 @@
1616
#pragma once
1717

1818
#include <rmm/aligned.hpp>
19-
#include <rmm/detail/error.hpp>
2019
#include <rmm/detail/export.hpp>
2120

22-
#include <cuda_runtime_api.h>
23-
2421
#include <cstddef>
2522
#include <utility>
2623

27-
namespace RMM_NAMESPACE {
24+
namespace RMM_EXPORT rmm {
2825

2926
struct cuda_device_id;
30-
inline cuda_device_id get_current_cuda_device();
27+
cuda_device_id get_current_cuda_device();
3128

3229
/**
3330
* @addtogroup cuda_device_management
@@ -96,37 +93,21 @@ struct cuda_device_id {
9693
*
9794
* @return `cuda_device_id` for the current device
9895
*/
99-
inline cuda_device_id get_current_cuda_device()
100-
{
101-
cuda_device_id::value_type dev_id{-1};
102-
RMM_ASSERT_CUDA_SUCCESS(cudaGetDevice(&dev_id));
103-
return cuda_device_id{dev_id};
104-
}
96+
cuda_device_id get_current_cuda_device();
10597

10698
/**
10799
* @brief Returns the number of CUDA devices in the system
108100
*
109101
* @return Number of CUDA devices in the system
110102
*/
111-
inline int get_num_cuda_devices()
112-
{
113-
cuda_device_id::value_type num_dev{-1};
114-
RMM_ASSERT_CUDA_SUCCESS(cudaGetDeviceCount(&num_dev));
115-
return num_dev;
116-
}
103+
int get_num_cuda_devices();
117104

118105
/**
119106
* @brief Returns the available and total device memory in bytes for the current device
120107
*
121108
* @return The available and total device memory in bytes for the current device as a std::pair.
122109
*/
123-
inline std::pair<std::size_t, std::size_t> available_device_memory()
124-
{
125-
std::size_t free{};
126-
std::size_t total{};
127-
RMM_CUDA_TRY(cudaMemGetInfo(&free, &total));
128-
return {free, total};
129-
}
110+
std::pair<std::size_t, std::size_t> available_device_memory();
130111

131112
/**
132113
* @brief Returns the approximate specified percent of available device memory on the current CUDA
@@ -136,13 +117,7 @@ inline std::pair<std::size_t, std::size_t> available_device_memory()
136117
*
137118
* @return The recommended initial device memory pool size in bytes.
138119
*/
139-
inline std::size_t percent_of_free_device_memory(int percent)
140-
{
141-
[[maybe_unused]] auto const [free, total] = rmm::available_device_memory();
142-
auto fraction = static_cast<double>(percent) / 100.0;
143-
return rmm::align_down(static_cast<std::size_t>(static_cast<double>(free) * fraction),
144-
rmm::CUDA_ALLOCATION_ALIGNMENT);
145-
}
120+
std::size_t percent_of_free_device_memory(int percent);
146121

147122
/**
148123
* @brief RAII class that sets the current CUDA device to the specified device on construction
@@ -154,19 +129,11 @@ struct cuda_set_device_raii {
154129
*
155130
* @param dev_id The device to set as the current CUDA device
156131
*/
157-
explicit cuda_set_device_raii(cuda_device_id dev_id)
158-
: old_device_{get_current_cuda_device()},
159-
needs_reset_{dev_id.value() >= 0 && old_device_ != dev_id}
160-
{
161-
if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(dev_id.value())); }
162-
}
132+
explicit cuda_set_device_raii(cuda_device_id dev_id);
163133
/**
164134
* @brief Reactivates the previous CUDA device
165135
*/
166-
~cuda_set_device_raii() noexcept
167-
{
168-
if (needs_reset_) { RMM_ASSERT_CUDA_SUCCESS(cudaSetDevice(old_device_.value())); }
169-
}
136+
~cuda_set_device_raii() noexcept;
170137

171138
cuda_set_device_raii(cuda_set_device_raii const&) = delete;
172139
cuda_set_device_raii& operator=(cuda_set_device_raii const&) = delete;
@@ -179,4 +146,4 @@ struct cuda_set_device_raii {
179146
};
180147

181148
/** @} */ // end of group
182-
} // namespace RMM_NAMESPACE
149+
} // namespace RMM_EXPORT rmm

0 commit comments

Comments
 (0)