Skip to content

Commit 9ad4e24

Browse files
authored
Optional support for nvcomp. (#11460)
No integration yet. This PR implements basic compression and decompression.
1 parent dde2772 commit 9ad4e24

13 files changed

+923
-19
lines changed

CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ option(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR "Output build artifacts in CMake binar
7272
## CUDA
7373
option(USE_CUDA "Build with GPU acceleration" OFF)
7474
option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
75+
option(USE_NVCOMP "Build with nvcomp to enable sparse data compression. (experimental)" OFF)
7576
# This is specifically designed for PyPI binary release and should be disabled for most of the cases.
7677
option(USE_DLOPEN_NCCL "Whether to load nccl dynamically." OFF)
7778
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
@@ -124,6 +125,9 @@ endif()
124125
if(USE_NCCL AND (NOT USE_CUDA))
125126
message(SEND_ERROR "`USE_NCCL` must be enabled with `USE_CUDA` flag.")
126127
endif()
128+
if(USE_NVCOMP AND (NOT USE_CUDA))
129+
message(SEND_ERROR "`USE_NVCOMP` must be enabled with `USE_CUDA` flag.")
130+
endif()
127131
if(USE_DEVICE_DEBUG AND (NOT USE_CUDA))
128132
message(SEND_ERROR "`USE_DEVICE_DEBUG` must be enabled with `USE_CUDA` flag.")
129133
endif()
@@ -234,6 +238,14 @@ if(USE_CUDA)
234238
find_package(CUDAToolkit REQUIRED)
235239
endif()
236240

241+
if(USE_NVCOMP)
242+
find_package(nvcomp REQUIRED)
243+
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 12.8)
244+
message(SEND_ERROR "NVComp support requires CUDA >= 12.8")
245+
endif()
246+
endif()
247+
248+
237249
if(FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
238250
((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") OR
239251
(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))

cmake/Utils.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,10 @@ macro(xgboost_target_defs target)
236236
if(PLUGIN_RMM)
237237
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
238238
endif()
239+
240+
if(USE_NVCOMP)
241+
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_NVCOMP=1)
242+
endif()
239243
endmacro()
240244

241245
# handles dependencies
@@ -262,6 +266,10 @@ macro(xgboost_target_link_libraries target)
262266
target_link_libraries(${target} PRIVATE rmm::rmm)
263267
endif()
264268

269+
if(USE_NVCOMP)
270+
target_link_libraries(${target} PRIVATE nvcomp::nvcomp)
271+
endif()
272+
265273
if(USE_NCCL)
266274
xgboost_link_nccl(${target})
267275
endif()

src/common/common.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,19 @@ class Range {
178178

179179
inline void AssertGPUSupport() {
180180
#ifndef XGBOOST_USE_CUDA
181-
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
181+
LOG(FATAL) << "XGBoost version not compiled with GPU support.";
182+
#endif // XGBOOST_USE_CUDA
183+
}
184+
185+
inline void AssertNvCompSupport() {
186+
#ifndef XGBOOST_USE_NVCOMP
187+
LOG(FATAL) << "XGBoost is not compiled with NVCOMP support.";
182188
#endif // XGBOOST_USE_CUDA
183189
}
184190

185191
inline void AssertNCCLSupport() {
186192
#if !defined(XGBOOST_USE_NCCL)
187-
LOG(FATAL) << "XGBoost version not compiled with NCCL support.";
193+
LOG(FATAL) << "XGBoost version not compiled with NCCL support.";
188194
#endif // !defined(XGBOOST_USE_NCCL)
189195
}
190196

src/common/cuda_dr_utils.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ CuDriverApi::CuDriverApi() {
4040
safe_load("cuGetErrorName", &this->cuGetErrorName);
4141
safe_load("cuDeviceGetAttribute", &this->cuDeviceGetAttribute);
4242
safe_load("cuDeviceGet", &this->cuDeviceGet);
43-
43+
#if defined(CUDA_HW_DECOM_AVAILABLE)
44+
safe_load("cuMemBatchDecompressAsync", &this->cuMemBatchDecompressAsync);
45+
#endif // defined(CUDA_HW_DECOM_AVAILABLE)
4446
CHECK(this->cuMemGetAllocationGranularity);
4547
}
4648

src/common/cuda_dr_utils.h

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515

1616
#include "xgboost/string_view.h" // for StringView
1717

18+
#if CUDART_VERSION >= 12080
19+
#define CUDA_HW_DECOM_AVAILABLE 1
20+
#endif
21+
1822
namespace xgboost::cudr {
1923
/**
2024
* @brief A struct for retrieving CUDA driver API from the runtime API.
@@ -44,28 +48,39 @@ struct CuDriverApi {
4448
using DeviceGetAttribute = CUresult(int *pi, CUdevice_attribute attrib, CUdevice dev);
4549
using DeviceGet = CUresult(CUdevice *device, int ordinal);
4650

51+
#if defined(CUDA_HW_DECOM_AVAILABLE)
52+
using BatchDecompressAsync = CUresult(CUmemDecompressParams *paramsArray, size_t count,
53+
unsigned int flags, size_t *errorIndex, CUstream stream);
54+
#endif // defined(CUDA_HW_DECOM_AVAILABLE)
55+
4756
MemGetAllocationGranularityFn *cuMemGetAllocationGranularity{nullptr}; // NOLINT
4857
MemCreateFn *cuMemCreate{nullptr}; // NOLINT
4958
/**
5059
* @param[in] offset - Must be zero.
5160
*/
52-
MemMapFn *cuMemMap{nullptr}; // NOLINT
61+
MemMapFn *cuMemMap{nullptr}; // NOLINT
5362
/**
5463
* @param[out] ptr - Resulting pointer to start of virtual address range allocated
5564
* @param[in] size - Size of the reserved virtual address range requested
5665
* @param[in] alignment - Alignment of the reserved virtual address range requested
5766
* @param[in] addr - Fixed starting address range requested
5867
* @param[in] flags - Currently unused, must be zero
5968
*/
60-
MemAddressReserveFn *cuMemAddressReserve{nullptr}; // NOLINT
61-
MemSetAccessFn *cuMemSetAccess{nullptr}; // NOLINT
62-
MemUnmapFn *cuMemUnmap{nullptr}; // NOLINT
63-
MemReleaseFn *cuMemRelease{nullptr}; // NOLINT
64-
MemAddressFreeFn *cuMemAddressFree{nullptr}; // NOLINT
65-
GetErrorString *cuGetErrorString{nullptr}; // NOLINT
66-
GetErrorName *cuGetErrorName{nullptr}; // NOLINT
67-
DeviceGetAttribute *cuDeviceGetAttribute{nullptr}; // NOLINT
68-
DeviceGet *cuDeviceGet{nullptr}; // NOLINT
69+
MemAddressReserveFn *cuMemAddressReserve{nullptr}; // NOLINT
70+
MemSetAccessFn *cuMemSetAccess{nullptr}; // NOLINT
71+
MemUnmapFn *cuMemUnmap{nullptr}; // NOLINT
72+
MemReleaseFn *cuMemRelease{nullptr}; // NOLINT
73+
MemAddressFreeFn *cuMemAddressFree{nullptr}; // NOLINT
74+
GetErrorString *cuGetErrorString{nullptr}; // NOLINT
75+
GetErrorName *cuGetErrorName{nullptr}; // NOLINT
76+
DeviceGetAttribute *cuDeviceGetAttribute{nullptr}; // NOLINT
77+
DeviceGet *cuDeviceGet{nullptr}; // NOLINT
78+
79+
#if defined(CUDA_HW_DECOM_AVAILABLE)
80+
81+
BatchDecompressAsync *cuMemBatchDecompressAsync{nullptr}; // NOLINT
82+
83+
#endif // defined(CUDA_HW_DECOM_AVAILABLE)
6984

7085
CuDriverApi();
7186

@@ -96,7 +111,7 @@ inline auto GetAllocGranularity(CUmemAllocationProp const *prop) {
96111
/**
97112
* @brief Obtain appropriate device ordinal for `CUmemLocation`.
98113
*/
99-
void MakeCuMemLocation(CUmemLocationType type, CUmemLocation* loc);
114+
void MakeCuMemLocation(CUmemLocationType type, CUmemLocation *loc);
100115

101116
/**
102117
* @brief Construct a `CUmemAllocationProp`.

src/common/cuda_pinned_allocator.cu

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,9 @@
1414
#endif // defined(XGBOOST_USE_CUDA)
1515

1616
#include "common.h"
17+
#include "cuda_dr_utils.h" // for CUDA_HW_DECOM_AVAILABLE
1718
#include "cuda_rt_utils.h" // for CurrentDevice
1819

19-
#if CUDART_VERSION >= 12080
20-
#define CUDA_HW_DECOM_AVAILABLE 1
21-
#endif
22-
2320
namespace xgboost::common::cuda_impl {
2421
[[nodiscard]] MemPoolHdl CreateHostMemPool() {
2522
auto mem_pool = std::unique_ptr<cudaMemPool_t, void (*)(cudaMemPool_t*)>{

0 commit comments

Comments
 (0)