Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit a1b6cc6

Browse files
nicolasvasilacheftynse
authored andcommitted
Drop NVRTC_CUB in non-RTC paths
There is no need to use modified CUB in nvcc or LLVM paths because, unlike NVRTC, they can include system headers that the regular CUB depends upon.
1 parent 4e37c4d commit a1b6cc6

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

tc/core/cuda/cuda_libraries.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ namespace code {
3131
namespace c {
3232

3333
constexpr auto types = R"C(
34+
#ifndef __CUDACC_RTC__
3435
// Can't include system dependencies with NVRTC
3536
// Can't include cuda_fp16.h with NVRTC due to transitive system dependencies
36-
// #include <cuda_fp16.h>
37+
#include <cuda_fp16.h>
38+
#endif
3739
)C";
3840

3941
constexpr auto defines = R"C(
@@ -211,7 +213,12 @@ struct SegmentedReducer {
211213

212214
constexpr auto cubBlockReduce = R"CUDA(
213215
216+
#if __CUDACC_RTC__
214217
#include "cub/nvrtc_cub.cuh"
218+
#else
219+
#include <assert.h>
220+
#include "cub/cub.cuh"
221+
#endif
215222
216223
namespace __tc {
217224

tc/core/cuda/cuda_rtc.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ static std::string llvmCompile(
128128
std::string("-I") + TC_STRINGIFY(TC_CUDA_INCLUDE_DIR),
129129
std::string("-I") + TC_STRINGIFY(TC_CUB_INCLUDE_DIR),
130130
tc::FLAGS_llvm_flags,
131-
"-DNVRTC_CUB=1",
132131
"-nocudalib",
133132
"-S",
134133
"-emit-llvm",

tc/core/flags.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ DEFINE_string(
4949
"compiler flags to set when llvm is used");
5050
DEFINE_string(
5151
nvcc_flags,
52-
"-std=c++11 -ptx -DNVRTC_CUB=1 --use_fast_math",
52+
"-std=c++11 -ptx --use_fast_math",
5353
"compiler flags to set when nvcc is used");
5454

5555
// CPU codegen options

0 commit comments

Comments
 (0)