Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit dfed608

Browse files
author
Theodoros Theodoridis
authored
Merge pull request #299 from facebookresearch/rtcfun-emplace-after-load
CudaRTCFunction: do not unload the module that was never loaded
2 parents 5e170c2 + 5d21837 commit dfed608

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

tc/core/cuda/cuda_rtc.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,14 @@ Duration CudaRTCFunction::Launch(
138138
int dev;
139139
TC_CUDA_RUNTIMEAPI_ENFORCE(cudaGetDevice(&dev));
140140
if (perGpuModule_.count(dev) == 0) {
141-
perGpuModule_.emplace(dev, CUmodule());
142-
perGpuKernel_.emplace(dev, CUfunction());
143-
TC_CUDA_DRIVERAPI_ENFORCE(cuModuleLoadDataEx(
144-
&(perGpuModule_.at(dev)), nvrtc_ptx.data(), 0, 0, 0));
145-
TC_CUDA_DRIVERAPI_ENFORCE(cuModuleGetFunction(
146-
&(perGpuKernel_.at(dev)),
147-
perGpuModule_.at(dev),
148-
specializedName.c_str()));
141+
CUmodule module;
142+
CUfunction function;
143+
TC_CUDA_DRIVERAPI_ENFORCE(
144+
cuModuleLoadDataEx(&module, nvrtc_ptx.data(), 0, 0, 0));
145+
perGpuModule_.emplace(dev, module);
146+
TC_CUDA_DRIVERAPI_ENFORCE(
147+
cuModuleGetFunction(&function, module, specializedName.c_str()));
148+
perGpuKernel_.emplace(dev, function);
149149
}
150150

151151
constexpr int kNumMaxParameters = 100;

0 commit comments

Comments
 (0)