@@ -30,7 +30,8 @@ DEFINE_bool(use_nvprof, false, "Start / stop nvprof");
30
30
31
31
namespace {
32
32
33
- std::tuple<std::vector<std::string>, std::vector<size_t >> init () {
33
+ std::tuple<std::vector<std::string>, std::vector<size_t >, std::vector<size_t >>
34
+ init () {
34
35
int deviceCount = 0 ;
35
36
auto err_id = cudaGetDeviceCount (&deviceCount);
36
37
if (err_id == 35 or err_id == 30 ) {
@@ -44,14 +45,16 @@ std::tuple<std::vector<std::string>, std::vector<size_t>> init() {
44
45
}
45
46
std::vector<std::string> gpuNames;
46
47
std::vector<size_t > sharedMemSizes;
48
+ std::vector<size_t > registersPerBlock;
47
49
gpuNames.reserve (deviceCount);
48
50
for (int i = 0 ; i < deviceCount; ++i) {
49
51
cudaDeviceProp deviceProp;
50
52
TC_CUDA_RUNTIMEAPI_ENFORCE (cudaGetDeviceProperties (&deviceProp, i));
51
53
gpuNames.emplace_back (deviceProp.name );
52
54
sharedMemSizes.emplace_back (deviceProp.sharedMemPerBlock );
55
+ registersPerBlock.emplace_back (deviceProp.regsPerBlock );
53
56
}
54
- return std::make_tuple (gpuNames, sharedMemSizes);
57
+ return std::make_tuple (gpuNames, sharedMemSizes, registersPerBlock );
55
58
}
56
59
57
60
} // namespace
@@ -61,8 +64,8 @@ CudaGPUInfo& CudaGPUInfo::GPUInfo() {
61
64
static thread_local bool inited = false ;
62
65
if (!inited) {
63
66
auto infos = init ();
64
- pInfo = std::unique_ptr<CudaGPUInfo>(
65
- new CudaGPUInfo ( std::get<0 >(infos), std::get<1 >(infos)));
67
+ pInfo = std::unique_ptr<CudaGPUInfo>(new CudaGPUInfo (
68
+ std::get<0 >(infos), std::get<1 >(infos), std::get< 2 >(infos)));
66
69
inited = true ;
67
70
}
68
71
return *pInfo;
@@ -102,4 +105,11 @@ size_t CudaGPUInfo::SharedMemorySize() const {
102
105
}
103
106
return sharedMemSizes_.at (CurrentGPUId ());
104
107
}
108
+
109
+ size_t CudaGPUInfo::RegistersPerBlock () const {
110
+ if (NumberGPUs () == 0 ) {
111
+ return 0 ; // no registers if no GPUs
112
+ }
113
+ return registersPerBlock_.at (CurrentGPUId ());
114
+ }
105
115
} // namespace tc
0 commit comments