Skip to content

Commit 0e24ab8

Browse files
authored
Merge pull request #931 from konradkusiak97/konradkusiak/LocalMemSizeQuery
[UR] [CUDA] Changed the output of querying localMemSize
2 parents a62423d + d5a4691 commit 0e24ab8

File tree

3 files changed

+17
-16
lines changed

3 files changed

+17
-16
lines changed

source/adapters/cuda/device.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -501,12 +501,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
501501
return ReturnValue(
502502
static_cast<uint64_t>(hDevice->getMaxChosenLocalMem()));
503503
} else {
504-
int LocalMemSize = 0;
505-
UR_CHECK_ERROR(cuDeviceGetAttribute(
506-
&LocalMemSize, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
507-
hDevice->get()));
508-
detail::ur::assertion(LocalMemSize >= 0);
509-
return ReturnValue(static_cast<uint64_t>(LocalMemSize));
504+
return ReturnValue(
505+
static_cast<uint64_t>(hDevice->getMaxCapacityLocalMem()));
510506
}
511507
}
512508
case UR_DEVICE_INFO_ERROR_CORRECTION_SUPPORT: {

source/adapters/cuda/device.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ struct ur_device_handle_t_ {
4545
UR_CHECK_ERROR(cuDeviceGetAttribute(
4646
&MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
4747
cuDevice));
48+
UR_CHECK_ERROR(cuDeviceGetAttribute(
49+
&MaxCapacityLocalMem,
50+
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice));
4851

4952
// Set local mem max size if env var is present
5053
static const char *LocalMemSizePtrUR =
@@ -56,9 +59,6 @@ struct ur_device_handle_t_ {
5659
: (LocalMemSizePtrPI ? LocalMemSizePtrPI : nullptr);
5760

5861
if (LocalMemSizePtr) {
59-
cuDeviceGetAttribute(
60-
&MaxCapacityLocalMem,
61-
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice);
6262
MaxChosenLocalMem = std::atoi(LocalMemSizePtr);
6363
MaxLocalMemSizeChosen = true;
6464
}

source/adapters/cuda/enqueue.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -284,9 +284,15 @@ setKernelParams(const ur_context_handle_t Context,
284284
CudaImplicitOffset);
285285
}
286286

287-
if (Context->getDevice()->maxLocalMemSizeChosen()) {
287+
auto Device = Context->getDevice();
288+
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
289+
setErrorMessage("Excessive allocation of local memory on the device",
290+
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
291+
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
292+
}
293+
294+
if (Device->maxLocalMemSizeChosen()) {
288295
// Set up local memory requirements for kernel.
289-
auto Device = Context->getDevice();
290296
if (Device->getMaxChosenLocalMem() < 0) {
291297
bool EnvVarHasURPrefix =
292298
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
@@ -297,11 +303,6 @@ setKernelParams(const ur_context_handle_t Context,
297303
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
298304
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
299305
}
300-
if (LocalSize > static_cast<uint32_t>(Device->getMaxCapacityLocalMem())) {
301-
setErrorMessage("Too much local memory allocated for device",
302-
UR_RESULT_ERROR_ADAPTER_SPECIFIC);
303-
return UR_RESULT_ERROR_ADAPTER_SPECIFIC;
304-
}
305306
if (LocalSize > static_cast<uint32_t>(Device->getMaxChosenLocalMem())) {
306307
bool EnvVarHasURPrefix =
307308
std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE") != nullptr;
@@ -319,6 +320,10 @@ setKernelParams(const ur_context_handle_t Context,
319320
UR_CHECK_ERROR(cuFuncSetAttribute(
320321
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
321322
Device->getMaxChosenLocalMem()));
323+
324+
} else {
325+
UR_CHECK_ERROR(cuFuncSetAttribute(
326+
CuFunc, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, LocalSize));
322327
}
323328

324329
} catch (ur_result_t Err) {

0 commit comments

Comments
 (0)