Skip to content

Commit 9f62123

Browse files
committed
Fix UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL size confusion
The spec says it's an array of uint32_t, but CUDA, HIP, OpenCL, and LevelZero adapters were returning arrays of size_t. As a result, we had extra dummy zeroes in the output. Native CPU adapter was correct.
1 parent 905804c commit 9f62123

File tree

4 files changed

+17
-8
lines changed

4 files changed

+17
-8
lines changed

source/adapters/cuda/device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
270270
int WarpSize = 0;
271271
UR_CHECK_ERROR(cuDeviceGetAttribute(
272272
&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
273-
size_t Sizes[1] = {static_cast<size_t>(WarpSize)};
273+
uint32_t Sizes[1] = {static_cast<uint32_t>(WarpSize)};
274274
return ReturnValue(Sizes, 1);
275275
}
276276
case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {

source/adapters/hip/device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
177177
int WarpSize = 0;
178178
UR_CHECK_ERROR(hipDeviceGetAttribute(&WarpSize, hipDeviceAttributeWarpSize,
179179
hDevice->get()));
180-
size_t Sizes[1] = {static_cast<size_t>(WarpSize)};
180+
uint32_t Sizes[1] = {static_cast<uint32_t>(WarpSize)};
181181
return ReturnValue(Sizes, 1);
182182
}
183183
case UR_DEVICE_INFO_MAX_CLOCK_FREQUENCY: {

source/adapters/level_zero/device.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -626,11 +626,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
626626
return ReturnValue(static_cast<ur_bool_t>(false));
627627
}
628628
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
629-
// ze_device_compute_properties.subGroupSizes is in uint32_t whereas the
630-
// expected return is size_t datatype. size_t can be 8 bytes of data.
631-
return ReturnValue.template operator()<size_t>(
632-
Device->ZeDeviceComputeProperties->subGroupSizes,
633-
Device->ZeDeviceComputeProperties->numSubGroupSizes);
629+
return ReturnValue(Device->ZeDeviceComputeProperties->subGroupSizes,
630+
Device->ZeDeviceComputeProperties->numSubGroupSizes);
634631
}
635632
case UR_DEVICE_INFO_IL_VERSION: {
636633
// Set to a space separated list of IL version strings of the form

source/adapters/opencl/device.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
910910
case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION:
911911
case UR_DEVICE_INFO_BUILT_IN_KERNELS:
912912
case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES:
913-
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
914913
case UR_DEVICE_INFO_IP_VERSION: {
915914
/* We can just use the OpenCL outputs because the sizes of OpenCL types
916915
* are the same as UR.
@@ -929,6 +928,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
929928

930929
return UR_RESULT_SUCCESS;
931930
}
931+
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: {
932+
// Have to convert size_t to uint32_t
933+
size_t SubGroupSizesSize = 0;
934+
CL_RETURN_ON_FAILURE(
935+
clGetDeviceInfo(cl_adapter::cast<cl_device_id>(hDevice), CLPropName, 0,
936+
nullptr, &SubGroupSizesSize));
937+
std::vector<size_t> SubGroupSizes(SubGroupSizesSize / sizeof(size_t));
938+
CL_RETURN_ON_FAILURE(
939+
clGetDeviceInfo(cl_adapter::cast<cl_device_id>(hDevice), CLPropName,
940+
SubGroupSizesSize, SubGroupSizes.data(), nullptr));
941+
return ReturnValue.template operator()<uint32_t>(SubGroupSizes.data(),
942+
SubGroupSizes.size());
943+
}
932944
case UR_DEVICE_INFO_EXTENSIONS: {
933945
cl_device_id Dev = cl_adapter::cast<cl_device_id>(hDevice);
934946
size_t ExtSize = 0;

0 commit comments

Comments
 (0)