Skip to content

Commit a5bf268

Browse files
authored
[UR][L0] Add support for querying the Vector Width Size Properties (#19159)
- Adds support for the L0 extension ZE_extension_device_vector_sizes - Enables for querying one or more supported vector width preferrred and native vector widths for each supported data type. - By default, the max vector widths per datatype are reported based on the max values. - UR_L0_VECTOR_WIDTH_SIZE = <vector_width_size> is a hint to return the perferred and native sizes for a specific vector width. If it is not supported, then the defaults are returned. - If this extension is not supported, then the previous defaults are returned. Signed-off-by: Neil R. Spruit <neil.r.spruit@intel.com>
1 parent 7a275e4 commit a5bf268

File tree

5 files changed

+108
-8
lines changed

5 files changed

+108
-8
lines changed

unified-runtime/scripts/core/LEVEL_ZERO.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,12 @@ Environment Variables
146146
| | The wait-event path relies on | the immediate append path only for some devices when the | |
147147
| | zeCommandQueueExecuteCommandLists() | pre-requisites are met. | |
148148
+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+
149+
| UR_L0_VECTOR_WIDTH_SIZE | Specifies the size (in bits) of the vector width supported | Any positive integer: Indicates the maximum number of data | Device-specific |
150+
| | by the Level Zero device. This value indicates the maximum | elements that can be processed simultaneously in a single | |
151+
| | number of data elements that can be processed simultaneously | instruction. The value entered by user user is the desired | |
152+
| | in a single instruction, which is useful for optimizing | width size to config. If this width size is not supported, | |
153+
| | data-parallel workloads and understanding device caps. | then the default "max" will be used. | |
154+
+---------------------------------------------+--------------------------------------------------------------+--------------------------------------------------------------+------------------+
149155
Contributors
150156
------------
151157

unified-runtime/source/adapters/level_zero/common.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,11 @@ ze_structure_type_t getZeStructureType<ze_device_ip_version_ext_t>() {
303303
return ZE_STRUCTURE_TYPE_DEVICE_IP_VERSION_EXT;
304304
}
305305
template <>
306+
ze_structure_type_t
307+
getZeStructureType<ze_device_vector_width_properties_ext_t>() {
308+
return ZE_STRUCTURE_TYPE_DEVICE_VECTOR_WIDTH_PROPERTIES_EXT;
309+
}
310+
template <>
306311
ze_structure_type_t getZeStructureType<ze_device_memory_access_properties_t>() {
307312
return ZE_STRUCTURE_TYPE_DEVICE_MEMORY_ACCESS_PROPERTIES;
308313
}

unified-runtime/source/adapters/level_zero/common.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ const int UrL0LeaksDebug = [] {
7878
return std::atoi(UrRet);
7979
}();
8080

81+
const int UrL0VectorWidth = [] {
82+
const char *UrRet = std::getenv("UR_L0_VECTOR_WIDTH_SIZE");
83+
if (!UrRet)
84+
return 0;
85+
return std::atoi(UrRet);
86+
}();
87+
8188
// Enable for UR L0 Adapter to Init all L0 Drivers on the system with filtering
8289
// in place for only currently used Drivers.
8390
const int UrL0InitAllDrivers = [] {

unified-runtime/source/adapters/level_zero/device.cpp

Lines changed: 88 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -690,35 +690,54 @@ ur_result_t urDeviceGetInfo(
690690
case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE:
691691
return ReturnValue(
692692
size_t{Device->ZeDeviceImageProperties->maxImageArraySlices});
693-
// Handle SIMD widths, matching compute-runtime OpenCL implementation:
694-
// https://github.com/intel/compute-runtime/blob/291745cdf76d83f5dc40e7ef41d347366235ccdb/opencl/source/cl_device/cl_device_caps.cpp#L236
695693
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR:
694+
return ReturnValue(
695+
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_char);
696696
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR:
697-
return ReturnValue(uint32_t{16});
697+
return ReturnValue(
698+
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_char);
698699
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT:
700+
return ReturnValue(
701+
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_short);
699702
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_SHORT:
700-
return ReturnValue(uint32_t{8});
703+
return ReturnValue(
704+
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_short);
701705
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT:
706+
return ReturnValue(
707+
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_int);
702708
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_INT:
703-
return ReturnValue(uint32_t{4});
709+
return ReturnValue(
710+
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_int);
704711
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG:
712+
return ReturnValue(
713+
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_long);
705714
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_LONG:
706-
return ReturnValue(uint32_t{1});
715+
return ReturnValue(
716+
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_long);
707717
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT:
718+
return ReturnValue(
719+
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_float);
708720
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_FLOAT:
709-
return ReturnValue(uint32_t{1});
721+
return ReturnValue(
722+
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_float);
710723
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE:
711724
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_DOUBLE:
712725
// Must return 0 for *vector_width_double* if the device does not have fp64.
713726
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64))
714727
return ReturnValue(uint32_t{0});
715728
return ReturnValue(uint32_t{1});
716729
case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF:
730+
// Must return 0 for *vector_width_half* if the device does not have fp16.
731+
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16))
732+
return ReturnValue(uint32_t{0});
733+
return ReturnValue(
734+
Device->ZeDeviceVectorWidthPropertiesExt->native_vector_width_half);
717735
case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_HALF:
718736
// Must return 0 for *vector_width_half* if the device does not have fp16.
719737
if (!(Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16))
720738
return ReturnValue(uint32_t{0});
721-
return ReturnValue(uint32_t{8});
739+
return ReturnValue(
740+
Device->ZeDeviceVectorWidthPropertiesExt->preferred_vector_width_half);
722741
case UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS: {
723742
// Max_num_sub_Groups = maxTotalGroupSize/min(set of subGroupSizes);
724743
uint32_t MinSubGroupSize =
@@ -1857,6 +1876,67 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
18571876
};
18581877
#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
18591878

1879+
auto UrPlatform = this->Platform;
1880+
ZeDeviceVectorWidthPropertiesExt.Compute =
1881+
[ZeDevice, UrPlatform](
1882+
ZeStruct<ze_device_vector_width_properties_ext_t> &Properties) {
1883+
// Set default vector width properties
1884+
Properties.preferred_vector_width_char = 16u;
1885+
Properties.preferred_vector_width_short = 8u;
1886+
Properties.preferred_vector_width_int = 4u;
1887+
Properties.preferred_vector_width_long = 1u;
1888+
Properties.preferred_vector_width_float = 1u;
1889+
Properties.preferred_vector_width_half = 8u;
1890+
Properties.native_vector_width_char = 16u;
1891+
Properties.native_vector_width_short = 8u;
1892+
Properties.native_vector_width_int = 4u;
1893+
Properties.native_vector_width_long = 1u;
1894+
Properties.native_vector_width_float = 1u;
1895+
Properties.native_vector_width_half = 8u;
1896+
1897+
if (UrPlatform->zeDriverExtensionMap.count(
1898+
ZE_DEVICE_VECTOR_SIZES_EXT_NAME)) {
1899+
uint32_t Count = 0;
1900+
ZE_CALL_NOCHECK(zeDeviceGetVectorWidthPropertiesExt,
1901+
(ZeDevice, &Count, nullptr));
1902+
1903+
std::vector<ZeStruct<ze_device_vector_width_properties_ext_t>>
1904+
PropertiesVector;
1905+
PropertiesVector.reserve(Count);
1906+
1907+
ZeStruct<ze_device_vector_width_properties_ext_t>
1908+
MaxVectorWidthProperties;
1909+
1910+
ZE_CALL_NOCHECK(zeDeviceGetVectorWidthPropertiesExt,
1911+
(ZeDevice, &Count, PropertiesVector.data()));
1912+
if (!PropertiesVector.empty()) {
1913+
// Find the largest vector_width_size property
1914+
uint32_t max_vector_width_size = 0;
1915+
for (const auto &prop : PropertiesVector) {
1916+
if (!max_vector_width_size) {
1917+
max_vector_width_size = prop.vector_width_size;
1918+
MaxVectorWidthProperties = prop;
1919+
} else if (prop.vector_width_size > max_vector_width_size) {
1920+
max_vector_width_size = prop.vector_width_size;
1921+
MaxVectorWidthProperties = prop;
1922+
}
1923+
}
1924+
Properties = MaxVectorWidthProperties;
1925+
// If the environment variable is set, use the specified vector
1926+
// width if it exists
1927+
if (UrL0VectorWidth) {
1928+
for (const auto &prop : PropertiesVector) {
1929+
if (prop.vector_width_size ==
1930+
static_cast<uint32_t>(UrL0VectorWidth)) {
1931+
Properties = prop;
1932+
break;
1933+
}
1934+
}
1935+
}
1936+
}
1937+
}
1938+
};
1939+
18601940
ImmCommandListUsed = this->useImmediateCommandLists();
18611941

18621942
uint32_t numQueueGroups = 0;

unified-runtime/source/adapters/level_zero/device.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,8 @@ struct ur_device_handle_t_ : ur_object {
231231
ZeCache<ZeStruct<ze_intel_device_block_array_exp_properties_t>>
232232
ZeDeviceBlockArrayProperties;
233233
#endif // ZE_INTEL_DEVICE_BLOCK_ARRAY_EXP_NAME
234+
ZeCache<ZeStruct<ze_device_vector_width_properties_ext_t>>
235+
ZeDeviceVectorWidthPropertiesExt;
234236

235237
// Map device bindless image offset to corresponding host image handle.
236238
std::unordered_map<ur_exp_image_native_handle_t, ze_image_handle_t>

0 commit comments

Comments
 (0)