Skip to content

Commit 4f7787c

Browse files
authored
[SYCL] Support memory clock rate and memory bus width queries (#7412)
E2E test: intel/llvm-test-suite#1386
1 parent e7ed860 commit 4f7787c

File tree

15 files changed

+291
-8
lines changed

15 files changed

+291
-8
lines changed

sycl/doc/extensions/supported/sycl_ext_intel_device_info.md

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ The Feature Test Macro SYCL\_EXT\_INTEL\_DEVICE\_INFO will be defined as one of
1818
| 3 | HW threads per EU device query is supported |
1919
| 4 | Free device memory query is supported |
2020
| 5 | Device ID is supported |
21+
| 6 | Memory clock rate and bus width queries are supported |
2122

2223

2324

@@ -425,6 +426,80 @@ Then the free device memory can be obtained using the standard get\_info() inte
425426
auto FreeMemory = dev.get_info<ext::intel::info::device::free_memory>();
426427
}
427428

429+
430+
# Memory Clock Rate #
431+
432+
A new device descriptor is added which provides the maximum clock rate of device's global memory.
433+
434+
This new device descriptor is not available for devices in the OpenCL platform, and the matching aspect is false for those devices. The DPC++ default behavior is to expose GPU devices through the Level Zero platform.
435+
436+
## Version ##
437+
438+
The extension supports this query in version 6 and later.
439+
440+
441+
## Device Information Descriptors ##
442+
443+
| Device Descriptors | Return Type | Description |
444+
| ------------------ | ----------- | ----------- |
445+
| ext\:\:intel\:\:info\:\:device\:\:memory\_clock\_rate | uint32\_t| Returns the maximum clock rate of device's global memory in MHz. If device doesn't have memory then returns 0. If there are several memories on the device then the minimum of the clock rate values is returned. |
446+
447+
448+
## Aspects ##
449+
450+
A new aspect, ext\_intel\_memory\_clock\_rate, is added.
451+
452+
453+
## Error Condition ##
454+
455+
An invalid object runtime error is thrown if the device does not support aspect\:\:ext\_intel\_memory\_clock\_rate.
456+
457+
458+
## Example Usage ##
459+
460+
Then the memory clock rate can be obtained using the standard get\_info() interface.
461+
462+
if (dev.has(aspect::ext_intel_memory_clock_rate)) {
463+
auto MemoryClockRate = dev.get_info<ext::intel::info::device::memory_clock_rate>();
464+
}
465+
466+
467+
# Memory Bus Width #
468+
469+
A new device descriptor is added which provides the maximum bus width between device and memory.
470+
471+
This new device descriptor is not available for devices in the OpenCL platform, and the matching aspect is false for those devices. The DPC++ default behavior is to expose GPU devices through the Level Zero platform.
472+
473+
## Version ##
474+
475+
The extension supports this query in version 6 and later.
476+
477+
478+
## Device Information Descriptors ##
479+
480+
| Device Descriptors | Return Type | Description |
481+
| ------------------ | ----------- | ----------- |
482+
| ext\:\:intel\:\:info\:\:device\:\:memory\_bus\_width | uint32\_t| Returns the maximum bus width between device and memory in bits. If device doesn't have memory then returns 0. If there are several memories on the device then the minimum of the bus width values is returned. |
483+
484+
485+
## Aspects ##
486+
487+
A new aspect, ext\_intel\_memory\_bus\_width, is added.
488+
489+
490+
## Error Condition ##
491+
492+
An invalid object runtime error is thrown if the device does not support aspect\:\:ext\_intel\_memory\_bus\_width.
493+
494+
495+
## Example Usage ##
496+
497+
Then the memory bus width can be obtained using the standard get\_info() interface.
498+
499+
if (dev.has(aspect::ext_intel_memory_bus_width)) {
500+
auto MemoryBusWidth = dev.get_info<ext::intel::info::device::memory_bus_width>();
501+
}
502+
428503
# Deprecated queries #
429504

430505
The table below lists deprecated, that would soon be removed and their replacements:

sycl/include/sycl/detail/pi.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,12 @@
5353
// 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for
5454
// piDeviceGetInfo.
5555
// 11.15 piEventCreate creates even in the signalled state now.
56+
// 11.16 Add PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE and
57+
// PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH as an extension for
58+
// piDeviceGetInfo.
5659

5760
#define _PI_H_VERSION_MAJOR 11
58-
#define _PI_H_VERSION_MINOR 15
61+
#define _PI_H_VERSION_MINOR 16
5962

6063
#define _PI_STRING_HELPER(a) #a
6164
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -277,6 +280,12 @@ typedef enum {
277280
// Return true if sub-device should do its own program build
278281
PI_DEVICE_INFO_BUILD_ON_SUBDEVICE = 0x10028,
279282
PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY = 0x10029,
283+
// Return 0 if device doesn't have any memory modules. Return the minimum of
284+
// the clock rate values if there are several memory modules on the device.
285+
PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE = 0x10030,
286+
// Return 0 if device doesn't have any memory modules. Return the minimum of
287+
// the bus width values if there are several memory modules on the device.
288+
PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH = 0x10031,
280289
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
281290
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
282291
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,

sycl/include/sycl/feature_test.hpp.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
2929
// Feature test macro definitions
3030

3131
// TODO: Move these feature-test macros to compiler driver.
32-
#define SYCL_EXT_INTEL_DEVICE_INFO 5
32+
#define SYCL_EXT_INTEL_DEVICE_INFO 6
3333
#define SYCL_EXT_INTEL_DEVICE_ARCHITECTURE 1
3434
#define SYCL_EXT_ONEAPI_SUB_GROUP_MASK 1
3535
#define SYCL_EXT_ONEAPI_LOCAL_MEMORY 1

sycl/include/sycl/info/aspects.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,5 @@ __SYCL_ASPECT(ext_oneapi_cuda_async_barrier, 34)
3434
__SYCL_ASPECT(ext_oneapi_bfloat16, 35)
3535
__SYCL_ASPECT(ext_intel_free_memory, 36)
3636
__SYCL_ASPECT(ext_intel_device_id, 37)
37+
__SYCL_ASPECT(ext_intel_memory_clock_rate, 38)
38+
__SYCL_ASPECT(ext_intel_memory_bus_width, 39)

sycl/include/sycl/info/ext_intel_device_traits.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, gpu_hw_threads_per_eu, pi_uint32, P
1313
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_mem_bandwidth, pi_uint64, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH)
1414
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, uuid, detail::uuid_type, PI_DEVICE_INFO_UUID)
1515
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, pi_uint64, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY)
16+
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE)
17+
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH)
1618
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
1719
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
1820
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,6 +1926,25 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
19261926
return getInfo(param_value_size, param_value, param_value_size_ret,
19271927
FreeMemory);
19281928
}
1929+
case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: {
1930+
int value = 0;
1931+
sycl::detail::pi::assertion(
1932+
cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE,
1933+
device->get()) == CUDA_SUCCESS);
1934+
sycl::detail::pi::assertion(value >= 0);
1935+
// Convert kilohertz to megahertz when returning.
1936+
return getInfo(param_value_size, param_value, param_value_size_ret,
1937+
value / 1000);
1938+
}
1939+
case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: {
1940+
int value = 0;
1941+
sycl::detail::pi::assertion(
1942+
cuDeviceGetAttribute(&value,
1943+
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH,
1944+
device->get()) == CUDA_SUCCESS);
1945+
sycl::detail::pi::assertion(value >= 0);
1946+
return getInfo(param_value_size, param_value, param_value_size_ret, value);
1947+
}
19291948

19301949
// TODO: Investigate if this information is available on CUDA.
19311950
case PI_DEVICE_INFO_DEVICE_ID:

sycl/plugins/hip/pi_hip.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1815,6 +1815,26 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
18151815
FreeMemory);
18161816
}
18171817

1818+
case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: {
1819+
int value = 0;
1820+
sycl::detail::pi::assertion(
1821+
hipDeviceGetAttribute(&value, hipDeviceAttributeMemoryClockRate,
1822+
device->get()) == hipSuccess);
1823+
sycl::detail::pi::assertion(value >= 0);
1824+
// Convert kilohertz to megahertz when returning.
1825+
return getInfo(param_value_size, param_value, param_value_size_ret,
1826+
value / 1000);
1827+
}
1828+
1829+
case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: {
1830+
int value = 0;
1831+
sycl::detail::pi::assertion(
1832+
hipDeviceGetAttribute(&value, hipDeviceAttributeMemoryBusWidth,
1833+
device->get()) == hipSuccess);
1834+
sycl::detail::pi::assertion(value >= 0);
1835+
return getInfo(param_value_size, param_value, param_value_size_ret, value);
1836+
}
1837+
18181838
// TODO: Implement.
18191839
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
18201840
// TODO: Investigate if this information is available on HIP.

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3180,7 +3180,38 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
31803180
}
31813181
return ReturnValue(FreeMemory);
31823182
}
3183-
3183+
case PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE: {
3184+
// If there are not any memory modules then return 0.
3185+
if (Device->ZeDeviceMemoryProperties->empty())
3186+
return ReturnValue(pi_uint32{0});
3187+
3188+
// If there are multiple memory modules on the device then we have to report
3189+
// the value of the slowest memory.
3190+
auto Comp = [](const ze_device_memory_properties_t &A,
3191+
const ze_device_memory_properties_t &B) -> bool {
3192+
return A.maxClockRate < B.maxClockRate;
3193+
};
3194+
auto MinIt =
3195+
std::min_element(Device->ZeDeviceMemoryProperties->begin(),
3196+
Device->ZeDeviceMemoryProperties->end(), Comp);
3197+
return ReturnValue(pi_uint32{MinIt->maxClockRate});
3198+
}
3199+
case PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH: {
3200+
// If there are not any memory modules then return 0.
3201+
if (Device->ZeDeviceMemoryProperties->empty())
3202+
return ReturnValue(pi_uint32{0});
3203+
3204+
// If there are multiple memory modules on the device then we have to report
3205+
// the value of the slowest memory.
3206+
auto Comp = [](const ze_device_memory_properties_t &A,
3207+
const ze_device_memory_properties_t &B) -> bool {
3208+
return A.maxBusWidth < B.maxBusWidth;
3209+
};
3210+
auto MinIt =
3211+
std::min_element(Device->ZeDeviceMemoryProperties->begin(),
3212+
Device->ZeDeviceMemoryProperties->end(), Comp);
3213+
return ReturnValue(pi_uint32{MinIt->maxBusWidth});
3214+
}
31843215
case PI_DEVICE_INFO_GPU_EU_COUNT: {
31853216
pi_uint32 count = Device->ZeDeviceProperties->numEUsPerSubslice *
31863217
Device->ZeDeviceProperties->numSubslicesPerSlice *

sycl/source/detail/device_impl.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,14 @@ bool device_impl::has(aspect Aspect) const {
352352
return getPlugin().call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
353353
MDevice, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY, 0, nullptr,
354354
&return_size) == PI_SUCCESS;
355+
case aspect::ext_intel_memory_clock_rate:
356+
return getPlugin().call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
357+
MDevice, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE, 0, nullptr,
358+
&return_size) == PI_SUCCESS;
359+
case aspect::ext_intel_memory_bus_width:
360+
return getPlugin().call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
361+
MDevice, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH, 0, nullptr,
362+
&return_size) == PI_SUCCESS;
355363
case aspect::ext_intel_device_info_uuid: {
356364
auto Result = getPlugin().call_nocheck<detail::PiApiKind::piDeviceGetInfo>(
357365
MDevice, PI_DEVICE_INFO_UUID, 0, nullptr, &return_size);

sycl/source/detail/device_info.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,22 @@ inline uint64_t get_device_info_host<ext::intel::info::device::free_memory>() {
15731573
PI_ERROR_INVALID_DEVICE);
15741574
}
15751575

1576+
template <>
1577+
inline uint32_t
1578+
get_device_info_host<ext::intel::info::device::memory_clock_rate>() {
1579+
throw runtime_error(
1580+
"Obtaining the device memory clock rate is not supported on HOST device",
1581+
PI_ERROR_INVALID_DEVICE);
1582+
}
1583+
1584+
template <>
1585+
inline uint32_t
1586+
get_device_info_host<ext::intel::info::device::memory_bus_width>() {
1587+
throw runtime_error(
1588+
"Obtaining the device memory bus width is not supported on HOST device",
1589+
PI_ERROR_INVALID_DEVICE);
1590+
}
1591+
15761592
} // namespace detail
15771593
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
15781594
} // namespace sycl

0 commit comments

Comments
 (0)