Skip to content

Commit 647d973

Browse files
[NFCI][SYCL] Refactor device_impl::get_info (#18358)
Before this change `get_info` was implemented via several layers: 1. `device::get_info` (ABI boundary) 2. `device_impl::get_info` (Some ABI exported for no reason!) 3. `detail::get_device_info` 4. `struct get_device_info_impl` 4b. specialization of 4. for SYCL/UR types could be distinct, e.g. `get_device_info_impl<device, Desc>` calling `get_device_info_impl<ur_device_handle_t, Desc>` with different descriptors implemented by partially/fully specializing *either* 3 or 4 (with ABINeutral processing done at 1 as well).o This PR collapses 2-4* into the following flow: 1. `device::get_info` (ABI boundary) 2. `device_impl::get_info`, single impl via `if constexpr` dispatch 3. (optional) for the most complicated descriptors outlined implementation into dedicated named helpers with no template magic 4. Two uniform single-layer helpers `get_info_impl` and `get_info_impl_nocheck`. A tiny bit of magic to support different types (native UR/std::string/std::vector) is handled via `if constexpr` and is easy to follow. Old ABI exports are preserved with minor customizations based on preview mode. Different info descriptors are handled in the same order as they appear in the corresponding `*.def` files. I expect that having this new implementation in `device_impl.hpp` allows for much more inlining without LTO, which previously wasn't possible (some symbols resided in `device.cpp` while actual implementations were only instantiated in `device_impl.cpp`). Possible future changes based on top of this: * Pre-initialize some bits in the `device_impl` ctor, at least extensions string * Possibly remove `UR_` mappings from `*.def` distributed as part of headers (and maybe drop those files completely). This is an implementation detail and has no business being visible to customers. * Wider usage of `get_info_impl` helpers. At least some aspects code can benefit from a follow-up cleanup. * Use something like llvm::SmallString at the `device_impl` level
1 parent ed49b2f commit 647d973

File tree

8 files changed

+1402
-1785
lines changed

8 files changed

+1402
-1785
lines changed

sycl/include/sycl/info/device_traits.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ __SYCL_PARAM_TRAITS_SPEC(device, execution_capabilities,
135135
UR_DEVICE_INFO_EXECUTION_CAPABILITIES)
136136
__SYCL_PARAM_TRAITS_SPEC(device, queue_profiling, bool,
137137
UR_DEVICE_INFO_QUEUE_PROPERTIES)
138+
// TODO: UR_DEVICE_INFO_FORCE_UINT32 looks wrong here:
138139
__SYCL_PARAM_TRAITS_SPEC(device, built_in_kernel_ids,
139140
std::vector<sycl::kernel_id>, UR_DEVICE_INFO_FORCE_UINT32)
140141
__SYCL_PARAM_TRAITS_SPEC(device, built_in_kernels, std::vector<std::string>,

sycl/source/detail/allowlist.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
#include <detail/allowlist.hpp>
99
#include <detail/config.hpp>
1010
#include <detail/device_impl.hpp>
11-
#include <detail/device_info.hpp>
1211
#include <sycl/backend_types.hpp>
1312

1413
#include <algorithm>
1514
#include <regex>
15+
#include <sstream>
1616

1717
namespace sycl {
1818
inline namespace _V1 {
@@ -426,18 +426,18 @@ void applyAllowList(std::vector<ur_device_handle_t> &UrDevices,
426426
}
427427
// get DeviceVendorId value and put it to DeviceDesc
428428
uint32_t DeviceVendorIdUInt =
429-
sycl::detail::get_device_info<info::device::vendor_id>(DeviceImpl);
429+
DeviceImpl.get_info<info::device::vendor_id>();
430430
std::stringstream DeviceVendorIdHexStringStream;
431431
DeviceVendorIdHexStringStream << "0x" << std::hex << DeviceVendorIdUInt;
432432
const auto &DeviceVendorIdValue = DeviceVendorIdHexStringStream.str();
433433
DeviceDesc[DeviceVendorIdKeyName] = DeviceVendorIdValue;
434434
// get DriverVersion value and put it to DeviceDesc
435435
const std::string &DriverVersionValue =
436-
sycl::detail::get_device_info<info::device::driver_version>(DeviceImpl);
436+
DeviceImpl.get_info<info::device::driver_version>();
437437
DeviceDesc[DriverVersionKeyName] = DriverVersionValue;
438438
// get DeviceName value and put it to DeviceDesc
439439
const std::string &DeviceNameValue =
440-
sycl::detail::get_device_info<info::device::name>(DeviceImpl);
440+
DeviceImpl.get_info<info::device::name>();
441441
DeviceDesc[DeviceNameKeyName] = DeviceNameValue;
442442

443443
// check if we can allow device with such device description DeviceDesc

sycl/source/detail/device_impl.cpp

Lines changed: 93 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <detail/device_impl.hpp>
10-
#include <detail/device_info.hpp>
10+
#include <detail/jit_compiler.hpp>
1111
#include <detail/platform_impl.hpp>
1212
#include <detail/ur_info_code.hpp>
1313
#include <sycl/detail/ur.hpp>
@@ -78,30 +78,6 @@ platform device_impl::get_platform() const {
7878
return createSyclObjFromImpl<platform>(MPlatform);
7979
}
8080

81-
template <typename Param>
82-
typename Param::return_type device_impl::get_info() const {
83-
return get_device_info<Param>(*this);
84-
}
85-
// Explicitly instantiate all device info traits
86-
#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \
87-
template ReturnT device_impl::get_info<info::device::Desc>() const;
88-
89-
#define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, PiCode) \
90-
template ReturnT device_impl::get_info<info::device::Desc>() const;
91-
92-
#include <sycl/info/device_traits.def>
93-
#undef __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED
94-
#undef __SYCL_PARAM_TRAITS_SPEC
95-
96-
#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \
97-
template __SYCL_EXPORT ReturnT \
98-
device_impl::get_info<Namespace::info::DescType::Desc>() const;
99-
100-
#include <sycl/info/ext_codeplay_device_traits.def>
101-
#include <sycl/info/ext_intel_device_traits.def>
102-
#include <sycl/info/ext_oneapi_device_traits.def>
103-
#undef __SYCL_PARAM_TRAITS_SPEC
104-
10581
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
10682
template <>
10783
typename info::platform::version::return_type
@@ -146,7 +122,8 @@ device_impl::get_backend_info<info::device::backend_version>() const {
146122

147123
bool device_impl::has_extension(const std::string &ExtensionName) const {
148124
std::string AllExtensionNames =
149-
get_device_info_string(UR_DEVICE_INFO_EXTENSIONS);
125+
get_info_impl<std::string>(UR_DEVICE_INFO_EXTENSIONS);
126+
150127
return (AllExtensionNames.find(ExtensionName) != std::string::npos);
151128
}
152129

@@ -256,6 +233,32 @@ device_impl::create_sub_devices(const std::vector<size_t> &Counts) const {
256233
return create_sub_devices(&Properties, Counts.size());
257234
}
258235

236+
static inline std::string
237+
affinityDomainToString(info::partition_affinity_domain AffinityDomain) {
238+
switch (AffinityDomain) {
239+
#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \
240+
case DOMAIN: \
241+
return #DOMAIN;
242+
243+
__SYCL_AFFINITY_DOMAIN_STRING_CASE(
244+
sycl::info::partition_affinity_domain::numa)
245+
__SYCL_AFFINITY_DOMAIN_STRING_CASE(
246+
sycl::info::partition_affinity_domain::L4_cache)
247+
__SYCL_AFFINITY_DOMAIN_STRING_CASE(
248+
sycl::info::partition_affinity_domain::L3_cache)
249+
__SYCL_AFFINITY_DOMAIN_STRING_CASE(
250+
sycl::info::partition_affinity_domain::L2_cache)
251+
__SYCL_AFFINITY_DOMAIN_STRING_CASE(
252+
sycl::info::partition_affinity_domain::L1_cache)
253+
__SYCL_AFFINITY_DOMAIN_STRING_CASE(
254+
sycl::info::partition_affinity_domain::next_partitionable)
255+
#undef __SYCL_AFFINITY_DOMAIN_STRING_CASE
256+
default:
257+
assert(false && "Missing case for affinity domain.");
258+
return "unknown";
259+
}
260+
}
261+
259262
std::vector<device> device_impl::create_sub_devices(
260263
info::partition_affinity_domain AffinityDomain) const {
261264
if (!is_partition_supported(
@@ -371,17 +374,15 @@ bool device_impl::has(aspect Aspect) const {
371374
case aspect::ext_oneapi_cuda_cluster_group:
372375
return get_info<info::device::ext_oneapi_cuda_cluster_group>();
373376
case aspect::usm_atomic_host_allocations:
374-
return (
375-
get_device_info_impl<ur_device_usm_access_capability_flags_t,
376-
info::device::usm_host_allocations>::get(*this) &
377-
UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
377+
return (get_info_impl<ur_device_usm_access_capability_flags_t>(
378+
UR_DEVICE_INFO_USM_HOST_SUPPORT) &
379+
UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
378380
case aspect::usm_shared_allocations:
379381
return get_info<info::device::usm_shared_allocations>();
380382
case aspect::usm_atomic_shared_allocations:
381-
return (
382-
get_device_info_impl<ur_device_usm_access_capability_flags_t,
383-
info::device::usm_shared_allocations>::get(*this) &
384-
UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
383+
return (get_info_impl<ur_device_usm_access_capability_flags_t>(
384+
UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT) &
385+
UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS);
385386
case aspect::usm_restricted_shared_allocations:
386387
return get_info<info::device::usm_restricted_shared_allocations>();
387388
case aspect::usm_system_allocations:
@@ -702,7 +703,7 @@ bool device_impl::has(aspect Aspect) const {
702703
return components.size() >= 2;
703704
}
704705
case aspect::ext_oneapi_is_component: {
705-
typename sycl_to_ur<device>::type Result;
706+
ur_device_handle_t Result;
706707
bool CallSuccessful =
707708
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
708709
getHandleRef(),
@@ -952,9 +953,7 @@ bool device_impl::supportsForwardProgress(
952953
ext::oneapi::experimental::forward_progress_guarantee guarantee,
953954
ext::oneapi::experimental::execution_scope threadScope,
954955
ext::oneapi::experimental::execution_scope coordinationScope) const {
955-
using ReturnT =
956-
std::vector<ext::oneapi::experimental::forward_progress_guarantee>;
957-
auto guarantees = getProgressGuaranteesUpTo<ReturnT>(
956+
auto guarantees = getProgressGuaranteesUpTo(
958957
getProgressGuarantee(threadScope, coordinationScope));
959958
return std::find(guarantees.begin(), guarantees.end(), guarantee) !=
960959
guarantees.end();
@@ -997,6 +996,62 @@ device_impl::getImmediateProgressGuarantee(
997996
return forward_progress_guarantee::weakly_parallel;
998997
}
999998

999+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
1000+
#define EXPORT_GET_INFO(PARAM) \
1001+
template <> \
1002+
__SYCL_EXPORT PARAM::return_type device_impl::get_info<PARAM>() const { \
1003+
return get_info_abi_workaround<PARAM>(); \
1004+
}
1005+
1006+
// clang-format off
1007+
EXPORT_GET_INFO(ext::intel::info::device::device_id)
1008+
EXPORT_GET_INFO(ext::intel::info::device::pci_address)
1009+
EXPORT_GET_INFO(ext::intel::info::device::gpu_eu_count)
1010+
EXPORT_GET_INFO(ext::intel::info::device::gpu_eu_simd_width)
1011+
EXPORT_GET_INFO(ext::intel::info::device::gpu_slices)
1012+
EXPORT_GET_INFO(ext::intel::info::device::gpu_subslices_per_slice)
1013+
EXPORT_GET_INFO(ext::intel::info::device::gpu_eu_count_per_subslice)
1014+
EXPORT_GET_INFO(ext::intel::info::device::gpu_hw_threads_per_eu)
1015+
EXPORT_GET_INFO(ext::intel::info::device::max_mem_bandwidth)
1016+
EXPORT_GET_INFO(ext::intel::info::device::uuid)
1017+
EXPORT_GET_INFO(ext::intel::info::device::free_memory)
1018+
EXPORT_GET_INFO(ext::intel::info::device::memory_clock_rate)
1019+
EXPORT_GET_INFO(ext::intel::info::device::memory_bus_width)
1020+
EXPORT_GET_INFO(ext::intel::info::device::max_compute_queue_indices)
1021+
EXPORT_GET_INFO(ext::intel::esimd::info::device::has_2d_block_io_support)
1022+
EXPORT_GET_INFO(ext::intel::info::device::current_clock_throttle_reasons)
1023+
EXPORT_GET_INFO(ext::intel::info::device::fan_speed)
1024+
EXPORT_GET_INFO(ext::intel::info::device::min_power_limit)
1025+
EXPORT_GET_INFO(ext::intel::info::device::max_power_limit)
1026+
1027+
EXPORT_GET_INFO(ext::codeplay::experimental::info::device::supports_fusion)
1028+
EXPORT_GET_INFO(ext::codeplay::experimental::info::device::max_registers_per_work_group)
1029+
1030+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_global_work_groups)
1031+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_work_groups<1>)
1032+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_work_groups<2>)
1033+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_work_groups<3>)
1034+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_group_progress_capabilities<ext::oneapi::experimental::execution_scope::root_group>)
1035+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::sub_group_progress_capabilities<ext::oneapi::experimental::execution_scope::root_group>)
1036+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::sub_group_progress_capabilities<ext::oneapi::experimental::execution_scope::work_group>)
1037+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_item_progress_capabilities<ext::oneapi::experimental::execution_scope::root_group>)
1038+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_item_progress_capabilities<ext::oneapi::experimental::execution_scope::work_group>)
1039+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_item_progress_capabilities<ext::oneapi::experimental::execution_scope::sub_group>)
1040+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::architecture)
1041+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::matrix_combinations)
1042+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::image_row_pitch_align)
1043+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_image_linear_row_pitch)
1044+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_image_linear_width)
1045+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_image_linear_height)
1046+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::mipmap_max_anisotropy)
1047+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::component_devices)
1048+
EXPORT_GET_INFO(ext::oneapi::experimental::info::device::composite_device)
1049+
EXPORT_GET_INFO(ext::oneapi::info::device::num_compute_units)
1050+
// clang-format on
1051+
1052+
#undef EXPORT_GET_INFO
1053+
#endif
1054+
10001055
} // namespace detail
10011056
} // namespace _V1
10021057
} // namespace sycl

0 commit comments

Comments
 (0)