|
7 | 7 | //===----------------------------------------------------------------------===//
|
8 | 8 |
|
9 | 9 | #include <detail/device_impl.hpp>
|
10 |
| -#include <detail/device_info.hpp> |
| 10 | +#include <detail/jit_compiler.hpp> |
11 | 11 | #include <detail/platform_impl.hpp>
|
12 | 12 | #include <detail/ur_info_code.hpp>
|
13 | 13 | #include <sycl/detail/ur.hpp>
|
@@ -78,30 +78,6 @@ platform device_impl::get_platform() const {
|
78 | 78 | return createSyclObjFromImpl<platform>(MPlatform);
|
79 | 79 | }
|
80 | 80 |
|
81 |
| -template <typename Param> |
82 |
| -typename Param::return_type device_impl::get_info() const { |
83 |
| - return get_device_info<Param>(*this); |
84 |
| -} |
85 |
| -// Explicitly instantiate all device info traits |
86 |
| -#define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ |
87 |
| - template ReturnT device_impl::get_info<info::device::Desc>() const; |
88 |
| - |
89 |
| -#define __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED(DescType, Desc, ReturnT, PiCode) \ |
90 |
| - template ReturnT device_impl::get_info<info::device::Desc>() const; |
91 |
| - |
92 |
| -#include <sycl/info/device_traits.def> |
93 |
| -#undef __SYCL_PARAM_TRAITS_SPEC_SPECIALIZED |
94 |
| -#undef __SYCL_PARAM_TRAITS_SPEC |
95 |
| - |
96 |
| -#define __SYCL_PARAM_TRAITS_SPEC(Namespace, DescType, Desc, ReturnT, PiCode) \ |
97 |
| - template __SYCL_EXPORT ReturnT \ |
98 |
| - device_impl::get_info<Namespace::info::DescType::Desc>() const; |
99 |
| - |
100 |
| -#include <sycl/info/ext_codeplay_device_traits.def> |
101 |
| -#include <sycl/info/ext_intel_device_traits.def> |
102 |
| -#include <sycl/info/ext_oneapi_device_traits.def> |
103 |
| -#undef __SYCL_PARAM_TRAITS_SPEC |
104 |
| - |
105 | 81 | #ifndef __INTEL_PREVIEW_BREAKING_CHANGES
|
106 | 82 | template <>
|
107 | 83 | typename info::platform::version::return_type
|
@@ -146,7 +122,8 @@ device_impl::get_backend_info<info::device::backend_version>() const {
|
146 | 122 |
|
147 | 123 | bool device_impl::has_extension(const std::string &ExtensionName) const {
|
148 | 124 | std::string AllExtensionNames =
|
149 |
| - get_device_info_string(UR_DEVICE_INFO_EXTENSIONS); |
| 125 | + get_info_impl<std::string>(UR_DEVICE_INFO_EXTENSIONS); |
| 126 | + |
150 | 127 | return (AllExtensionNames.find(ExtensionName) != std::string::npos);
|
151 | 128 | }
|
152 | 129 |
|
@@ -256,6 +233,32 @@ device_impl::create_sub_devices(const std::vector<size_t> &Counts) const {
|
256 | 233 | return create_sub_devices(&Properties, Counts.size());
|
257 | 234 | }
|
258 | 235 |
|
| 236 | +static inline std::string |
| 237 | +affinityDomainToString(info::partition_affinity_domain AffinityDomain) { |
| 238 | + switch (AffinityDomain) { |
| 239 | +#define __SYCL_AFFINITY_DOMAIN_STRING_CASE(DOMAIN) \ |
| 240 | + case DOMAIN: \ |
| 241 | + return #DOMAIN; |
| 242 | + |
| 243 | + __SYCL_AFFINITY_DOMAIN_STRING_CASE( |
| 244 | + sycl::info::partition_affinity_domain::numa) |
| 245 | + __SYCL_AFFINITY_DOMAIN_STRING_CASE( |
| 246 | + sycl::info::partition_affinity_domain::L4_cache) |
| 247 | + __SYCL_AFFINITY_DOMAIN_STRING_CASE( |
| 248 | + sycl::info::partition_affinity_domain::L3_cache) |
| 249 | + __SYCL_AFFINITY_DOMAIN_STRING_CASE( |
| 250 | + sycl::info::partition_affinity_domain::L2_cache) |
| 251 | + __SYCL_AFFINITY_DOMAIN_STRING_CASE( |
| 252 | + sycl::info::partition_affinity_domain::L1_cache) |
| 253 | + __SYCL_AFFINITY_DOMAIN_STRING_CASE( |
| 254 | + sycl::info::partition_affinity_domain::next_partitionable) |
| 255 | +#undef __SYCL_AFFINITY_DOMAIN_STRING_CASE |
| 256 | + default: |
| 257 | + assert(false && "Missing case for affinity domain."); |
| 258 | + return "unknown"; |
| 259 | + } |
| 260 | +} |
| 261 | + |
259 | 262 | std::vector<device> device_impl::create_sub_devices(
|
260 | 263 | info::partition_affinity_domain AffinityDomain) const {
|
261 | 264 | if (!is_partition_supported(
|
@@ -371,17 +374,15 @@ bool device_impl::has(aspect Aspect) const {
|
371 | 374 | case aspect::ext_oneapi_cuda_cluster_group:
|
372 | 375 | return get_info<info::device::ext_oneapi_cuda_cluster_group>();
|
373 | 376 | case aspect::usm_atomic_host_allocations:
|
374 |
| - return ( |
375 |
| - get_device_info_impl<ur_device_usm_access_capability_flags_t, |
376 |
| - info::device::usm_host_allocations>::get(*this) & |
377 |
| - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS); |
| 377 | + return (get_info_impl<ur_device_usm_access_capability_flags_t>( |
| 378 | + UR_DEVICE_INFO_USM_HOST_SUPPORT) & |
| 379 | + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS); |
378 | 380 | case aspect::usm_shared_allocations:
|
379 | 381 | return get_info<info::device::usm_shared_allocations>();
|
380 | 382 | case aspect::usm_atomic_shared_allocations:
|
381 |
| - return ( |
382 |
| - get_device_info_impl<ur_device_usm_access_capability_flags_t, |
383 |
| - info::device::usm_shared_allocations>::get(*this) & |
384 |
| - UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS); |
| 383 | + return (get_info_impl<ur_device_usm_access_capability_flags_t>( |
| 384 | + UR_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT) & |
| 385 | + UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ATOMIC_CONCURRENT_ACCESS); |
385 | 386 | case aspect::usm_restricted_shared_allocations:
|
386 | 387 | return get_info<info::device::usm_restricted_shared_allocations>();
|
387 | 388 | case aspect::usm_system_allocations:
|
@@ -702,7 +703,7 @@ bool device_impl::has(aspect Aspect) const {
|
702 | 703 | return components.size() >= 2;
|
703 | 704 | }
|
704 | 705 | case aspect::ext_oneapi_is_component: {
|
705 |
| - typename sycl_to_ur<device>::type Result; |
| 706 | + ur_device_handle_t Result; |
706 | 707 | bool CallSuccessful =
|
707 | 708 | getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
|
708 | 709 | getHandleRef(),
|
@@ -952,9 +953,7 @@ bool device_impl::supportsForwardProgress(
|
952 | 953 | ext::oneapi::experimental::forward_progress_guarantee guarantee,
|
953 | 954 | ext::oneapi::experimental::execution_scope threadScope,
|
954 | 955 | ext::oneapi::experimental::execution_scope coordinationScope) const {
|
955 |
| - using ReturnT = |
956 |
| - std::vector<ext::oneapi::experimental::forward_progress_guarantee>; |
957 |
| - auto guarantees = getProgressGuaranteesUpTo<ReturnT>( |
| 956 | + auto guarantees = getProgressGuaranteesUpTo( |
958 | 957 | getProgressGuarantee(threadScope, coordinationScope));
|
959 | 958 | return std::find(guarantees.begin(), guarantees.end(), guarantee) !=
|
960 | 959 | guarantees.end();
|
@@ -997,6 +996,62 @@ device_impl::getImmediateProgressGuarantee(
|
997 | 996 | return forward_progress_guarantee::weakly_parallel;
|
998 | 997 | }
|
999 | 998 |
|
| 999 | +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES |
| 1000 | +#define EXPORT_GET_INFO(PARAM) \ |
| 1001 | + template <> \ |
| 1002 | + __SYCL_EXPORT PARAM::return_type device_impl::get_info<PARAM>() const { \ |
| 1003 | + return get_info_abi_workaround<PARAM>(); \ |
| 1004 | + } |
| 1005 | + |
| 1006 | +// clang-format off |
| 1007 | +EXPORT_GET_INFO(ext::intel::info::device::device_id) |
| 1008 | +EXPORT_GET_INFO(ext::intel::info::device::pci_address) |
| 1009 | +EXPORT_GET_INFO(ext::intel::info::device::gpu_eu_count) |
| 1010 | +EXPORT_GET_INFO(ext::intel::info::device::gpu_eu_simd_width) |
| 1011 | +EXPORT_GET_INFO(ext::intel::info::device::gpu_slices) |
| 1012 | +EXPORT_GET_INFO(ext::intel::info::device::gpu_subslices_per_slice) |
| 1013 | +EXPORT_GET_INFO(ext::intel::info::device::gpu_eu_count_per_subslice) |
| 1014 | +EXPORT_GET_INFO(ext::intel::info::device::gpu_hw_threads_per_eu) |
| 1015 | +EXPORT_GET_INFO(ext::intel::info::device::max_mem_bandwidth) |
| 1016 | +EXPORT_GET_INFO(ext::intel::info::device::uuid) |
| 1017 | +EXPORT_GET_INFO(ext::intel::info::device::free_memory) |
| 1018 | +EXPORT_GET_INFO(ext::intel::info::device::memory_clock_rate) |
| 1019 | +EXPORT_GET_INFO(ext::intel::info::device::memory_bus_width) |
| 1020 | +EXPORT_GET_INFO(ext::intel::info::device::max_compute_queue_indices) |
| 1021 | +EXPORT_GET_INFO(ext::intel::esimd::info::device::has_2d_block_io_support) |
| 1022 | +EXPORT_GET_INFO(ext::intel::info::device::current_clock_throttle_reasons) |
| 1023 | +EXPORT_GET_INFO(ext::intel::info::device::fan_speed) |
| 1024 | +EXPORT_GET_INFO(ext::intel::info::device::min_power_limit) |
| 1025 | +EXPORT_GET_INFO(ext::intel::info::device::max_power_limit) |
| 1026 | + |
| 1027 | +EXPORT_GET_INFO(ext::codeplay::experimental::info::device::supports_fusion) |
| 1028 | +EXPORT_GET_INFO(ext::codeplay::experimental::info::device::max_registers_per_work_group) |
| 1029 | + |
| 1030 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_global_work_groups) |
| 1031 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_work_groups<1>) |
| 1032 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_work_groups<2>) |
| 1033 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_work_groups<3>) |
| 1034 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_group_progress_capabilities<ext::oneapi::experimental::execution_scope::root_group>) |
| 1035 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::sub_group_progress_capabilities<ext::oneapi::experimental::execution_scope::root_group>) |
| 1036 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::sub_group_progress_capabilities<ext::oneapi::experimental::execution_scope::work_group>) |
| 1037 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_item_progress_capabilities<ext::oneapi::experimental::execution_scope::root_group>) |
| 1038 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_item_progress_capabilities<ext::oneapi::experimental::execution_scope::work_group>) |
| 1039 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::work_item_progress_capabilities<ext::oneapi::experimental::execution_scope::sub_group>) |
| 1040 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::architecture) |
| 1041 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::matrix_combinations) |
| 1042 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::image_row_pitch_align) |
| 1043 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_image_linear_row_pitch) |
| 1044 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_image_linear_width) |
| 1045 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::max_image_linear_height) |
| 1046 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::mipmap_max_anisotropy) |
| 1047 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::component_devices) |
| 1048 | +EXPORT_GET_INFO(ext::oneapi::experimental::info::device::composite_device) |
| 1049 | +EXPORT_GET_INFO(ext::oneapi::info::device::num_compute_units) |
| 1050 | +// clang-format on |
| 1051 | + |
| 1052 | +#undef EXPORT_GET_INFO |
| 1053 | +#endif |
| 1054 | + |
1000 | 1055 | } // namespace detail
|
1001 | 1056 | } // namespace _V1
|
1002 | 1057 | } // namespace sycl
|
0 commit comments