Skip to content

Commit 4fd438a

Browse files
authored
[UR] Fix creation of context with parent device and its sub-devices (#19223)
This use case seems to be allowed. For example, according to: https://intel.github.io/llvm/MultiTileCardWithLevelZero.html#context "Both root-devices and sub-devices can be within single context, but they all should be of the same SYCL platform." Before change CollectDevicesAndSubDevices was resulting in duplicate devices being returned which leads to an error in https://github.com/intel/llvm/blob/6af08fe9c6fd4dc8e433555646606898c71d92fb/unified-runtime/source/common/ur_pool_manager.hpp#L178-L179 because of the duplicate pool descriptors.
1 parent e98d8a0 commit 4fd438a

File tree

2 files changed

+58
-2
lines changed

2 files changed

+58
-2
lines changed

unified-runtime/source/adapters/level_zero/device.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <stdarg.h>
1717
#include <string>
1818
#include <unordered_map>
19+
#include <unordered_set>
1920
#include <vector>
2021

2122
#include "adapters/level_zero/platform.hpp"
@@ -250,12 +251,16 @@ struct ur_device_handle_t_ : ur_object {
250251
inline std::vector<ur_device_handle_t>
251252
CollectDevicesAndSubDevices(const std::vector<ur_device_handle_t> &Devices) {
252253
std::vector<ur_device_handle_t> DevicesAndSubDevices;
254+
std::unordered_set<ur_device_handle_t> Seen;
253255
std::function<void(const std::vector<ur_device_handle_t> &)>
254256
CollectDevicesAndSubDevicesRec =
255257
[&](const std::vector<ur_device_handle_t> &Devices) {
256258
for (auto &Device : Devices) {
257-
DevicesAndSubDevices.push_back(Device);
258-
CollectDevicesAndSubDevicesRec(Device->SubDevices);
259+
// Only add device if has not been seen before.
260+
if (Seen.insert(Device).second) {
261+
DevicesAndSubDevices.push_back(Device);
262+
CollectDevicesAndSubDevicesRec(Device->SubDevices);
263+
}
259264
}
260265
};
261266
CollectDevicesAndSubDevicesRec(Devices);

unified-runtime/test/conformance/context/urContextCreate.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,57 @@ TEST_P(urContextCreateTest, InvalidEnumeration) {
4545
urContextCreate(1, &device, &properties, context.ptr()));
4646
}
4747

48+
TEST_P(urContextCreateTest, SuccessParentAndSubDevices) {
49+
if (!uur::hasDevicePartitionSupport(device,
50+
UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)) {
51+
GTEST_SKIP() << "Device \'" << device
52+
<< "\' does not support partitioning by affinity domain.\n";
53+
}
54+
55+
ur_device_affinity_domain_flags_t flag = UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA;
56+
ur_device_affinity_domain_flags_t supported_flags{0};
57+
ASSERT_SUCCESS(
58+
uur::GetDevicePartitionAffinityDomainFlags(device, supported_flags));
59+
if (!(flag & supported_flags)) {
60+
GTEST_SKIP() << static_cast<ur_device_affinity_domain_flag_t>(flag)
61+
<< " is not supported by the device: \'" << device << "\'.\n";
62+
}
63+
64+
ur_device_partition_property_t prop =
65+
uur::makePartitionByAffinityDomain(flag);
66+
67+
ur_device_partition_properties_t properties{
68+
UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES,
69+
nullptr,
70+
&prop,
71+
1,
72+
};
73+
74+
// Get the number of devices that will be created
75+
uint32_t n_devices = 0;
76+
ASSERT_SUCCESS(
77+
urDevicePartition(device, &properties, 0, nullptr, &n_devices));
78+
ASSERT_NE(n_devices, 0);
79+
80+
std::vector<ur_device_handle_t> sub_devices(n_devices);
81+
ASSERT_SUCCESS(urDevicePartition(device, &properties,
82+
static_cast<uint32_t>(sub_devices.size()),
83+
sub_devices.data(), nullptr));
84+
85+
std::vector<ur_device_handle_t> all_devices;
86+
all_devices.push_back(device);
87+
all_devices.insert(all_devices.end(), sub_devices.begin(), sub_devices.end());
88+
uur::raii::Context context = nullptr;
89+
ASSERT_SUCCESS(urContextCreate(static_cast<uint32_t>(all_devices.size()),
90+
all_devices.data(), nullptr, context.ptr()));
91+
ASSERT_NE(nullptr, context);
92+
93+
for (auto sub_device : sub_devices) {
94+
ASSERT_NE(sub_device, nullptr);
95+
ASSERT_SUCCESS(urDeviceRelease(sub_device));
96+
}
97+
}
98+
4899
using urContextCreateMultiDeviceTest = uur::urAllDevicesTest;
49100
UUR_INSTANTIATE_PLATFORM_TEST_SUITE(urContextCreateMultiDeviceTest);
50101

0 commit comments

Comments
 (0)