Skip to content

Commit 123c00f

Browse files
committed
[L0] Fix the multi device event cache to allocate lists as pointers
- fix to address a multi device crash given many devices causing a stack overflow. Event Caches per device are now explicitly allocated. Signed-off-by: Spruit, Neil R <neil.r.spruit@intel.com>
1 parent 4814e71 commit 123c00f

File tree

4 files changed

+80
-27
lines changed

4 files changed

+80
-27
lines changed

source/adapters/level_zero/context.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -395,27 +395,29 @@ ur_result_t ur_context_handle_t_::finalize() {
395395

396396
if (!DisableEventsCaching) {
397397
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
398-
for (auto &EventCache : EventCaches) {
399-
for (auto &Event : EventCache) {
398+
for (auto EventCache : EventCaches) {
399+
for (auto Event : *EventCache) {
400400
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
401401
// Gracefully handle the case that L0 was already unloaded.
402402
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
403403
return ze2urResult(ZeResult);
404404
delete Event;
405405
}
406-
EventCache.clear();
406+
EventCache->clear();
407+
delete EventCache;
407408
}
408409
}
409410
{
410411
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
411-
for (auto &ZePoolCache : ZeEventPoolCache) {
412-
for (auto &ZePool : ZePoolCache) {
412+
for (auto ZePoolCache : ZeEventPoolCache) {
413+
for (auto ZePool : *ZePoolCache) {
413414
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
414415
// Gracefully handle the case that L0 was already unloaded.
415416
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
416417
return ze2urResult(ZeResult);
417418
}
418-
ZePoolCache.clear();
419+
ZePoolCache->clear();
420+
delete ZePoolCache;
419421
}
420422
}
421423

source/adapters/level_zero/context.hpp

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ struct ur_context_handle_t_ : _ur_object {
141141
// head.
142142
//
143143
// Cache of event pools to which host-visible events are added to.
144-
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
144+
std::vector<std::list<ze_event_pool_handle_t> *> ZeEventPoolCache;
145145
std::vector<std::unordered_map<ze_device_handle_t,
146146
std::list<ze_event_pool_handle_t> *>>
147147
ZeEventPoolCacheDeviceMap{4};
@@ -165,7 +165,7 @@ struct ur_context_handle_t_ : _ur_object {
165165
ur_mutex EventCacheMutex;
166166

167167
// Caches for events.
168-
std::vector<std::list<ur_event_handle_t>> EventCaches{4};
168+
std::vector<std::list<ur_event_handle_t> *> EventCaches;
169169
std::vector<
170170
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
171171
EventCachesDeviceMap{4};
@@ -207,31 +207,44 @@ struct ur_context_handle_t_ : _ur_object {
207207

208208
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling,
209209
ze_device_handle_t ZeDevice) {
210+
// Adding 4 initial global caches for provided scope and profiling modes:
211+
// Host Scope, Device Scope, with Profiling, without Profiling.
212+
if (ZeEventPoolCache.empty()) {
213+
for (int i = 0; i < 4; i++) {
214+
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
215+
new std::list<ze_event_pool_handle_t>;
216+
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
217+
}
218+
}
210219
if (HostVisible) {
211220
if (ZeDevice) {
212221
auto ZeEventPoolCacheMap = WithProfiling
213222
? &ZeEventPoolCacheDeviceMap[0]
214223
: &ZeEventPoolCacheDeviceMap[1];
215224
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
216-
ZeEventPoolCache.emplace_back();
217-
(*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back();
225+
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
226+
new std::list<ze_event_pool_handle_t>;
227+
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
228+
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
218229
}
219230
return (*ZeEventPoolCacheMap)[ZeDevice];
220231
} else {
221-
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
232+
return WithProfiling ? ZeEventPoolCache[0] : ZeEventPoolCache[1];
222233
}
223234
} else {
224235
if (ZeDevice) {
225236
auto ZeEventPoolCacheMap = WithProfiling
226237
? &ZeEventPoolCacheDeviceMap[2]
227238
: &ZeEventPoolCacheDeviceMap[3];
228239
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
229-
ZeEventPoolCache.emplace_back();
230-
(*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back();
240+
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
241+
new std::list<ze_event_pool_handle_t>;
242+
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
243+
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
231244
}
232245
return (*ZeEventPoolCacheMap)[ZeDevice];
233246
} else {
234-
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
247+
return WithProfiling ? ZeEventPoolCache[2] : ZeEventPoolCache[3];
235248
}
236249
}
237250
}
@@ -274,29 +287,42 @@ struct ur_context_handle_t_ : _ur_object {
274287
// Get the cache of events for a provided scope and profiling mode.
275288
auto getEventCache(bool HostVisible, bool WithProfiling,
276289
ur_device_handle_t Device) {
290+
// Adding 4 initial global caches for provided scope and profiling modes:
291+
// Host Scope, Device Scope, with Profiling, without Profiling.
292+
if (EventCaches.empty()) {
293+
for (int i = 0; i < 4; i++) {
294+
std::list<ur_event_handle_t> *deviceEventCache =
295+
new std::list<ur_event_handle_t>;
296+
EventCaches.push_back(deviceEventCache);
297+
}
298+
}
277299
if (HostVisible) {
278300
if (Device) {
279301
auto EventCachesMap =
280302
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
281303
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
282-
EventCaches.emplace_back();
283-
(*EventCachesMap)[Device] = &EventCaches.back();
304+
std::list<ur_event_handle_t> *deviceEventCache =
305+
new std::list<ur_event_handle_t>;
306+
EventCaches.push_back(deviceEventCache);
307+
(*EventCachesMap)[Device] = deviceEventCache;
284308
}
285309
return (*EventCachesMap)[Device];
286310
} else {
287-
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
311+
return WithProfiling ? EventCaches[0] : EventCaches[1];
288312
}
289313
} else {
290314
if (Device) {
291315
auto EventCachesMap =
292316
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
293317
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
294-
EventCaches.emplace_back();
295-
(*EventCachesMap)[Device] = &EventCaches.back();
318+
std::list<ur_event_handle_t> *deviceEventCache =
319+
new std::list<ur_event_handle_t>;
320+
EventCaches.push_back(deviceEventCache);
321+
(*EventCachesMap)[Device] = deviceEventCache;
296322
}
297323
return (*EventCachesMap)[Device];
298324
} else {
299-
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
325+
return WithProfiling ? EventCaches[2] : EventCaches[3];
300326
}
301327
}
302328
}

source/adapters/level_zero/queue.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,17 +1267,28 @@ ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
12671267
}
12681268

12691269
ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) {
1270+
// Adding 2 initial global caches for provided scope:
1271+
// Host Scope, Device Scope.
1272+
if (EventCaches.empty()) {
1273+
for (int i = 0; i < 2; i++) {
1274+
std::list<ur_event_handle_t> *deviceEventCache =
1275+
new std::list<ur_event_handle_t>;
1276+
EventCaches.push_back(deviceEventCache);
1277+
}
1278+
}
12701279
if (!Event->IsMultiDevice && Event->UrQueue) {
12711280
auto Device = Event->UrQueue->Device;
12721281
auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0]
12731282
: &EventCachesDeviceMap[1];
12741283
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
1275-
EventCaches.emplace_back();
1276-
(*EventCachesMap)[Device] = &EventCaches.back();
1284+
std::list<ur_event_handle_t> *deviceEventCache =
1285+
new std::list<ur_event_handle_t>;
1286+
EventCaches.push_back(deviceEventCache);
1287+
(*EventCachesMap)[Device] = deviceEventCache;
12771288
}
12781289
(*EventCachesMap)[Device]->emplace_back(Event);
12791290
} else {
1280-
auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1];
1291+
auto Cache = Event->isHostVisible() ? EventCaches[0] : EventCaches[1];
12811292
Cache->emplace_back(Event);
12821293
}
12831294
return UR_RESULT_SUCCESS;
@@ -1301,9 +1312,13 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
13011312
if (!UrQueue->RefCount.decrementAndTest())
13021313
return UR_RESULT_SUCCESS;
13031314

1304-
for (auto &Cache : UrQueue->EventCaches)
1305-
for (auto &Event : Cache)
1315+
for (auto Cache : UrQueue->EventCaches) {
1316+
for (auto Event : *Cache) {
13061317
UR_CALL(urEventReleaseInternal(Event));
1318+
}
1319+
Cache->clear();
1320+
delete Cache;
1321+
}
13071322

13081323
if (UrQueue->OwnZeCommandQueue) {
13091324
for (auto &QueueMap :
@@ -1460,6 +1475,16 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
14601475
bool HostVisible) {
14611476
std::list<ur_event_handle_t> *Cache;
14621477

1478+
// Adding 2 initial global caches for provided scope:
1479+
// Host Scope, Device Scope.
1480+
if (EventCaches.empty()) {
1481+
for (int i = 0; i < 2; i++) {
1482+
std::list<ur_event_handle_t> *deviceEventCache =
1483+
new std::list<ur_event_handle_t>;
1484+
EventCaches.push_back(deviceEventCache);
1485+
}
1486+
}
1487+
14631488
if (!IsMultiDevice) {
14641489
auto Device = this->Device;
14651490
Cache = HostVisible ? EventCachesDeviceMap[0][Device]
@@ -1468,7 +1493,7 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
14681493
return nullptr;
14691494
}
14701495
} else {
1471-
Cache = HostVisible ? &EventCaches[0] : &EventCaches[1];
1496+
Cache = HostVisible ? EventCaches[0] : EventCaches[1];
14721497
}
14731498

14741499
// If we don't have any events, return nullptr.

source/adapters/level_zero/queue.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ struct ur_queue_handle_t_ : _ur_object {
342342
// requested type of event. Each list contains events which can be reused
343343
// inside all command lists in the queue as described in the 2-event model.
344344
// Leftover events in the cache are relased at the queue destruction.
345-
std::vector<std::list<ur_event_handle_t>> EventCaches{2};
345+
std::vector<std::list<ur_event_handle_t> *> EventCaches;
346346
std::vector<
347347
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
348348
EventCachesDeviceMap{2};

0 commit comments

Comments
 (0)