Skip to content

Commit 78e09f7

Browse files
authored
Merge pull request #1392 from pbalcer/event-cache-different-fix
[L0] Event cache different fix
2 parents e0393a4 + 8a5d6d3 commit 78e09f7

File tree

4 files changed

+46
-94
lines changed

4 files changed

+46
-94
lines changed

source/adapters/level_zero/context.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -395,29 +395,27 @@ ur_result_t ur_context_handle_t_::finalize() {
395395

396396
if (!DisableEventsCaching) {
397397
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
398-
for (auto EventCache : EventCaches) {
399-
for (auto Event : *EventCache) {
398+
for (auto &EventCache : EventCaches) {
399+
for (auto &Event : EventCache) {
400400
auto ZeResult = ZE_CALL_NOCHECK(zeEventDestroy, (Event->ZeEvent));
401401
// Gracefully handle the case that L0 was already unloaded.
402402
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
403403
return ze2urResult(ZeResult);
404404
delete Event;
405405
}
406-
EventCache->clear();
407-
delete EventCache;
406+
EventCache.clear();
408407
}
409408
}
410409
{
411410
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
412-
for (auto ZePoolCache : ZeEventPoolCache) {
413-
for (auto ZePool : *ZePoolCache) {
411+
for (auto &ZePoolCache : ZeEventPoolCache) {
412+
for (auto &ZePool : ZePoolCache) {
414413
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
415414
// Gracefully handle the case that L0 was already unloaded.
416415
if (ZeResult && ZeResult != ZE_RESULT_ERROR_UNINITIALIZED)
417416
return ze2urResult(ZeResult);
418417
}
419-
ZePoolCache->clear();
420-
delete ZePoolCache;
418+
ZePoolCache.clear();
421419
}
422420
}
423421

source/adapters/level_zero/context.hpp

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,8 @@ struct ur_context_handle_t_ : _ur_object {
141141
// head.
142142
//
143143
// Cache of event pools to which host-visible events are added to.
144-
std::vector<std::list<ze_event_pool_handle_t> *> ZeEventPoolCache;
145-
std::vector<std::unordered_map<ze_device_handle_t,
146-
std::list<ze_event_pool_handle_t> *>>
144+
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
145+
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
147146
ZeEventPoolCacheDeviceMap{4};
148147

149148
// This map will be used to determine if a pool is full or not
@@ -165,9 +164,9 @@ struct ur_context_handle_t_ : _ur_object {
165164
ur_mutex EventCacheMutex;
166165

167166
// Caches for events.
168-
std::vector<std::list<ur_event_handle_t> *> EventCaches;
169-
std::vector<
170-
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
167+
using EventCache = std::vector<std::list<ur_event_handle_t>>;
168+
EventCache EventCaches{4};
169+
std::vector<std::unordered_map<ur_device_handle_t, size_t>>
171170
EventCachesDeviceMap{4};
172171

173172
// Initialize the PI context.
@@ -205,46 +204,36 @@ struct ur_context_handle_t_ : _ur_object {
205204
// Add ur_event_handle_t to cache.
206205
void addEventToContextCache(ur_event_handle_t);
207206

208-
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling,
209-
ze_device_handle_t ZeDevice) {
210-
// Adding 4 initial global caches for provided scope and profiling modes:
211-
// Host Scope, Device Scope, with Profiling, without Profiling.
212-
if (ZeEventPoolCache.empty()) {
213-
for (int i = 0; i < 4; i++) {
214-
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
215-
new std::list<ze_event_pool_handle_t>;
216-
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
217-
}
218-
}
207+
std::list<ze_event_pool_handle_t> *
208+
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
209+
ze_device_handle_t ZeDevice) {
219210
if (HostVisible) {
220211
if (ZeDevice) {
221212
auto ZeEventPoolCacheMap = WithProfiling
222213
? &ZeEventPoolCacheDeviceMap[0]
223214
: &ZeEventPoolCacheDeviceMap[1];
224215
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
225-
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
226-
new std::list<ze_event_pool_handle_t>;
227-
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
228-
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
216+
ZeEventPoolCache.emplace_back();
217+
ZeEventPoolCacheMap->insert(
218+
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
229219
}
230-
return (*ZeEventPoolCacheMap)[ZeDevice];
220+
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
231221
} else {
232-
return WithProfiling ? ZeEventPoolCache[0] : ZeEventPoolCache[1];
222+
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
233223
}
234224
} else {
235225
if (ZeDevice) {
236226
auto ZeEventPoolCacheMap = WithProfiling
237227
? &ZeEventPoolCacheDeviceMap[2]
238228
: &ZeEventPoolCacheDeviceMap[3];
239229
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
240-
std::list<ze_event_pool_handle_t> *deviceZeEventPoolCache =
241-
new std::list<ze_event_pool_handle_t>;
242-
ZeEventPoolCache.push_back(deviceZeEventPoolCache);
243-
(*ZeEventPoolCacheMap)[ZeDevice] = deviceZeEventPoolCache;
230+
ZeEventPoolCache.emplace_back();
231+
ZeEventPoolCacheMap->insert(
232+
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
244233
}
245-
return (*ZeEventPoolCacheMap)[ZeDevice];
234+
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
246235
} else {
247-
return WithProfiling ? ZeEventPoolCache[2] : ZeEventPoolCache[3];
236+
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
248237
}
249238
}
250239
}
@@ -287,42 +276,31 @@ struct ur_context_handle_t_ : _ur_object {
287276
// Get the cache of events for a provided scope and profiling mode.
288277
auto getEventCache(bool HostVisible, bool WithProfiling,
289278
ur_device_handle_t Device) {
290-
// Adding 4 initial global caches for provided scope and profiling modes:
291-
// Host Scope, Device Scope, with Profiling, without Profiling.
292-
if (EventCaches.empty()) {
293-
for (int i = 0; i < 4; i++) {
294-
std::list<ur_event_handle_t> *deviceEventCache =
295-
new std::list<ur_event_handle_t>;
296-
EventCaches.push_back(deviceEventCache);
297-
}
298-
}
299279
if (HostVisible) {
300280
if (Device) {
301281
auto EventCachesMap =
302282
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
303283
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
304-
std::list<ur_event_handle_t> *deviceEventCache =
305-
new std::list<ur_event_handle_t>;
306-
EventCaches.push_back(deviceEventCache);
307-
(*EventCachesMap)[Device] = deviceEventCache;
284+
EventCaches.emplace_back();
285+
EventCachesMap->insert(
286+
std::make_pair(Device, EventCaches.size() - 1));
308287
}
309-
return (*EventCachesMap)[Device];
288+
return &EventCaches[(*EventCachesMap)[Device]];
310289
} else {
311-
return WithProfiling ? EventCaches[0] : EventCaches[1];
290+
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
312291
}
313292
} else {
314293
if (Device) {
315294
auto EventCachesMap =
316295
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
317296
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
318-
std::list<ur_event_handle_t> *deviceEventCache =
319-
new std::list<ur_event_handle_t>;
320-
EventCaches.push_back(deviceEventCache);
321-
(*EventCachesMap)[Device] = deviceEventCache;
297+
EventCaches.emplace_back();
298+
EventCachesMap->insert(
299+
std::make_pair(Device, EventCaches.size() - 1));
322300
}
323-
return (*EventCachesMap)[Device];
301+
return &EventCaches[(*EventCachesMap)[Device]];
324302
} else {
325-
return WithProfiling ? EventCaches[2] : EventCaches[3];
303+
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
326304
}
327305
}
328306
}

source/adapters/level_zero/queue.cpp

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,28 +1267,17 @@ ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
12671267
}
12681268

12691269
ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) {
1270-
// Adding 2 initial global caches for provided scope:
1271-
// Host Scope, Device Scope.
1272-
if (EventCaches.empty()) {
1273-
for (int i = 0; i < 2; i++) {
1274-
std::list<ur_event_handle_t> *deviceEventCache =
1275-
new std::list<ur_event_handle_t>;
1276-
EventCaches.push_back(deviceEventCache);
1277-
}
1278-
}
12791270
if (!Event->IsMultiDevice && Event->UrQueue) {
12801271
auto Device = Event->UrQueue->Device;
12811272
auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0]
12821273
: &EventCachesDeviceMap[1];
12831274
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
1284-
std::list<ur_event_handle_t> *deviceEventCache =
1285-
new std::list<ur_event_handle_t>;
1286-
EventCaches.push_back(deviceEventCache);
1287-
(*EventCachesMap)[Device] = deviceEventCache;
1275+
EventCaches.emplace_back();
1276+
EventCachesMap->insert(std::make_pair(Device, EventCaches.size() - 1));
12881277
}
1289-
(*EventCachesMap)[Device]->emplace_back(Event);
1278+
EventCaches[EventCachesMap->at(Device)].emplace_back(Event);
12901279
} else {
1291-
auto Cache = Event->isHostVisible() ? EventCaches[0] : EventCaches[1];
1280+
auto Cache = Event->isHostVisible() ? &EventCaches[0] : &EventCaches[1];
12921281
Cache->emplace_back(Event);
12931282
}
12941283
return UR_RESULT_SUCCESS;
@@ -1312,12 +1301,10 @@ ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) {
13121301
if (!UrQueue->RefCount.decrementAndTest())
13131302
return UR_RESULT_SUCCESS;
13141303

1315-
for (auto Cache : UrQueue->EventCaches) {
1316-
for (auto Event : *Cache) {
1304+
for (auto &Cache : UrQueue->EventCaches) {
1305+
for (auto &Event : Cache)
13171306
UR_CALL(urEventReleaseInternal(Event));
1318-
}
1319-
Cache->clear();
1320-
delete Cache;
1307+
Cache.clear();
13211308
}
13221309

13231310
if (UrQueue->OwnZeCommandQueue) {
@@ -1475,25 +1462,15 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
14751462
bool HostVisible) {
14761463
std::list<ur_event_handle_t> *Cache;
14771464

1478-
// Adding 2 initial global caches for provided scope:
1479-
// Host Scope, Device Scope.
1480-
if (EventCaches.empty()) {
1481-
for (int i = 0; i < 2; i++) {
1482-
std::list<ur_event_handle_t> *deviceEventCache =
1483-
new std::list<ur_event_handle_t>;
1484-
EventCaches.push_back(deviceEventCache);
1485-
}
1486-
}
1487-
14881465
if (!IsMultiDevice) {
14891466
auto Device = this->Device;
1490-
Cache = HostVisible ? EventCachesDeviceMap[0][Device]
1491-
: EventCachesDeviceMap[1][Device];
1467+
Cache = HostVisible ? &EventCaches[EventCachesDeviceMap[0][Device]]
1468+
: &EventCaches[EventCachesDeviceMap[1][Device]];
14921469
if (!Cache) {
14931470
return nullptr;
14941471
}
14951472
} else {
1496-
Cache = HostVisible ? EventCaches[0] : EventCaches[1];
1473+
Cache = HostVisible ? &EventCaches[0] : &EventCaches[1];
14971474
}
14981475

14991476
// If we don't have any events, return nullptr.

source/adapters/level_zero/queue.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,8 @@ struct ur_queue_handle_t_ : _ur_object {
342342
// requested type of event. Each list contains events which can be reused
343343
// inside all command lists in the queue as described in the 2-event model.
344344
// Leftover events in the cache are relased at the queue destruction.
345-
std::vector<std::list<ur_event_handle_t> *> EventCaches;
346-
std::vector<
347-
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
345+
std::vector<std::list<ur_event_handle_t>> EventCaches{2};
346+
std::vector<std::unordered_map<ur_device_handle_t, size_t>>
348347
EventCachesDeviceMap{2};
349348

350349
// adjust the queue's batch size, knowing that the current command list

0 commit comments

Comments
 (0)