Skip to content

Commit 77187f6

Browse files
authored
Merge pull request #1698 from winstonzhang-intel/counter-based-2
[L0] Phase 2 of Counter-Based Event Implementation
2 parents f0246bd + ad11182 commit 77187f6

File tree

4 files changed

+88
-33
lines changed

4 files changed

+88
-33
lines changed

source/adapters/level_zero/context.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -560,9 +560,12 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
560560

561561
ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
562562
bool HostVisible, bool WithProfiling, ur_device_handle_t Device,
563-
bool CounterBasedEventEnabled) {
563+
bool CounterBasedEventEnabled, bool UsingImmCmdList) {
564564
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
565565
auto Cache = getEventCache(HostVisible, WithProfiling, Device);
566+
if (CounterBasedEventEnabled) {
567+
Cache = getCounterBasedEventCache(WithProfiling, UsingImmCmdList, Device);
568+
}
566569
if (Cache->empty())
567570
return nullptr;
568571

@@ -585,9 +588,16 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
585588
Device = Event->UrQueue->Device;
586589
}
587590

588-
auto Cache = getEventCache(Event->isHostVisible(),
589-
Event->isProfilingEnabled(), Device);
590-
Cache->emplace_back(Event);
591+
if (Event->CounterBasedEventsEnabled) {
592+
auto Cache = getCounterBasedEventCache(
593+
Event->isProfilingEnabled(),
594+
!(Event->UrQueue) || (Event->UrQueue)->UsingImmCmdLists, Device);
595+
Cache->emplace_back(Event);
596+
} else {
597+
auto Cache = getEventCache(Event->isHostVisible(),
598+
Event->isProfilingEnabled(), Device);
599+
Cache->emplace_back(Event);
600+
}
591601
}
592602

593603
ur_result_t

source/adapters/level_zero/context.hpp

Lines changed: 71 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ struct ur_context_handle_t_ : _ur_object {
3939
: ZeContext{ZeContext}, Devices{Devs, Devs + NumDevices},
4040
NumDevices{NumDevices} {
4141
OwnNativeHandle = OwnZeContext;
42+
for (const auto &Device : Devices) {
43+
for (int i = 0; i < EventCacheTypeCount; i++) {
44+
EventCaches.emplace_back();
45+
EventCachesDeviceMap[i].insert(
46+
std::make_pair(Device, EventCaches.size() - 1));
47+
}
48+
}
4249
}
4350

4451
ur_context_handle_t_(ze_context_handle_t ZeContext) : ZeContext{ZeContext} {}
@@ -147,9 +154,10 @@ struct ur_context_handle_t_ : _ur_object {
147154
// head.
148155
//
149156
// Cache of event pools to which host-visible events are added to.
150-
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{12};
157+
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{
158+
ZeEventPoolCacheTypeCount * 2};
151159
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
152-
ZeEventPoolCacheDeviceMap{12};
160+
ZeEventPoolCacheDeviceMap{ZeEventPoolCacheTypeCount * 2};
153161

154162
// This map will be used to determine if a pool is full or not
155163
// by storing number of empty slots available in the pool.
@@ -171,9 +179,9 @@ struct ur_context_handle_t_ : _ur_object {
171179

172180
// Caches for events.
173181
using EventCache = std::vector<std::list<ur_event_handle_t>>;
174-
EventCache EventCaches{4};
182+
EventCache EventCaches{EventCacheTypeCount};
175183
std::vector<std::unordered_map<ur_device_handle_t, size_t>>
176-
EventCachesDeviceMap{4};
184+
EventCachesDeviceMap{EventCacheTypeCount};
177185

178186
// Initialize the PI context.
179187
ur_result_t initialize();
@@ -211,25 +219,39 @@ struct ur_context_handle_t_ : _ur_object {
211219
ur_event_handle_t getEventFromContextCache(bool HostVisible,
212220
bool WithProfiling,
213221
ur_device_handle_t Device,
214-
bool CounterBasedEventEnabled);
222+
bool CounterBasedEventEnabled,
223+
bool UsingImmCmdList);
215224

216225
// Add ur_event_handle_t to cache.
217226
void addEventToContextCache(ur_event_handle_t);
218227

219-
enum EventPoolCacheType {
228+
enum ZeEventPoolCacheType {
220229
HostVisibleCacheType,
221230
HostInvisibleCacheType,
222231
HostVisibleCounterBasedRegularCacheType,
223232
HostInvisibleCounterBasedRegularCacheType,
224233
HostVisibleCounterBasedImmediateCacheType,
225-
HostInvisibleCounterBasedImmediateCacheType
234+
HostInvisibleCounterBasedImmediateCacheType,
235+
ZeEventPoolCacheTypeCount
236+
};
237+
238+
enum EventCacheType {
239+
HostVisibleProfilingCacheType,
240+
HostVisibleRegularCacheType,
241+
HostInvisibleProfilingCacheType,
242+
HostInvisibleRegularCacheType,
243+
CounterBasedImmediateCacheType,
244+
CounterBasedRegularCacheType,
245+
CounterBasedImmediateProfilingCacheType,
246+
CounterBasedRegularProfilingCacheType,
247+
EventCacheTypeCount
226248
};
227249

228250
std::list<ze_event_pool_handle_t> *
229251
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
230252
bool CounterBasedEventEnabled, bool UsingImmediateCmdList,
231253
ze_device_handle_t ZeDevice) {
232-
EventPoolCacheType CacheType;
254+
ZeEventPoolCacheType CacheType;
233255

234256
calculateCacheIndex(HostVisible, CounterBasedEventEnabled,
235257
UsingImmediateCmdList, CacheType);
@@ -252,7 +274,7 @@ struct ur_context_handle_t_ : _ur_object {
252274
ur_result_t calculateCacheIndex(bool HostVisible,
253275
bool CounterBasedEventEnabled,
254276
bool UsingImmediateCmdList,
255-
EventPoolCacheType &CacheType) {
277+
ZeEventPoolCacheType &CacheType) {
256278
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
257279
CacheType = HostVisibleCounterBasedRegularCacheType;
258280
} else if (CounterBasedEventEnabled && !HostVisible &&
@@ -316,34 +338,57 @@ struct ur_context_handle_t_ : _ur_object {
316338
if (HostVisible) {
317339
if (Device) {
318340
auto EventCachesMap =
319-
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
320-
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
321-
EventCaches.emplace_back();
322-
EventCachesMap->insert(
323-
std::make_pair(Device, EventCaches.size() - 1));
324-
}
341+
WithProfiling ? &EventCachesDeviceMap[HostVisibleProfilingCacheType]
342+
: &EventCachesDeviceMap[HostVisibleRegularCacheType];
343+
return &EventCaches[(*EventCachesMap)[Device]];
344+
} else {
345+
return WithProfiling ? &EventCaches[HostVisibleProfilingCacheType]
346+
: &EventCaches[HostVisibleRegularCacheType];
347+
}
348+
} else {
349+
if (Device) {
350+
auto EventCachesMap =
351+
WithProfiling
352+
? &EventCachesDeviceMap[HostInvisibleProfilingCacheType]
353+
: &EventCachesDeviceMap[HostInvisibleRegularCacheType];
354+
return &EventCaches[(*EventCachesMap)[Device]];
355+
} else {
356+
return WithProfiling ? &EventCaches[HostInvisibleProfilingCacheType]
357+
: &EventCaches[HostInvisibleRegularCacheType];
358+
}
359+
}
360+
};
361+
auto getCounterBasedEventCache(bool WithProfiling, bool UsingImmediateCmdList,
362+
ur_device_handle_t Device) {
363+
if (UsingImmediateCmdList) {
364+
if (Device) {
365+
auto EventCachesMap =
366+
WithProfiling
367+
? &EventCachesDeviceMap[CounterBasedImmediateProfilingCacheType]
368+
: &EventCachesDeviceMap[CounterBasedImmediateCacheType];
325369
return &EventCaches[(*EventCachesMap)[Device]];
326370
} else {
327-
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
371+
return WithProfiling
372+
? &EventCaches[CounterBasedImmediateProfilingCacheType]
373+
: &EventCaches[CounterBasedImmediateCacheType];
328374
}
329375
} else {
330376
if (Device) {
331377
auto EventCachesMap =
332-
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
333-
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
334-
EventCaches.emplace_back();
335-
EventCachesMap->insert(
336-
std::make_pair(Device, EventCaches.size() - 1));
337-
}
378+
WithProfiling
379+
? &EventCachesDeviceMap[CounterBasedRegularProfilingCacheType]
380+
: &EventCachesDeviceMap[CounterBasedRegularCacheType];
338381
return &EventCaches[(*EventCachesMap)[Device]];
339382
} else {
340-
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
383+
return WithProfiling
384+
? &EventCaches[CounterBasedRegularProfilingCacheType]
385+
: &EventCaches[CounterBasedRegularCacheType];
341386
}
342387
}
343388
}
344389
};
345390

346-
// Helper function to release the context, a caller must lock the platform-level
347-
// mutex guarding the container with contexts because the context can be removed
348-
// from the list of tracked contexts.
391+
// Helper function to release the context, a caller must lock the
392+
// platform-level mutex guarding the container with contexts because the
393+
// context can be removed from the list of tracked contexts.
349394
ur_result_t ContextReleaseHelper(ur_context_handle_t Context);

source/adapters/level_zero/event.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,6 @@ urEventRelease(ur_event_handle_t Event ///< [in] handle of the event object
835835
) {
836836
Event->RefCountExternal--;
837837
UR_CALL(urEventReleaseInternal(Event));
838-
839838
return UR_RESULT_SUCCESS;
840839
}
841840

@@ -1257,7 +1256,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
12571256
}
12581257

12591258
if (auto CachedEvent = Context->getEventFromContextCache(
1260-
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled)) {
1259+
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled,
1260+
UsingImmediateCommandlists)) {
12611261
*RetEvent = CachedEvent;
12621262
return UR_RESULT_SUCCESS;
12631263
}

source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,7 @@ ur_queue_handle_t_::ur_queue_handle_t_(
11871187
return std::atoi(UrRet) != 0;
11881188
}();
11891189
this->CounterBasedEventsEnabled =
1190-
UsingImmCmdLists && isInOrderQueue() && Device->useDriverInOrderLists() &&
1190+
isInOrderQueue() && Device->useDriverInOrderLists() &&
11911191
useDriverCounterBasedEvents &&
11921192
Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound;
11931193
}

0 commit comments

Comments
 (0)