@@ -32,6 +32,8 @@ extern "C" {
32
32
// Forward declarartions.
33
33
static pi_result EventRelease (pi_event Event, pi_queue LockedQueue);
34
34
static pi_result QueueRelease (pi_queue Queue, pi_queue LockedQueue);
35
+ static pi_result EventCreate (pi_context Context, bool HostVisible,
36
+ pi_event *RetEvent);
35
37
}
36
38
37
39
namespace {
@@ -186,12 +188,31 @@ static void zePrint(const char *Format, ...) {
186
188
}
187
189
}
188
190
189
- // Controls whether device-scope events are used.
190
- static const bool ZeAllHostVisibleEvents = [] {
191
+ // Controls whether device-scope events are used, and how.
192
+ static const enum EventsScope {
193
+ // All events are created host-visible (the default mode)
194
+ AllHostVisible,
195
+ // All events are created with device-scope and only when
196
+ // host waits them or queries their status that a proxy
197
+ // host-visible event is created and set to signal after
198
+ // original event signals.
199
+ OnDemandHostVisibleProxy,
200
+ // All events are created with device-scope and only
201
+ // when a batch of commands is submitted for execution a
202
+ // last command in that batch is added to signal host-visible
203
+ // completion of each command in this batch.
204
+ LastCommandInBatchHostVisible
205
+ } EventsScope = [] {
191
206
const auto DeviceEventsStr =
192
207
std::getenv (" SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS" );
193
- bool result = (DeviceEventsStr ? (std::atoi (DeviceEventsStr) == 0 ) : true );
194
- return result;
208
+
209
+ switch (DeviceEventsStr ? std::atoi (DeviceEventsStr) : 0 ) {
210
+ case 1 :
211
+ return OnDemandHostVisibleProxy;
212
+ case 2 :
213
+ return LastCommandInBatchHostVisible;
214
+ }
215
+ return AllHostVisible;
195
216
}();
196
217
197
218
// Maximum number of events that can be present in an event ZePool is captured
@@ -415,14 +436,11 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
415
436
ze_event_pool_flag_t ZePoolFlag = {};
416
437
std::list<ze_event_pool_handle_t > *ZePoolCache;
417
438
418
- if (ZeAllHostVisibleEvents) {
419
- ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
420
- ZePoolCache = &ZeEventPoolCache;
421
- } else if (HostVisible) {
439
+ if (HostVisible) {
422
440
ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
423
441
ZePoolCache = &ZeHostVisibleEventPoolCache;
424
442
} else {
425
- ZePoolCache = &ZeEventPoolCache ;
443
+ ZePoolCache = &ZeDeviceScopeEventPoolCache ;
426
444
}
427
445
428
446
// Remove full pool from the cache.
@@ -468,30 +486,24 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) {
468
486
return PI_SUCCESS;
469
487
}
470
488
489
+ std::list<ze_event_pool_handle_t > *ZePoolCache;
490
+ if (Event->IsHostVisible ()) {
491
+ ZePoolCache = &ZeHostVisibleEventPoolCache;
492
+ } else {
493
+ ZePoolCache = &ZeDeviceScopeEventPoolCache;
494
+ }
495
+
471
496
// Put the empty pool to the cache of the pools.
472
497
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
473
498
if (NumEventsUnreleasedInEventPool[Event->ZeEventPool ] == 0 )
474
499
die (" Invalid event release: event pool doesn't have unreleased events" );
475
500
if (--NumEventsUnreleasedInEventPool[Event->ZeEventPool ] == 0 ) {
476
- if (ZeEventPoolCache. front () != Event->ZeEventPool ) {
477
- ZeEventPoolCache. push_back (Event->ZeEventPool );
501
+ if (ZePoolCache-> front () != Event->ZeEventPool ) {
502
+ ZePoolCache-> push_back (Event->ZeEventPool );
478
503
}
479
504
NumEventsAvailableInEventPool[Event->ZeEventPool ] = MaxNumEventsPerPool;
480
505
}
481
506
482
- if (Event->ZeHostVisibleEventPool ) {
483
- if (NumEventsUnreleasedInEventPool[Event->ZeHostVisibleEventPool ] == 0 )
484
- die (" Invalid host visible event release: host visible event pool doesn't "
485
- " have unreleased events" );
486
- if (--NumEventsUnreleasedInEventPool[Event->ZeHostVisibleEventPool ] == 0 ) {
487
- if (ZeHostVisibleEventPoolCache.front () !=
488
- Event->ZeHostVisibleEventPool ) {
489
- ZeHostVisibleEventPoolCache.push_back (Event->ZeHostVisibleEventPool );
490
- }
491
- NumEventsAvailableInEventPool[Event->ZeHostVisibleEventPool ] =
492
- MaxNumEventsPerPool;
493
- }
494
- }
495
507
return PI_SUCCESS;
496
508
}
497
509
@@ -788,12 +800,12 @@ pi_result _pi_context::finalize() {
788
800
// For example, event pool caches would be still alive.
789
801
{
790
802
std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
791
- for (auto &ZePool : ZeEventPoolCache )
803
+ for (auto &ZePool : ZeDeviceScopeEventPoolCache )
792
804
ZE_CALL (zeEventPoolDestroy, (ZePool));
793
805
for (auto &ZePool : ZeHostVisibleEventPoolCache)
794
806
ZE_CALL (zeEventPoolDestroy, (ZePool));
795
807
796
- ZeEventPoolCache .clear ();
808
+ ZeDeviceScopeEventPoolCache .clear ();
797
809
ZeHostVisibleEventPoolCache.clear ();
798
810
}
799
811
@@ -1321,6 +1333,39 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
1321
1333
KernelsToBeSubmitted.clear ();
1322
1334
}
1323
1335
1336
+ // In this mode all inner-batch events have device visibility only,
1337
+ // and we want the last command in the batch to signal a host-visible
1338
+ // event that anybody waiting for any event in the batch will
1339
+ // really be using.
1340
+ //
1341
+ if (EventsScope == LastCommandInBatchHostVisible) {
1342
+ // Create a "proxy" host-visible event.
1343
+ //
1344
+ pi_event HostVisibleEvent;
1345
+ PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
1346
+
1347
+ // Update each command's event in the command-list to "see" this
1348
+ // proxy event as a host-visible counterpart.
1349
+ for (auto &Event : CommandList->second .EventList ) {
1350
+ Event->HostVisibleEvent = HostVisibleEvent;
1351
+ PI_CALL (piEventRetain (HostVisibleEvent));
1352
+ }
1353
+
1354
+ // Decrement the reference count by 1 so all the remaining references
1355
+ // are from the other commands in this batch. This host-visible event
1356
+ // will be destroyed after all events in the batch are gone.
1357
+ PI_CALL (piEventRelease (HostVisibleEvent));
1358
+ // Indicate no cleanup is needed for this PI event as it is special.
1359
+ HostVisibleEvent->CleanedUp = true ;
1360
+
1361
+ // Finally set to signal the host-visible event at the end of the
1362
+ // command-list.
1363
+ // TODO: see if we need a barrier here (or explicit wait for all events in
1364
+ // the batch).
1365
+ ZE_CALL (zeCommandListAppendSignalEvent,
1366
+ (CommandList->first , HostVisibleEvent->ZeEvent ));
1367
+ }
1368
+
1324
1369
// Close the command list and have it ready for dispatch.
1325
1370
ZE_CALL (zeCommandListClose, (CommandList->first ));
1326
1371
// Offload command list to the GPU for asynchronous execution
@@ -1504,9 +1549,10 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList(
1504
1549
auto ZeEvent = EventList[I]->ZeEvent ;
1505
1550
1506
1551
// Poll of the host-visible events.
1507
- auto ZeEventHostVisible = EventList[I]->getHostVisibleEvent ();
1508
- if (FilterEventWaitList && ZeEventHostVisible) {
1509
- auto Res = ZE_CALL_NOCHECK (zeEventQueryStatus, (ZeEventHostVisible));
1552
+ auto HostVisibleEvent = EventList[I]->HostVisibleEvent ;
1553
+ if (FilterEventWaitList && HostVisibleEvent) {
1554
+ auto Res =
1555
+ ZE_CALL_NOCHECK (zeEventQueryStatus, (HostVisibleEvent->ZeEvent ));
1510
1556
if (Res == ZE_RESULT_SUCCESS) {
1511
1557
// Event has already completed, don't put it into the list
1512
1558
continue ;
@@ -1792,8 +1838,11 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms,
1792
1838
if (NumPlatforms)
1793
1839
*NumPlatforms = PiPlatformsCache->size ();
1794
1840
1795
- zePrint (" Using %s events\n " ,
1796
- ZeAllHostVisibleEvents ? " all host-visible" : " device-only" );
1841
+ zePrint (" Using events scope: %s\n " ,
1842
+ EventsScope == AllHostVisible ? " all host-visible"
1843
+ : EventsScope == OnDemandHostVisibleProxy
1844
+ ? " on demand host-visible proxy"
1845
+ : " only last command in a batch is host-visible" );
1797
1846
return PI_SUCCESS;
1798
1847
}
1799
1848
@@ -4724,45 +4773,16 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
4724
4773
//
4725
4774
// Events
4726
4775
//
4727
- ze_event_handle_t _pi_event::getHostVisibleEvent () const {
4728
- if (ZeAllHostVisibleEvents) {
4729
- return ZeEvent;
4730
- } else if (ZeHostVisibleEvent) {
4731
- return ZeHostVisibleEvent;
4732
- } else {
4733
- return nullptr ;
4734
- }
4735
- }
4736
-
4737
4776
pi_result
4738
- _pi_event::getOrCreateHostVisibleEvent (ze_event_handle_t &HostVisibleEvent ) {
4777
+ _pi_event::getOrCreateHostVisibleEvent (ze_event_handle_t &ZeHostVisibleEvent ) {
4739
4778
4740
- if (ZeAllHostVisibleEvents) {
4741
- HostVisibleEvent = ZeEvent;
4742
- } else if (ZeHostVisibleEvent) {
4743
- HostVisibleEvent = ZeHostVisibleEvent;
4744
- } else {
4745
- size_t Index;
4746
- ze_event_pool_handle_t ZeEventPool = {};
4747
- if (auto Res =
4748
- Context->getFreeSlotInExistingOrNewPool (ZeEventPool, Index, true ))
4749
- return Res;
4779
+ if (!HostVisibleEvent) {
4780
+ if (EventsScope != OnDemandHostVisibleProxy)
4781
+ die (" getOrCreateHostVisibleEvent: missing host-visible event" );
4750
4782
4751
- // Create a "proxy" host-visible event.
4752
- //
4753
- // TODO: consider creating just single host-visible proxy event to
4754
- // represent multiple device-scope events. E.g. have a host-visible
4755
- // event at the end of each command-list to represent device-scope
4756
- // events from every command in that command-list.
4757
- //
4758
- ZeStruct<ze_event_desc_t > ZeEventDesc;
4759
- ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
4760
- ZeEventDesc.wait = 0 ;
4761
- ZeEventDesc.index = Index;
4762
-
4763
- ZE_CALL (zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeHostVisibleEvent));
4764
- ZeHostVisibleEventPool = ZeEventPool;
4765
- HostVisibleEvent = ZeHostVisibleEvent;
4783
+ // Create a "proxy" host-visible event on demand.
4784
+ PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
4785
+ HostVisibleEvent->CleanedUp = true ;
4766
4786
4767
4787
// Submit the command(s) signalling the proxy event to the queue.
4768
4788
// We have to first submit a wait for the device-only event for which this
@@ -4783,36 +4803,41 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent) {
4783
4803
ZE_CALL (zeCommandListAppendWaitOnEvents,
4784
4804
(CommandList->first , 1 , &ZeEvent));
4785
4805
ZE_CALL (zeCommandListAppendSignalEvent,
4786
- (CommandList->first , ZeHostVisibleEvent ));
4806
+ (CommandList->first , HostVisibleEvent-> ZeEvent ));
4787
4807
4788
4808
if (auto Res = Queue->executeCommandList (CommandList, false , OkToBatch))
4789
4809
return Res;
4790
4810
}
4791
4811
}
4812
+
4813
+ ZeHostVisibleEvent = HostVisibleEvent->ZeEvent ;
4792
4814
return PI_SUCCESS;
4793
4815
}
4794
4816
4795
- pi_result piEventCreate (pi_context Context, pi_event *RetEvent) {
4817
+ static pi_result EventCreate (pi_context Context, bool HostVisible,
4818
+ pi_event *RetEvent) {
4796
4819
size_t Index = 0 ;
4797
4820
ze_event_pool_handle_t ZeEventPool = {};
4798
- if (auto Res = Context->getFreeSlotInExistingOrNewPool (ZeEventPool, Index))
4821
+ if (auto Res = Context->getFreeSlotInExistingOrNewPool (ZeEventPool, Index,
4822
+ HostVisible))
4799
4823
return Res;
4800
4824
4801
4825
ze_event_handle_t ZeEvent;
4802
4826
ZeStruct<ze_event_desc_t > ZeEventDesc;
4803
4827
ZeEventDesc.index = Index;
4804
4828
ZeEventDesc.wait = 0 ;
4805
- //
4806
- // Set the scope to "device" for every event. This is sufficient for global
4807
- // device access and peer device access. If needed to be waited on the host
4808
- // we are doing special handling, see piEventsWait.
4809
- //
4810
- // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
4811
- // used in some circumstances.
4812
- //
4813
- if (ZeAllHostVisibleEvents) {
4829
+
4830
+ if (HostVisible) {
4814
4831
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
4815
4832
} else {
4833
+ //
4834
+ // Set the scope to "device" for every event. This is sufficient for global
4835
+ // device access and peer device access. If needed to be seen on the host
4836
+ // we are doing special handling, see EventsScope options.
4837
+ //
4838
+ // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
4839
+ // used in some circumstances.
4840
+ //
4816
4841
ZeEventDesc.signal = 0 ;
4817
4842
}
4818
4843
@@ -4828,9 +4853,17 @@ pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
4828
4853
} catch (...) {
4829
4854
return PI_ERROR_UNKNOWN;
4830
4855
}
4856
+
4857
+ if (HostVisible)
4858
+ (*RetEvent)->HostVisibleEvent = *RetEvent;
4859
+
4831
4860
return PI_SUCCESS;
4832
4861
}
4833
4862
4863
+ pi_result piEventCreate (pi_context Context, pi_event *RetEvent) {
4864
+ return EventCreate (Context, EventsScope == AllHostVisible, RetEvent);
4865
+ }
4866
+
4834
4867
pi_result piEventGetInfo (pi_event Event, pi_event_info ParamName,
4835
4868
size_t ParamValueSize, void *ParamValue,
4836
4869
size_t *ParamValueSizeRet) {
@@ -4860,10 +4893,11 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName,
4860
4893
// Make sure that we query a host-visible event only.
4861
4894
// If one wasn't yet created then don't create it here as well, and
4862
4895
// just conservatively return that event is not yet completed.
4863
- auto ZeHostVisibleEvent = Event->getHostVisibleEvent () ;
4864
- if (ZeHostVisibleEvent ) {
4896
+ auto HostVisibleEvent = Event->HostVisibleEvent ;
4897
+ if (HostVisibleEvent ) {
4865
4898
ze_result_t ZeResult;
4866
- ZeResult = ZE_CALL_NOCHECK (zeEventQueryStatus, (ZeHostVisibleEvent));
4899
+ ZeResult =
4900
+ ZE_CALL_NOCHECK (zeEventQueryStatus, (HostVisibleEvent->ZeEvent ));
4867
4901
if (ZeResult == ZE_RESULT_SUCCESS) {
4868
4902
return getInfo (ParamValueSize, ParamValue, ParamValueSizeRet,
4869
4903
pi_int32{CL_COMPLETE}); // Untie from OpenCL
@@ -5072,15 +5106,17 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
5072
5106
if (NumEvents && !EventList) {
5073
5107
return PI_INVALID_EVENT;
5074
5108
}
5075
- // Make sure to add all host-visible "proxy" event signals if needed.
5076
- // This ensures that all signalling commands are submitted below and
5077
- // thus proxy events can be waited without a deadlock.
5078
- //
5079
- for (uint32_t I = 0 ; I < NumEvents; I++) {
5080
- ze_event_handle_t ZeHostVisibleEvent;
5081
- if (auto Res =
5082
- EventList[I]->getOrCreateHostVisibleEvent (ZeHostVisibleEvent))
5083
- return Res;
5109
+ if (EventsScope == OnDemandHostVisibleProxy) {
5110
+ // Make sure to add all host-visible "proxy" event signals if needed.
5111
+ // This ensures that all signalling commands are submitted below and
5112
+ // thus proxy events can be waited without a deadlock.
5113
+ //
5114
+ for (uint32_t I = 0 ; I < NumEvents; I++) {
5115
+ ze_event_handle_t ZeHostVisibleEvent;
5116
+ if (auto Res =
5117
+ EventList[I]->getOrCreateHostVisibleEvent (ZeHostVisibleEvent))
5118
+ return Res;
5119
+ }
5084
5120
}
5085
5121
// Submit dependent open command lists for execution, if any
5086
5122
for (uint32_t I = 0 ; I < NumEvents; I++) {
@@ -5096,10 +5132,11 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) {
5096
5132
}
5097
5133
}
5098
5134
for (uint32_t I = 0 ; I < NumEvents; I++) {
5099
- ze_event_handle_t ZeEvent = EventList[I]->getHostVisibleEvent () ;
5100
- if (!ZeEvent )
5135
+ auto HostVisibleEvent = EventList[I]->HostVisibleEvent ;
5136
+ if (!HostVisibleEvent )
5101
5137
die (" The host-visible proxy event missing" );
5102
5138
5139
+ ze_event_handle_t ZeEvent = HostVisibleEvent->ZeEvent ;
5103
5140
zePrint (" ZeEvent = %#lx\n " , pi_cast<std::uintptr_t >(ZeEvent));
5104
5141
ZE_CALL (zeHostSynchronize, (ZeEvent));
5105
5142
@@ -5159,8 +5196,12 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) {
5159
5196
if (Event->OwnZeEvent ) {
5160
5197
ZE_CALL (zeEventDestroy, (Event->ZeEvent ));
5161
5198
}
5162
- if (Event->ZeHostVisibleEvent ) {
5163
- ZE_CALL (zeEventDestroy, (Event->ZeHostVisibleEvent ));
5199
+ // It is possible that host-visible event was never created.
5200
+ // In case it was check if that's different from this same event
5201
+ // and release a reference to it.
5202
+ if (Event->HostVisibleEvent && Event->HostVisibleEvent != Event) {
5203
+ // Decrement ref-count of the host-visible proxy event.
5204
+ PI_CALL (piEventRelease (Event->HostVisibleEvent ));
5164
5205
}
5165
5206
5166
5207
auto Context = Event->Context ;
0 commit comments