Skip to content

Commit ed92c4c

Browse files
[SYCL] lazily set context on default constructed events (#6296)
According to the spec, sycl::event() constructed via empty constructor should be initialized with the default context. But simply doing so in that constructor is too expensive - many of those events are created temporarily, never needing the context. So instead, we are adding the context lazily, when it might be needed. The performance impact of both approaches has been measured, the simple direct setting of the context on all empty constructed events is deleterious whereas the method here, setting it lazily, has no appreciable degradation on performance. There are a couple places in the existing codebase where the lack of a context was used as a proxy indicating event state. This has had to change. Also, the setContextImpl routine was not only setting the context, but setting the atomic state to incomplete. This is no longer desirable. I've moved the setting of that state to its own call on the impl, and the appropriate calls now invoke it.
1 parent 2baf1de commit ed92c4c

File tree

6 files changed

+80
-39
lines changed

6 files changed

+80
-39
lines changed

sycl/source/detail/event_impl.cpp

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <CL/sycl/context.hpp>
10+
#include <CL/sycl/device_selector.hpp>
1011
#include <detail/event_impl.hpp>
1112
#include <detail/event_info.hpp>
1213
#include <detail/plugin.hpp>
@@ -31,11 +32,26 @@ namespace detail {
3132
extern xpti::trace_event_data_t *GSYCLGraphEvent;
3233
#endif
3334

34-
// Threat all devices that don't support interoperability as host devices to
35-
// avoid attempts to call method get on such events.
36-
bool event_impl::is_host() const { return MHostEvent || !MOpenCLInterop; }
35+
// If we do not yet have a context, use the default one.
36+
void event_impl::ensureContextInitialized() {
37+
if (MIsContextInitialized)
38+
return;
39+
40+
const device &SyclDevice = default_selector().select_device();
41+
this->setContextImpl(
42+
detail::queue_impl::getDefaultOrNew(detail::getSyclObjImpl(SyclDevice)));
43+
}
3744

38-
cl_event event_impl::get() const {
45+
bool event_impl::is_host() {
46+
// We'll need a context before we can answer is_host question.
47+
// setting it may adjust the values of MHostEvent and MOpenCLInterop
48+
ensureContextInitialized();
49+
// Treat all devices that don't support interoperability as host devices to
50+
// avoid attempts to call method get on such events.
51+
return MHostEvent || !MOpenCLInterop;
52+
}
53+
54+
cl_event event_impl::get() {
3955
if (!MOpenCLInterop) {
4056
throw invalid_object_error(
4157
"This instance of event doesn't support OpenCL interoperability.",
@@ -91,25 +107,32 @@ void event_impl::setComplete() {
91107
const RT::PiEvent &event_impl::getHandleRef() const { return MEvent; }
92108
RT::PiEvent &event_impl::getHandleRef() { return MEvent; }
93109

94-
const ContextImplPtr &event_impl::getContextImpl() { return MContext; }
110+
const ContextImplPtr &event_impl::getContextImpl() {
111+
ensureContextInitialized();
112+
return MContext;
113+
}
114+
115+
const plugin &event_impl::getPlugin() {
116+
ensureContextInitialized();
117+
return MContext->getPlugin();
118+
}
95119

96-
const plugin &event_impl::getPlugin() const { return MContext->getPlugin(); }
120+
void event_impl::setStateIncomplete() { MState = HES_NotComplete; }
97121

98122
void event_impl::setContextImpl(const ContextImplPtr &Context) {
99123
MHostEvent = Context->is_host();
100124
MOpenCLInterop = !MHostEvent;
101125
MContext = Context;
102-
103-
MState = HES_NotComplete;
126+
MIsContextInitialized = true;
104127
}
105128

106129
event_impl::event_impl(HostEventState State)
107130
: MIsInitialized(false), MIsFlushed(true), MState(State) {}
108131

109132
event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)
110-
: MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)),
111-
MOpenCLInterop(true), MHostEvent(false), MIsFlushed(true),
112-
MState(HES_Complete) {
133+
: MIsContextInitialized(true), MEvent(Event),
134+
MContext(detail::getSyclObjImpl(SyclContext)), MOpenCLInterop(true),
135+
MHostEvent(false), MIsFlushed(true), MState(HES_Complete) {
113136

114137
if (MContext->is_host()) {
115138
throw cl::sycl::invalid_parameter_error(
@@ -133,6 +156,8 @@ event_impl::event_impl(RT::PiEvent Event, const context &SyclContext)
133156
event_impl::event_impl(const QueueImplPtr &Queue)
134157
: MQueue{Queue}, MIsProfilingEnabled{Queue->is_host() ||
135158
Queue->MIsProfilingEnabled} {
159+
this->setContextImpl(Queue->getContextImplPtr());
160+
136161
if (Queue->is_host()) {
137162
MState.store(HES_NotComplete);
138163

@@ -262,7 +287,7 @@ void event_impl::checkProfilingPreconditions() const {
262287

263288
template <>
264289
uint64_t
265-
event_impl::get_profiling_info<info::event_profiling::command_submit>() const {
290+
event_impl::get_profiling_info<info::event_profiling::command_submit>() {
266291
checkProfilingPreconditions();
267292
if (!MHostEvent) {
268293
if (MEvent)
@@ -279,7 +304,7 @@ event_impl::get_profiling_info<info::event_profiling::command_submit>() const {
279304

280305
template <>
281306
uint64_t
282-
event_impl::get_profiling_info<info::event_profiling::command_start>() const {
307+
event_impl::get_profiling_info<info::event_profiling::command_start>() {
283308
checkProfilingPreconditions();
284309
if (!MHostEvent) {
285310
if (MEvent)
@@ -295,8 +320,7 @@ event_impl::get_profiling_info<info::event_profiling::command_start>() const {
295320
}
296321

297322
template <>
298-
uint64_t
299-
event_impl::get_profiling_info<info::event_profiling::command_end>() const {
323+
uint64_t event_impl::get_profiling_info<info::event_profiling::command_end>() {
300324
checkProfilingPreconditions();
301325
if (!MHostEvent) {
302326
if (MEvent)
@@ -310,8 +334,7 @@ event_impl::get_profiling_info<info::event_profiling::command_end>() const {
310334
return MHostProfilingInfo->getEndTime();
311335
}
312336

313-
template <>
314-
uint32_t event_impl::get_info<info::event::reference_count>() const {
337+
template <> uint32_t event_impl::get_info<info::event::reference_count>() {
315338
if (!MHostEvent && MEvent) {
316339
return get_event_info<info::event::reference_count>::get(
317340
this->getHandleRef(), this->getPlugin());
@@ -321,7 +344,7 @@ uint32_t event_impl::get_info<info::event::reference_count>() const {
321344

322345
template <>
323346
info::event_command_status
324-
event_impl::get_info<info::event::command_execution_status>() const {
347+
event_impl::get_info<info::event::command_execution_status>() {
325348
if (MState == HES_Discarded)
326349
return info::event_command_status::ext_oneapi_unknown;
327350

@@ -344,13 +367,9 @@ void HostProfilingInfo::start() { StartTime = getTimestamp(); }
344367

345368
void HostProfilingInfo::end() { EndTime = getTimestamp(); }
346369

347-
pi_native_handle event_impl::getNative() const {
348-
if (!MContext) {
349-
static context SyclContext;
350-
MContext = getSyclObjImpl(SyclContext);
351-
MHostEvent = MContext->is_host();
352-
MOpenCLInterop = !MHostEvent;
353-
}
370+
pi_native_handle event_impl::getNative() {
371+
ensureContextInitialized();
372+
354373
auto Plugin = getPlugin();
355374
if (!MIsInitialized) {
356375
MIsInitialized = true;

sycl/source/detail/event_impl.hpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ class event_impl {
6060
/// host device to avoid attempts to call method get on such events.
6161
//
6262
/// \return true if this event is a SYCL host event.
63-
bool is_host() const;
63+
bool is_host();
6464

6565
/// Returns a valid OpenCL event interoperability handle.
6666
///
6767
/// \return a valid instance of OpenCL cl_event.
68-
cl_event get() const;
68+
cl_event get();
6969

7070
/// Waits for the event.
7171
///
@@ -103,13 +103,13 @@ class event_impl {
103103
/// \return depends on template parameter.
104104
template <info::event_profiling param>
105105
typename info::param_traits<info::event_profiling, param>::return_type
106-
get_profiling_info() const;
106+
get_profiling_info();
107107

108108
/// Queries this SYCL event for information.
109109
///
110110
/// \return depends on the information being requested.
111111
template <info::event param>
112-
typename info::param_traits<info::event, param>::return_type get_info() const;
112+
typename info::param_traits<info::event, param>::return_type get_info();
113113

114114
~event_impl();
115115

@@ -137,7 +137,7 @@ class event_impl {
137137

138138
/// \return the Plugin associated with the context of this event.
139139
/// Should be called when this is not a Host Event.
140-
const plugin &getPlugin() const;
140+
const plugin &getPlugin();
141141

142142
/// Associate event with the context.
143143
///
@@ -147,6 +147,9 @@ class event_impl {
147147
/// @param Context is a shared pointer to an instance of valid context_impl.
148148
void setContextImpl(const ContextImplPtr &Context);
149149

150+
/// Clear the event state
151+
void setStateIncomplete();
152+
150153
/// Returns command that is associated with the event.
151154
///
152155
/// Scheduler mutex must be locked in read mode when this is called.
@@ -169,7 +172,7 @@ class event_impl {
169172
/// Gets the native handle of the SYCL event.
170173
///
171174
/// \return a native handle.
172-
pi_native_handle getNative() const;
175+
pi_native_handle getNative();
173176

174177
/// Returns vector of event dependencies.
175178
///
@@ -220,11 +223,15 @@ class event_impl {
220223
void instrumentationEpilog(void *TelementryEvent, const std::string &Name,
221224
int32_t StreamID, uint64_t IId) const;
222225
void checkProfilingPreconditions() const;
223-
mutable bool MIsInitialized = true;
224-
mutable RT::PiEvent MEvent = nullptr;
225-
mutable ContextImplPtr MContext;
226-
mutable bool MOpenCLInterop = false;
227-
mutable bool MHostEvent = true;
226+
// Events constructed without a context will lazily use the default context
227+
// when needed.
228+
void ensureContextInitialized();
229+
bool MIsInitialized = true;
230+
bool MIsContextInitialized = false;
231+
RT::PiEvent MEvent = nullptr;
232+
ContextImplPtr MContext;
233+
bool MOpenCLInterop = false;
234+
bool MHostEvent = true;
228235
std::unique_ptr<HostProfilingInfo> MHostProfilingInfo;
229236
void *MCommand = nullptr;
230237
std::weak_ptr<queue_impl> MQueue;
@@ -251,6 +258,10 @@ class event_impl {
251258

252259
std::mutex MMutex;
253260
std::condition_variable cv;
261+
262+
friend std::vector<RT::PiEvent>
263+
getOrWaitEvents(std::vector<cl::sycl::event> DepEvents,
264+
std::shared_ptr<cl::sycl::detail::context_impl> Context);
254265
};
255266

256267
} // namespace detail

sycl/source/detail/helpers.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ std::vector<RT::PiEvent> getOrWaitEvents(std::vector<cl::sycl::event> DepEvents,
2323
std::vector<RT::PiEvent> Events;
2424
for (auto SyclEvent : DepEvents) {
2525
auto SyclEventImplPtr = detail::getSyclObjImpl(SyclEvent);
26+
// throwaway events created with default constructor will not have a context
27+
// (which is set lazily) calling is_host(), getContextImpl() would set that
28+
// context, which we wish to avoid as it is expensive.
29+
if (SyclEventImplPtr->MIsContextInitialized == false) {
30+
continue;
31+
}
2632
if (SyclEventImplPtr->is_host() ||
2733
SyclEventImplPtr->getContextImpl() != Context) {
2834
SyclEventImplPtr->waitInternal();

sycl/source/detail/queue_impl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ prepareUSMEvent(const std::shared_ptr<detail::queue_impl> &QueueImpl,
4949
auto EventImpl = std::make_shared<detail::event_impl>(QueueImpl);
5050
EventImpl->getHandleRef() = NativeEvent;
5151
EventImpl->setContextImpl(detail::getSyclObjImpl(QueueImpl->get_context()));
52+
EventImpl->setStateIncomplete();
5253
return detail::createSyclObjFromImpl<event>(EventImpl);
5354
}
5455

sycl/source/detail/scheduler/commands.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ Command::Command(CommandType Type, QueueImplPtr Queue)
399399
MSubmittedQueue = MQueue;
400400
MEvent->setCommand(this);
401401
MEvent->setContextImpl(MQueue->getContextImplPtr());
402+
MEvent->setStateIncomplete();
402403
MEnqueueStatus = EnqueueResultT::SyclEnqueueReady;
403404

404405
#ifdef XPTI_ENABLE_INSTRUMENTATION
@@ -1091,6 +1092,7 @@ pi_int32 ReleaseCommand::enqueueImp() {
10911092

10921093
EventImplPtr UnmapEventImpl(new event_impl(Queue));
10931094
UnmapEventImpl->setContextImpl(Queue->getContextImplPtr());
1095+
UnmapEventImpl->setStateIncomplete();
10941096
RT::PiEvent &UnmapEvent = UnmapEventImpl->getHandleRef();
10951097

10961098
void *Src = CurAllocaIsHost
@@ -1293,9 +1295,9 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
12931295
MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)),
12941296
MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)),
12951297
MDstAllocaCmd(DstAllocaCmd) {
1296-
if (!MSrcQueue->is_host())
1298+
if (!MSrcQueue->is_host()) {
12971299
MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
1298-
1300+
}
12991301
emitInstrumentationDataProxy();
13001302
}
13011303

@@ -1475,8 +1477,9 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq,
14751477
: Command(CommandType::COPY_MEMORY, std::move(DstQueue)),
14761478
MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)),
14771479
MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) {
1478-
if (!MSrcQueue->is_host())
1480+
if (!MSrcQueue->is_host()) {
14791481
MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
1482+
}
14801483

14811484
emitInstrumentationDataProxy();
14821485
}

sycl/source/handler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ event handler::finalize() {
285285
} else {
286286
NewEvent = std::make_shared<detail::event_impl>(MQueue);
287287
NewEvent->setContextImpl(MQueue->getContextImplPtr());
288+
NewEvent->setStateIncomplete();
288289
OutEvent = &NewEvent->getHandleRef();
289290

290291
if (PI_SUCCESS != EnqueueKernel())

0 commit comments

Comments
 (0)