Skip to content

Commit b7607f0

Browse files
[SYCL] Fix the barrier dependency for OOO profiling tags (#16112)
This commit fixes an issue where the barrier before the timestamp enqueued for the profiling tag in an out-of-order queue did not prevent future work from being enqueued prior to the start/end of the profiling tag. --------- Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
1 parent 5f65de4 commit b7607f0

File tree

2 files changed

+97
-8
lines changed

2 files changed

+97
-8
lines changed

sycl/source/detail/scheduler/commands.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3441,25 +3441,47 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
34413441
case CGType::ProfilingTag: {
34423442
assert(MQueue && "Profiling tag requires a valid queue");
34433443
const auto &Adapter = MQueue->getAdapter();
3444+
3445+
bool IsInOrderQueue = MQueue->isInOrder();
3446+
ur_event_handle_t *TimestampDeps = nullptr;
3447+
size_t NumTimestampDeps = 0;
3448+
3449+
// If the queue is not in-order, the implementation will need to first
3450+
// insert a marker event that the timestamp waits for.
3451+
ur_event_handle_t PreTimestampMarkerEvent{};
3452+
if (!IsInOrderQueue) {
3453+
// FIXME: urEnqueueEventsWait on the L0 adapter requires a double-release.
3454+
// Use that instead once it has been fixed.
3455+
// See https://github.com/oneapi-src/unified-runtime/issues/2347.
3456+
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
3457+
MQueue->getHandleRef(),
3458+
/*num_events_in_wait_list=*/0,
3459+
/*event_wait_list=*/nullptr, &PreTimestampMarkerEvent);
3460+
TimestampDeps = &PreTimestampMarkerEvent;
3461+
NumTimestampDeps = 1;
3462+
}
3463+
3464+
Adapter->call<UrApiKind::urEnqueueTimestampRecordingExp>(
3465+
MQueue->getHandleRef(),
3466+
/*blocking=*/false, NumTimestampDeps, TimestampDeps, Event);
3467+
34443468
// If the queue is not in-order, we need to insert a barrier. This barrier
34453469
// does not need output events as it will implicitly enforce the following
34463470
// enqueue is blocked until it finishes.
3447-
if (!MQueue->isInOrder()) {
3471+
if (!IsInOrderQueue) {
3472+
// We also need to release the timestamp event from the marker.
3473+
Adapter->call<UrApiKind::urEventRelease>(PreTimestampMarkerEvent);
34483474
// FIXME: Due to a bug in the L0 UR adapter, we will leak events if we do
34493475
// not pass an output event to the UR call. Once that is fixed,
34503476
// this immediately-deleted event can be removed.
3451-
ur_event_handle_t PreTimestampBarrierEvent{};
3477+
ur_event_handle_t PostTimestampBarrierEvent{};
34523478
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
34533479
MQueue->getHandleRef(),
34543480
/*num_events_in_wait_list=*/0,
3455-
/*event_wait_list=*/nullptr, &PreTimestampBarrierEvent);
3456-
Adapter->call<UrApiKind::urEventRelease>(PreTimestampBarrierEvent);
3481+
/*event_wait_list=*/nullptr, &PostTimestampBarrierEvent);
3482+
Adapter->call<UrApiKind::urEventRelease>(PostTimestampBarrierEvent);
34573483
}
34583484

3459-
Adapter->call<UrApiKind::urEnqueueTimestampRecordingExp>(
3460-
MQueue->getHandleRef(),
3461-
/*blocking=*/false,
3462-
/*num_events_in_wait_list=*/0, /*event_wait_list=*/nullptr, Event);
34633485
if (Event)
34643486
MEvent->setHandle(*Event);
34653487
return UR_RESULT_SUCCESS;

sycl/unittests/Extensions/ProfilingTag.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) {
6666
"urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp);
6767
mock::getCallbacks().set_after_callback("urEventGetProfilingInfo",
6868
&after_urEventGetProfilingInfo);
69+
mock::getCallbacks().set_after_callback(
70+
"urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier);
6971

7072
sycl::context Ctx{sycl::platform()};
7173
sycl::queue Queue{Ctx, sycl::default_selector_v};
@@ -75,6 +77,39 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedDefaultQueue) {
7577

7678
sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue);
7779
ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp);
80+
// TODO: We expect two barriers for now, while marker events leak. Adjust when
81+
// addressed.
82+
ASSERT_EQ(size_t{2}, counter_urEnqueueEventsWaitWithBarrier);
83+
84+
E.get_profiling_info<sycl::info::event_profiling::command_start>();
85+
ASSERT_TRUE(LatestProfilingQuery.has_value());
86+
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START);
87+
88+
E.get_profiling_info<sycl::info::event_profiling::command_end>();
89+
ASSERT_TRUE(LatestProfilingQuery.has_value());
90+
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END);
91+
}
92+
93+
TEST_F(ProfilingTagTest, ProfilingTagSupportedInOrderQueue) {
94+
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
95+
&after_urDeviceGetInfo<true>);
96+
mock::getCallbacks().set_after_callback(
97+
"urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp);
98+
mock::getCallbacks().set_after_callback("urEventGetProfilingInfo",
99+
&after_urEventGetProfilingInfo);
100+
mock::getCallbacks().set_after_callback(
101+
"urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier);
102+
103+
sycl::context Ctx{sycl::platform()};
104+
sycl::queue Queue{
105+
Ctx, sycl::default_selector_v, {sycl::property::queue::in_order()}};
106+
sycl::device Dev = Queue.get_device();
107+
108+
ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag));
109+
110+
sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue);
111+
ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp);
112+
ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier);
78113

79114
E.get_profiling_info<sycl::info::event_profiling::command_start>();
80115
ASSERT_TRUE(LatestProfilingQuery.has_value());
@@ -113,6 +148,38 @@ TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingQueue) {
113148
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END);
114149
}
115150

151+
TEST_F(ProfilingTagTest, ProfilingTagSupportedProfilingInOrderQueue) {
152+
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
153+
&after_urDeviceGetInfo<true>);
154+
mock::getCallbacks().set_after_callback(
155+
"urEnqueueTimestampRecordingExp", &after_urEnqueueTimestampRecordingExp);
156+
mock::getCallbacks().set_after_callback("urEventGetProfilingInfo",
157+
&after_urEventGetProfilingInfo);
158+
mock::getCallbacks().set_after_callback(
159+
"urEnqueueEventsWaitWithBarrier", &after_urEnqueueEventsWaitWithBarrier);
160+
161+
sycl::context Ctx{sycl::platform()};
162+
sycl::queue Queue{Ctx,
163+
sycl::default_selector_v,
164+
{sycl::property::queue::enable_profiling(),
165+
sycl::property::queue::in_order()}};
166+
sycl::device Dev = Queue.get_device();
167+
168+
ASSERT_TRUE(Dev.has(sycl::aspect::ext_oneapi_queue_profiling_tag));
169+
170+
sycl::event E = sycl::ext::oneapi::experimental::submit_profiling_tag(Queue);
171+
ASSERT_EQ(size_t{1}, counter_urEnqueueTimestampRecordingExp);
172+
ASSERT_EQ(size_t{0}, counter_urEnqueueEventsWaitWithBarrier);
173+
174+
E.get_profiling_info<sycl::info::event_profiling::command_start>();
175+
ASSERT_TRUE(LatestProfilingQuery.has_value());
176+
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_START);
177+
178+
E.get_profiling_info<sycl::info::event_profiling::command_end>();
179+
ASSERT_TRUE(LatestProfilingQuery.has_value());
180+
ASSERT_EQ(*LatestProfilingQuery, UR_PROFILING_INFO_COMMAND_END);
181+
}
182+
116183
TEST_F(ProfilingTagTest, ProfilingTagFallbackDefaultQueue) {
117184
mock::getCallbacks().set_after_callback("urDeviceGetInfo",
118185
&after_urDeviceGetInfo<false>);

0 commit comments

Comments
 (0)