Skip to content

Commit bd24254

Browse files
igchorpbalcer
authored andcommitted
[L0 v2] fix enqueueEventsWaitWithBarrier
Use actual barrier when profiling is enabled to ensure we get proper profiling info.
1 parent fb2d615 commit bd24254

File tree

3 files changed

+87
-2
lines changed

3 files changed

+87
-2
lines changed

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,14 +282,46 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait(
282282
return UR_RESULT_SUCCESS;
283283
}
284284

285+
ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierImpl(
286+
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
287+
ur_event_handle_t *phEvent) {
288+
TRACK_SCOPE_LATENCY(
289+
"ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier");
290+
291+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
292+
293+
if (!numEventsInWaitList && !phEvent) {
294+
// nop
295+
return UR_RESULT_SUCCESS;
296+
}
297+
298+
auto signalEvent =
299+
getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER);
300+
auto [pWaitEvents, numWaitEvents] =
301+
getWaitListView(phEventWaitList, numEventsInWaitList);
302+
303+
ZE2UR_CALL(zeCommandListAppendBarrier,
304+
(handler.commandList.get(), signalEvent->getZeEvent(),
305+
numWaitEvents, pWaitEvents));
306+
307+
return UR_RESULT_SUCCESS;
308+
}
309+
285310
ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrier(
286311
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
287312
ur_event_handle_t *phEvent) {
288313
// For in-order queue we don't need a real barrier, just wait for
289314
// requested events in potentially different queues and add a "barrier"
290315
// event signal because it is already guaranteed that previous commands
291-
// in this queue are completed when the signal is started.
292-
return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent);
316+
// in this queue are completed when the signal is started. However, we do
317+
// need to use barrier if profiling is enabled: see
318+
// zeCommandListAppendWaitOnEvents
319+
if ((flags & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0) {
320+
return enqueueEventsWaitWithBarrierImpl(numEventsInWaitList,
321+
phEventWaitList, phEvent);
322+
} else {
323+
return enqueueEventsWait(numEventsInWaitList, phEventWaitList, phEvent);
324+
}
293325
}
294326

295327
ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierExt(

source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ {
7777
const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent,
7878
ur_command_t commandType);
7979

80+
ur_result_t
81+
enqueueEventsWaitWithBarrierImpl(uint32_t numEventsInWaitList,
82+
const ur_event_handle_t *phEventWaitList,
83+
ur_event_handle_t *phEvent);
84+
8085
public:
8186
ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t,
8287
const ur_queue_properties_t *);

test/conformance/event/urEventGetProfilingInfo.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,51 @@ TEST_P(urEventGetProfilingInfoNegativeTest, InvalidValue) {
121121
}
122122

123123
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventGetProfilingInfoNegativeTest);
124+
125+
struct urEventGetProfilingInfoForWaitWithBarrier : uur::urProfilingQueueTest {
126+
void SetUp() override {
127+
UUR_RETURN_ON_FATAL_FAILURE(urProfilingQueueTest::SetUp());
128+
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_WRITE_ONLY, size,
129+
nullptr, &buffer));
130+
131+
input.assign(count, 42);
132+
ur_event_handle_t membuf_event = nullptr;
133+
ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, false, 0, size,
134+
input.data(), 0, nullptr,
135+
&membuf_event));
136+
137+
ASSERT_SUCCESS(
138+
urEnqueueEventsWaitWithBarrier(queue, 1, &membuf_event, &event));
139+
ASSERT_SUCCESS(urQueueFinish(queue));
140+
}
141+
142+
void TearDown() override {
143+
UUR_RETURN_ON_FATAL_FAILURE(urProfilingQueueTest::TearDown());
144+
}
145+
146+
const size_t count = 1024;
147+
const size_t size = sizeof(uint32_t) * count;
148+
ur_mem_handle_t buffer = nullptr;
149+
ur_event_handle_t event = nullptr;
150+
std::vector<uint32_t> input;
151+
};
152+
153+
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEventGetProfilingInfoForWaitWithBarrier);
154+
155+
TEST_P(urEventGetProfilingInfoForWaitWithBarrier, Success) {
156+
std::vector<uint8_t> submit_data(size);
157+
ASSERT_SUCCESS(urEventGetProfilingInfo(event,
158+
UR_PROFILING_INFO_COMMAND_START,
159+
size, submit_data.data(), nullptr));
160+
auto start_timing = reinterpret_cast<size_t *>(submit_data.data());
161+
ASSERT_NE(*start_timing, 0);
162+
163+
std::vector<uint8_t> complete_data(size);
164+
ASSERT_SUCCESS(urEventGetProfilingInfo(event, UR_PROFILING_INFO_COMMAND_END,
165+
size, complete_data.data(),
166+
nullptr));
167+
auto end_timing = reinterpret_cast<size_t *>(complete_data.data());
168+
ASSERT_NE(*end_timing, 0);
169+
170+
ASSERT_GT(*end_timing, *start_timing);
171+
}

0 commit comments

Comments
 (0)