Skip to content

Commit d76742e

Browse files
author
Hugh Delaney
committed
Use ScopedCommandList to get thread local CL
Same as the CUDA implementation. This means that any CommandList obtained through urQueueGetNativeHandle will be the same CommmandList that is synchronized with before the interop func call.
1 parent 8020612 commit d76742e

File tree

4 files changed

+37
-1
lines changed

4 files changed

+37
-1
lines changed

source/adapters/level_zero/device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
877877
}
878878
case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: {
879879
// L0 doesn't support enqueueing native work through the urNativeEnqueueExp
880-
return ReturnValue(static_cast<ur_bool_t>(false));
880+
return ReturnValue(static_cast<ur_bool_t>(true));
881881
}
882882

883883
case UR_DEVICE_INFO_ESIMD_SUPPORT: {

source/adapters/level_zero/enqueue_native.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp(
4343
// support is added
4444
UR_CALL(Queue->Context->getAvailableCommandList(
4545
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventList));
46+
ScopedCommandList Active{Queue, CommandList->first};
4647

4748
// TODO: do we need to create a unique command type for this?
4849
ze_event_handle_t ZeEvent = nullptr;

source/adapters/level_zero/queue.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,15 @@ ur_result_t ur_queue_handle_legacy_t_::queueGetNativeHandle(
705705
) {
706706
auto Queue = this;
707707

708+
// Needed for EnqueueNativeCommandExp, so that the native queue 'got' in the
709+
// interop func is the as the native queue used to manage dependencies
710+
// before the interop func invocation
711+
if (Queue->getThreadLocalCommandList() != ze_command_list_handle_t{0}) {
712+
auto ZeCmdList = ur_cast<ze_command_list_handle_t *>(NativeQueue);
713+
*ZeCmdList = Queue->getThreadLocalCommandList();
714+
return UR_RESULT_SUCCESS;
715+
}
716+
708717
// Lock automatically releases when this goes out of scope.
709718
std::shared_lock<ur_shared_mutex> lock(Queue->Mutex);
710719

source/adapters/level_zero/queue.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,12 @@ struct ur_queue_handle_legacy_t_ : _ur_object, public ur_queue_handle_t_ {
423423
uint32_t, const ur_event_handle_t *,
424424
ur_event_handle_t *) override;
425425

426+
// Thread local stream will be used if ScopedStream is active
427+
static ze_command_list_handle_t &getThreadLocalCommandList() {
428+
static thread_local ze_command_list_handle_t CommandList{0};
429+
return CommandList;
430+
}
431+
426432
using queue_type = ur_device_handle_t_::queue_group_info_t::type;
427433
// PI queue is in general a one to many mapping to L0 native queues.
428434
struct ur_queue_group_t {
@@ -941,3 +947,23 @@ ur_result_t setSignalEvent(ur_queue_handle_legacy_t Queue, bool UseCopyEngine,
941947
ur_result_t CleanupEventListFromResetCmdList(
942948
std::vector<ur_event_handle_t> &EventListToCleanup,
943949
bool QueueLocked = false);
950+
951+
// RAII object to make hQueue command list getter methods all return the same
952+
// command list within the lifetime of this object.
953+
//
954+
// This is useful for urEnqueueNativeCommandExp where we want guarantees that
955+
// the user submitted native calls will be dispatched to a known command list,
956+
// which must be "got" within the user submitted fuction.
957+
class ScopedCommandList {
958+
ur_queue_handle_legacy_t hQueue;
959+
960+
public:
961+
ScopedCommandList(ur_queue_handle_legacy_t hQueue,
962+
ze_command_list_handle_t CommandList)
963+
: hQueue{hQueue} {
964+
hQueue->getThreadLocalCommandList() = CommandList;
965+
}
966+
~ScopedCommandList() {
967+
hQueue->getThreadLocalCommandList() = ze_command_list_handle_t{0};
968+
}
969+
};

0 commit comments

Comments
 (0)