From aa04ce16b3a06e0f11ff2602d7c5f46c1a3fbd6f Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 28 Jan 2025 15:18:15 +0000 Subject: [PATCH] Add Native Command support to command-buffer Enables support for using native commands inside a command-buffer. --- include/ur_api.h | 96 ++++++++++++++ include/ur_api_funcs.def | 2 + include/ur_ddi.h | 16 +++ include/ur_print.h | 22 ++++ include/ur_print.hpp | 81 ++++++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 37 ++++++ scripts/core/exp-command-buffer.yml | 65 ++++++++++ scripts/core/registry.yml | 6 + source/adapters/cuda/command_buffer.cpp | 46 +++++++ source/adapters/cuda/ur_interface_loader.cpp | 2 + source/adapters/hip/command_buffer.cpp | 45 +++++++ source/adapters/hip/ur_interface_loader.cpp | 3 +- source/adapters/level_zero/command_buffer.cpp | 39 ++++++ .../level_zero/ur_interface_loader.cpp | 4 + .../level_zero/ur_interface_loader.hpp | 10 ++ source/adapters/level_zero/v2/api.cpp | 15 +++ source/adapters/mock/ur_mockddi.cpp | 117 ++++++++++++++++++ source/adapters/native_cpu/command_buffer.cpp | 15 +++ .../native_cpu/ur_interface_loader.cpp | 2 + source/adapters/opencl/command_buffer.cpp | 41 ++++++ source/adapters/opencl/common.hpp | 9 ++ .../adapters/opencl/extension_functions.def | 1 + .../adapters/opencl/ur_interface_loader.cpp | 2 + .../loader/layers/sanitizer/asan/asan_ddi.cpp | 2 + source/loader/layers/tracing/ur_trcddi.cpp | 104 ++++++++++++++++ source/loader/layers/validation/ur_valddi.cpp | 83 +++++++++++++ source/loader/loader.def.in | 4 + source/loader/loader.map.in | 4 + source/loader/ur_ldrddi.cpp | 91 ++++++++++++++ source/loader/ur_libapi.cpp | 83 +++++++++++++ source/loader/ur_print.cpp | 16 +++ source/ur_api.cpp | 68 ++++++++++ 32 files changed, 1130 insertions(+), 1 deletion(-) diff --git a/include/ur_api.h b/include/ur_api.h index 764391527f..e8b4c46c2a 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -429,6 +429,10 @@ typedef enum ur_function_t { UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, /// Enumerator for ::urPhysicalMemGetInfo UR_FUNCTION_PHYSICAL_MEM_GET_INFO = 249, + /// Enumerator for ::urCommandBufferAppendNativeCommandExp + UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP = 250, + /// Enumerator for ::urCommandBufferGetNativeHandleExp + UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP = 252, /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -10992,6 +10996,51 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( /// [out][optional] Handle to this command. ur_exp_command_buffer_command_handle_t *phCommand); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function adding work through the native API to be executed +/// immediately. +typedef void (*ur_exp_command_buffer_native_command_function_t)( + /// [in][out] pointer to data to be passed to callback + void *pUserData); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append nodes through a native backend API +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnNativeCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint); + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -11218,6 +11267,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( /// [out][optional] bytes returned in command-buffer property size_t *pPropSizeRet); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Return platform native command-buffer handle. +/// +/// @details +/// - Retrieved native handle can be used for direct interaction with the +/// native platform driver. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phNativeCommandBuffer` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the adapter has no underlying equivalent handle. +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer); + #if !defined(__GNUC__) #pragma endregion #endif @@ -14174,6 +14247,20 @@ typedef struct ur_command_buffer_append_usm_advise_exp_params_t { ur_exp_command_buffer_command_handle_t **pphCommand; } ur_command_buffer_append_usm_advise_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferAppendNativeCommandExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_append_native_command_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + ur_exp_command_buffer_native_command_function_t *ppfnNativeCommand; + void **ppData; + ur_exp_command_buffer_handle_t *phChildCommandBuffer; + uint32_t *pnumSyncPointsInWaitList; + const ur_exp_command_buffer_sync_point_t **ppSyncPointWaitList; + ur_exp_command_buffer_sync_point_t **ppSyncPoint; +} ur_command_buffer_append_native_command_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferEnqueueExp /// @details Each entry is a pointer to the parameter passed to the function; @@ -14227,6 +14314,15 @@ typedef struct ur_command_buffer_get_info_exp_params_t { size_t **ppPropSizeRet; } ur_command_buffer_get_info_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urCommandBufferGetNativeHandleExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_command_buffer_get_native_handle_exp_params_t { + ur_exp_command_buffer_handle_t *phCommandBuffer; + ur_native_handle_t **pphNativeCommandBuffer; +} ur_command_buffer_get_native_handle_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index 8c25dde67f..76a9f2a2d7 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -181,11 +181,13 @@ _UR_API(urCommandBufferAppendMemBufferReadRectExp) _UR_API(urCommandBufferAppendMemBufferFillExp) _UR_API(urCommandBufferAppendUSMPrefetchExp) _UR_API(urCommandBufferAppendUSMAdviseExp) +_UR_API(urCommandBufferAppendNativeCommandExp) _UR_API(urCommandBufferEnqueueExp) _UR_API(urCommandBufferUpdateKernelLaunchExp) _UR_API(urCommandBufferUpdateSignalEventExp) _UR_API(urCommandBufferUpdateWaitEventsExp) _UR_API(urCommandBufferGetInfoExp) +_UR_API(urCommandBufferGetNativeHandleExp) _UR_API(urUsmP2PEnablePeerAccessExp) _UR_API(urUsmP2PDisablePeerAccessExp) _UR_API(urUsmP2PPeerAccessGetInfoExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index c64aaa8d46..d513bea215 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1590,6 +1590,15 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendUSMAdviseExp_t)( const ur_event_handle_t *, ur_exp_command_buffer_sync_point_t *, ur_event_handle_t *, ur_exp_command_buffer_command_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferAppendNativeCommandExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferAppendNativeCommandExp_t)( + ur_exp_command_buffer_handle_t, + ur_exp_command_buffer_native_command_function_t, void *, + ur_exp_command_buffer_handle_t, uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferEnqueueExp typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferEnqueueExp_t)( @@ -1619,6 +1628,11 @@ typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferGetInfoExp_t)( ur_exp_command_buffer_handle_t, ur_exp_command_buffer_info_t, size_t, void *, size_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urCommandBufferGetNativeHandleExp +typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferGetNativeHandleExp_t)( + ur_exp_command_buffer_handle_t, ur_native_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Table of CommandBufferExp functions pointers typedef struct ur_command_buffer_exp_dditable_t { @@ -1639,11 +1653,13 @@ typedef struct ur_command_buffer_exp_dditable_t { ur_pfnCommandBufferAppendMemBufferFillExp_t pfnAppendMemBufferFillExp; ur_pfnCommandBufferAppendUSMPrefetchExp_t pfnAppendUSMPrefetchExp; ur_pfnCommandBufferAppendUSMAdviseExp_t pfnAppendUSMAdviseExp; + ur_pfnCommandBufferAppendNativeCommandExp_t pfnAppendNativeCommandExp; ur_pfnCommandBufferEnqueueExp_t pfnEnqueueExp; ur_pfnCommandBufferUpdateKernelLaunchExp_t pfnUpdateKernelLaunchExp; ur_pfnCommandBufferUpdateSignalEventExp_t pfnUpdateSignalEventExp; ur_pfnCommandBufferUpdateWaitEventsExp_t pfnUpdateWaitEventsExp; ur_pfnCommandBufferGetInfoExp_t pfnGetInfoExp; + ur_pfnCommandBufferGetNativeHandleExp_t pfnGetNativeHandleExp; } ur_command_buffer_exp_dditable_t; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/ur_print.h b/include/ur_print.h index f58133bb8a..d223268374 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -3161,6 +3161,17 @@ urPrintCommandBufferAppendUsmAdviseExpParams( const struct ur_command_buffer_append_usm_advise_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_append_native_command_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL +urPrintCommandBufferAppendNativeCommandExpParams( + const struct ur_command_buffer_append_native_command_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_command_buffer_enqueue_exp_params_t struct /// @returns @@ -3214,6 +3225,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferGetInfoExpParams( const struct ur_command_buffer_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_command_buffer_get_native_handle_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL +urPrintCommandBufferGetNativeHandleExpParams( + const struct ur_command_buffer_get_native_handle_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_usm_p2p_enable_peer_access_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 5c5f573477..f5ee722a19 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -1177,6 +1177,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_PHYSICAL_MEM_GET_INFO: os << "UR_FUNCTION_PHYSICAL_MEM_GET_INFO"; break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP"; + break; + case UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP: + os << "UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -18635,6 +18641,52 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the +/// ur_command_buffer_append_native_command_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_append_native_command_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur::details::printPtr(os, *(params->phCommandBuffer)); + + os << ", "; + os << ".pfnNativeCommand = "; + + os << reinterpret_cast(*(params->ppfnNativeCommand)); + + os << ", "; + os << ".pData = "; + + ur::details::printPtr(os, *(params->ppData)); + + os << ", "; + os << ".hChildCommandBuffer = "; + + ur::details::printPtr(os, *(params->phChildCommandBuffer)); + + os << ", "; + os << ".numSyncPointsInWaitList = "; + + os << *(params->pnumSyncPointsInWaitList); + + os << ", "; + os << ".pSyncPointWaitList = "; + + ur::details::printPtr(os, *(params->ppSyncPointWaitList)); + + os << ", "; + os << ".pSyncPoint = "; + + ur::details::printPtr(os, *(params->ppSyncPoint)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_command_buffer_enqueue_exp_params_t type /// @returns @@ -18797,6 +18849,27 @@ operator<<(std::ostream &os, return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the +/// ur_command_buffer_get_native_handle_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream & +operator<<(std::ostream &os, [[maybe_unused]] const struct + ur_command_buffer_get_native_handle_exp_params_t *params) { + + os << ".hCommandBuffer = "; + + ur::details::printPtr(os, *(params->phCommandBuffer)); + + os << ", "; + os << ".phNativeCommandBuffer = "; + + ur::details::printPtr(os, *(params->pphNativeCommandBuffer)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t /// type @@ -20064,6 +20137,10 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, os << (const struct ur_command_buffer_append_usm_advise_exp_params_t *) params; } break; + case UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP: { + os << (const struct ur_command_buffer_append_native_command_exp_params_t *) + params; + } break; case UR_FUNCTION_COMMAND_BUFFER_ENQUEUE_EXP: { os << (const struct ur_command_buffer_enqueue_exp_params_t *)params; } break; @@ -20082,6 +20159,10 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, case UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP: { os << (const struct ur_command_buffer_get_info_exp_params_t *)params; } break; + case UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP: { + os << (const struct ur_command_buffer_get_native_handle_exp_params_t *) + params; + } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 1a4925e83f..f7ad0d391d 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -119,6 +119,7 @@ Currently only the following commands are supported: * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp +* ${x}CommandBufferAppendNativeCommandExp It is planned to eventually support any command type from the Core API which can actually be appended to the equivalent adapter native constructs. @@ -209,6 +210,38 @@ command-buffer, before the code path returns to user code for the user to enqueue the second command-buffer. Resulting in the first command-buffer's wait node completing too early for the intended overall executing ordering. +Native Commands +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +The command-buffer interface enables user interop with native backend APIs. +Through ${x}CommandBufferAppendNativeCommandExp the user can immediately invoke +some native API calls that add commands to the command-buffer in a way that the +UR is aware of. In doing so, the UR adapter can respect the dependencies of the +native commands with the other UR command-buffer commands. + +In order for UR to guarantee correct synchronization of commands enqueued +within the native API through the function passed to +${x}CommandBufferAppendNativeCommandExp, the ${x}_exp_command_buffer_handle_t +arguments must only use the native command-buffer accessed through +${x}CommandBufferGetNativeHandleExp. Use of a native command-buffer that is +not a native command-buffer returned by ${x}CommandBufferGetNativeHandleExp +results in undefined behavior. + +The ${x}_exp_command_buffer_handle_t ``hChildCommandBuffer`` parameter to +${x}CommandBufferAppendNativeCommandExp is used by the CUDA & HIP adapters +to implement this feature, but is ignored by Level-Zero and OpenCL. This +represents a child graph node that will be added to the parent graph, with +the child graph node expressing the sync-point dependencies and returned +sync point. This child graph object will be packed into the ``void* pData`` +argument that will be given to the user in the ``pfnNativeCommand`` callback +for adding the native nodes to the command-buffer. + +Level-Zero & OpenCL backends use barrier nodes to enforce the dependencies +on the user added nodes, rather than using an append child graph API. As a +result the native command-buffer object for ``hCommandBuffer`` should +be packed into ``void* pData``, as the adapters will ignore the +``hChildCommandBuffer`` parameter. + Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -485,11 +518,13 @@ Functions * ${x}CommandBufferAppendMemBufferFillExp * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp +* ${x}CommandBufferAppendNativeCommandExp * ${x}CommandBufferEnqueueExp * ${x}CommandBufferUpdateKernelLaunchExp * ${x}CommandBufferUpdateSignalEventExp * ${x}CommandBufferUpdateWaitEventsExp * ${x}CommandBufferGetInfoExp +* ${x}CommandBufferGetNativeHandleExp Changelog -------------------------------------------------------------------------------- @@ -514,6 +549,8 @@ Changelog +-----------+-------------------------------------------------------+ | 1.7 | Remove command handle reference counting and querying | +-----------+-------------------------------------------------------+ +| 1.8 | Support native commands. | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 11ea144d53..20f6a637f1 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -1130,6 +1130,52 @@ returns: - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES --- #-------------------------------------------------------------------------- +type: fptr_typedef +desc: "Function adding work through the native API to be executed immediately." +name: $x_exp_command_buffer_native_command_function_t +return: void +params: + - type: void* + name: pUserData + desc: "[in][out] pointer to data to be passed to callback" +--- #-------------------------------------------------------------------------- +type: function +desc: "Append nodes through a native backend API" +class: $xCommandBuffer +name: AppendNativeCommandExp +params: + - type: $x_exp_command_buffer_handle_t + name: hCommandBuffer + desc: "[in] handle of the command-buffer object." + - type: $x_exp_command_buffer_native_command_function_t + desc: "[in] function calling the native underlying API, to be executed immediately." + name: pfnNativeCommand + - type: void* + name: pData + desc: "[in][optional] data used by pfnNativeCommand" + - type: $x_exp_command_buffer_handle_t + name: hChildCommandBuffer + desc: "[in][optional] TODO" + - type: uint32_t + name: numSyncPointsInWaitList + desc: "[in] The number of sync points in the provided dependency list." + - type: "const $x_exp_command_buffer_sync_point_t*" + name: pSyncPointWaitList + desc: "[in][optional] A list of sync points that this command depends on. + May be ignored if command-buffer is in-order." + - type: "$x_exp_command_buffer_sync_point_t*" + name: pSyncPoint + desc: "[out][optional] sync point associated with this command." +returns: + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP: + - "`pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0`" + - "`pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0`" + - $X_RESULT_ERROR_INVALID_MEM_OBJECT + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- type: function desc: "Submit a command-buffer for execution on a queue." class: $xCommandBuffer @@ -1287,3 +1333,22 @@ returns: - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP - $X_RESULT_ERROR_OUT_OF_RESOURCES - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY +--- #-------------------------------------------------------------------------- +type: function +desc: "Return platform native command-buffer handle." +class: $xCommandBuffer +name: GetNativeHandleExp +details: + - "Retrieved native handle can be used for direct interaction with the native platform driver." +params: + - type: "$x_exp_command_buffer_handle_t" + name: hCommandBuffer + desc: | + [in] handle of the command-buffer. + - type: $x_native_handle_t* + name: phNativeCommandBuffer + desc: | + [out] a pointer to the native handle of the command-buffer. +returns: + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If the adapter has no underlying equivalent handle." diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index c774642482..4ca389d276 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -601,6 +601,12 @@ etors: - name: PHYSICAL_MEM_GET_INFO desc: Enumerator for $xPhysicalMemGetInfo value: '249' +- name: COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP + desc: Enumerator for $xCommandBufferAppendNativeCommandExp + value: '250' +- name: COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP + desc: Enumerator for $xCommandBufferGetNativeHandleExp + value: '252' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 0d274e6c15..88893d730b 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -1467,3 +1467,49 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + void *pData, ur_exp_command_buffer_handle_t hChildCommandBuffer, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + try { + // Call user-define function immediately + pfnNativeCommand(pData); + + // Cuda graph node returned by interop_handle::getNativeGraph() in callback + CUgraph ChildGraph = hChildCommandBuffer->CudaGraph; + + // Dependencies of node + std::vector DepsList; + UR_CHECK_ERROR(getNodesFromSyncPoints( + hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + + // Add user defined node to graph as a subgraph + CUgraphNode GraphNode; + UR_CHECK_ERROR( + cuGraphAddChildGraphNode(&GraphNode, hCommandBuffer->CudaGraph, + DepsList.data(), DepsList.size(), ChildGraph)); + auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + if (pSyncPoint) { + *pSyncPoint = SyncPoint; + } + + return UR_RESULT_SUCCESS; + } catch (ur_result_t Err) { + return Err; + } catch (CUresult CuErr) { + return mapErrorUR(CuErr); + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_native_handle_t *phNativeCommandBuffer) { + + *phNativeCommandBuffer = + reinterpret_cast(hCommandBuffer->CudaGraph); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index d701c93d44..c080f0ffb8 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -301,6 +301,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; + pDdiTable->pfnAppendNativeCommandExp = urCommandBufferAppendNativeCommandExp; + pDdiTable->pfnGetNativeHandleExp = urCommandBufferGetNativeHandleExp; return retVal; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 4c65e61dab..79c88c4fe2 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -1100,3 +1100,48 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + void *pData, ur_exp_command_buffer_handle_t hChildCommandBuffer, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + try { + // Call user-define function immediately + pfnNativeCommand(pData); + + // HIP graph node returned by interop_handle::getNativeGraph() in callback + hipGraph_t ChildGraph = hChildCommandBuffer->HIPGraph; + + // Dependencies of node + std::vector DepsList; + UR_CHECK_ERROR(getNodesFromSyncPoints( + hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList)); + + // Add user defined node to graph as a subgraph + hipGraphNode_t GraphNode; + UR_CHECK_ERROR(hipGraphAddChildGraphNode( + &GraphNode, hCommandBuffer->HIPGraph, DepsList.data(), DepsList.size(), + ChildGraph)); + auto SyncPoint = hCommandBuffer->addSyncPoint(GraphNode); + if (pSyncPoint) { + *pSyncPoint = SyncPoint; + } + + return UR_RESULT_SUCCESS; + } catch (ur_result_t Err) { + return Err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_native_handle_t *phNativeCommandBuffer) { + *phNativeCommandBuffer = + reinterpret_cast(hCommandBuffer->HIPGraph); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 77e9aa3a40..1ede6afd94 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -298,7 +298,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; - + pDdiTable->pfnAppendNativeCommandExp = urCommandBufferAppendNativeCommandExp; + pDdiTable->pfnGetNativeHandleExp = urCommandBufferGetNativeHandleExp; return retVal; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 879ee0f1cc..496efac474 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -2199,4 +2199,43 @@ urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +ur_result_t urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + void *pData, ur_exp_command_buffer_handle_t, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + + // TODO - copy command-list? + + std::vector ZeEventList; + ze_event_handle_t ZeLaunchEvent = nullptr; + UR_CALL(createSyncPointAndGetZeEvents( + UR_COMMAND_ENQUEUE_NATIVE_EXP, hCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, true, pSyncPoint, ZeEventList, ZeLaunchEvent)); + + ZE2UR_CALL(zeCommandListAppendBarrier, + (hCommandBuffer->ZeComputeCommandList, nullptr, ZeEventList.size(), + getPointerFromVector(ZeEventList))); + + // Call user-define function immediately + pfnNativeCommand(pData); + + // Barrier on all commands after user defined commands. + ZE2UR_CALL(zeCommandListAppendBarrier, + (hCommandBuffer->ZeComputeCommandList, ZeLaunchEvent, 0, nullptr)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t +urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_native_handle_t *phNativeCommandBuffer) { + // TODO - copy command-list? + ze_command_list_handle_t ZeCommandList = hCommandBuffer->ZeComputeCommandList; + *phNativeCommandBuffer = reinterpret_cast(ZeCommandList); + return UR_RESULT_SUCCESS; +} } // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index c237581016..180a279683 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -127,6 +127,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( ur::level_zero::urCommandBufferAppendUSMPrefetchExp; pDdiTable->pfnAppendUSMAdviseExp = ur::level_zero::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendNativeCommandExp = + ur::level_zero::urCommandBufferAppendNativeCommandExp; pDdiTable->pfnEnqueueExp = ur::level_zero::urCommandBufferEnqueueExp; pDdiTable->pfnUpdateKernelLaunchExp = ur::level_zero::urCommandBufferUpdateKernelLaunchExp; @@ -135,6 +137,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnUpdateWaitEventsExp = ur::level_zero::urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnGetInfoExp = ur::level_zero::urCommandBufferGetInfoExp; + pDdiTable->pfnGetNativeHandleExp = + ur::level_zero::urCommandBufferGetNativeHandleExp; return result; } diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 8803b86b07..659a55c82d 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -659,6 +659,13 @@ ur_result_t urCommandBufferAppendUSMAdviseExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, ur_exp_command_buffer_command_handle_t *phCommand); +ur_result_t urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + void *pData, ur_exp_command_buffer_handle_t hChildCommandBuffer, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint); ur_result_t urCommandBufferEnqueueExp( ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -678,6 +685,9 @@ urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, ur_exp_command_buffer_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet); +ur_result_t +urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_native_handle_t *phNativeCommandBuffer); ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index 88fde2cfac..d9065d9744 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -396,4 +396,19 @@ ur_result_t urCommandBufferUpdateWaitEventsExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } +ur_result_t urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t, + ur_exp_command_buffer_native_command_function_t, void *, + ur_exp_command_buffer_handle_t, uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *) { + logger::error("{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +ur_result_t urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t, + ur_native_handle_t *) { + logger::error("{} function not implemented!", __FUNCTION__); + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} } // namespace ur::level_zero diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index c255dc0bcf..afe5c91c92 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -9747,6 +9747,68 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendNativeCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_command_buffer_append_native_command_exp_params_t params = { + &hCommandBuffer, &pfnNativeCommand, &pData, + &hChildCommandBuffer, &numSyncPointsInWaitList, &pSyncPointWaitList, + &pSyncPoint}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback( + "urCommandBufferAppendNativeCommandExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urCommandBufferAppendNativeCommandExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback( + "urCommandBufferAppendNativeCommandExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -10011,6 +10073,56 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetNativeHandleExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_command_buffer_get_native_handle_exp_params_t params = { + &hCommandBuffer, &phNativeCommandBuffer}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback( + "urCommandBufferGetNativeHandleExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urCommandBufferGetNativeHandleExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *phNativeCommandBuffer = + reinterpret_cast(hCommandBuffer); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback( + "urCommandBufferGetNativeHandleExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -11046,6 +11158,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = driver::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendNativeCommandExp = + driver::urCommandBufferAppendNativeCommandExp; + pDdiTable->pfnEnqueueExp = driver::urCommandBufferEnqueueExp; pDdiTable->pfnUpdateKernelLaunchExp = @@ -11059,6 +11174,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnGetInfoExp = driver::urCommandBufferGetInfoExp; + pDdiTable->pfnGetNativeHandleExp = driver::urCommandBufferGetNativeHandleExp; + return result; } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index b49974b3c0..407ddae6ff 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -203,3 +203,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( ur_exp_command_buffer_command_info_t, size_t, void *, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t, + ur_exp_command_buffer_native_command_function_t, void *, + ur_exp_command_buffer_handle_t, uint32_t, + const ur_exp_command_buffer_sync_point_t *, + ur_exp_command_buffer_sync_point_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + ur_exp_command_buffer_handle_t, ur_native_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index d94380e405..05ecc1dfb7 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -291,6 +291,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; + pDdiTable->pfnAppendNativeCommandExp = urCommandBufferAppendNativeCommandExp; + pDdiTable->pfnGetNativeHandleExp = urCommandBufferGetNativeHandleExp; return retVal; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index d78ef0121b..17c9973101 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -672,3 +672,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( return UR_RESULT_ERROR_INVALID_ENUMERATION; } + +UR_APIEXPORT +ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + void *pData, ur_exp_command_buffer_handle_t, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_ext::clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR = + nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, + cl_ext::ExtFuncPtrCache->clCommandBarrierWithWaitListKHRCache, + cl_ext::CommandBarrierWithWaitListName, + &clCommandBarrierWithWaitListKHR)); + + CL_RETURN_ON_FAILURE(clCommandBarrierWithWaitListKHR( + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, + numSyncPointsInWaitList, pSyncPointWaitList, nullptr, nullptr)); + + // Call user-define function immediately + pfnNativeCommand(pData); + + // Barrier on all commands after user defined commands. + CL_RETURN_ON_FAILURE(clCommandBarrierWithWaitListKHR( + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, 0, nullptr, pSyncPoint, + nullptr)); + + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL +urCommandBufferGetNativeHandleExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_native_handle_t *phNativeCommandBuffer) { + *phNativeCommandBuffer = + reinterpret_cast(hCommandBuffer->CLCommandBuffer); + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 6857220dc2..ed3f44260d 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -215,6 +215,8 @@ CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR"; CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR"; CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR"; CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR"; +CONSTFIX char CommandBarrierWithWaitListName[] = + "clCommandBarrierWithWaitListKHR"; CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR"; CONSTFIX char GetCommandBufferInfoName[] = "clGetCommandBufferInfoKHR"; CONSTFIX char UpdateMutableCommandsName[] = "clUpdateMutableCommandsKHR"; @@ -303,6 +305,13 @@ using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( const cl_sync_point_khr *sync_point_wait_list, cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); +using clCommandBarrierWithWaitListKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)( + cl_command_buffer_khr command_buffer, cl_command_queue command_queue, + const cl_command_properties_khr *properties, + cl_uint num_sync_points_in_wait_list, + const cl_sync_point_khr *sync_point_wait_list, + cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle); + using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues, cl_command_buffer_khr command_buffer, diff --git a/source/adapters/opencl/extension_functions.def b/source/adapters/opencl/extension_functions.def index 3f5e3ea917..ecef132bcf 100644 --- a/source/adapters/opencl/extension_functions.def +++ b/source/adapters/opencl/extension_functions.def @@ -21,6 +21,7 @@ CL_EXTENSION_FUNC(clCommandNDRangeKernelKHR) CL_EXTENSION_FUNC(clCommandCopyBufferKHR) CL_EXTENSION_FUNC(clCommandCopyBufferRectKHR) CL_EXTENSION_FUNC(clCommandFillBufferKHR) +CL_EXTENSION_FUNC(clCommandBarrierWithWaitListKHR) CL_EXTENSION_FUNC(clEnqueueCommandBufferKHR) CL_EXTENSION_FUNC(clGetCommandBufferInfoKHR) CL_EXTENSION_FUNC(clUpdateMutableCommandsKHR) diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 44ba406e00..baa42dc3a3 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -310,6 +310,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnGetInfoExp = urCommandBufferGetInfoExp; pDdiTable->pfnUpdateWaitEventsExp = urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnUpdateSignalEventExp = urCommandBufferUpdateSignalEventExp; + pDdiTable->pfnAppendNativeCommandExp = urCommandBufferAppendNativeCommandExp; + pDdiTable->pfnGetNativeHandleExp = urCommandBufferGetNativeHandleExp; return retVal; } diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp index c11e6a77b1..5e4118884e 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -2005,6 +2005,8 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( SET_UNSUPPORTED(pDdiTable->pfnUpdateSignalEventExp); SET_UNSUPPORTED(pDdiTable->pfnUpdateWaitEventsExp); SET_UNSUPPORTED(pDdiTable->pfnGetInfoExp); + SET_UNSUPPORTED(pDdiTable->pfnAppendNativeCommandExp); + SET_UNSUPPORTED(pDdiTable->pfnGetNativeHandleExp); #undef SET_UNSUPPORTED diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f53e7c1c4d..caf63b7de0 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -8126,6 +8126,62 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendNativeCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + auto pfnAppendNativeCommandExp = + getContext()->urDdiTable.CommandBufferExp.pfnAppendNativeCommandExp; + + if (nullptr == pfnAppendNativeCommandExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_command_buffer_append_native_command_exp_params_t params = { + &hCommandBuffer, &pfnNativeCommand, &pData, + &hChildCommandBuffer, &numSyncPointsInWaitList, &pSyncPointWaitList, + &pSyncPoint}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP, + "urCommandBufferAppendNativeCommandExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urCommandBufferAppendNativeCommandExp\n"); + + ur_result_t result = pfnAppendNativeCommandExp( + hCommandBuffer, pfnNativeCommand, pData, hChildCommandBuffer, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP, + "urCommandBufferAppendNativeCommandExp", ¶ms, + &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_COMMAND_BUFFER_APPEND_NATIVE_COMMAND_EXP, + ¶ms); + logger.info(" <--- urCommandBufferAppendNativeCommandExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -8347,6 +8403,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetNativeHandleExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer) { + auto pfnGetNativeHandleExp = + getContext()->urDdiTable.CommandBufferExp.pfnGetNativeHandleExp; + + if (nullptr == pfnGetNativeHandleExp) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + + ur_command_buffer_get_native_handle_exp_params_t params = { + &hCommandBuffer, &phNativeCommandBuffer}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP, + "urCommandBufferGetNativeHandleExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urCommandBufferGetNativeHandleExp\n"); + + ur_result_t result = + pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer); + + getContext()->notify_end(UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP, + "urCommandBufferGetNativeHandleExp", ¶ms, + &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_COMMAND_BUFFER_GET_NATIVE_HANDLE_EXP, ¶ms); + logger.info(" <--- urCommandBufferGetNativeHandleExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -9318,6 +9414,10 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_tracing_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnAppendNativeCommandExp = pDdiTable->pfnAppendNativeCommandExp; + pDdiTable->pfnAppendNativeCommandExp = + ur_tracing_layer::urCommandBufferAppendNativeCommandExp; + dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_tracing_layer::urCommandBufferEnqueueExp; @@ -9336,6 +9436,10 @@ __urdlllocal ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; pDdiTable->pfnGetInfoExp = ur_tracing_layer::urCommandBufferGetInfoExp; + dditable.pfnGetNativeHandleExp = pDdiTable->pfnGetNativeHandleExp; + pDdiTable->pfnGetNativeHandleExp = + ur_tracing_layer::urCommandBufferGetNativeHandleExp; + return result; } /////////////////////////////////////////////////////////////////////////////// diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 75603b3e89..fb87487ebd 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8772,6 +8772,53 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendNativeCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + auto pfnAppendNativeCommandExp = + getContext()->urDdiTable.CommandBufferExp.pfnAppendNativeCommandExp; + + if (nullptr == pfnAppendNativeCommandExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hCommandBuffer) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (NULL == pfnNativeCommand) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + + if (pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0) + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + + if (pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0) + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + } + + ur_result_t result = pfnAppendNativeCommandExp( + hCommandBuffer, pfnNativeCommand, pData, hChildCommandBuffer, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -8975,6 +9022,34 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetNativeHandleExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer) { + auto pfnGetNativeHandleExp = + getContext()->urDdiTable.CommandBufferExp.pfnGetNativeHandleExp; + + if (nullptr == pfnGetNativeHandleExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hCommandBuffer) + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + + if (NULL == phNativeCommandBuffer) + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + ur_result_t result = + pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -9971,6 +10046,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnAppendUSMAdviseExp = ur_validation_layer::urCommandBufferAppendUSMAdviseExp; + dditable.pfnAppendNativeCommandExp = pDdiTable->pfnAppendNativeCommandExp; + pDdiTable->pfnAppendNativeCommandExp = + ur_validation_layer::urCommandBufferAppendNativeCommandExp; + dditable.pfnEnqueueExp = pDdiTable->pfnEnqueueExp; pDdiTable->pfnEnqueueExp = ur_validation_layer::urCommandBufferEnqueueExp; @@ -9989,6 +10068,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( dditable.pfnGetInfoExp = pDdiTable->pfnGetInfoExp; pDdiTable->pfnGetInfoExp = ur_validation_layer::urCommandBufferGetInfoExp; + dditable.pfnGetNativeHandleExp = pDdiTable->pfnGetNativeHandleExp; + pDdiTable->pfnGetNativeHandleExp = + ur_validation_layer::urCommandBufferGetNativeHandleExp; + return result; } diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index 1425c602d6..0febbb569a 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -31,6 +31,7 @@ EXPORTS urCommandBufferAppendMemBufferReadRectExp urCommandBufferAppendMemBufferWriteExp urCommandBufferAppendMemBufferWriteRectExp + urCommandBufferAppendNativeCommandExp urCommandBufferAppendUSMAdviseExp urCommandBufferAppendUSMFillExp urCommandBufferAppendUSMMemcpyExp @@ -39,6 +40,7 @@ EXPORTS urCommandBufferEnqueueExp urCommandBufferFinalizeExp urCommandBufferGetInfoExp + urCommandBufferGetNativeHandleExp urCommandBufferReleaseExp urCommandBufferRetainExp urCommandBufferUpdateKernelLaunchExp @@ -209,6 +211,7 @@ EXPORTS urPrintCommandBufferAppendMemBufferReadRectExpParams urPrintCommandBufferAppendMemBufferWriteExpParams urPrintCommandBufferAppendMemBufferWriteRectExpParams + urPrintCommandBufferAppendNativeCommandExpParams urPrintCommandBufferAppendUsmAdviseExpParams urPrintCommandBufferAppendUsmFillExpParams urPrintCommandBufferAppendUsmMemcpyExpParams @@ -217,6 +220,7 @@ EXPORTS urPrintCommandBufferEnqueueExpParams urPrintCommandBufferFinalizeExpParams urPrintCommandBufferGetInfoExpParams + urPrintCommandBufferGetNativeHandleExpParams urPrintCommandBufferReleaseExpParams urPrintCommandBufferRetainExpParams urPrintCommandBufferUpdateKernelLaunchExpParams diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index ebb413c985..9300d8b699 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -31,6 +31,7 @@ urCommandBufferAppendMemBufferReadRectExp; urCommandBufferAppendMemBufferWriteExp; urCommandBufferAppendMemBufferWriteRectExp; + urCommandBufferAppendNativeCommandExp; urCommandBufferAppendUSMAdviseExp; urCommandBufferAppendUSMFillExp; urCommandBufferAppendUSMMemcpyExp; @@ -39,6 +40,7 @@ urCommandBufferEnqueueExp; urCommandBufferFinalizeExp; urCommandBufferGetInfoExp; + urCommandBufferGetNativeHandleExp; urCommandBufferReleaseExp; urCommandBufferRetainExp; urCommandBufferUpdateKernelLaunchExp; @@ -209,6 +211,7 @@ urPrintCommandBufferAppendMemBufferReadRectExpParams; urPrintCommandBufferAppendMemBufferWriteExpParams; urPrintCommandBufferAppendMemBufferWriteRectExpParams; + urPrintCommandBufferAppendNativeCommandExpParams; urPrintCommandBufferAppendUsmAdviseExpParams; urPrintCommandBufferAppendUsmFillExpParams; urPrintCommandBufferAppendUsmMemcpyExpParams; @@ -217,6 +220,7 @@ urPrintCommandBufferEnqueueExpParams; urPrintCommandBufferFinalizeExpParams; urPrintCommandBufferGetInfoExpParams; + urPrintCommandBufferGetNativeHandleExpParams; urPrintCommandBufferReleaseExpParams; urPrintCommandBufferRetainExpParams; urPrintCommandBufferUpdateKernelLaunchExpParams; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index fc8585f9e4..2440ccfae8 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -8307,6 +8307,59 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferAppendNativeCommandExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnAppendNativeCommandExp = + dditable->ur.CommandBufferExp.pfnAppendNativeCommandExp; + if (nullptr == pfnAppendNativeCommandExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // convert loader handle to platform handle + hChildCommandBuffer = + (hChildCommandBuffer) + ? reinterpret_cast( + hChildCommandBuffer) + ->handle + : nullptr; + + // forward to device-platform + result = pfnAppendNativeCommandExp( + hCommandBuffer, pfnNativeCommand, pData, hChildCommandBuffer, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urCommandBufferEnqueueExp __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( @@ -8556,6 +8609,40 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urCommandBufferGetNativeHandleExp +__urdlllocal ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = + reinterpret_cast(hCommandBuffer) + ->dditable; + auto pfnGetNativeHandleExp = + dditable->ur.CommandBufferExp.pfnGetNativeHandleExp; + if (nullptr == pfnGetNativeHandleExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + // convert loader handle to platform handle + hCommandBuffer = + reinterpret_cast(hCommandBuffer) + ->handle; + + // forward to device-platform + result = pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer); + + if (UR_RESULT_SUCCESS != result) + return result; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -9482,6 +9569,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( ur_loader::urCommandBufferAppendUSMPrefetchExp; pDdiTable->pfnAppendUSMAdviseExp = ur_loader::urCommandBufferAppendUSMAdviseExp; + pDdiTable->pfnAppendNativeCommandExp = + ur_loader::urCommandBufferAppendNativeCommandExp; pDdiTable->pfnEnqueueExp = ur_loader::urCommandBufferEnqueueExp; pDdiTable->pfnUpdateKernelLaunchExp = ur_loader::urCommandBufferUpdateKernelLaunchExp; @@ -9490,6 +9579,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( pDdiTable->pfnUpdateWaitEventsExp = ur_loader::urCommandBufferUpdateWaitEventsExp; pDdiTable->pfnGetInfoExp = ur_loader::urCommandBufferGetInfoExp; + pDdiTable->pfnGetNativeHandleExp = + ur_loader::urCommandBufferGetNativeHandleExp; } else { // return pointers directly to platform's DDIs *pDdiTable = ur_loader::getContext() diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index ca06bf670c..fa63d86edb 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -8692,6 +8692,56 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append nodes through a native backend API +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnNativeCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint) try { + auto pfnAppendNativeCommandExp = + ur_lib::getContext() + ->urDdiTable.CommandBufferExp.pfnAppendNativeCommandExp; + if (nullptr == pfnAppendNativeCommandExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnAppendNativeCommandExp(hCommandBuffer, pfnNativeCommand, pData, + hChildCommandBuffer, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -8966,6 +9016,39 @@ ur_result_t UR_APICALL urCommandBufferGetInfoExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Return platform native command-buffer handle. +/// +/// @details +/// - Retrieved native handle can be used for direct interaction with the +/// native platform driver. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phNativeCommandBuffer` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the adapter has no underlying equivalent handle. +ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer) try { + auto pfnGetNativeHandleExp = + ur_lib::getContext()->urDdiTable.CommandBufferExp.pfnGetNativeHandleExp; + if (nullptr == pfnGetNativeHandleExp) + return UR_RESULT_ERROR_UNINITIALIZED; + + return pfnGetNativeHandleExp(hCommandBuffer, phNativeCommandBuffer); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index d75272a1ae..de71fafa9c 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1446,6 +1446,14 @@ ur_result_t urPrintCommandBufferAppendUsmAdviseExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintCommandBufferAppendNativeCommandExpParams( + const struct ur_command_buffer_append_native_command_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintCommandBufferEnqueueExpParams( const struct ur_command_buffer_enqueue_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1486,6 +1494,14 @@ ur_result_t urPrintCommandBufferGetInfoExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintCommandBufferGetNativeHandleExpParams( + const struct ur_command_buffer_get_native_handle_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintContextCreateParams(const struct ur_context_create_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 0afeaa7d26..796216d35a 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -7593,6 +7593,47 @@ ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Append nodes through a native backend API +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pfnNativeCommand` +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP +/// + `pSyncPointWaitList == NULL && numSyncPointsInWaitList > 0` +/// + `pSyncPointWaitList != NULL && numSyncPointsInWaitList == 0` +/// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( + /// [in] handle of the command-buffer object. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [in] function calling the native underlying API, to be executed + /// immediately. + ur_exp_command_buffer_native_command_function_t pfnNativeCommand, + /// [in][optional] data used by pfnNativeCommand + void *pData, + /// [in][optional] TODO + ur_exp_command_buffer_handle_t hChildCommandBuffer, + /// [in] The number of sync points in the provided dependency list. + uint32_t numSyncPointsInWaitList, + /// [in][optional] A list of sync points that this command depends on. May + /// be ignored if command-buffer is in-order. + const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, + /// [out][optional] sync point associated with this command. + ur_exp_command_buffer_sync_point_t *pSyncPoint) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Submit a command-buffer for execution on a queue. /// @@ -7834,6 +7875,33 @@ ur_result_t UR_APICALL urCommandBufferGetInfoExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Return platform native command-buffer handle. +/// +/// @details +/// - Retrieved native handle can be used for direct interaction with the +/// native platform driver. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hCommandBuffer` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == phNativeCommandBuffer` +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If the adapter has no underlying equivalent handle. +ur_result_t UR_APICALL urCommandBufferGetNativeHandleExp( + /// [in] handle of the command-buffer. + ur_exp_command_buffer_handle_t hCommandBuffer, + /// [out] a pointer to the native handle of the command-buffer. + ur_native_handle_t *phNativeCommandBuffer) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel ///