From ae677c7fd5ba7887ce819270ad5e5d1e57a9d749 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Mon, 30 Sep 2024 10:06:52 +0100 Subject: [PATCH 1/6] Add initial spec for async alloc entry points First basic work in progress spec. --- include/ur_api.h | 219 ++++++++++- include/ur_api_funcs.def | 4 + include/ur_ddi.h | 50 +++ include/ur_print.h | 48 +++ include/ur_print.hpp | 364 ++++++++++++++++++ scripts/core/EXP-ASYNC-ALLOC.rst | 78 ++++ scripts/core/exp-async-alloc.yml | 216 +++++++++++ scripts/core/registry.yml | 12 + .../level_zero/ur_interface_loader.cpp | 4 + .../level_zero/ur_interface_loader.hpp | 20 + source/adapters/level_zero/v2/queue_api.cpp | 35 ++ source/adapters/level_zero/v2/queue_api.hpp | 18 + source/adapters/mock/ur_mockddi.cpp | 250 ++++++++++++ source/common/stype_map_helpers.def | 2 + source/loader/layers/tracing/ur_trcddi.cpp | 222 +++++++++++ source/loader/layers/validation/ur_valddi.cpp | 266 ++++++++++++- source/loader/loader.def.in | 10 + source/loader/loader.map.in | 10 + source/loader/ur_ldrddi.cpp | 273 +++++++++++++ source/loader/ur_libapi.cpp | 177 ++++++++- source/loader/ur_print.cpp | 49 +++ source/ur_api.cpp | 142 ++++++- tools/urinfo/urinfo.hpp | 3 + 23 files changed, 2468 insertions(+), 4 deletions(-) create mode 100644 scripts/core/EXP-ASYNC-ALLOC.rst create mode 100644 scripts/core/exp-async-alloc.yml diff --git a/include/ur_api.h b/include/ur_api.h index 8731d78c00..5913c158ed 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -230,6 +230,10 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_UPDATE_SIGNAL_EVENT_EXP = 243, ///< Enumerator for ::urCommandBufferUpdateSignalEventExp UR_FUNCTION_COMMAND_BUFFER_UPDATE_WAIT_EVENTS_EXP = 244, ///< Enumerator for ::urCommandBufferUpdateWaitEventsExp UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, ///< Enumerator for ::urBindlessImagesMapExternalLinearMemoryExp + UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP = 246, ///< Enumerator for ::urEnqueueUSMDeviceAllocExp + UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP = 247, ///< Enumerator for ::urEnqueueUSMSharedAllocExp + UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP = 248, ///< Enumerator for ::urEnqueueUSMHostAllocExp + UR_FUNCTION_ENQUEUE_USM_FREE_EXP = 249, ///< Enumerator for ::urEnqueueUSMFreeExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -288,6 +292,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES = 0x2006, ///< ::ur_exp_sampler_cubemap_properties_t UR_STRUCTURE_TYPE_EXP_IMAGE_COPY_REGION = 0x2007, ///< ::ur_exp_image_copy_region_t UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES = 0x3000, ///< ::ur_exp_enqueue_native_command_properties_t + UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES = 0x3001, ///< ::ur_exp_async_usm_alloc_properties_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1697,6 +1702,8 @@ typedef enum ur_device_info_t { ///< backed 2D sampled image data. UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native ///< work + UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports enqueueing of native + ///< work /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1722,7 +1729,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5768,6 +5775,10 @@ typedef enum ur_command_t { UR_COMMAND_EXTERNAL_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp UR_COMMAND_ENQUEUE_NATIVE_EXP = 0x2004, ///< Event created by ::urEnqueueNativeCommandExp + UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP = 0x2008, ///< Event created by ::urEnqueueDeviceAllocExp + UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP = 0x2010, ///< Event created by ::urEnqueueSharedAllocExp + UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP = 0x2011, ///< Event created by ::urEnqueueHostAllocExp + UR_COMMAND_ENQUEUE_USM_FREE_EXP = 0x2012, ///< Event created by ::urEnqueueFreeExp /// @cond UR_COMMAND_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -7395,6 +7406,154 @@ urEnqueueWriteHostPipe( ///< an element of the phEventWaitList array. ); +#if !defined(__GNUC__) +#pragma endregion +#endif +// Intel 'oneAPI' Unified Runtime Experimental API for asynchronous allocations +#if !defined(__GNUC__) +#pragma region async_alloc_(experimental) +#endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Async alloc properties +typedef uint32_t ur_exp_async_usm_alloc_flags_t; +typedef enum ur_exp_async_usm_alloc_flag_t { + UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD = UR_BIT(0), ///< reserved for future use. + /// @cond + UR_EXP_ASYNC_USM_ALLOC_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_async_usm_alloc_flag_t; +/// @brief Bit Mask for validating ur_exp_async_usm_alloc_flags_t +#define UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK 0xfffffffe + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Async alloc properties +typedef struct ur_exp_async_usm_alloc_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_ASYNC_USM_ALLOC_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_exp_async_usm_alloc_flags_t flags; ///< [in] async alloc flags + +} ur_exp_async_usm_alloc_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t *pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the async alloc +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t *pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the async alloc +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t *pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the async alloc +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +UR_APIEXPORT ur_result_t UR_APICALL +urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the async alloc +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -11423,6 +11582,64 @@ typedef struct ur_enqueue_kernel_launch_custom_exp_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_kernel_launch_custom_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMDeviceAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_device_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_async_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_device_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMSharedAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_shared_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_async_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_shared_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMHostAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_host_alloc_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + const size_t *psize; + const ur_exp_async_usm_alloc_properties_t **ppProperties; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + void ***pppMem; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_host_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEnqueueUSMFreeExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_enqueue_usm_free_exp_params_t { + ur_queue_handle_t *phQueue; + ur_usm_pool_handle_t *ppPool; + void **ppMem; + uint32_t *pnumEventsInWaitList; + const ur_event_handle_t **pphEventWaitList; + ur_event_handle_t **pphEvent; +} ur_enqueue_usm_free_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index a7ca4d88a0..8da700c84d 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -127,6 +127,10 @@ _UR_API(urEnqueueDeviceGlobalVariableRead) _UR_API(urEnqueueReadHostPipe) _UR_API(urEnqueueWriteHostPipe) _UR_API(urEnqueueKernelLaunchCustomExp) +_UR_API(urEnqueueUSMDeviceAllocExp) +_UR_API(urEnqueueUSMSharedAllocExp) +_UR_API(urEnqueueUSMHostAllocExp) +_UR_API(urEnqueueUSMFreeExp) _UR_API(urEnqueueCooperativeKernelLaunchExp) _UR_API(urEnqueueTimestampRecordingExp) _UR_API(urEnqueueNativeCommandExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index b4d6f2bade..514e4e8f5a 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1462,6 +1462,52 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueKernelLaunchCustomExp_t)( const ur_event_handle_t *, ur_event_handle_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMDeviceAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMDeviceAllocExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + const size_t, + const ur_exp_async_usm_alloc_properties_t *, + uint32_t, + const ur_event_handle_t *, + void **, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMSharedAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMSharedAllocExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + const size_t, + const ur_exp_async_usm_alloc_properties_t *, + uint32_t, + const ur_event_handle_t *, + void **, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMHostAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMHostAllocExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + const size_t, + const ur_exp_async_usm_alloc_properties_t *, + uint32_t, + const ur_event_handle_t *, + void **, + ur_event_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEnqueueUSMFreeExp +typedef ur_result_t(UR_APICALL *ur_pfnEnqueueUSMFreeExp_t)( + ur_queue_handle_t, + ur_usm_pool_handle_t, + void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEnqueueCooperativeKernelLaunchExp typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)( @@ -1501,6 +1547,10 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueNativeCommandExp_t)( /// @brief Table of EnqueueExp functions pointers typedef struct ur_enqueue_exp_dditable_t { ur_pfnEnqueueKernelLaunchCustomExp_t pfnKernelLaunchCustomExp; + ur_pfnEnqueueUSMDeviceAllocExp_t pfnUSMDeviceAllocExp; + ur_pfnEnqueueUSMSharedAllocExp_t pfnUSMSharedAllocExp; + ur_pfnEnqueueUSMHostAllocExp_t pfnUSMHostAllocExp; + ur_pfnEnqueueUSMFreeExp_t pfnUSMFreeExp; ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp; ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp; ur_pfnEnqueueNativeCommandExp_t pfnNativeCommandExp; diff --git a/include/ur_print.h b/include/ur_print.h index 1dd874e5a5..ebe25669f7 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -874,6 +874,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_async_usm_alloc_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpAsyncUsmAllocFlags(enum ur_exp_async_usm_alloc_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_async_usm_alloc_properties_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpAsyncUsmAllocProperties(const struct ur_exp_async_usm_alloc_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_image_copy_flag_t enum /// @returns @@ -2034,6 +2050,38 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueKernelLaunchCustomExpParams(const struct ur_enqueue_kernel_launch_custom_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_device_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmDeviceAllocExpParams(const struct ur_enqueue_usm_device_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_shared_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmSharedAllocExpParams(const struct ur_enqueue_usm_shared_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_host_alloc_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmHostAllocExpParams(const struct ur_enqueue_usm_host_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_enqueue_usm_free_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueUsmFreeExpParams(const struct ur_enqueue_usm_free_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_enqueue_cooperative_kernel_launch_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index a443e04f2f..79802306d7 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -194,6 +194,9 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); + template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); @@ -325,6 +328,8 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_async_usm_alloc_flag_t value); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_async_usm_alloc_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value); @@ -954,6 +959,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP: os << "UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP"; break; + case UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP"; + break; + case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: + os << "UR_FUNCTION_ENQUEUE_USM_FREE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -1113,6 +1130,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES: os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_NATIVE_COMMAND_PROPERTIES"; break; + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES"; + break; default: os << "unknown enumerator"; break; @@ -1374,6 +1394,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { const ur_exp_enqueue_native_command_properties_t *pstruct = (const ur_exp_enqueue_native_command_properties_t *)ptr; printPtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_EXP_ENQUEUE_USM_ALLOC_PROPERTIES: { + const ur_exp_async_usm_alloc_properties_t *pstruct = (const ur_exp_async_usm_alloc_properties_t *)ptr; + printPtr(os, pstruct); + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -2643,6 +2668,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP: os << "UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP: + os << "UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4425,6 +4453,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -8965,6 +9005,18 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) { case UR_COMMAND_ENQUEUE_NATIVE_EXP: os << "UR_COMMAND_ENQUEUE_NATIVE_EXP"; break; + case UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP: + os << "UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP"; + break; + case UR_COMMAND_ENQUEUE_USM_FREE_EXP: + os << "UR_COMMAND_ENQUEUE_USM_FREE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -9376,6 +9428,77 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_async_usm_alloc_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_async_usm_alloc_flag_t value) { + switch (value) { + case UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD: + os << "UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_async_usm_alloc_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD) == (uint32_t)UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD) { + val ^= (uint32_t)UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_EXP_ASYNC_USM_ALLOC_FLAG_TBD; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_async_usm_alloc_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_async_usm_alloc_properties_t params) { + os << "(struct ur_exp_async_usm_alloc_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".flags = "; + + ur::details::printFlag(os, + (params.flags)); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_image_copy_flag_t type /// @returns /// std::ostream & @@ -14723,6 +14846,235 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_device_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_device_alloc_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_shared_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_shared_alloc_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_host_alloc_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_host_alloc_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".pProperties = "; + + ur::details::printPtr(os, + *(params->ppProperties)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_enqueue_usm_free_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_usm_free_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); + + os << ", "; + os << ".pPool = "; + + ur::details::printPtr(os, + *(params->ppPool)); + + os << ", "; + os << ".pMem = "; + + ur::details::printPtr(os, + *(params->ppMem)); + + os << ", "; + os << ".numEventsInWaitList = "; + + os << *(params->pnumEventsInWaitList); + + os << ", "; + os << ".phEventWaitList = {"; + for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + + os << ", "; + os << ".phEvent = "; + + ur::details::printPtr(os, + *(params->pphEvent)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_enqueue_cooperative_kernel_launch_exp_params_t type /// @returns @@ -18410,6 +18762,18 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_ENQUEUE_KERNEL_LAUNCH_CUSTOM_EXP: { os << (const struct ur_enqueue_kernel_launch_custom_exp_params_t *)params; } break; + case UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_device_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_shared_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP: { + os << (const struct ur_enqueue_usm_host_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: { + os << (const struct ur_enqueue_usm_free_exp_params_t *)params; + } break; case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: { os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-ASYNC-ALLOC.rst b/scripts/core/EXP-ASYNC-ALLOC.rst new file mode 100644 index 0000000000..bc71c3f0a9 --- /dev/null +++ b/scripts/core/EXP-ASYNC-ALLOC.rst @@ -0,0 +1,78 @@ +<% + OneApi=tags['$OneApi'] + x=tags['$x'] + X=x.upper() +%> + +.. _experimental-async-allocations: + +================================================================================ +Async Allocation Functions +================================================================================ + +.. warning:: + + Experimental features: + + * May be replaced, updated, or removed at any time. + * Do not require maintaining API/ABI stability of their own additions over + time. + * Do not require conformance testing of their own additions. + + +Motivation +-------------------------------------------------------------------------------- + +Asynchronous allocations can allow queues to allocate and free memory between +UR command enqueues without forcing synchronization points in the asynchronous +command DAG associated with a queue. This can allow applications to compose +memory allocation and command execution asynchronously, which can improve +performancet. + +API +-------------------------------------------------------------------------------- + +Enums +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* ${x}_device_info_t + * ${X}_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP +* ${x}_command_t + * ${X}_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP + * ${X}_COMMAND_ENQUEUE_USM_FREE_EXP +* ${x}_exp_async_usm_alloc_flags_t + +Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +${x}_exp_async_usm_alloc_properties_t + +Functions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* ${x}EnqueueUSMDeviceAllocExp +* ${x}EnqueueUSMSharedAllocExp +* ${x}EnqueueUSMHostAllocExp +* ${x}EnqueueUSMFreeExp + +Changelog +-------------------------------------------------------------------------------- + ++-----------+---------------------------+ +| Revision | Changes | ++===========+===========================+ +| 1.0 | Initial Draft | ++-----------+---------------------------+ + +Support +-------------------------------------------------------------------------------- + +Adapters which support this experimental feature *must* return true for the new +``${X}_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP`` device info query. + + +Contributors +-------------------------------------------------------------------------------- + +* Hugh Delaney `hugh.delaney@codeplay.com `_ diff --git a/scripts/core/exp-async-alloc.yml b/scripts/core/exp-async-alloc.yml new file mode 100644 index 0000000000..b5935b644e --- /dev/null +++ b/scripts/core/exp-async-alloc.yml @@ -0,0 +1,216 @@ +# +# Copyright (C) 2024 Intel Corporation +# +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# See YaML.md for syntax definition +# +--- #-------------------------------------------------------------------------- +type: header +desc: "Intel $OneApi Unified Runtime Experimental API for asynchronous allocations" +ordinal: "99" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +typed_etors: true +desc: "Extension enums to $x_device_info_t to support async allocations." +name: $x_device_info_t +etors: + - name: ASYNC_USM_ALLOCATIONS_EXP + value: "0x2021" + desc: "[$x_bool_t] returns true if the device supports enqueueing of native work" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Command Type experimental enumerations." +name: $x_command_t +etors: + - name: ENQUEUE_USM_DEVICE_ALLOC_EXP + value: "0x2008" + desc: Event created by $xEnqueueDeviceAllocExp + - name: ENQUEUE_USM_SHARED_ALLOC_EXP + value: "0x2010" + desc: Event created by $xEnqueueSharedAllocExp + - name: ENQUEUE_USM_HOST_ALLOC_EXP + value: "0x2011" + desc: Event created by $xEnqueueHostAllocExp + - name: ENQUEUE_USM_FREE_EXP + value: "0x2012" + desc: Event created by $xEnqueueFreeExp + +--- #-------------------------------------------------------------------------- +type: enum +desc: "Async alloc properties" +name: $x_exp_async_usm_alloc_flags_t +etors: + - name: TBD + desc: "reserved for future use." + +--- #-------------------------------------------------------------------------- +type: struct +desc: "Async alloc properties" +name: $x_exp_async_usm_alloc_properties_t +base: $x_base_properties_t +members: + - type: $x_exp_async_usm_alloc_flags_t + name: flags + desc: "[in] async alloc flags" + +--- #-------------------------------------------------------------------------- +type: enum +extend: true +desc: "Structure type experimental enumerations" +name: $x_structure_type_t +etors: + - name: EXP_ENQUEUE_USM_ALLOC_PROPERTIES + desc: $x_exp_async_usm_alloc_properties_t + value: "0x3001" + +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an async device allocation" +class: $xEnqueue +name: USMDeviceAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] USM pool descriptor" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_async_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue async alloc properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the async alloc" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an async shared allocation" +class: $xEnqueue +name: USMSharedAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] USM pool descriptor" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_async_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue async alloc properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the async alloc" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an async host allocation" +class: $xEnqueue +name: USMHostAllocExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] USM pool descriptor" + name: pPool + - type: const size_t + desc: "[in] minimum size in bytes of the USM memory object to be allocated" + name: size + - type: const $x_exp_async_usm_alloc_properties_t* + name: pProperties + desc: "[in][optional] pointer to the enqueue async alloc properties" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: void** + name: ppMem + desc: "[out] pointer to USM memory object" + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the async alloc" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Enqueue an async host allocation" +class: $xEnqueue +name: USMFreeExp +params: + - type: $x_queue_handle_t + name: hQueue + desc: "[in] handle of the queue object" + - type: $x_usm_pool_handle_t + desc: "[in][optional] USM pool descriptor" + name: pPool + - type: void* + name: pMem + desc: "[in] pointer to USM memory object" + - type: uint32_t + name: numEventsInWaitList + desc: "[in] size of the event wait list" + - type: const $x_event_handle_t* + name: phEventWaitList + desc: | + [in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution. + If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + - type: $x_event_handle_t* + name: phEvent + desc: "[out][optional] return an event object that identifies the async alloc" +returns: + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_NULL_POINTER + - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index f4ba983bfc..9566b81b7f 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -604,6 +604,18 @@ etors: - name: BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP desc: Enumerator for $xBindlessImagesMapExternalLinearMemoryExp value: '245' +- name: ENQUEUE_USM_DEVICE_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMDeviceAllocExp + value: '246' +- name: ENQUEUE_USM_SHARED_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMSharedAllocExp + value: '247' +- name: ENQUEUE_USM_HOST_ALLOC_EXP + desc: Enumerator for $xEnqueueUSMHostAllocExp + value: '248' +- name: ENQUEUE_USM_FREE_EXP + desc: Enumerator for $xEnqueueUSMFreeExp + value: '249' --- type: enum desc: Defines structure types diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 1c2f68c07c..921168d0f2 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -213,6 +213,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur::level_zero::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = ur::level_zero::urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = ur::level_zero::urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur::level_zero::urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = ur::level_zero::urEnqueueUSMFreeExp; pDdiTable->pfnCooperativeKernelLaunchExp = ur::level_zero::urEnqueueCooperativeKernelLaunchExp; pDdiTable->pfnTimestampRecordingExp = diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 540eab7292..315b087df6 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -461,6 +461,26 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent); +ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, + ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index ea2e931bfe..f63fd0940d 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -271,6 +271,41 @@ ur_result_t urEnqueueWriteHostPipe(ur_queue_handle_t hQueue, size, numEventsInWaitList, phEventWaitList, phEvent); } +ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMDeviceAllocExp(pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, + ppMem, phEvent); +} +ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMSharedAllocExp(pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, + ppMem, phEvent); +} +ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMHostAllocExp(pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, + ppMem, phEvent); +} +ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, + ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return hQueue->enqueueUSMFreeExp(pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); +} ur_result_t urBindlessImagesImageCopyExp( ur_queue_handle_t hQueue, const void *pSrc, void *pDst, const ur_image_desc_t *pSrcImageDesc, const ur_image_desc_t *pDstImageDesc, diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index bc01596d2b..b20185a55e 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -122,6 +122,24 @@ struct ur_queue_handle_t_ { bool, void *, size_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMDeviceAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMSharedAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, + uint32_t, const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t + enqueueUSMHostAllocExp(ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, + ur_event_handle_t *) = 0; + virtual ur_result_t enqueueUSMFreeExp(ur_usm_pool_handle_t, void *, uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) = 0; virtual ur_result_t bindlessImagesImageCopyExp( const void *, void *, const ur_image_desc_t *, const ur_image_desc_t *, const ur_image_format_t *, const ur_image_format_t *, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 1e2b788683..2428d89f0a 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -7099,6 +7099,248 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_device_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMDeviceAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urEnqueueUSMDeviceAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMDeviceAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_shared_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMSharedAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback( + "urEnqueueUSMSharedAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMSharedAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_host_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMHostAllocExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMHostAllocExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMHostAllocExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_enqueue_usm_free_exp_params_t params = { + &hQueue, &pPool, &pMem, &numEventsInWaitList, + &phEventWaitList, &phEvent}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urEnqueueUSMFreeExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urEnqueueUSMFreeExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + // optional output handle + if (phEvent) { + *phEvent = mock::createDummyHandle(); + } + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urEnqueueUSMFreeExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -11020,6 +11262,14 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = driver::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = driver::urEnqueueUSMDeviceAllocExp; + + pDdiTable->pfnUSMSharedAllocExp = driver::urEnqueueUSMSharedAllocExp; + + pDdiTable->pfnUSMHostAllocExp = driver::urEnqueueUSMHostAllocExp; + + pDdiTable->pfnUSMFreeExp = driver::urEnqueueUSMFreeExp; + pDdiTable->pfnCooperativeKernelLaunchExp = driver::urEnqueueCooperativeKernelLaunchExp; diff --git a/source/common/stype_map_helpers.def b/source/common/stype_map_helpers.def index c938ca6b95..0a29010b03 100644 --- a/source/common/stype_map_helpers.def +++ b/source/common/stype_map_helpers.def @@ -99,4 +99,6 @@ template <> struct stype_map : stype_map_impl {}; template <> struct stype_map : stype_map_impl {}; +template <> +struct stype_map : stype_map_impl {}; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index d6f23eab9a..d65254c0a2 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6026,6 +6026,214 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMDeviceAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_device_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, + "urEnqueueUSMDeviceAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMDeviceAllocExp\n"); + + ur_result_t result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, + "urEnqueueUSMDeviceAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_DEVICE_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMDeviceAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMSharedAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_shared_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, + "urEnqueueUSMSharedAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMSharedAllocExp\n"); + + ur_result_t result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, + "urEnqueueUSMSharedAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMSharedAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMHostAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_host_alloc_exp_params_t params = { + &hQueue, &pPool, &size, &pProperties, &numEventsInWaitList, + &phEventWaitList, &ppMem, &phEvent}; + uint64_t instance = + getContext()->notify_begin(UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, + "urEnqueueUSMHostAllocExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMHostAllocExp\n"); + + ur_result_t result = pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, + "urEnqueueUSMHostAllocExp", ¶ms, &result, + instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMHostAllocExp({}) -> {};\n", + args_str.str(), result); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMFreeExp = getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_enqueue_usm_free_exp_params_t params = { + &hQueue, &pPool, &pMem, &numEventsInWaitList, + &phEventWaitList, &phEvent}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_ENQUEUE_USM_FREE_EXP, "urEnqueueUSMFreeExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urEnqueueUSMFreeExp\n"); + + ur_result_t result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); + + getContext()->notify_end(UR_FUNCTION_ENQUEUE_USM_FREE_EXP, + "urEnqueueUSMFreeExp", ¶ms, &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_ENQUEUE_USM_FREE_EXP, ¶ms); + logger.info(" <--- urEnqueueUSMFreeExp({}) -> {};\n", args_str.str(), + result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -9635,6 +9843,20 @@ __urdlllocal ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur_tracing_layer::urEnqueueKernelLaunchCustomExp; + dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_tracing_layer::urEnqueueUSMDeviceAllocExp; + + dditable.pfnUSMSharedAllocExp = pDdiTable->pfnUSMSharedAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_tracing_layer::urEnqueueUSMSharedAllocExp; + + dditable.pfnUSMHostAllocExp = pDdiTable->pfnUSMHostAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_tracing_layer::urEnqueueUSMHostAllocExp; + + dditable.pfnUSMFreeExp = pDdiTable->pfnUSMFreeExp; + pDdiTable->pfnUSMFreeExp = ur_tracing_layer::urEnqueueUSMFreeExp; + dditable.pfnCooperativeKernelLaunchExp = pDdiTable->pfnCooperativeKernelLaunchExp; pDdiTable->pfnCooperativeKernelLaunchExp = diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ddf40de35f..2d3382cf86 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -514,7 +514,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName) { + if (UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -6866,6 +6866,255 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMDeviceAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == ppMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL != pProperties && + UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMSharedAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == ppMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL != pProperties && + UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMHostAllocExp = + getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == ppMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL != pProperties && + UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitList, ppMem, phEvent); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + auto pfnUSMFreeExp = getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hQueue) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pMem) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (phEventWaitList != NULL && numEventsInWaitList > 0) { + for (uint32_t i = 0; i < numEventsInWaitList; ++i) { + if (phEventWaitList[i] == NULL) { + return UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST; + } + } + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hQueue)) { + getContext()->refCountContext->logInvalidReference(hQueue); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(pPool)) { + getContext()->refCountContext->logInvalidReference(pPool); + } + + ur_result_t result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -10678,6 +10927,21 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnKernelLaunchCustomExp = ur_validation_layer::urEnqueueKernelLaunchCustomExp; + dditable.pfnUSMDeviceAllocExp = pDdiTable->pfnUSMDeviceAllocExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_validation_layer::urEnqueueUSMDeviceAllocExp; + + dditable.pfnUSMSharedAllocExp = pDdiTable->pfnUSMSharedAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_validation_layer::urEnqueueUSMSharedAllocExp; + + dditable.pfnUSMHostAllocExp = pDdiTable->pfnUSMHostAllocExp; + pDdiTable->pfnUSMHostAllocExp = + ur_validation_layer::urEnqueueUSMHostAllocExp; + + dditable.pfnUSMFreeExp = pDdiTable->pfnUSMFreeExp; + pDdiTable->pfnUSMFreeExp = ur_validation_layer::urEnqueueUSMFreeExp; + dditable.pfnCooperativeKernelLaunchExp = pDdiTable->pfnCooperativeKernelLaunchExp; pDdiTable->pfnCooperativeKernelLaunchExp = diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index c34bde6fd2..da0bacfba7 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -87,11 +87,15 @@ EXPORTS urEnqueueReadHostPipe urEnqueueTimestampRecordingExp urEnqueueUSMAdvise + urEnqueueUSMDeviceAllocExp urEnqueueUSMFill urEnqueueUSMFill2D + urEnqueueUSMFreeExp + urEnqueueUSMHostAllocExp urEnqueueUSMMemcpy urEnqueueUSMMemcpy2D urEnqueueUSMPrefetch + urEnqueueUSMSharedAllocExp urEnqueueWriteHostPipe urEventCreateWithNativeHandle urEventGetInfo @@ -285,11 +289,15 @@ EXPORTS urPrintEnqueueReadHostPipeParams urPrintEnqueueTimestampRecordingExpParams urPrintEnqueueUsmAdviseParams + urPrintEnqueueUsmDeviceAllocExpParams urPrintEnqueueUsmFillParams urPrintEnqueueUsmFill_2dParams + urPrintEnqueueUsmFreeExpParams + urPrintEnqueueUsmHostAllocExpParams urPrintEnqueueUsmMemcpyParams urPrintEnqueueUsmMemcpy_2dParams urPrintEnqueueUsmPrefetchParams + urPrintEnqueueUsmSharedAllocExpParams urPrintEnqueueWriteHostPipeParams urPrintEventCreateWithNativeHandleParams urPrintEventGetInfoParams @@ -303,6 +311,8 @@ EXPORTS urPrintEventStatus urPrintEventWaitParams urPrintExecutionInfo + urPrintExpAsyncUsmAllocFlags + urPrintExpAsyncUsmAllocProperties urPrintExpCommandBufferCommandInfo urPrintExpCommandBufferDesc urPrintExpCommandBufferInfo diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 8333ee2fa4..535e329234 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -87,11 +87,15 @@ urEnqueueReadHostPipe; urEnqueueTimestampRecordingExp; urEnqueueUSMAdvise; + urEnqueueUSMDeviceAllocExp; urEnqueueUSMFill; urEnqueueUSMFill2D; + urEnqueueUSMFreeExp; + urEnqueueUSMHostAllocExp; urEnqueueUSMMemcpy; urEnqueueUSMMemcpy2D; urEnqueueUSMPrefetch; + urEnqueueUSMSharedAllocExp; urEnqueueWriteHostPipe; urEventCreateWithNativeHandle; urEventGetInfo; @@ -285,11 +289,15 @@ urPrintEnqueueReadHostPipeParams; urPrintEnqueueTimestampRecordingExpParams; urPrintEnqueueUsmAdviseParams; + urPrintEnqueueUsmDeviceAllocExpParams; urPrintEnqueueUsmFillParams; urPrintEnqueueUsmFill_2dParams; + urPrintEnqueueUsmFreeExpParams; + urPrintEnqueueUsmHostAllocExpParams; urPrintEnqueueUsmMemcpyParams; urPrintEnqueueUsmMemcpy_2dParams; urPrintEnqueueUsmPrefetchParams; + urPrintEnqueueUsmSharedAllocExpParams; urPrintEnqueueWriteHostPipeParams; urPrintEventCreateWithNativeHandleParams; urPrintEventGetInfoParams; @@ -303,6 +311,8 @@ urPrintEventStatus; urPrintEventWaitParams; urPrintExecutionInfo; + urPrintExpAsyncUsmAllocFlags; + urPrintExpAsyncUsmAllocProperties; urPrintExpCommandBufferCommandInfo; urPrintExpCommandBufferDesc; urPrintExpCommandBufferInfo; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index e86bada0a0..966fd802ae 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -6117,6 +6117,273 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMDeviceAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMDeviceAllocExp = dditable->ur.EnqueueExp.pfnUSMDeviceAllocExp; + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMSharedAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMSharedAllocExp = dditable->ur.EnqueueExp.pfnUSMSharedAllocExp; + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMHostAllocExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMHostAllocExp = dditable->ur.EnqueueExp.pfnUSMHostAllocExp; + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, + phEventWaitListLocal.data(), ppMem, phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueUSMFreeExp +__urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hQueue)->dditable; + auto pfnUSMFreeExp = dditable->ur.EnqueueExp.pfnUSMFreeExp; + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hQueue = reinterpret_cast(hQueue)->handle; + + // convert loader handle to platform handle + pPool = (pPool) ? reinterpret_cast(pPool)->handle + : nullptr; + + // convert loader handles to platform handles + auto phEventWaitListLocal = + std::vector(numEventsInWaitList); + for (size_t i = 0; i < numEventsInWaitList; ++i) { + phEventWaitListLocal[i] = + reinterpret_cast(phEventWaitList[i])->handle; + } + + // forward to device-platform + result = pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitListLocal.data(), phEvent); + + // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. + if (UR_RESULT_SUCCESS != result && + UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) { + return result; + } + try { + // convert platform handle to loader handle + if (nullptr != phEvent) { + *phEvent = reinterpret_cast( + context->factories.ur_event_factory.getInstance(*phEvent, + dditable)); + } + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -9762,6 +10029,12 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( // return pointers to loader's DDIs pDdiTable->pfnKernelLaunchCustomExp = ur_loader::urEnqueueKernelLaunchCustomExp; + pDdiTable->pfnUSMDeviceAllocExp = + ur_loader::urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = + ur_loader::urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = ur_loader::urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = ur_loader::urEnqueueUSMFreeExp; pDdiTable->pfnCooperativeKernelLaunchExp = ur_loader::urEnqueueCooperativeKernelLaunchExp; pDdiTable->pfnTimestampRecordingExp = diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index a77c5916b1..2f2ddd7154 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -868,7 +868,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -6535,6 +6535,181 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + auto pfnUSMDeviceAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMDeviceAllocExp; + if (nullptr == pfnUSMDeviceAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMDeviceAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + auto pfnUSMSharedAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMSharedAllocExp; + if (nullptr == pfnUSMSharedAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMSharedAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + auto pfnUSMHostAllocExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMHostAllocExp; + if (nullptr == pfnUSMHostAllocExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMHostAllocExp(hQueue, pPool, size, pProperties, + numEventsInWaitList, phEventWaitList, ppMem, + phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc + ) try { + auto pfnUSMFreeExp = + ur_lib::getContext()->urDdiTable.EnqueueExp.pfnUSMFreeExp; + if (nullptr == pfnUSMFreeExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnUSMFreeExp(hQueue, pPool, pMem, numEventsInWaitList, + phEventWaitList, phEvent); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 3a14d9a9de..2e8554b709 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -879,6 +879,23 @@ ur_result_t urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t +urPrintExpAsyncUsmAllocFlags(enum ur_exp_async_usm_alloc_flag_t value, + char *buffer, const size_t buff_size, + size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintExpAsyncUsmAllocProperties( + const struct ur_exp_async_usm_alloc_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size) { @@ -1746,6 +1763,38 @@ ur_result_t urPrintEnqueueKernelLaunchCustomExpParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEnqueueUsmDeviceAllocExpParams( + const struct ur_enqueue_usm_device_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmSharedAllocExpParams( + const struct ur_enqueue_usm_shared_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmHostAllocExpParams( + const struct ur_enqueue_usm_host_alloc_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + +ur_result_t urPrintEnqueueUsmFreeExpParams( + const struct ur_enqueue_usm_free_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintEnqueueCooperativeKernelLaunchExpParams( const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index e375d496f8..d5f2229b3b 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -769,7 +769,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -5583,6 +5583,146 @@ ur_result_t UR_APICALL urEnqueueWriteHostPipe( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async device allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async shared allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `NULL != pProperties && ::UR_EXP_ASYNC_USM_ALLOC_FLAGS_MASK & pProperties->flags` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == ppMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + const size_t + size, ///< [in] minimum size in bytes of the USM memory object to be allocated + const ur_exp_async_usm_alloc_properties_t * + pProperties, ///< [in][optional] pointer to the enqueue async alloc properties + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + void **ppMem, ///< [out] pointer to USM memory object + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Enqueue an async host allocation +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hQueue` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pMem` +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_usm_pool_handle_t pPool, ///< [in][optional] USM pool descriptor + void *pMem, ///< [in] pointer to USM memory object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating no wait events. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies the async alloc +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index e4e0cdb696..0853716856 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -416,5 +416,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_EXP); } } // namespace urinfo From ce7bf016d8eb93131cb52a7916b2fba7590fc8e2 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Tue, 8 Oct 2024 18:59:29 +0100 Subject: [PATCH 2/6] Add ur_usm_pool_flags_t entry Add an entry so the user can specify if the native USM pool should be used. --- include/ur_api.h | 7 ++++--- include/ur_print.hpp | 13 +++++++++++++ scripts/core/exp-async-alloc.yml | 10 ++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 5913c158ed..bbf4e6bd07 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -3350,15 +3350,16 @@ typedef enum ur_usm_device_mem_flag_t { /// @brief USM memory property flags typedef uint32_t ur_usm_pool_flags_t; typedef enum ur_usm_pool_flag_t { - UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK = UR_BIT(0), ///< All coarse-grain allocations (allocations from the driver) will be - ///< zero-initialized. + UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK = UR_BIT(0), ///< All coarse-grain allocations (allocations from the driver) will be + ///< zero-initialized. + UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP = UR_BIT(1), ///< Use the native memory pool API /// @cond UR_USM_POOL_FLAG_FORCE_UINT32 = 0x7fffffff /// @endcond } ur_usm_pool_flag_t; /// @brief Bit Mask for validating ur_usm_pool_flags_t -#define UR_USM_POOL_FLAGS_MASK 0xfffffffe +#define UR_USM_POOL_FLAGS_MASK 0xfffffffc /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocation type diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 79802306d7..b7ef33a635 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -6520,6 +6520,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_usm_pool_flag_t value) case UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK: os << "UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK"; break; + case UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP: + os << "UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP"; + break; default: os << "unknown enumerator"; break; @@ -6544,6 +6547,16 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag } os << UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK; } + + if ((val & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) == (uint32_t)UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { + val ^= (uint32_t)UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP; + } if (val != 0) { std::bitset<32> bits(val); if (!first) { diff --git a/scripts/core/exp-async-alloc.yml b/scripts/core/exp-async-alloc.yml index b5935b644e..0ce5ab813e 100644 --- a/scripts/core/exp-async-alloc.yml +++ b/scripts/core/exp-async-alloc.yml @@ -23,6 +23,16 @@ etors: value: "0x2021" desc: "[$x_bool_t] returns true if the device supports enqueueing of native work" +--- #-------------------------------------------------------------------------- +type: enum +extend: true +name: $x_usm_pool_flags_t +desc: "USM memory property flag for creating a native USM pool" +etors: + - name: USE_NATIVE_MEMORY_POOL_EXP + value: "$X_BIT(1)" + desc: "Use the native memory pool API" + --- #-------------------------------------------------------------------------- type: enum extend: true From 92ffce58c5facd68e3bbdaa6cb59f8493959fa78 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 10 Oct 2024 10:10:19 +0100 Subject: [PATCH 3/6] WIP --- source/adapters/cuda/CMakeLists.txt | 1 + source/adapters/cuda/async_alloc.cpp | 107 +++++++++++++++++++++ source/adapters/cuda/usm.cpp | 73 ++++++++++++-- source/adapters/cuda/usm.hpp | 8 ++ source/adapters/hip/CMakeLists.txt | 1 + source/adapters/hip/async_alloc.cpp | 40 ++++++++ source/adapters/level_zero/CMakeLists.txt | 1 + source/adapters/level_zero/async_alloc.cpp | 40 ++++++++ source/adapters/native_cpu/CMakeLists.txt | 1 + source/adapters/native_cpu/async_alloc.cpp | 40 ++++++++ source/adapters/opencl/CMakeLists.txt | 1 + source/adapters/opencl/async_alloc.cpp | 40 ++++++++ 12 files changed, 346 insertions(+), 7 deletions(-) create mode 100644 source/adapters/cuda/async_alloc.cpp create mode 100644 source/adapters/hip/async_alloc.cpp create mode 100644 source/adapters/level_zero/async_alloc.cpp create mode 100644 source/adapters/native_cpu/async_alloc.cpp create mode 100644 source/adapters/opencl/async_alloc.cpp diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index b6b153a5d8..ed69dc0996 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -10,6 +10,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp diff --git a/source/adapters/cuda/async_alloc.cpp b/source/adapters/cuda/async_alloc.cpp new file mode 100644 index 0000000000..6dade0803d --- /dev/null +++ b/source/adapters/cuda/async_alloc.cpp @@ -0,0 +1,107 @@ +//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "context.hpp" +#include "enqueue.hpp" +#include "event.hpp" +#include "queue.hpp" +#include "usm.hpp" + +UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, void **ppMem, + ur_event_handle_t *phEvent) { + try { + std::unique_ptr RetImplEvent{nullptr}; + + ScopedContext Active(hQueue->getDevice()); + uint32_t StreamToken; + ur_stream_guard_ Guard; + CUstream CuStream = hQueue->getNextComputeStream( + numEventsInWaitList, phEventWaitList, Guard, &StreamToken); + + UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList, + phEventWaitList)); + + if (phEvent) { + RetImplEvent = + std::unique_ptr(ur_event_handle_t_::makeNative( + UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken)); + UR_CHECK_ERROR(RetImplEvent->start()); + } + + if (pPool) { + assert(pPool->usesCudaPool()); + + } else { + UR_CHECK_ERROR(cuMemAllocAsync(reinterpret_cast(ppMem), + size, CuStream)); + } + + if (phEvent) { + UR_CHECK_ERROR(RetImplEvent->record()); + *phEvent = RetImplEvent.release(); + } + + } catch (ur_result_t Err) { + return Err; + } + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, + ur_usm_pool_handle_t, void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +/* + +UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *pProperties, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + void **ppMem, ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, void *pMem, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} +*/ diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 8a6ac41b08..3a8b2d8e65 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -379,6 +379,67 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) : Context{Context} { + if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { + // TODO: this should only use the host + } + const void *pNext = PoolDesc->pNext; + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = static_cast(pNext); + switch (BaseDesc->stype) { + case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { + const ur_usm_pool_limits_desc_t *Limits = + reinterpret_cast(BaseDesc); + for (auto &config : DisjointPoolConfigs.Configs) { + config.MaxPoolableSize = Limits->maxPoolableSize; + config.SlabMinSize = Limits->minDriverAllocSize; + } + break; + } + default: { + throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); + } + } + pNext = BaseDesc->pNext; + } + + auto MemProvider = + umf::memoryProviderMakeUnique(Context, nullptr) + .second; + + HostMemPool = + umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]) + .second; + + for (const auto &Device : Context->getDevices()) { + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + DeviceMemPool = umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &this->DisjointPoolConfigs + .Configs[usm::DisjointPoolMemType::Device]) + .second; + MemProvider = + umf::memoryProviderMakeUnique(Context, Device) + .second; + SharedMemPool = umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &this->DisjointPoolConfigs + .Configs[usm::DisjointPoolMemType::Shared]) + .second; + Context->addPool(this); + } +} + +ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_device_handle_t Device, + ur_usm_pool_desc_t *PoolDesc) + : Context{Context} { + if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { + // TODO: this should only use the host + } const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { const ur_base_desc_t *BaseDesc = static_cast(pNext); @@ -443,7 +504,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool ) { // Without pool tracking we can't free pool allocations. -#ifdef UMF_ENABLE_POOL_TRACKING +#ifndef UMF_ENABLE_POOL_TRACKING + // We don't need UMF to use native mem pools + if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP)) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +#endif if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -454,12 +519,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( return Ex.getError(); } return UR_RESULT_SUCCESS; -#else - std::ignore = Context; - std::ignore = PoolDesc; - std::ignore = Pool; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -#endif } UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( diff --git a/source/adapters/cuda/usm.hpp b/source/adapters/cuda/usm.hpp index 7c6a2ea666..956c2a7359 100644 --- a/source/adapters/cuda/usm.hpp +++ b/source/adapters/cuda/usm.hpp @@ -15,6 +15,8 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); +// A ur_usm_pool_handle_t can represent different types of memory pools. It may +// sit on top of a UMF pool or a CUmemoryPool, but not both. struct ur_usm_pool_handle_t_ { std::atomic_uint32_t RefCount = 1; @@ -27,6 +29,8 @@ struct ur_usm_pool_handle_t_ { umf::pool_unique_handle_t SharedMemPool; umf::pool_unique_handle_t HostMemPool; + CUmemoryPool CUmemPool{0}; + ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc); @@ -37,6 +41,10 @@ struct ur_usm_pool_handle_t_ { uint32_t getReferenceCount() const noexcept { return RefCount; } bool hasUMFPool(umf_memory_pool_t *umf_pool); + + // To be used if ur_usm_pool_handle_t represents a CUmemoryPool + bool usesCudaPool() const { return CUmemPool != CUmemoryPool{0}; }; + CUmemoryPool getCudaPool() { return CUmemPool; }; }; // Exception type to pass allocation errors diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 9113d7b1ca..4ec5bfdc23 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -59,6 +59,7 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp diff --git a/source/adapters/hip/async_alloc.cpp b/source/adapters/hip/async_alloc.cpp new file mode 100644 index 0000000000..bf7bcb10bd --- /dev/null +++ b/source/adapters/hip/async_alloc.cpp @@ -0,0 +1,40 @@ +//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, + ur_usm_pool_handle_t, void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 5e6d0ce18e..ff12067e3a 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -13,6 +13,7 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp diff --git a/source/adapters/level_zero/async_alloc.cpp b/source/adapters/level_zero/async_alloc.cpp new file mode 100644 index 0000000000..bf7bcb10bd --- /dev/null +++ b/source/adapters/level_zero/async_alloc.cpp @@ -0,0 +1,40 @@ +//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, + ur_usm_pool_handle_t, void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/native_cpu/CMakeLists.txt b/source/adapters/native_cpu/CMakeLists.txt index 56cfc577d8..12447b2c4e 100644 --- a/source/adapters/native_cpu/CMakeLists.txt +++ b/source/adapters/native_cpu/CMakeLists.txt @@ -10,6 +10,7 @@ set(TARGET_NAME ur_adapter_native_cpu) add_ur_adapter(${TARGET_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp diff --git a/source/adapters/native_cpu/async_alloc.cpp b/source/adapters/native_cpu/async_alloc.cpp new file mode 100644 index 0000000000..bf7bcb10bd --- /dev/null +++ b/source/adapters/native_cpu/async_alloc.cpp @@ -0,0 +1,40 @@ +//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, + ur_usm_pool_handle_t, void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index 8bf67239b0..56b72c372e 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -16,6 +16,7 @@ add_ur_adapter(${TARGET_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp ${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/async_alloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/common.hpp diff --git a/source/adapters/opencl/async_alloc.cpp b/source/adapters/opencl/async_alloc.cpp new file mode 100644 index 0000000000..bf7bcb10bd --- /dev/null +++ b/source/adapters/opencl/async_alloc.cpp @@ -0,0 +1,40 @@ +//===--------- async_alloc.cpp - CUDA Adapter -----------------------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( + ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, + const ur_exp_async_usm_alloc_properties_t *, uint32_t, + const ur_event_handle_t *, void **, ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} + +UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, + ur_usm_pool_handle_t, void *, + uint32_t, + const ur_event_handle_t *, + ur_event_handle_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} From 68960371761854257b66f1148b2c068aff379577 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 19 Dec 2024 10:35:24 +0000 Subject: [PATCH 4/6] Fix compilation --- source/adapters/cuda/usm.cpp | 107 ++++++++++++++--------------------- source/adapters/cuda/usm.hpp | 5 ++ 2 files changed, 46 insertions(+), 66 deletions(-) diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 0ad48de375..e717d17f29 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -379,67 +379,6 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) : Context{Context} { - if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { - // TODO: this should only use the host - } - const void *pNext = PoolDesc->pNext; - while (pNext != nullptr) { - const ur_base_desc_t *BaseDesc = static_cast(pNext); - switch (BaseDesc->stype) { - case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { - const ur_usm_pool_limits_desc_t *Limits = - reinterpret_cast(BaseDesc); - for (auto &config : DisjointPoolConfigs.Configs) { - config.MaxPoolableSize = Limits->maxPoolableSize; - config.SlabMinSize = Limits->minDriverAllocSize; - } - break; - } - default: { - throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); - } - } - pNext = BaseDesc->pNext; - } - - auto MemProvider = - umf::memoryProviderMakeUnique(Context, nullptr) - .second; - - HostMemPool = - umf::poolMakeUniqueFromOps( - umfDisjointPoolOps(), std::move(MemProvider), - &this->DisjointPoolConfigs.Configs[usm::DisjointPoolMemType::Host]) - .second; - - for (const auto &Device : Context->getDevices()) { - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - DeviceMemPool = umf::poolMakeUniqueFromOps( - umfDisjointPoolOps(), std::move(MemProvider), - &this->DisjointPoolConfigs - .Configs[usm::DisjointPoolMemType::Device]) - .second; - MemProvider = - umf::memoryProviderMakeUnique(Context, Device) - .second; - SharedMemPool = umf::poolMakeUniqueFromOps( - umfDisjointPoolOps(), std::move(MemProvider), - &this->DisjointPoolConfigs - .Configs[usm::DisjointPoolMemType::Shared]) - .second; - Context->addPool(this); - } -} - -ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, - ur_device_handle_t Device, - ur_usm_pool_desc_t *PoolDesc) - : Context{Context} { - if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { - // TODO: this should only use the host - } const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { const ur_base_desc_t *BaseDesc = static_cast(pNext); @@ -494,6 +433,18 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, } } +ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, + ur_device_handle_t Device, + ur_usm_pool_desc_t *PoolDesc) + : Context{Context}, Device{Device} { + if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP)) + throw; + + // TODO: what flags should be used here. Moreover what flags should have + // UR counterparts? + UR_CHECK_ERROR(cuMemPoolCreate(&CUmemPool, 0)); +} + bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { return DeviceMemPool.get() == umf_pool || SharedMemPool.get() == umf_pool || HostMemPool.get() == umf_pool; @@ -507,11 +458,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool ) { // Without pool tracking we can't free pool allocations. -#ifndef UMF_ENABLE_POOL_TRACKING - // We don't need UMF to use native mem pools - if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP)) - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -#endif +#ifdef UMF_ENABLE_POOL_TRACKING if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -526,6 +473,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( return UR_RESULT_ERROR_UNKNOWN; } return UR_RESULT_SUCCESS; +#else + std::ignore = Context; + std::ignore = PoolDesc; + std::ignore = Pool; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +#endif +} + +UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t Context, ///< [in] handle of the context object + ur_device_handle_t Device, ///< [in] handle of the device object + ur_usm_pool_desc_t *PoolDesc, ///< [in] pointer to USM pool descriptor. + ///< Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool +) { + // This entry point only supports native mem pools + if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP)) + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, Device, PoolDesc)); + } catch (ur_result_t err) { + return err; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolRetain( diff --git a/source/adapters/cuda/usm.hpp b/source/adapters/cuda/usm.hpp index 956c2a7359..e154b3439b 100644 --- a/source/adapters/cuda/usm.hpp +++ b/source/adapters/cuda/usm.hpp @@ -21,6 +21,7 @@ struct ur_usm_pool_handle_t_ { std::atomic_uint32_t RefCount = 1; ur_context_handle_t Context = nullptr; + ur_device_handle_t Device = nullptr; usm::DisjointPoolAllConfigs DisjointPoolConfigs = usm::DisjointPoolAllConfigs(); @@ -34,6 +35,10 @@ struct ur_usm_pool_handle_t_ { ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc); + // TODO: do we need the context param? + ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device, + ur_usm_pool_desc_t *PoolDesc); + uint32_t incrementReferenceCount() noexcept { return ++RefCount; } uint32_t decrementReferenceCount() noexcept { return --RefCount; } From 4d1388da7e07f43af465e47dfdeb8ec333fa38d4 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Fri, 3 Jan 2025 12:26:25 +0000 Subject: [PATCH 5/6] Use cuda specific mem pool options Link some UR mem pool flags up to their CUDA equivalents. There are a lot of outstanding TODOs for flags whose mapping/purpose is unknown. --- source/adapters/cuda/async_alloc.cpp | 4 ++- source/adapters/cuda/usm.cpp | 43 +++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/source/adapters/cuda/async_alloc.cpp b/source/adapters/cuda/async_alloc.cpp index 6dade0803d..1cc48a80d2 100644 --- a/source/adapters/cuda/async_alloc.cpp +++ b/source/adapters/cuda/async_alloc.cpp @@ -42,7 +42,9 @@ UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( if (pPool) { assert(pPool->usesCudaPool()); - + UR_CHECK_ERROR( + cuMemAllocFromPoolAsync(reinterpret_cast(ppMem), size, + pPool->getCudaPool(), CuStream)); } else { UR_CHECK_ERROR(cuMemAllocAsync(reinterpret_cast(ppMem), size, CuStream)); diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index e717d17f29..2849981125 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -440,9 +440,50 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP)) throw; + CUmemPoolProps MemPoolProps; + + const void *pNext = PoolDesc->pNext; + while (pNext != nullptr) { + const ur_base_desc_t *BaseDesc = static_cast(pNext); + switch (BaseDesc->stype) { + case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { + const ur_usm_pool_limits_desc_t *Limits = + reinterpret_cast(BaseDesc); + MemPoolProps.maxSize = Limits->maxPoolableSize; + std::ignore = Limits->minDriverAllocSize; // FIXME: We don't do anything + // with this. Can we/do we need + // to do something with this? + break; + } + default: { + throw UsmAllocationException(UR_RESULT_ERROR_INVALID_ARGUMENT); + } + } + pNext = BaseDesc->pNext; + } + // TODO: what flags should be used here. Moreover what flags should have // UR counterparts? - UR_CHECK_ERROR(cuMemPoolCreate(&CUmemPool, 0)); + MemPoolProps.allocType = + CU_MEM_ALLOCATION_TYPE_PINNED; // Is this valid? CUDA docs say: + // + // "This allocation type is 'pinned', i.e. + // cannot migrate from its current + // location while the application is + // actively using it" + // + // Alternatives are *_INVALID (default) and + // *_MAX. + MemPoolProps.location.id = Device->getIndex(); // Docs are not clear on what + // this id is for. I am + // assuming it is used for + // device id. I have made a + // forum post here: + // https://forums.developer.nvidia.com/t/incomplete-description-in-cumemlocation-v1-struct-reference/318701 + MemPoolProps.location.type = + CU_MEM_LOCATION_TYPE_DEVICE; // Alternatives are: + // HOST, HOST_NUMA and HOST_NUMA_CURRENT + UR_CHECK_ERROR(cuMemPoolCreate(&CUmemPool, &MemPoolProps)); } bool ur_usm_pool_handle_t_::hasUMFPool(umf_memory_pool_t *umf_pool) { From 919c5de879e3ab40c9f81dfe289f95cefdf7b7a6 Mon Sep 17 00:00:00 2001 From: Hugh Delaney Date: Thu, 9 Jan 2025 12:23:52 +0000 Subject: [PATCH 6/6] Add tests and new urUSMPoolCreateExp protoype Also some small fixes here and there in the CUDA adapter. --- include/ur_api.h | 311 +++--- include/ur_api_funcs.def | 25 +- include/ur_ddi.h | 333 ++++--- include/ur_print.h | 128 +-- include/ur_print.hpp | 926 +++++++++--------- scripts/core/exp-async-alloc.yml | 28 + scripts/core/registry.yml | 3 + source/adapters/cuda/async_alloc.cpp | 182 ++-- source/adapters/cuda/ur_interface_loader.cpp | 6 + source/adapters/cuda/usm.cpp | 80 +- source/adapters/cuda/usm.hpp | 2 + .../level_zero/ur_interface_loader.cpp | 1 + .../level_zero/ur_interface_loader.hpp | 4 + source/adapters/mock/ur_mockddi.cpp | 51 + source/loader/layers/tracing/ur_trcddi.cpp | 43 + source/loader/layers/validation/ur_valddi.cpp | 60 ++ source/loader/loader.def.in | 2 + source/loader/loader.map.in | 2 + source/loader/ur_ldrddi.cpp | 47 + source/loader/ur_libapi.cpp | 43 + source/loader/ur_print.cpp | 8 + source/ur_api.cpp | 36 + test/conformance/CMakeLists.txt | 1 + .../exp_async_alloc/CMakeLists.txt | 9 + .../exp_async_alloc/async_alloc.cpp | 75 ++ 25 files changed, 1486 insertions(+), 920 deletions(-) create mode 100644 test/conformance/exp_async_alloc/CMakeLists.txt create mode 100644 test/conformance/exp_async_alloc/async_alloc.cpp diff --git a/include/ur_api.h b/include/ur_api.h index 5fb3337a40..2038f6e1d0 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -238,6 +238,7 @@ typedef enum ur_function_t { UR_FUNCTION_ENQUEUE_USM_SHARED_ALLOC_EXP = 251, ///< Enumerator for ::urEnqueueUSMSharedAllocExp UR_FUNCTION_ENQUEUE_USM_HOST_ALLOC_EXP = 252, ///< Enumerator for ::urEnqueueUSMHostAllocExp UR_FUNCTION_ENQUEUE_USM_FREE_EXP = 253, ///< Enumerator for ::urEnqueueUSMFreeExp + UR_FUNCTION_USM_POOL_CREATE_EXP = 254, ///< Enumerator for ::urUSMPoolCreateExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -7667,6 +7668,39 @@ urEnqueueUSMFreeExp( ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies the async alloc ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Create USM memory pool with desired properties. +/// +/// @details +/// - Create a memory pool associated with a single device. +/// - See also ::urUSMPoolCrearte and ::ur_usm_pool_limits_desc_t. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPoolDesc` +/// + `NULL == ppPool` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_POOL_FLAGS_MASK & pPoolDesc->flags` +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT +UR_APIEXPORT ur_result_t UR_APICALL +urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t *pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -12119,6 +12153,150 @@ typedef struct ur_enqueue_native_command_exp_params_t { ur_event_handle_t **pphEvent; } ur_enqueue_native_command_exp_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMHostAlloc +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_host_alloc_params_t { + ur_context_handle_t *phContext; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *psize; + void ***pppMem; +} ur_usm_host_alloc_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMDeviceAlloc +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_device_alloc_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *psize; + void ***pppMem; +} ur_usm_device_alloc_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMSharedAlloc +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_shared_alloc_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *psize; + void ***pppMem; +} ur_usm_shared_alloc_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMFree +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_free_params_t { + ur_context_handle_t *phContext; + void **ppMem; +} ur_usm_free_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMGetMemAllocInfo +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_get_mem_alloc_info_params_t { + ur_context_handle_t *phContext; + const void **ppMem; + ur_usm_alloc_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_usm_get_mem_alloc_info_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolCreate +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_create_params_t { + ur_context_handle_t *phContext; + ur_usm_pool_desc_t **ppPoolDesc; + ur_usm_pool_handle_t **pppPool; +} ur_usm_pool_create_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolRetain +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_retain_params_t { + ur_usm_pool_handle_t *ppPool; +} ur_usm_pool_retain_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolRelease +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_release_params_t { + ur_usm_pool_handle_t *ppPool; +} ur_usm_pool_release_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolGetInfo +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_get_info_params_t { + ur_usm_pool_handle_t *phPool; + ur_usm_pool_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_usm_pool_get_info_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPoolCreateExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pool_create_exp_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + ur_usm_pool_desc_t **ppPoolDesc; + ur_usm_pool_handle_t **pppPool; +} ur_usm_pool_create_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMPitchedAllocExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_pitched_alloc_exp_params_t { + ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; + const ur_usm_desc_t **ppUSMDesc; + ur_usm_pool_handle_t *ppool; + size_t *pwidthInBytes; + size_t *pheight; + size_t *pelementSizeBytes; + void ***pppMem; + size_t **ppResultPitch; +} ur_usm_pitched_alloc_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMImportExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_import_exp_params_t { + ur_context_handle_t *phContext; + void **ppMem; + size_t *psize; +} ur_usm_import_exp_params_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urUSMReleaseExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_usm_release_exp_params_t { + ur_context_handle_t *phContext; + void **ppMem; +} ur_usm_release_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp /// @details Each entry is a pointer to the parameter passed to the function; @@ -12340,139 +12518,6 @@ typedef struct ur_bindless_images_signal_external_semaphore_exp_params_t { ur_event_handle_t **pphEvent; } ur_bindless_images_signal_external_semaphore_exp_params_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMHostAlloc -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_host_alloc_params_t { - ur_context_handle_t *phContext; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *psize; - void ***pppMem; -} ur_usm_host_alloc_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMDeviceAlloc -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_device_alloc_params_t { - ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *psize; - void ***pppMem; -} ur_usm_device_alloc_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMSharedAlloc -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_shared_alloc_params_t { - ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *psize; - void ***pppMem; -} ur_usm_shared_alloc_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMFree -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_free_params_t { - ur_context_handle_t *phContext; - void **ppMem; -} ur_usm_free_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMGetMemAllocInfo -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_get_mem_alloc_info_params_t { - ur_context_handle_t *phContext; - const void **ppMem; - ur_usm_alloc_info_t *ppropName; - size_t *ppropSize; - void **ppPropValue; - size_t **ppPropSizeRet; -} ur_usm_get_mem_alloc_info_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolCreate -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_create_params_t { - ur_context_handle_t *phContext; - ur_usm_pool_desc_t **ppPoolDesc; - ur_usm_pool_handle_t **pppPool; -} ur_usm_pool_create_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolRetain -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_retain_params_t { - ur_usm_pool_handle_t *ppPool; -} ur_usm_pool_retain_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolRelease -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_release_params_t { - ur_usm_pool_handle_t *ppPool; -} ur_usm_pool_release_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPoolGetInfo -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pool_get_info_params_t { - ur_usm_pool_handle_t *phPool; - ur_usm_pool_info_t *ppropName; - size_t *ppropSize; - void **ppPropValue; - size_t **ppPropSizeRet; -} ur_usm_pool_get_info_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMPitchedAllocExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_pitched_alloc_exp_params_t { - ur_context_handle_t *phContext; - ur_device_handle_t *phDevice; - const ur_usm_desc_t **ppUSMDesc; - ur_usm_pool_handle_t *ppool; - size_t *pwidthInBytes; - size_t *pheight; - size_t *pelementSizeBytes; - void ***pppMem; - size_t **ppResultPitch; -} ur_usm_pitched_alloc_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMImportExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_import_exp_params_t { - ur_context_handle_t *phContext; - void **ppMem; - size_t *psize; -} ur_usm_import_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urUSMReleaseExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_usm_release_exp_params_t { - ur_context_handle_t *phContext; - void **ppMem; -} ur_usm_release_exp_params_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urCommandBufferCreateExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index c421151ef3..0531e124b9 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -136,6 +136,19 @@ _UR_API(urEnqueueUSMFreeExp) _UR_API(urEnqueueCooperativeKernelLaunchExp) _UR_API(urEnqueueTimestampRecordingExp) _UR_API(urEnqueueNativeCommandExp) +_UR_API(urUSMHostAlloc) +_UR_API(urUSMDeviceAlloc) +_UR_API(urUSMSharedAlloc) +_UR_API(urUSMFree) +_UR_API(urUSMGetMemAllocInfo) +_UR_API(urUSMPoolCreate) +_UR_API(urUSMPoolRetain) +_UR_API(urUSMPoolRelease) +_UR_API(urUSMPoolGetInfo) +_UR_API(urUSMPoolCreateExp) +_UR_API(urUSMPitchedAllocExp) +_UR_API(urUSMImportExp) +_UR_API(urUSMReleaseExp) _UR_API(urBindlessImagesUnsampledImageHandleDestroyExp) _UR_API(urBindlessImagesSampledImageHandleDestroyExp) _UR_API(urBindlessImagesImageAllocateExp) @@ -154,18 +167,6 @@ _UR_API(urBindlessImagesImportExternalSemaphoreExp) _UR_API(urBindlessImagesReleaseExternalSemaphoreExp) _UR_API(urBindlessImagesWaitExternalSemaphoreExp) _UR_API(urBindlessImagesSignalExternalSemaphoreExp) -_UR_API(urUSMHostAlloc) -_UR_API(urUSMDeviceAlloc) -_UR_API(urUSMSharedAlloc) -_UR_API(urUSMFree) -_UR_API(urUSMGetMemAllocInfo) -_UR_API(urUSMPoolCreate) -_UR_API(urUSMPoolRetain) -_UR_API(urUSMPoolRelease) -_UR_API(urUSMPoolGetInfo) -_UR_API(urUSMPitchedAllocExp) -_UR_API(urUSMImportExp) -_UR_API(urUSMReleaseExp) _UR_API(urCommandBufferCreateExp) _UR_API(urCommandBufferRetainExp) _UR_API(urCommandBufferReleaseExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 5fd4e3b855..a664a443a3 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1600,6 +1600,176 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEnqueueExpProcAddrTable_t)( ur_api_version_t, ur_enqueue_exp_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMHostAlloc +typedef ur_result_t(UR_APICALL *ur_pfnUSMHostAlloc_t)( + ur_context_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, + void **); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMDeviceAlloc +typedef ur_result_t(UR_APICALL *ur_pfnUSMDeviceAlloc_t)( + ur_context_handle_t, + ur_device_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, + void **); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMSharedAlloc +typedef ur_result_t(UR_APICALL *ur_pfnUSMSharedAlloc_t)( + ur_context_handle_t, + ur_device_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, + void **); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMFree +typedef ur_result_t(UR_APICALL *ur_pfnUSMFree_t)( + ur_context_handle_t, + void *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMGetMemAllocInfo +typedef ur_result_t(UR_APICALL *ur_pfnUSMGetMemAllocInfo_t)( + ur_context_handle_t, + const void *, + ur_usm_alloc_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolCreate +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolCreate_t)( + ur_context_handle_t, + ur_usm_pool_desc_t *, + ur_usm_pool_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolRetain +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRetain_t)( + ur_usm_pool_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolRelease +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRelease_t)( + ur_usm_pool_handle_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolGetInfo +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolGetInfo_t)( + ur_usm_pool_handle_t, + ur_usm_pool_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of USM functions pointers +typedef struct ur_usm_dditable_t { + ur_pfnUSMHostAlloc_t pfnHostAlloc; + ur_pfnUSMDeviceAlloc_t pfnDeviceAlloc; + ur_pfnUSMSharedAlloc_t pfnSharedAlloc; + ur_pfnUSMFree_t pfnFree; + ur_pfnUSMGetMemAllocInfo_t pfnGetMemAllocInfo; + ur_pfnUSMPoolCreate_t pfnPoolCreate; + ur_pfnUSMPoolRetain_t pfnPoolRetain; + ur_pfnUSMPoolRelease_t pfnPoolRelease; + ur_pfnUSMPoolGetInfo_t pfnPoolGetInfo; +} ur_usm_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's USM table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetUSMProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_usm_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetUSMProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetUSMProcAddrTable_t)( + ur_api_version_t, + ur_usm_dditable_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPoolCreateExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolCreateExp_t)( + ur_context_handle_t, + ur_device_handle_t, + ur_usm_pool_desc_t *, + ur_usm_pool_handle_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMPitchedAllocExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMPitchedAllocExp_t)( + ur_context_handle_t, + ur_device_handle_t, + const ur_usm_desc_t *, + ur_usm_pool_handle_t, + size_t, + size_t, + size_t, + void **, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMImportExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMImportExp_t)( + ur_context_handle_t, + void *, + size_t); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urUSMReleaseExp +typedef ur_result_t(UR_APICALL *ur_pfnUSMReleaseExp_t)( + ur_context_handle_t, + void *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of USMExp functions pointers +typedef struct ur_usm_exp_dditable_t { + ur_pfnUSMPoolCreateExp_t pfnPoolCreateExp; + ur_pfnUSMPitchedAllocExp_t pfnPitchedAllocExp; + ur_pfnUSMImportExp_t pfnImportExp; + ur_pfnUSMReleaseExp_t pfnReleaseExp; +} ur_usm_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's USMExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetUSMExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_usm_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetUSMExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetUSMExpProcAddrTable_t)( + ur_api_version_t, + ur_usm_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urBindlessImagesUnsampledImageHandleDestroyExp typedef ur_result_t(UR_APICALL *ur_pfnBindlessImagesUnsampledImageHandleDestroyExp_t)( @@ -1811,167 +1981,6 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetBindlessImagesExpProcAddrTable_t)( ur_api_version_t, ur_bindless_images_exp_dditable_t *); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMHostAlloc -typedef ur_result_t(UR_APICALL *ur_pfnUSMHostAlloc_t)( - ur_context_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, - void **); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMDeviceAlloc -typedef ur_result_t(UR_APICALL *ur_pfnUSMDeviceAlloc_t)( - ur_context_handle_t, - ur_device_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, - void **); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMSharedAlloc -typedef ur_result_t(UR_APICALL *ur_pfnUSMSharedAlloc_t)( - ur_context_handle_t, - ur_device_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, - void **); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMFree -typedef ur_result_t(UR_APICALL *ur_pfnUSMFree_t)( - ur_context_handle_t, - void *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMGetMemAllocInfo -typedef ur_result_t(UR_APICALL *ur_pfnUSMGetMemAllocInfo_t)( - ur_context_handle_t, - const void *, - ur_usm_alloc_info_t, - size_t, - void *, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolCreate -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolCreate_t)( - ur_context_handle_t, - ur_usm_pool_desc_t *, - ur_usm_pool_handle_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolRetain -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRetain_t)( - ur_usm_pool_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolRelease -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolRelease_t)( - ur_usm_pool_handle_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPoolGetInfo -typedef ur_result_t(UR_APICALL *ur_pfnUSMPoolGetInfo_t)( - ur_usm_pool_handle_t, - ur_usm_pool_info_t, - size_t, - void *, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of USM functions pointers -typedef struct ur_usm_dditable_t { - ur_pfnUSMHostAlloc_t pfnHostAlloc; - ur_pfnUSMDeviceAlloc_t pfnDeviceAlloc; - ur_pfnUSMSharedAlloc_t pfnSharedAlloc; - ur_pfnUSMFree_t pfnFree; - ur_pfnUSMGetMemAllocInfo_t pfnGetMemAllocInfo; - ur_pfnUSMPoolCreate_t pfnPoolCreate; - ur_pfnUSMPoolRetain_t pfnPoolRetain; - ur_pfnUSMPoolRelease_t pfnPoolRelease; - ur_pfnUSMPoolGetInfo_t pfnPoolGetInfo; -} ur_usm_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's USM table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL -urGetUSMProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_usm_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers -); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetUSMProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetUSMProcAddrTable_t)( - ur_api_version_t, - ur_usm_dditable_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMPitchedAllocExp -typedef ur_result_t(UR_APICALL *ur_pfnUSMPitchedAllocExp_t)( - ur_context_handle_t, - ur_device_handle_t, - const ur_usm_desc_t *, - ur_usm_pool_handle_t, - size_t, - size_t, - size_t, - void **, - size_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMImportExp -typedef ur_result_t(UR_APICALL *ur_pfnUSMImportExp_t)( - ur_context_handle_t, - void *, - size_t); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urUSMReleaseExp -typedef ur_result_t(UR_APICALL *ur_pfnUSMReleaseExp_t)( - ur_context_handle_t, - void *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of USMExp functions pointers -typedef struct ur_usm_exp_dditable_t { - ur_pfnUSMPitchedAllocExp_t pfnPitchedAllocExp; - ur_pfnUSMImportExp_t pfnImportExp; - ur_pfnUSMReleaseExp_t pfnReleaseExp; -} ur_usm_exp_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's USMExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL -urGetUSMExpProcAddrTable( - ur_api_version_t version, ///< [in] API version requested - ur_usm_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers -); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetUSMExpProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetUSMExpProcAddrTable_t)( - ur_api_version_t, - ur_usm_exp_dditable_t *); - /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urCommandBufferCreateExp typedef ur_result_t(UR_APICALL *ur_pfnCommandBufferCreateExp_t)( @@ -2638,9 +2647,9 @@ typedef struct ur_dditable_t { ur_global_dditable_t Global; ur_enqueue_dditable_t Enqueue; ur_enqueue_exp_dditable_t EnqueueExp; - ur_bindless_images_exp_dditable_t BindlessImagesExp; ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; + ur_bindless_images_exp_dditable_t BindlessImagesExp; ur_command_buffer_exp_dditable_t CommandBufferExp; ur_tensor_map_exp_dditable_t TensorMapExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; diff --git a/include/ur_print.h b/include/ur_print.h index 9cf185775b..7d21822023 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -2195,244 +2195,252 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(co UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueNativeCommandExpParams(const struct ur_enqueue_native_command_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct +/// @brief Print ur_usm_host_alloc_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesUnsampledImageHandleDestroyExpParams(const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmHostAllocParams(const struct ur_usm_host_alloc_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_sampled_image_handle_destroy_exp_params_t struct +/// @brief Print ur_usm_device_alloc_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesSampledImageHandleDestroyExpParams(const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmDeviceAllocParams(const struct ur_usm_device_alloc_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_image_allocate_exp_params_t struct +/// @brief Print ur_usm_shared_alloc_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageAllocateExpParams(const struct ur_bindless_images_image_allocate_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmSharedAllocParams(const struct ur_usm_shared_alloc_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_image_free_exp_params_t struct +/// @brief Print ur_usm_free_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageFreeExpParams(const struct ur_bindless_images_image_free_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmFreeParams(const struct ur_usm_free_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_unsampled_image_create_exp_params_t struct +/// @brief Print ur_usm_get_mem_alloc_info_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesUnsampledImageCreateExpParams(const struct ur_bindless_images_unsampled_image_create_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmGetMemAllocInfoParams(const struct ur_usm_get_mem_alloc_info_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_sampled_image_create_exp_params_t struct +/// @brief Print ur_usm_pool_create_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesSampledImageCreateExpParams(const struct ur_bindless_images_sampled_image_create_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolCreateParams(const struct ur_usm_pool_create_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_image_copy_exp_params_t struct +/// @brief Print ur_usm_pool_retain_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageCopyExpParams(const struct ur_bindless_images_image_copy_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolRetainParams(const struct ur_usm_pool_retain_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_image_get_info_exp_params_t struct +/// @brief Print ur_usm_pool_release_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageGetInfoExpParams(const struct ur_bindless_images_image_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolReleaseParams(const struct ur_usm_pool_release_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_mipmap_get_level_exp_params_t struct +/// @brief Print ur_usm_pool_get_info_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMipmapGetLevelExpParams(const struct ur_bindless_images_mipmap_get_level_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolGetInfoParams(const struct ur_usm_pool_get_info_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_mipmap_free_exp_params_t struct +/// @brief Print ur_usm_pool_create_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMipmapFreeExpParams(const struct ur_bindless_images_mipmap_free_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolCreateExpParams(const struct ur_usm_pool_create_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_import_external_memory_exp_params_t struct +/// @brief Print ur_usm_pitched_alloc_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImportExternalMemoryExpParams(const struct ur_bindless_images_import_external_memory_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPitchedAllocExpParams(const struct ur_usm_pitched_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_map_external_array_exp_params_t struct +/// @brief Print ur_usm_import_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMapExternalArrayExpParams(const struct ur_bindless_images_map_external_array_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmImportExpParams(const struct ur_usm_import_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_map_external_linear_memory_exp_params_t struct +/// @brief Print ur_usm_release_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMapExternalLinearMemoryExpParams(const struct ur_bindless_images_map_external_linear_memory_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmReleaseExpParams(const struct ur_usm_release_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_release_external_memory_exp_params_t struct +/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesReleaseExternalMemoryExpParams(const struct ur_bindless_images_release_external_memory_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesUnsampledImageHandleDestroyExpParams(const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_import_external_semaphore_exp_params_t struct +/// @brief Print ur_bindless_images_sampled_image_handle_destroy_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImportExternalSemaphoreExpParams(const struct ur_bindless_images_import_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesSampledImageHandleDestroyExpParams(const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_release_external_semaphore_exp_params_t struct +/// @brief Print ur_bindless_images_image_allocate_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesReleaseExternalSemaphoreExpParams(const struct ur_bindless_images_release_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageAllocateExpParams(const struct ur_bindless_images_image_allocate_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_wait_external_semaphore_exp_params_t struct +/// @brief Print ur_bindless_images_image_free_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesWaitExternalSemaphoreExpParams(const struct ur_bindless_images_wait_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageFreeExpParams(const struct ur_bindless_images_image_free_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_bindless_images_signal_external_semaphore_exp_params_t struct +/// @brief Print ur_bindless_images_unsampled_image_create_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesSignalExternalSemaphoreExpParams(const struct ur_bindless_images_signal_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesUnsampledImageCreateExpParams(const struct ur_bindless_images_unsampled_image_create_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_host_alloc_params_t struct +/// @brief Print ur_bindless_images_sampled_image_create_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmHostAllocParams(const struct ur_usm_host_alloc_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesSampledImageCreateExpParams(const struct ur_bindless_images_sampled_image_create_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_device_alloc_params_t struct +/// @brief Print ur_bindless_images_image_copy_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmDeviceAllocParams(const struct ur_usm_device_alloc_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageCopyExpParams(const struct ur_bindless_images_image_copy_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_shared_alloc_params_t struct +/// @brief Print ur_bindless_images_image_get_info_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmSharedAllocParams(const struct ur_usm_shared_alloc_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImageGetInfoExpParams(const struct ur_bindless_images_image_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_free_params_t struct +/// @brief Print ur_bindless_images_mipmap_get_level_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmFreeParams(const struct ur_usm_free_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMipmapGetLevelExpParams(const struct ur_bindless_images_mipmap_get_level_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_get_mem_alloc_info_params_t struct +/// @brief Print ur_bindless_images_mipmap_free_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmGetMemAllocInfoParams(const struct ur_usm_get_mem_alloc_info_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMipmapFreeExpParams(const struct ur_bindless_images_mipmap_free_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_create_params_t struct +/// @brief Print ur_bindless_images_import_external_memory_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolCreateParams(const struct ur_usm_pool_create_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImportExternalMemoryExpParams(const struct ur_bindless_images_import_external_memory_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_retain_params_t struct +/// @brief Print ur_bindless_images_map_external_array_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolRetainParams(const struct ur_usm_pool_retain_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMapExternalArrayExpParams(const struct ur_bindless_images_map_external_array_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_release_params_t struct +/// @brief Print ur_bindless_images_map_external_linear_memory_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolReleaseParams(const struct ur_usm_pool_release_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesMapExternalLinearMemoryExpParams(const struct ur_bindless_images_map_external_linear_memory_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pool_get_info_params_t struct +/// @brief Print ur_bindless_images_release_external_memory_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPoolGetInfoParams(const struct ur_usm_pool_get_info_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesReleaseExternalMemoryExpParams(const struct ur_bindless_images_release_external_memory_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_pitched_alloc_exp_params_t struct +/// @brief Print ur_bindless_images_import_external_semaphore_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmPitchedAllocExpParams(const struct ur_usm_pitched_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesImportExternalSemaphoreExpParams(const struct ur_bindless_images_import_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_import_exp_params_t struct +/// @brief Print ur_bindless_images_release_external_semaphore_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmImportExpParams(const struct ur_usm_import_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesReleaseExternalSemaphoreExpParams(const struct ur_bindless_images_release_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_usm_release_exp_params_t struct +/// @brief Print ur_bindless_images_wait_external_semaphore_exp_params_t struct /// @returns /// - ::UR_RESULT_SUCCESS /// - ::UR_RESULT_ERROR_INVALID_SIZE /// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmReleaseExpParams(const struct ur_usm_release_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesWaitExternalSemaphoreExpParams(const struct ur_bindless_images_wait_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_bindless_images_signal_external_semaphore_exp_params_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintBindlessImagesSignalExternalSemaphoreExpParams(const struct ur_bindless_images_signal_external_semaphore_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_command_buffer_create_exp_params_t struct diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 94a0404f14..776bf1a920 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -1018,6 +1018,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ENQUEUE_USM_FREE_EXP: os << "UR_FUNCTION_ENQUEUE_USM_FREE_EXP"; break; + case UR_FUNCTION_USM_POOL_CREATE_EXP: + os << "UR_FUNCTION_USM_POOL_CREATE_EXP"; + break; default: os << "unknown enumerator"; break; @@ -16498,10 +16501,10 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type +/// @brief Print operator for the ur_usm_host_alloc_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_host_alloc_params_t *params) { os << ".hContext = "; @@ -16509,25 +16512,36 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pUSMDesc = "; ur::details::printPtr(os, - *(params->phDevice)); + *(params->ppUSMDesc)); os << ", "; - os << ".hImage = "; + os << ".pool = "; - ur::details::printPtr(os, reinterpret_cast( - *(params->phImage))); + ur::details::printPtr(os, + *(params->ppool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_sampled_image_handle_destroy_exp_params_t type +/// @brief Print operator for the ur_usm_device_alloc_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_device_alloc_params_t *params) { os << ".hContext = "; @@ -16541,19 +16555,36 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".hImage = "; + os << ".pUSMDesc = "; - ur::details::printPtr(os, reinterpret_cast( - *(params->phImage))); + ur::details::printPtr(os, + *(params->ppUSMDesc)); + + os << ", "; + os << ".pool = "; + + ur::details::printPtr(os, + *(params->ppool)); + + os << ", "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".ppMem = "; + + ur::details::printPtr(os, + *(params->pppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_allocate_exp_params_t type +/// @brief Print operator for the ur_usm_shared_alloc_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_allocate_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_shared_alloc_params_t *params) { os << ".hContext = "; @@ -16567,31 +16598,36 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".pImageFormat = "; + os << ".pUSMDesc = "; ur::details::printPtr(os, - *(params->ppImageFormat)); + *(params->ppUSMDesc)); os << ", "; - os << ".pImageDesc = "; + os << ".pool = "; ur::details::printPtr(os, - *(params->ppImageDesc)); + *(params->ppool)); os << ", "; - os << ".phImageMem = "; + os << ".size = "; + + os << *(params->psize); + + os << ", "; + os << ".ppMem = "; ur::details::printPtr(os, - *(params->pphImageMem)); + *(params->pppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_free_exp_params_t type +/// @brief Print operator for the ur_usm_free_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_free_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_free_params_t *params) { os << ".hContext = "; @@ -16599,25 +16635,19 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pMem = "; ur::details::printPtr(os, - *(params->phDevice)); - - os << ", "; - os << ".hImageMem = "; - - ur::details::printPtr(os, reinterpret_cast( - *(params->phImageMem))); + *(params->ppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_unsampled_image_create_exp_params_t type +/// @brief Print operator for the ur_usm_get_mem_alloc_info_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_unsampled_image_create_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_get_mem_alloc_info_params_t *params) { os << ".hContext = "; @@ -16625,43 +16655,39 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pMem = "; ur::details::printPtr(os, - *(params->phDevice)); + *(params->ppMem)); os << ", "; - os << ".hImageMem = "; + os << ".propName = "; - ur::details::printPtr(os, reinterpret_cast( - *(params->phImageMem))); + os << *(params->ppropName); os << ", "; - os << ".pImageFormat = "; + os << ".propSize = "; - ur::details::printPtr(os, - *(params->ppImageFormat)); + os << *(params->ppropSize); os << ", "; - os << ".pImageDesc = "; - - ur::details::printPtr(os, - *(params->ppImageDesc)); + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; - os << ".phImage = "; + os << ".pPropSizeRet = "; ur::details::printPtr(os, - *(params->pphImage)); + *(params->ppPropSizeRet)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_sampled_image_create_exp_params_t type +/// @brief Print operator for the ur_usm_pool_create_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_sampled_image_create_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_create_params_t *params) { os << ".hContext = "; @@ -16669,138 +16695,119 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pPoolDesc = "; ur::details::printPtr(os, - *(params->phDevice)); + *(params->ppPoolDesc)); os << ", "; - os << ".hImageMem = "; + os << ".ppPool = "; - ur::details::printPtr(os, reinterpret_cast( - *(params->phImageMem))); + ur::details::printPtr(os, + *(params->pppPool)); - os << ", "; - os << ".pImageFormat = "; + return os; +} - ur::details::printPtr(os, - *(params->ppImageFormat)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_retain_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_retain_params_t *params) { - os << ", "; - os << ".pImageDesc = "; + os << ".pPool = "; ur::details::printPtr(os, - *(params->ppImageDesc)); + *(params->ppPool)); - os << ", "; - os << ".hSampler = "; + return os; +} - ur::details::printPtr(os, - *(params->phSampler)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_release_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_release_params_t *params) { - os << ", "; - os << ".phImage = "; + os << ".pPool = "; ur::details::printPtr(os, - *(params->pphImage)); + *(params->ppPool)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_copy_exp_params_t type +/// @brief Print operator for the ur_usm_pool_get_info_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_copy_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_get_info_params_t *params) { - os << ".hQueue = "; + os << ".hPool = "; ur::details::printPtr(os, - *(params->phQueue)); + *(params->phPool)); os << ", "; - os << ".pSrc = "; + os << ".propName = "; - ur::details::printPtr(os, - *(params->ppSrc)); + os << *(params->ppropName); os << ", "; - os << ".pDst = "; + os << ".propSize = "; - ur::details::printPtr(os, - *(params->ppDst)); + os << *(params->ppropSize); os << ", "; - os << ".pSrcImageDesc = "; - - ur::details::printPtr(os, - *(params->ppSrcImageDesc)); + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); os << ", "; - os << ".pDstImageDesc = "; + os << ".pPropSizeRet = "; ur::details::printPtr(os, - *(params->ppDstImageDesc)); + *(params->ppPropSizeRet)); - os << ", "; - os << ".pSrcImageFormat = "; + return os; +} - ur::details::printPtr(os, - *(params->ppSrcImageFormat)); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_pool_create_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_create_exp_params_t *params) { - os << ", "; - os << ".pDstImageFormat = "; + os << ".hContext = "; ur::details::printPtr(os, - *(params->ppDstImageFormat)); + *(params->phContext)); os << ", "; - os << ".pCopyRegion = "; + os << ".hDevice = "; ur::details::printPtr(os, - *(params->ppCopyRegion)); - - os << ", "; - os << ".imageCopyFlags = "; - - ur::details::printFlag(os, - *(params->pimageCopyFlags)); - - os << ", "; - os << ".numEventsInWaitList = "; - - os << *(params->pnumEventsInWaitList); + *(params->phDevice)); os << ", "; - os << ".phEventWaitList = "; - ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); - if (*(params->pphEventWaitList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pPoolDesc = "; - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); - } - os << "}"; - } + ur::details::printPtr(os, + *(params->ppPoolDesc)); os << ", "; - os << ".phEvent = "; + os << ".ppPool = "; ur::details::printPtr(os, - *(params->pphEvent)); + *(params->pppPool)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_image_get_info_exp_params_t type +/// @brief Print operator for the ur_usm_pitched_alloc_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_get_info_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pitched_alloc_exp_params_t *params) { os << ".hContext = "; @@ -16808,36 +16815,58 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hImageMem = "; + os << ".hDevice = "; - ur::details::printPtr(os, reinterpret_cast( - *(params->phImageMem))); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".propName = "; + os << ".pUSMDesc = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppUSMDesc)); os << ", "; - os << ".pPropValue = "; + os << ".pool = "; ur::details::printPtr(os, - *(params->ppPropValue)); + *(params->ppool)); os << ", "; - os << ".pPropSizeRet = "; + os << ".widthInBytes = "; + + os << *(params->pwidthInBytes); + + os << ", "; + os << ".height = "; + + os << *(params->pheight); + + os << ", "; + os << ".elementSizeBytes = "; + + os << *(params->pelementSizeBytes); + + os << ", "; + os << ".ppMem = "; ur::details::printPtr(os, - *(params->ppPropSizeRet)); + *(params->pppMem)); + + os << ", "; + os << ".pResultPitch = "; + + ur::details::printPtr(os, + *(params->ppResultPitch)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_mipmap_get_level_exp_params_t type +/// @brief Print operator for the ur_usm_import_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_mipmap_get_level_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_import_exp_params_t *params) { os << ".hContext = "; @@ -16845,36 +16874,44 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".hDevice = "; + os << ".pMem = "; ur::details::printPtr(os, - *(params->phDevice)); + *(params->ppMem)); os << ", "; - os << ".hImageMem = "; + os << ".size = "; - ur::details::printPtr(os, reinterpret_cast( - *(params->phImageMem))); + os << *(params->psize); - os << ", "; - os << ".mipmapLevel = "; + return os; +} - os << *(params->pmipmapLevel); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_usm_release_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_release_exp_params_t *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, + *(params->phContext)); os << ", "; - os << ".phImageMem = "; + os << ".pMem = "; ur::details::printPtr(os, - *(params->pphImageMem)); + *(params->ppMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_mipmap_free_exp_params_t type +/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_mipmap_free_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *params) { os << ".hContext = "; @@ -16888,19 +16925,19 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".hMem = "; + os << ".hImage = "; ur::details::printPtr(os, reinterpret_cast( - *(params->phMem))); + *(params->phImage))); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_import_external_memory_exp_params_t type +/// @brief Print operator for the ur_bindless_images_sampled_image_handle_destroy_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_external_memory_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_sampled_image_handle_destroy_exp_params_t *params) { os << ".hContext = "; @@ -16914,35 +16951,19 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".size = "; - - os << *(params->psize); - - os << ", "; - os << ".memHandleType = "; - - os << *(params->pmemHandleType); - - os << ", "; - os << ".pExternalMemDesc = "; - - ur::details::printPtr(os, - *(params->ppExternalMemDesc)); - - os << ", "; - os << ".phExternalMem = "; + os << ".hImage = "; - ur::details::printPtr(os, - *(params->pphExternalMem)); + ur::details::printPtr(os, reinterpret_cast( + *(params->phImage))); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_map_external_array_exp_params_t type +/// @brief Print operator for the ur_bindless_images_image_allocate_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_map_external_array_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_allocate_exp_params_t *params) { os << ".hContext = "; @@ -16967,12 +16988,6 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur::details::printPtr(os, *(params->ppImageDesc)); - os << ", "; - os << ".hExternalMem = "; - - ur::details::printPtr(os, - *(params->phExternalMem)); - os << ", "; os << ".phImageMem = "; @@ -16983,10 +16998,10 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_map_external_linear_memory_exp_params_t type +/// @brief Print operator for the ur_bindless_images_image_free_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_map_external_linear_memory_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_free_exp_params_t *params) { os << ".hContext = "; @@ -17000,35 +17015,19 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".offset = "; - - os << *(params->poffset); - - os << ", "; - os << ".size = "; - - os << *(params->psize); - - os << ", "; - os << ".hExternalMem = "; - - ur::details::printPtr(os, - *(params->phExternalMem)); - - os << ", "; - os << ".ppRetMem = "; + os << ".hImageMem = "; - ur::details::printPtr(os, - *(params->pppRetMem)); + ur::details::printPtr(os, reinterpret_cast( + *(params->phImageMem))); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_release_external_memory_exp_params_t type +/// @brief Print operator for the ur_bindless_images_unsampled_image_create_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_release_external_memory_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_unsampled_image_create_exp_params_t *params) { os << ".hContext = "; @@ -17042,19 +17041,37 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".hExternalMem = "; + os << ".hImageMem = "; + + ur::details::printPtr(os, reinterpret_cast( + *(params->phImageMem))); + + os << ", "; + os << ".pImageFormat = "; ur::details::printPtr(os, - *(params->phExternalMem)); + *(params->ppImageFormat)); + + os << ", "; + os << ".pImageDesc = "; + + ur::details::printPtr(os, + *(params->ppImageDesc)); + + os << ", "; + os << ".phImage = "; + + ur::details::printPtr(os, + *(params->pphImage)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_import_external_semaphore_exp_params_t type +/// @brief Print operator for the ur_bindless_images_sampled_image_create_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_external_semaphore_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_sampled_image_create_exp_params_t *params) { os << ".hContext = "; @@ -17068,56 +17085,43 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".semHandleType = "; + os << ".hImageMem = "; - os << *(params->psemHandleType); + ur::details::printPtr(os, reinterpret_cast( + *(params->phImageMem))); os << ", "; - os << ".pExternalSemaphoreDesc = "; + os << ".pImageFormat = "; ur::details::printPtr(os, - *(params->ppExternalSemaphoreDesc)); + *(params->ppImageFormat)); os << ", "; - os << ".phExternalSemaphore = "; - - ur::details::printPtr(os, - *(params->pphExternalSemaphore)); - - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_release_external_semaphore_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_release_external_semaphore_exp_params_t *params) { - - os << ".hContext = "; + os << ".pImageDesc = "; ur::details::printPtr(os, - *(params->phContext)); + *(params->ppImageDesc)); os << ", "; - os << ".hDevice = "; + os << ".hSampler = "; ur::details::printPtr(os, - *(params->phDevice)); + *(params->phSampler)); os << ", "; - os << ".hExternalSemaphore = "; + os << ".phImage = "; ur::details::printPtr(os, - *(params->phExternalSemaphore)); + *(params->pphImage)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_wait_external_semaphore_exp_params_t type +/// @brief Print operator for the ur_bindless_images_image_copy_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_wait_external_semaphore_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_copy_exp_params_t *params) { os << ".hQueue = "; @@ -17125,77 +17129,52 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phQueue)); os << ", "; - os << ".hSemaphore = "; + os << ".pSrc = "; ur::details::printPtr(os, - *(params->phSemaphore)); - - os << ", "; - os << ".hasWaitValue = "; - - os << *(params->phasWaitValue); - - os << ", "; - os << ".waitValue = "; - - os << *(params->pwaitValue); + *(params->ppSrc)); os << ", "; - os << ".numEventsInWaitList = "; + os << ".pDst = "; - os << *(params->pnumEventsInWaitList); + ur::details::printPtr(os, + *(params->ppDst)); os << ", "; - os << ".phEventWaitList = "; - ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); - if (*(params->pphEventWaitList) != NULL) { - os << " {"; - for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { - if (i != 0) { - os << ", "; - } + os << ".pSrcImageDesc = "; - ur::details::printPtr(os, - (*(params->pphEventWaitList))[i]); - } - os << "}"; - } + ur::details::printPtr(os, + *(params->ppSrcImageDesc)); os << ", "; - os << ".phEvent = "; + os << ".pDstImageDesc = "; ur::details::printPtr(os, - *(params->pphEvent)); - - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_bindless_images_signal_external_semaphore_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_signal_external_semaphore_exp_params_t *params) { + *(params->ppDstImageDesc)); - os << ".hQueue = "; + os << ", "; + os << ".pSrcImageFormat = "; ur::details::printPtr(os, - *(params->phQueue)); + *(params->ppSrcImageFormat)); os << ", "; - os << ".hSemaphore = "; + os << ".pDstImageFormat = "; ur::details::printPtr(os, - *(params->phSemaphore)); + *(params->ppDstImageFormat)); os << ", "; - os << ".hasSignalValue = "; + os << ".pCopyRegion = "; - os << *(params->phasSignalValue); + ur::details::printPtr(os, + *(params->ppCopyRegion)); os << ", "; - os << ".signalValue = "; + os << ".imageCopyFlags = "; - os << *(params->psignalValue); + ur::details::printFlag(os, + *(params->pimageCopyFlags)); os << ", "; os << ".numEventsInWaitList = "; @@ -17228,10 +17207,10 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_host_alloc_params_t type +/// @brief Print operator for the ur_bindless_images_image_get_info_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_host_alloc_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_image_get_info_exp_params_t *params) { os << ".hContext = "; @@ -17239,36 +17218,36 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".pUSMDesc = "; + os << ".hImageMem = "; - ur::details::printPtr(os, - *(params->ppUSMDesc)); + ur::details::printPtr(os, reinterpret_cast( + *(params->phImageMem))); os << ", "; - os << ".pool = "; + os << ".propName = "; - ur::details::printPtr(os, - *(params->ppool)); + os << *(params->ppropName); os << ", "; - os << ".size = "; + os << ".pPropValue = "; - os << *(params->psize); + ur::details::printPtr(os, + *(params->ppPropValue)); os << ", "; - os << ".ppMem = "; + os << ".pPropSizeRet = "; ur::details::printPtr(os, - *(params->pppMem)); + *(params->ppPropSizeRet)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_device_alloc_params_t type +/// @brief Print operator for the ur_bindless_images_mipmap_get_level_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_device_alloc_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_mipmap_get_level_exp_params_t *params) { os << ".hContext = "; @@ -17282,36 +17261,30 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".pUSMDesc = "; - - ur::details::printPtr(os, - *(params->ppUSMDesc)); - - os << ", "; - os << ".pool = "; + os << ".hImageMem = "; - ur::details::printPtr(os, - *(params->ppool)); + ur::details::printPtr(os, reinterpret_cast( + *(params->phImageMem))); os << ", "; - os << ".size = "; + os << ".mipmapLevel = "; - os << *(params->psize); + os << *(params->pmipmapLevel); os << ", "; - os << ".ppMem = "; + os << ".phImageMem = "; ur::details::printPtr(os, - *(params->pppMem)); + *(params->pphImageMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_shared_alloc_params_t type +/// @brief Print operator for the ur_bindless_images_mipmap_free_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_shared_alloc_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_mipmap_free_exp_params_t *params) { os << ".hContext = "; @@ -17325,16 +17298,30 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".pUSMDesc = "; + os << ".hMem = "; + + ur::details::printPtr(os, reinterpret_cast( + *(params->phMem))); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_import_external_memory_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_external_memory_exp_params_t *params) { + + os << ".hContext = "; ur::details::printPtr(os, - *(params->ppUSMDesc)); + *(params->phContext)); os << ", "; - os << ".pool = "; + os << ".hDevice = "; ur::details::printPtr(os, - *(params->ppool)); + *(params->phDevice)); os << ", "; os << ".size = "; @@ -17342,39 +17329,30 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct os << *(params->psize); os << ", "; - os << ".ppMem = "; - - ur::details::printPtr(os, - *(params->pppMem)); - - return os; -} + os << ".memHandleType = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_free_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_free_params_t *params) { + os << *(params->pmemHandleType); - os << ".hContext = "; + os << ", "; + os << ".pExternalMemDesc = "; ur::details::printPtr(os, - *(params->phContext)); + *(params->ppExternalMemDesc)); os << ", "; - os << ".pMem = "; + os << ".phExternalMem = "; ur::details::printPtr(os, - *(params->ppMem)); + *(params->pphExternalMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_get_mem_alloc_info_params_t type +/// @brief Print operator for the ur_bindless_images_map_external_array_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_get_mem_alloc_info_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_map_external_array_exp_params_t *params) { os << ".hContext = "; @@ -17382,127 +17360,148 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phContext)); os << ", "; - os << ".pMem = "; + os << ".hDevice = "; ur::details::printPtr(os, - *(params->ppMem)); + *(params->phDevice)); os << ", "; - os << ".propName = "; + os << ".pImageFormat = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->ppImageFormat)); os << ", "; - os << ".propSize = "; + os << ".pImageDesc = "; - os << *(params->ppropSize); + ur::details::printPtr(os, + *(params->ppImageDesc)); os << ", "; - os << ".pPropValue = "; - ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + os << ".hExternalMem = "; + + ur::details::printPtr(os, + *(params->phExternalMem)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phImageMem = "; ur::details::printPtr(os, - *(params->ppPropSizeRet)); + *(params->pphImageMem)); return os; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_create_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_create_params_t *params) { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_map_external_linear_memory_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_map_external_linear_memory_exp_params_t *params) { + + os << ".hContext = "; + + ur::details::printPtr(os, + *(params->phContext)); + + os << ", "; + os << ".hDevice = "; + + ur::details::printPtr(os, + *(params->phDevice)); + + os << ", "; + os << ".offset = "; + + os << *(params->poffset); - os << ".hContext = "; + os << ", "; + os << ".size = "; - ur::details::printPtr(os, - *(params->phContext)); + os << *(params->psize); os << ", "; - os << ".pPoolDesc = "; + os << ".hExternalMem = "; ur::details::printPtr(os, - *(params->ppPoolDesc)); + *(params->phExternalMem)); os << ", "; - os << ".ppPool = "; + os << ".ppRetMem = "; ur::details::printPtr(os, - *(params->pppPool)); + *(params->pppRetMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_retain_params_t type +/// @brief Print operator for the ur_bindless_images_release_external_memory_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_retain_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_release_external_memory_exp_params_t *params) { - os << ".pPool = "; + os << ".hContext = "; ur::details::printPtr(os, - *(params->ppPool)); + *(params->phContext)); - return os; -} + os << ", "; + os << ".hDevice = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_release_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_release_params_t *params) { + ur::details::printPtr(os, + *(params->phDevice)); - os << ".pPool = "; + os << ", "; + os << ".hExternalMem = "; ur::details::printPtr(os, - *(params->ppPool)); + *(params->phExternalMem)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pool_get_info_params_t type +/// @brief Print operator for the ur_bindless_images_import_external_semaphore_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pool_get_info_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_import_external_semaphore_exp_params_t *params) { - os << ".hPool = "; + os << ".hContext = "; ur::details::printPtr(os, - *(params->phPool)); + *(params->phContext)); os << ", "; - os << ".propName = "; + os << ".hDevice = "; - os << *(params->ppropName); + ur::details::printPtr(os, + *(params->phDevice)); os << ", "; - os << ".propSize = "; + os << ".semHandleType = "; - os << *(params->ppropSize); + os << *(params->psemHandleType); os << ", "; - os << ".pPropValue = "; - ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + os << ".pExternalSemaphoreDesc = "; + + ur::details::printPtr(os, + *(params->ppExternalSemaphoreDesc)); os << ", "; - os << ".pPropSizeRet = "; + os << ".phExternalSemaphore = "; ur::details::printPtr(os, - *(params->ppPropSizeRet)); + *(params->pphExternalSemaphore)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_pitched_alloc_exp_params_t type +/// @brief Print operator for the ur_bindless_images_release_external_semaphore_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_pitched_alloc_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_release_external_semaphore_exp_params_t *params) { os << ".hContext = "; @@ -17516,88 +17515,124 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct *(params->phDevice)); os << ", "; - os << ".pUSMDesc = "; + os << ".hExternalSemaphore = "; ur::details::printPtr(os, - *(params->ppUSMDesc)); + *(params->phExternalSemaphore)); + + return os; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_bindless_images_wait_external_semaphore_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_wait_external_semaphore_exp_params_t *params) { + + os << ".hQueue = "; + + ur::details::printPtr(os, + *(params->phQueue)); os << ", "; - os << ".pool = "; + os << ".hSemaphore = "; ur::details::printPtr(os, - *(params->ppool)); + *(params->phSemaphore)); os << ", "; - os << ".widthInBytes = "; + os << ".hasWaitValue = "; - os << *(params->pwidthInBytes); + os << *(params->phasWaitValue); os << ", "; - os << ".height = "; + os << ".waitValue = "; - os << *(params->pheight); + os << *(params->pwaitValue); os << ", "; - os << ".elementSizeBytes = "; + os << ".numEventsInWaitList = "; - os << *(params->pelementSizeBytes); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".ppMem = "; + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } - ur::details::printPtr(os, - *(params->pppMem)); + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + } os << ", "; - os << ".pResultPitch = "; + os << ".phEvent = "; ur::details::printPtr(os, - *(params->ppResultPitch)); + *(params->pphEvent)); return os; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_import_exp_params_t type +/// @brief Print operator for the ur_bindless_images_signal_external_semaphore_exp_params_t type /// @returns /// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_import_exp_params_t *params) { +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_bindless_images_signal_external_semaphore_exp_params_t *params) { - os << ".hContext = "; + os << ".hQueue = "; ur::details::printPtr(os, - *(params->phContext)); + *(params->phQueue)); os << ", "; - os << ".pMem = "; + os << ".hSemaphore = "; ur::details::printPtr(os, - *(params->ppMem)); + *(params->phSemaphore)); os << ", "; - os << ".size = "; + os << ".hasSignalValue = "; - os << *(params->psize); + os << *(params->phasSignalValue); - return os; -} + os << ", "; + os << ".signalValue = "; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_usm_release_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_usm_release_exp_params_t *params) { + os << *(params->psignalValue); - os << ".hContext = "; + os << ", "; + os << ".numEventsInWaitList = "; - ur::details::printPtr(os, - *(params->phContext)); + os << *(params->pnumEventsInWaitList); os << ", "; - os << ".pMem = "; + os << ".phEventWaitList = "; + ur::details::printPtr(os, reinterpret_cast(*(params->pphEventWaitList))); + if (*(params->pphEventWaitList) != NULL) { + os << " {"; + for (size_t i = 0; i < *params->pnumEventsInWaitList; ++i) { + if (i != 0) { + os << ", "; + } + + ur::details::printPtr(os, + (*(params->pphEventWaitList))[i]); + } + os << "}"; + } + + os << ", "; + os << ".phEvent = "; ur::details::printPtr(os, - *(params->ppMem)); + *(params->pphEvent)); return os; } @@ -20301,6 +20336,45 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_ENQUEUE_NATIVE_COMMAND_EXP: { os << (const struct ur_enqueue_native_command_exp_params_t *)params; } break; + case UR_FUNCTION_USM_HOST_ALLOC: { + os << (const struct ur_usm_host_alloc_params_t *)params; + } break; + case UR_FUNCTION_USM_DEVICE_ALLOC: { + os << (const struct ur_usm_device_alloc_params_t *)params; + } break; + case UR_FUNCTION_USM_SHARED_ALLOC: { + os << (const struct ur_usm_shared_alloc_params_t *)params; + } break; + case UR_FUNCTION_USM_FREE: { + os << (const struct ur_usm_free_params_t *)params; + } break; + case UR_FUNCTION_USM_GET_MEM_ALLOC_INFO: { + os << (const struct ur_usm_get_mem_alloc_info_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_CREATE: { + os << (const struct ur_usm_pool_create_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_RETAIN: { + os << (const struct ur_usm_pool_retain_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_RELEASE: { + os << (const struct ur_usm_pool_release_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_GET_INFO: { + os << (const struct ur_usm_pool_get_info_params_t *)params; + } break; + case UR_FUNCTION_USM_POOL_CREATE_EXP: { + os << (const struct ur_usm_pool_create_exp_params_t *)params; + } break; + case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: { + os << (const struct ur_usm_pitched_alloc_exp_params_t *)params; + } break; + case UR_FUNCTION_USM_IMPORT_EXP: { + os << (const struct ur_usm_import_exp_params_t *)params; + } break; + case UR_FUNCTION_USM_RELEASE_EXP: { + os << (const struct ur_usm_release_exp_params_t *)params; + } break; case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: { os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params; } break; @@ -20355,42 +20429,6 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP: { os << (const struct ur_bindless_images_signal_external_semaphore_exp_params_t *)params; } break; - case UR_FUNCTION_USM_HOST_ALLOC: { - os << (const struct ur_usm_host_alloc_params_t *)params; - } break; - case UR_FUNCTION_USM_DEVICE_ALLOC: { - os << (const struct ur_usm_device_alloc_params_t *)params; - } break; - case UR_FUNCTION_USM_SHARED_ALLOC: { - os << (const struct ur_usm_shared_alloc_params_t *)params; - } break; - case UR_FUNCTION_USM_FREE: { - os << (const struct ur_usm_free_params_t *)params; - } break; - case UR_FUNCTION_USM_GET_MEM_ALLOC_INFO: { - os << (const struct ur_usm_get_mem_alloc_info_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_CREATE: { - os << (const struct ur_usm_pool_create_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_RETAIN: { - os << (const struct ur_usm_pool_retain_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_RELEASE: { - os << (const struct ur_usm_pool_release_params_t *)params; - } break; - case UR_FUNCTION_USM_POOL_GET_INFO: { - os << (const struct ur_usm_pool_get_info_params_t *)params; - } break; - case UR_FUNCTION_USM_PITCHED_ALLOC_EXP: { - os << (const struct ur_usm_pitched_alloc_exp_params_t *)params; - } break; - case UR_FUNCTION_USM_IMPORT_EXP: { - os << (const struct ur_usm_import_exp_params_t *)params; - } break; - case UR_FUNCTION_USM_RELEASE_EXP: { - os << (const struct ur_usm_release_exp_params_t *)params; - } break; case UR_FUNCTION_COMMAND_BUFFER_CREATE_EXP: { os << (const struct ur_command_buffer_create_exp_params_t *)params; } break; diff --git a/scripts/core/exp-async-alloc.yml b/scripts/core/exp-async-alloc.yml index 3fd24ce587..39c129580f 100644 --- a/scripts/core/exp-async-alloc.yml +++ b/scripts/core/exp-async-alloc.yml @@ -224,3 +224,31 @@ returns: - $X_RESULT_ERROR_INVALID_NULL_HANDLE - $X_RESULT_ERROR_INVALID_NULL_POINTER - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST +--- #-------------------------------------------------------------------------- +type: function +desc: "Create USM memory pool with desired properties." +class: $xUSM +name: PoolCreateExp +ordinal: "0" +details: + - "Create a memory pool associated with a single device." + - "See also $xUSMPoolCrearte and $x_usm_pool_limits_desc_t." +params: + - type: $x_context_handle_t + name: hContext + desc: "[in] handle of the context object" + - type: $x_device_handle_t + name: hDevice + desc: "[in] handle of the device object" + - type: $x_usm_pool_desc_t* + name: pPoolDesc + desc: "[in] pointer to USM pool descriptor. Can be chained with $x_usm_pool_limits_desc_t" + - type: $x_usm_pool_handle_t* + name: ppPool + desc: "[out] pointer to USM memory pool" +returns: + - $X_RESULT_ERROR_INVALID_NULL_HANDLE + - $X_RESULT_ERROR_INVALID_VALUE + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: + - "If any device associated with `hContext` reports `false` for $X_DEVICE_INFO_USM_POOL_SUPPORT" diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index abab44668e..ad4ec2a51b 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -628,6 +628,9 @@ etors: - name: ENQUEUE_USM_FREE_EXP desc: Enumerator for $xEnqueueUSMFreeExp value: '253' +- name: USM_POOL_CREATE_EXP + desc: Enumerator for $xUSMPoolCreateExp + value: '254' --- type: enum desc: Defines structure types diff --git a/source/adapters/cuda/async_alloc.cpp b/source/adapters/cuda/async_alloc.cpp index 1cc48a80d2..ea02bdcf0c 100644 --- a/source/adapters/cuda/async_alloc.cpp +++ b/source/adapters/cuda/async_alloc.cpp @@ -16,94 +16,132 @@ #include "queue.hpp" #include "usm.hpp" -UR_APIEXPORT ur_result_t urEnqueueUSMDeviceAllocExp( +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMDeviceAllocExp( ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, const ur_exp_async_usm_alloc_properties_t *, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, void **ppMem, - ur_event_handle_t *phEvent) { - try { - std::unique_ptr RetImplEvent{nullptr}; - - ScopedContext Active(hQueue->getDevice()); - uint32_t StreamToken; - ur_stream_guard_ Guard; - CUstream CuStream = hQueue->getNextComputeStream( - numEventsInWaitList, phEventWaitList, Guard, &StreamToken); - - UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList, - phEventWaitList)); - - if (phEvent) { - RetImplEvent = - std::unique_ptr(ur_event_handle_t_::makeNative( - UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken)); - UR_CHECK_ERROR(RetImplEvent->start()); - } - - if (pPool) { - assert(pPool->usesCudaPool()); - UR_CHECK_ERROR( - cuMemAllocFromPoolAsync(reinterpret_cast(ppMem), size, - pPool->getCudaPool(), CuStream)); - } else { - UR_CHECK_ERROR(cuMemAllocAsync(reinterpret_cast(ppMem), - size, CuStream)); - } - - if (phEvent) { - UR_CHECK_ERROR(RetImplEvent->record()); - *phEvent = RetImplEvent.release(); - } - - } catch (ur_result_t Err) { - return Err; + ur_event_handle_t *phEvent) try { + std::unique_ptr RetImplEvent{nullptr}; + + ScopedContext Active(hQueue->getDevice()); + uint32_t StreamToken; + ur_stream_guard_ Guard; + CUstream CuStream = hQueue->getNextComputeStream( + numEventsInWaitList, phEventWaitList, Guard, &StreamToken); + + UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList, + phEventWaitList)); + + if (phEvent) { + RetImplEvent = + std::unique_ptr(ur_event_handle_t_::makeNative( + UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken)); + UR_CHECK_ERROR(RetImplEvent->start()); + } + + if (pPool) { + assert(pPool->usesCudaPool()); + UR_CHECK_ERROR( + cuMemAllocFromPoolAsync(reinterpret_cast(ppMem), size, + pPool->getCudaPool(), CuStream)); + } else { + UR_CHECK_ERROR(cuMemAllocAsync(reinterpret_cast(ppMem), size, + CuStream)); } + + if (phEvent) { + UR_CHECK_ERROR(RetImplEvent->record()); + *phEvent = RetImplEvent.release(); + } + return UR_RESULT_SUCCESS; +} catch (ur_result_t Err) { + return Err; } -UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( - ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, - const ur_exp_async_usm_alloc_properties_t *, uint32_t, - const ur_event_handle_t *, void **, ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMHostAllocExp( + ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, + const ur_exp_async_usm_alloc_properties_t *, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, void **ppMem, + ur_event_handle_t *phEvent) try { + std::unique_ptr RetImplEvent{nullptr}; + + ScopedContext Active(hQueue->getDevice()); + uint32_t StreamToken; + ur_stream_guard_ Guard; + CUstream CuStream = hQueue->getNextComputeStream( + numEventsInWaitList, phEventWaitList, Guard, &StreamToken); + + UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList, + phEventWaitList)); + + if (phEvent) { + RetImplEvent = + std::unique_ptr(ur_event_handle_t_::makeNative( + UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken)); + UR_CHECK_ERROR(RetImplEvent->start()); + } + + if (pPool) { + assert(pPool->usesCudaPool()); + assert(pPool->usesCudaHostPool() && "Memory pool does not use" + " host mem pooling"); + UR_CHECK_ERROR( + cuMemAllocFromPoolAsync(reinterpret_cast(ppMem), size, + pPool->getCudaPool(), CuStream)); + } else { + // Can only use host async allocations if pool is created in advance + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + if (phEvent) { + UR_CHECK_ERROR(RetImplEvent->record()); + *phEvent = RetImplEvent.release(); + } + + return UR_RESULT_SUCCESS; +} catch (ur_result_t Err) { + return Err; } -UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( +// CUDA doesn't support memory pools with CUDA managed mem +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMSharedAllocExp( ur_queue_handle_t, ur_usm_pool_handle_t, const size_t, const ur_exp_async_usm_alloc_properties_t *, uint32_t, const ur_event_handle_t *, void **, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t, - ur_usm_pool_handle_t, void *, - uint32_t, - const ur_event_handle_t *, - ur_event_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -/* +UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFreeExp( + ur_queue_handle_t hQueue, [[maybe_unused]] ur_usm_pool_handle_t pPool, + void *pMem, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) try { + std::unique_ptr RetImplEvent{nullptr}; -UR_APIEXPORT ur_result_t urEnqueueUSMSharedAllocExp( - ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, - const ur_exp_async_usm_alloc_properties_t *pProperties, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - void **ppMem, ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} + ScopedContext Active(hQueue->getDevice()); + uint32_t StreamToken; + ur_stream_guard_ Guard; + CUstream CuStream = hQueue->getNextComputeStream( + numEventsInWaitList, phEventWaitList, Guard, &StreamToken); -UR_APIEXPORT ur_result_t urEnqueueUSMHostAllocExp( - ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, const size_t size, - const ur_exp_async_usm_alloc_properties_t *pProperties, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - void **ppMem, ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} + UR_CHECK_ERROR(enqueueEventsWait(hQueue, CuStream, numEventsInWaitList, + phEventWaitList)); -UR_APIEXPORT ur_result_t urEnqueueUSMFreeExp( - ur_queue_handle_t hQueue, ur_usm_pool_handle_t pPool, void *pMem, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + if (phEvent) { + RetImplEvent = + std::unique_ptr(ur_event_handle_t_::makeNative( + UR_COMMAND_KERNEL_LAUNCH, hQueue, CuStream, StreamToken)); + UR_CHECK_ERROR(RetImplEvent->start()); + } + + UR_CHECK_ERROR(cuMemFreeAsync(reinterpret_cast(pMem), CuStream)); + + if (phEvent) { + UR_CHECK_ERROR(RetImplEvent->record()); + *phEvent = RetImplEvent.release(); + } + + return UR_RESULT_SUCCESS; +} catch (ur_result_t Err) { + return Err; } -*/ diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index ad0d775be0..3f791e07e7 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -242,6 +242,7 @@ urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { pDdiTable->pfnGetMemAllocInfo = urUSMGetMemAllocInfo; pDdiTable->pfnHostAlloc = urUSMHostAlloc; pDdiTable->pfnPoolCreate = urUSMPoolCreate; + pDdiTable->pfnPoolCreate = urUSMPoolCreate; pDdiTable->pfnPoolRetain = urUSMPoolRetain; pDdiTable->pfnPoolRelease = urUSMPoolRelease; pDdiTable->pfnPoolGetInfo = urUSMPoolGetInfo; @@ -364,6 +365,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( return result; } pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; + pDdiTable->pfnPoolCreateExp = urUSMPoolCreateExp; return UR_RESULT_SUCCESS; } @@ -418,6 +420,10 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( pDdiTable->pfnTimestampRecordingExp = urEnqueueTimestampRecordingExp; pDdiTable->pfnKernelLaunchCustomExp = urEnqueueKernelLaunchCustomExp; pDdiTable->pfnNativeCommandExp = urEnqueueNativeCommandExp; + pDdiTable->pfnUSMDeviceAllocExp = urEnqueueUSMDeviceAllocExp; + pDdiTable->pfnUSMSharedAllocExp = urEnqueueUSMSharedAllocExp; + pDdiTable->pfnUSMHostAllocExp = urEnqueueUSMHostAllocExp; + pDdiTable->pfnUSMFreeExp = urEnqueueUSMFreeExp; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 2849981125..a41aacadc5 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -379,6 +379,7 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) : Context{Context} { + CUmemPoolProps MemPoolProps{}; // Used if native mem pools are used const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { const ur_base_desc_t *BaseDesc = static_cast(pNext); @@ -386,9 +387,13 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, case UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC: { const ur_usm_pool_limits_desc_t *Limits = reinterpret_cast(BaseDesc); - for (auto &config : DisjointPoolConfigs.Configs) { - config.MaxPoolableSize = Limits->maxPoolableSize; - config.SlabMinSize = Limits->minDriverAllocSize; + if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { + MemPoolProps.maxSize = Limits->maxPoolableSize; + } else { + for (auto &config : DisjointPoolConfigs.Configs) { + config.MaxPoolableSize = Limits->maxPoolableSize; + config.SlabMinSize = Limits->minDriverAllocSize; + } } break; } @@ -399,6 +404,14 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, pNext = BaseDesc->pNext; } + if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP) { + MemPoolProps.allocType = CU_MEM_ALLOCATION_TYPE_PINNED; + MemPoolProps.location.type = CU_MEM_LOCATION_TYPE_HOST; // Alternatives are: + UR_CHECK_ERROR(cuMemPoolCreate(&CUmemPool, &MemPoolProps)); + CUHostMemPool = true; + return; + } + auto MemProvider = umf::memoryProviderMakeUnique(Context, nullptr) .second; @@ -440,7 +453,7 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, if (!(PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP)) throw; - CUmemPoolProps MemPoolProps; + CUmemPoolProps MemPoolProps{}; const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { @@ -464,22 +477,11 @@ ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, // TODO: what flags should be used here. Moreover what flags should have // UR counterparts? - MemPoolProps.allocType = - CU_MEM_ALLOCATION_TYPE_PINNED; // Is this valid? CUDA docs say: - // - // "This allocation type is 'pinned', i.e. - // cannot migrate from its current - // location while the application is - // actively using it" - // - // Alternatives are *_INVALID (default) and - // *_MAX. - MemPoolProps.location.id = Device->getIndex(); // Docs are not clear on what - // this id is for. I am - // assuming it is used for - // device id. I have made a - // forum post here: - // https://forums.developer.nvidia.com/t/incomplete-description-in-cumemlocation-v1-struct-reference/318701 + MemPoolProps.allocType = CU_MEM_ALLOCATION_TYPE_PINNED; + MemPoolProps.location.id = + Device + ->getIndex(); // Clarification of what id means here: + // https://forums.developer.nvidia.com/t/incomplete-description-in-cumemlocation-v1-struct-reference/318701 MemPoolProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; // Alternatives are: // HOST, HOST_NUMA and HOST_NUMA_CURRENT @@ -498,28 +500,32 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreate( ///< ::ur_usm_pool_limits_desc_t ur_usm_pool_handle_t *Pool ///< [out] pointer to USM memory pool ) { - // Without pool tracking we can't free pool allocations. -#ifdef UMF_ENABLE_POOL_TRACKING if (PoolDesc->flags & UR_USM_POOL_FLAG_ZERO_INITIALIZE_BLOCK) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - try { - *Pool = reinterpret_cast( - new ur_usm_pool_handle_t_(Context, PoolDesc)); - } catch (const UsmAllocationException &Ex) { - return Ex.getError(); - } catch (umf_result_t e) { - return umf::umf2urResult(e); - } catch (...) { - return UR_RESULT_ERROR_UNKNOWN; - } - return UR_RESULT_SUCCESS; + // Host mem pool using CUDA entrypoint + if (PoolDesc->flags & UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP || + // Without pool tracking we can't free pool allocations. +#ifdef UMF_ENABLE_POOL_TRACKING + // UMF mem pool + true #else - std::ignore = Context; - std::ignore = PoolDesc; - std::ignore = Pool; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + false #endif + ) { + try { + *Pool = reinterpret_cast( + new ur_usm_pool_handle_t_(Context, PoolDesc)); + } catch (const UsmAllocationException &Ex) { + return Ex.getError(); + } catch (umf_result_t e) { + return umf::umf2urResult(e); + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + return UR_RESULT_SUCCESS; + } + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urUSMPoolCreateExp( diff --git a/source/adapters/cuda/usm.hpp b/source/adapters/cuda/usm.hpp index e154b3439b..2b97dc9d7f 100644 --- a/source/adapters/cuda/usm.hpp +++ b/source/adapters/cuda/usm.hpp @@ -31,6 +31,7 @@ struct ur_usm_pool_handle_t_ { umf::pool_unique_handle_t HostMemPool; CUmemoryPool CUmemPool{0}; + bool CUHostMemPool = false; ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc); @@ -49,6 +50,7 @@ struct ur_usm_pool_handle_t_ { // To be used if ur_usm_pool_handle_t represents a CUmemoryPool bool usesCudaPool() const { return CUmemPool != CUmemoryPool{0}; }; + bool usesCudaHostPool() const { return CUHostMemPool; }; CUmemoryPool getCudaPool() { return CUmemPool; }; }; diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index ea0a16d562..77157e2c38 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -468,6 +468,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( return result; } + pDdiTable->pfnPoolCreateExp = ur::level_zero::urUSMPoolCreateExp; pDdiTable->pfnPitchedAllocExp = ur::level_zero::urUSMPitchedAllocExp; pDdiTable->pfnImportExp = ur::level_zero::urUSMImportExp; pDdiTable->pfnReleaseExp = ur::level_zero::urUSMReleaseExp; diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 9188abeff6..e6d120ec08 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -485,6 +485,10 @@ ur_result_t urEnqueueUSMFreeExp(ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); +ur_result_t urUSMPoolCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + ur_usm_pool_desc_t *pPoolDesc, + ur_usm_pool_handle_t *ppPool); ur_result_t urUSMPitchedAllocExp(ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_usm_desc_t *pUSMDesc, diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 81893aeadb..35735c768b 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -7402,6 +7402,55 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMPoolCreateExp +__urdlllocal ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t * + pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + ur_usm_pool_create_exp_params_t params = {&hContext, &hDevice, &pPoolDesc, + &ppPool}; + + auto beforeCallback = reinterpret_cast( + mock::getCallbacks().get_before_callback("urUSMPoolCreateExp")); + if (beforeCallback) { + result = beforeCallback(¶ms); + if (result != UR_RESULT_SUCCESS) { + return result; + } + } + + auto replaceCallback = reinterpret_cast( + mock::getCallbacks().get_replace_callback("urUSMPoolCreateExp")); + if (replaceCallback) { + result = replaceCallback(¶ms); + } else { + + *ppPool = mock::createDummyHandle(); + result = UR_RESULT_SUCCESS; + } + + if (result != UR_RESULT_SUCCESS) { + return result; + } + + auto afterCallback = reinterpret_cast( + mock::getCallbacks().get_after_callback("urUSMPoolCreateExp")); + if (afterCallback) { + return afterCallback(¶ms); + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -12141,6 +12190,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; + pDdiTable->pfnPoolCreateExp = driver::urUSMPoolCreateExp; + pDdiTable->pfnPitchedAllocExp = driver::urUSMPitchedAllocExp; pDdiTable->pfnImportExp = driver::urUSMImportExp; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 5fa3c65fa0..47fc03763b 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6290,6 +6290,46 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMPoolCreateExp +__urdlllocal ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t * + pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool +) { + auto pfnPoolCreateExp = getContext()->urDdiTable.USMExp.pfnPoolCreateExp; + + if (nullptr == pfnPoolCreateExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_usm_pool_create_exp_params_t params = {&hContext, &hDevice, &pPoolDesc, + &ppPool}; + uint64_t instance = getContext()->notify_begin( + UR_FUNCTION_USM_POOL_CREATE_EXP, "urUSMPoolCreateExp", ¶ms); + + auto &logger = getContext()->logger; + logger.info(" ---> urUSMPoolCreateExp\n"); + + ur_result_t result = pfnPoolCreateExp(hContext, hDevice, pPoolDesc, ppPool); + + getContext()->notify_end(UR_FUNCTION_USM_POOL_CREATE_EXP, + "urUSMPoolCreateExp", ¶ms, &result, instance); + + if (logger.getLevel() <= logger::Level::INFO) { + std::ostringstream args_str; + ur::extras::printFunctionParams( + args_str, UR_FUNCTION_USM_POOL_CREATE_EXP, ¶ms); + logger.info(" <--- urUSMPoolCreateExp({}) -> {};\n", args_str.str(), + result); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -10835,6 +10875,9 @@ __urdlllocal ur_result_t UR_APICALL urGetUSMExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; + dditable.pfnPoolCreateExp = pDdiTable->pfnPoolCreateExp; + pDdiTable->pfnPoolCreateExp = ur_tracing_layer::urUSMPoolCreateExp; + dditable.pfnPitchedAllocExp = pDdiTable->pfnPitchedAllocExp; pDdiTable->pfnPitchedAllocExp = ur_tracing_layer::urUSMPitchedAllocExp; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 8706aaebdd..d130f3ecb2 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7176,6 +7176,63 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMPoolCreateExp +__urdlllocal ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t * + pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool +) { + auto pfnPoolCreateExp = getContext()->urDdiTable.USMExp.pfnPoolCreateExp; + + if (nullptr == pfnPoolCreateExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (getContext()->enableParameterValidation) { + if (NULL == hContext) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == hDevice) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (NULL == pPoolDesc) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (NULL == ppPool) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_USM_POOL_FLAGS_MASK & pPoolDesc->flags) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hContext)) { + getContext()->refCountContext->logInvalidReference(hContext); + } + + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hDevice)) { + getContext()->refCountContext->logInvalidReference(hDevice); + } + + ur_result_t result = pfnPoolCreateExp(hContext, hDevice, pPoolDesc, ppPool); + + if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS) { + getContext()->refCountContext->createRefCount(*ppPool); + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -12018,6 +12075,9 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( ur_result_t result = UR_RESULT_SUCCESS; + dditable.pfnPoolCreateExp = pDdiTable->pfnPoolCreateExp; + pDdiTable->pfnPoolCreateExp = ur_validation_layer::urUSMPoolCreateExp; + dditable.pfnPitchedAllocExp = pDdiTable->pfnPitchedAllocExp; pDdiTable->pfnPitchedAllocExp = ur_validation_layer::urUSMPitchedAllocExp; diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index de9636bd68..9322d27bea 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -504,6 +504,7 @@ EXPORTS urPrintUsmP2pEnablePeerAccessExpParams urPrintUsmP2pPeerAccessGetInfoExpParams urPrintUsmPitchedAllocExpParams + urPrintUsmPoolCreateExpParams urPrintUsmPoolCreateParams urPrintUsmPoolDesc urPrintUsmPoolFlags @@ -565,6 +566,7 @@ EXPORTS urUSMImportExp urUSMPitchedAllocExp urUSMPoolCreate + urUSMPoolCreateExp urUSMPoolGetInfo urUSMPoolRelease urUSMPoolRetain diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 5fdc2c5687..8b45b2875d 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -504,6 +504,7 @@ urPrintUsmP2pEnablePeerAccessExpParams; urPrintUsmP2pPeerAccessGetInfoExpParams; urPrintUsmPitchedAllocExpParams; + urPrintUsmPoolCreateExpParams; urPrintUsmPoolCreateParams; urPrintUsmPoolDesc; urPrintUsmPoolFlags; @@ -565,6 +566,7 @@ urUSMImportExp; urUSMPitchedAllocExp; urUSMPoolCreate; + urUSMPoolCreateExp; urUSMPoolGetInfo; urUSMPoolRelease; urUSMPoolRetain; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 92eb77fb92..828ade2859 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -6478,6 +6478,52 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFreeExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMPoolCreateExp +__urdlllocal ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t * + pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool +) { + ur_result_t result = UR_RESULT_SUCCESS; + + [[maybe_unused]] auto context = getContext(); + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hContext)->dditable; + auto pfnPoolCreateExp = dditable->ur.USMExp.pfnPoolCreateExp; + if (nullptr == pfnPoolCreateExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hContext = reinterpret_cast(hContext)->handle; + + // convert loader handle to platform handle + hDevice = reinterpret_cast(hDevice)->handle; + + // forward to device-platform + result = pfnPoolCreateExp(hContext, hDevice, pPoolDesc, ppPool); + + if (UR_RESULT_SUCCESS != result) { + return result; + } + + try { + // convert platform handle to loader handle + *ppPool = reinterpret_cast( + context->factories.ur_usm_pool_factory.getInstance(*ppPool, + dditable)); + } catch (std::bad_alloc &) { + result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urUSMPitchedAllocExp __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( @@ -11232,6 +11278,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( if (ur_loader::getContext()->platforms.size() != 1 || ur_loader::getContext()->forceIntercept) { // return pointers to loader's DDIs + pDdiTable->pfnPoolCreateExp = ur_loader::urUSMPoolCreateExp; pDdiTable->pfnPitchedAllocExp = ur_loader::urUSMPitchedAllocExp; pDdiTable->pfnImportExp = ur_loader::urUSMImportExp; pDdiTable->pfnReleaseExp = ur_loader::urUSMReleaseExp; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 8a84a0abba..df582af577 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -6773,6 +6773,49 @@ ur_result_t UR_APICALL urEnqueueUSMFreeExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Create USM memory pool with desired properties. +/// +/// @details +/// - Create a memory pool associated with a single device. +/// - See also ::urUSMPoolCrearte and ::ur_usm_pool_limits_desc_t. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPoolDesc` +/// + `NULL == ppPool` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_POOL_FLAGS_MASK & pPoolDesc->flags` +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT +ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t * + pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool + ) try { + auto pfnPoolCreateExp = + ur_lib::getContext()->urDdiTable.USMExp.pfnPoolCreateExp; + if (nullptr == pfnPoolCreateExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnPoolCreateExp(hContext, hDevice, pPoolDesc, ppPool); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 0c3e5a3d13..c1b9ce05b6 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -2711,6 +2711,14 @@ urPrintUsmPoolGetInfoParams(const struct ur_usm_pool_get_info_params_t *params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintUsmPoolCreateExpParams( + const struct ur_usm_pool_create_exp_params_t *params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintUsmPitchedAllocExpParams( const struct ur_usm_pitched_alloc_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 2e42c4b24c..9894e2f7ce 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -5779,6 +5779,42 @@ ur_result_t UR_APICALL urEnqueueUSMFreeExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Create USM memory pool with desired properties. +/// +/// @details +/// - Create a memory pool associated with a single device. +/// - See also ::urUSMPoolCrearte and ::ur_usm_pool_limits_desc_t. +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hContext` +/// + `NULL == hDevice` +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// + `NULL == pPoolDesc` +/// + `NULL == ppPool` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_USM_POOL_FLAGS_MASK & pPoolDesc->flags` +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE +/// + If any device associated with `hContext` reports `false` for ::UR_DEVICE_INFO_USM_POOL_SUPPORT +ur_result_t UR_APICALL urUSMPoolCreateExp( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + ur_usm_pool_desc_t * + pPoolDesc, ///< [in] pointer to USM pool descriptor. Can be chained with + ///< ::ur_usm_pool_limits_desc_t + ur_usm_pool_handle_t *ppPool ///< [out] pointer to USM memory pool +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief USM allocate pitched memory /// diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt index e71a829964..b9b12d855c 100644 --- a/test/conformance/CMakeLists.txt +++ b/test/conformance/CMakeLists.txt @@ -123,6 +123,7 @@ set(TEST_SUBDIRECTORIES_DPCXX "program" "enqueue" "integration" + "exp_async_alloc" "exp_command_buffer" "exp_enqueue_native" "exp_usm_p2p" diff --git a/test/conformance/exp_async_alloc/CMakeLists.txt b/test/conformance/exp_async_alloc/CMakeLists.txt new file mode 100644 index 0000000000..50195644a6 --- /dev/null +++ b/test/conformance/exp_async_alloc/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright (C) 2025 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +add_conformance_test_with_kernels_environment( + exp_async_alloc + async_alloc.cpp +) diff --git a/test/conformance/exp_async_alloc/async_alloc.cpp b/test/conformance/exp_async_alloc/async_alloc.cpp new file mode 100644 index 0000000000..a9fd695400 --- /dev/null +++ b/test/conformance/exp_async_alloc/async_alloc.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include + +using T = uint32_t; + +struct urAsyncAllocTest : uur::urQueueTest { + void SetUp() { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); + + host_vec = std::vector(global_size, 0); + ASSERT_EQ(host_vec.size(), global_size); + } + void TearDown() { + UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::TearDown()); + if (pool) { + UUR_RETURN_ON_FATAL_FAILURE(urUSMPoolRelease(pool)); + } + } + static constexpr T val = 42; + static constexpr uint32_t global_size = 1e7; + std::vector host_vec; + void *dev_ptr = nullptr; + static constexpr size_t allocation_size = sizeof(val) * global_size; + static constexpr size_t pool_size = 400; + static constexpr ur_usm_pool_limits_desc_t limits_desc{ + UR_STRUCTURE_TYPE_USM_POOL_LIMITS_DESC, nullptr, pool_size, 1}; + ur_usm_pool_handle_t pool{0}; +}; + +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urAsyncAllocTest); + +#define SUCCESS_TEST(ALLOC_TYPE) \ + TEST_P(urAsyncAllocTest, SuccessNoPool##ALLOC_TYPE) { \ + ur_event_handle_t alloc_ev; \ + ASSERT_SUCCESS(urEnqueueUSM##ALLOC_TYPE##AllocExp( \ + queue, nullptr, 1, nullptr, 0, nullptr, &dev_ptr, &alloc_ev)); \ + ASSERT_SUCCESS(urEnqueueUSMFreeExp(queue, nullptr, dev_ptr, 1, \ + &alloc_ev, nullptr)); \ + } + +SUCCESS_TEST(Device) +SUCCESS_TEST(Shared) +SUCCESS_TEST(Host) + +TEST_P(urAsyncAllocTest, SuccessWithPoolDevice) { + ur_event_handle_t alloc_ev; + ur_usm_pool_desc_t pool_desc{UR_STRUCTURE_TYPE_USM_POOL_DESC, &limits_desc, + UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP}; + // Use device specific mem pool creation + ASSERT_SUCCESS(urUSMPoolCreateExp(context, device, &pool_desc, &pool)); + + ASSERT_SUCCESS(urEnqueueUSMDeviceAllocExp(queue, pool, 1, nullptr, 0, + nullptr, &dev_ptr, &alloc_ev)); + ASSERT_SUCCESS( + urEnqueueUSMFreeExp(queue, pool, dev_ptr, 1, &alloc_ev, nullptr)); +} + +#define SUCCESS_TEST_POOL(ALLOC_TYPE) \ + TEST_P(urAsyncAllocTest, SuccessWithPool##) { \ + ur_event_handle_t alloc_ev; \ + limits_desc->next = TTOD; \ + ur_usm_pool_desc_t pool_desc{ \ + UR_STRUCTURE_TYPE_USM_POOL_DESC, &limits_desc, \ + UR_USM_POOL_FLAG_USE_NATIVE_MEMORY_POOL_EXP}; \ + ASSERT_SUCCESS(urUSMPoolCreate(context, &pool_desc, &pool)); \ + ASSERT_SUCCESS(urEnqueueUSMDeviceAllocExp( \ + queue, pool, 1, nullptr, 0, nullptr, &dev_ptr, &alloc_ev)); \ + ASSERT_SUCCESS( \ + urEnqueueUSMFreeExp(queue, pool, dev_ptr, 1, &alloc_ev, nullptr)); \ + }