@@ -207,6 +207,8 @@ typedef enum ur_function_t {
207
207
UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_WRITE_RECT_EXP = 190, ///< Enumerator for ::urCommandBufferAppendMemBufferWriteRectExp
208
208
UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_READ_RECT_EXP = 191, ///< Enumerator for ::urCommandBufferAppendMemBufferReadRectExp
209
209
UR_FUNCTION_COMMAND_BUFFER_APPEND_MEM_BUFFER_FILL_EXP = 192, ///< Enumerator for ::urCommandBufferAppendMemBufferFillExp
210
+ UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 193, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp
211
+ UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 194, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp
210
212
/// @cond
211
213
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
212
214
/// @endcond
@@ -8171,6 +8173,90 @@ urCommandBufferEnqueueExp(
8171
8173
///< command-buffer execution instance.
8172
8174
);
8173
8175
8176
+ #if !defined(__GNUC__)
8177
+ #pragma endregion
8178
+ #endif
8179
+ // Intel 'oneAPI' Unified Runtime Experimental APIs for Cooperative Kernels
8180
+ #if !defined(__GNUC__)
8181
+ #pragma region cooperative kernels(experimental)
8182
+ #endif
8183
+ ///////////////////////////////////////////////////////////////////////////////
8184
+ #ifndef UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP
8185
+ /// @brief The extension string which defines support for cooperative-kernels
8186
+ /// which is returned when querying device extensions.
8187
+ #define UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP "ur_exp_cooperative_kernels"
8188
+ #endif // UR_COOPERATIVE_KERNELS_EXTENSION_STRING_EXP
8189
+
8190
+ ///////////////////////////////////////////////////////////////////////////////
8191
+ /// @brief Enqueue a command to execute a cooperative kernel
8192
+ ///
8193
+ /// @returns
8194
+ /// - ::UR_RESULT_SUCCESS
8195
+ /// - ::UR_RESULT_ERROR_UNINITIALIZED
8196
+ /// - ::UR_RESULT_ERROR_DEVICE_LOST
8197
+ /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
8198
+ /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
8199
+ /// + `NULL == hQueue`
8200
+ /// + `NULL == hKernel`
8201
+ /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
8202
+ /// + `NULL == pGlobalWorkOffset`
8203
+ /// + `NULL == pGlobalWorkSize`
8204
+ /// - ::UR_RESULT_ERROR_INVALID_QUEUE
8205
+ /// - ::UR_RESULT_ERROR_INVALID_KERNEL
8206
+ /// - ::UR_RESULT_ERROR_INVALID_EVENT
8207
+ /// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
8208
+ /// + `phEventWaitList == NULL && numEventsInWaitList > 0`
8209
+ /// + `phEventWaitList != NULL && numEventsInWaitList == 0`
8210
+ /// + If event objects in phEventWaitList are not valid events.
8211
+ /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION
8212
+ /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE
8213
+ /// - ::UR_RESULT_ERROR_INVALID_VALUE
8214
+ /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
8215
+ /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
8216
+ UR_APIEXPORT ur_result_t UR_APICALL
8217
+ urEnqueueCooperativeKernelLaunchExp(
8218
+ ur_queue_handle_t hQueue, ///< [in] handle of the queue object
8219
+ ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
8220
+ uint32_t workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and
8221
+ ///< work-group work-items
8222
+ const size_t *pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the
8223
+ ///< offset used to calculate the global ID of a work-item
8224
+ const size_t *pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the
8225
+ ///< number of global work-items in workDim that will execute the kernel
8226
+ ///< function
8227
+ const size_t *pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that
8228
+ ///< specify the number of local work-items forming a work-group that will
8229
+ ///< execute the kernel function.
8230
+ ///< If nullptr, the runtime implementation will choose the work-group
8231
+ ///< size.
8232
+ uint32_t numEventsInWaitList, ///< [in] size of the event wait list
8233
+ const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
8234
+ ///< events that must be complete before the kernel execution.
8235
+ ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait
8236
+ ///< event.
8237
+ ur_event_handle_t *phEvent ///< [out][optional] return an event object that identifies this particular
8238
+ ///< kernel execution instance.
8239
+ );
8240
+
8241
+ ///////////////////////////////////////////////////////////////////////////////
8242
+ /// @brief Query the maximum number of work groups for a cooperative kernel
8243
+ ///
8244
+ /// @returns
8245
+ /// - ::UR_RESULT_SUCCESS
8246
+ /// - ::UR_RESULT_ERROR_UNINITIALIZED
8247
+ /// - ::UR_RESULT_ERROR_DEVICE_LOST
8248
+ /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
8249
+ /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
8250
+ /// + `NULL == hKernel`
8251
+ /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
8252
+ /// + `NULL == pGroupCountRet`
8253
+ /// - ::UR_RESULT_ERROR_INVALID_KERNEL
8254
+ UR_APIEXPORT ur_result_t UR_APICALL
8255
+ urKernelSuggestMaxCooperativeGroupCountExp(
8256
+ ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object
8257
+ uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
8258
+ );
8259
+
8174
8260
#if !defined(__GNUC__)
8175
8261
#pragma endregion
8176
8262
#endif
@@ -8939,6 +9025,15 @@ typedef struct ur_kernel_set_specialization_constants_params_t {
8939
9025
const ur_specialization_constant_info_t **ppSpecConstants;
8940
9026
} ur_kernel_set_specialization_constants_params_t;
8941
9027
9028
+ ///////////////////////////////////////////////////////////////////////////////
9029
+ /// @brief Function parameters for urKernelSuggestMaxCooperativeGroupCountExp
9030
+ /// @details Each entry is a pointer to the parameter passed to the function;
9031
+ /// allowing the callback the ability to modify the parameter's value
9032
+ typedef struct ur_kernel_suggest_max_cooperative_group_count_exp_params_t {
9033
+ ur_kernel_handle_t *phKernel;
9034
+ uint32_t **ppGroupCountRet;
9035
+ } ur_kernel_suggest_max_cooperative_group_count_exp_params_t;
9036
+
8942
9037
///////////////////////////////////////////////////////////////////////////////
8943
9038
/// @brief Function parameters for urSamplerCreate
8944
9039
/// @details Each entry is a pointer to the parameter passed to the function;
@@ -9586,6 +9681,22 @@ typedef struct ur_enqueue_write_host_pipe_params_t {
9586
9681
ur_event_handle_t **pphEvent;
9587
9682
} ur_enqueue_write_host_pipe_params_t;
9588
9683
9684
+ ///////////////////////////////////////////////////////////////////////////////
9685
+ /// @brief Function parameters for urEnqueueCooperativeKernelLaunchExp
9686
+ /// @details Each entry is a pointer to the parameter passed to the function;
9687
+ /// allowing the callback the ability to modify the parameter's value
9688
+ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
9689
+ ur_queue_handle_t *phQueue;
9690
+ ur_kernel_handle_t *phKernel;
9691
+ uint32_t *pworkDim;
9692
+ const size_t **ppGlobalWorkOffset;
9693
+ const size_t **ppGlobalWorkSize;
9694
+ const size_t **ppLocalWorkSize;
9695
+ uint32_t *pnumEventsInWaitList;
9696
+ const ur_event_handle_t **pphEventWaitList;
9697
+ ur_event_handle_t **pphEvent;
9698
+ } ur_enqueue_cooperative_kernel_launch_exp_params_t;
9699
+
9589
9700
///////////////////////////////////////////////////////////////////////////////
9590
9701
/// @brief Function parameters for urQueueGetInfo
9591
9702
/// @details Each entry is a pointer to the parameter passed to the function;
0 commit comments