Skip to content

Commit aefc853

Browse files
committed
Merge branch 'main' into simplify-device-global
2 parents 453b8c2 + 6d5d84c commit aefc853

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1073
-362
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ List of options provided by CMake:
140140
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
141141
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
142142
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
143+
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
143144
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
144145
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
145146
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |

include/ur_api.h

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ typedef enum ur_function_t {
222222
UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 220, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp
223223
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
224224
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
225+
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
225226
/// @cond
226227
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
227228
/// @endcond
@@ -1641,6 +1642,7 @@ typedef enum ur_device_info_t {
16411642
///< backed 3D sampled image data.
16421643
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2017, ///< [::ur_bool_t] returns true if the device is capable of fetching
16431644
///< non-USM backed 3D sampled image data.
1645+
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP = 0x2018, ///< [::ur_bool_t] returns true if the device supports timestamp recording
16441646
/// @cond
16451647
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
16461648
/// @endcond
@@ -1666,7 +1668,7 @@ typedef enum ur_device_info_t {
16661668
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
16671669
/// + `NULL == hDevice`
16681670
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
1669-
/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName`
1671+
/// + `::UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP < propName`
16701672
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
16711673
/// + If `propName` is not supported by the adapter.
16721674
/// - ::UR_RESULT_ERROR_INVALID_SIZE
@@ -5618,6 +5620,7 @@ typedef enum ur_command_t {
56185620
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP = 0x1000, ///< Event created by ::urCommandBufferEnqueueExp
56195621
UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP = 0x2000, ///< Event created by ::urBindlessImagesWaitExternalSemaphoreExp
56205622
UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp
5623+
UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp
56215624
/// @cond
56225625
UR_COMMAND_FORCE_UINT32 = 0x7fffffff
56235626
/// @endcond
@@ -8890,6 +8893,46 @@ urKernelSuggestMaxCooperativeGroupCountExp(
88908893
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
88918894
);
88928895

8896+
#if !defined(__GNUC__)
8897+
#pragma endregion
8898+
#endif
8899+
// Intel 'oneAPI' Unified Runtime Experimental APIs for enqueuing timestamp recordings
8900+
#if !defined(__GNUC__)
8901+
#pragma region enqueue timestamp recording(experimental)
8902+
#endif
8903+
///////////////////////////////////////////////////////////////////////////////
8904+
/// @brief Enqueue a command for recording the device timestamp
8905+
///
8906+
/// @returns
8907+
/// - ::UR_RESULT_SUCCESS
8908+
/// - ::UR_RESULT_ERROR_UNINITIALIZED
8909+
/// - ::UR_RESULT_ERROR_DEVICE_LOST
8910+
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
8911+
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
8912+
/// + `NULL == hQueue`
8913+
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
8914+
/// + `NULL == phEvent`
8915+
/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
8916+
UR_APIEXPORT ur_result_t UR_APICALL
8917+
urEnqueueTimestampRecordingExp(
8918+
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
8919+
bool blocking, ///< [in] indicates whether the call to this function should block until
8920+
///< until the device timestamp recording command has executed on the
8921+
///< device.
8922+
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
8923+
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
8924+
///< events that must be complete before the kernel execution.
8925+
///< If nullptr, the numEventsInWaitList must be 0, indicating no wait
8926+
///< events.
8927+
ur_event_handle_t *phEvent ///< [in,out] return an event object that identifies this particular kernel
8928+
///< execution instance. Profiling information can be queried
8929+
///< from this event as if `hQueue` had profiling enabled. Querying
8930+
///< `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
8931+
///< reports the timestamp at the time of the call to this function.
8932+
///< Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
8933+
///< reports the timestamp recorded when the command is executed on the device.
8934+
);
8935+
88938936
#if !defined(__GNUC__)
88948937
#pragma endregion
88958938
#endif
@@ -10600,6 +10643,18 @@ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
1060010643
ur_event_handle_t **pphEvent;
1060110644
} ur_enqueue_cooperative_kernel_launch_exp_params_t;
1060210645

10646+
///////////////////////////////////////////////////////////////////////////////
10647+
/// @brief Function parameters for urEnqueueTimestampRecordingExp
10648+
/// @details Each entry is a pointer to the parameter passed to the function;
10649+
/// allowing the callback the ability to modify the parameter's value
10650+
typedef struct ur_enqueue_timestamp_recording_exp_params_t {
10651+
ur_queue_handle_t *phQueue;
10652+
bool *pblocking;
10653+
uint32_t *pnumEventsInWaitList;
10654+
const ur_event_handle_t **pphEventWaitList;
10655+
ur_event_handle_t **pphEvent;
10656+
} ur_enqueue_timestamp_recording_exp_params_t;
10657+
1060310658
///////////////////////////////////////////////////////////////////////////////
1060410659
/// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp
1060510660
/// @details Each entry is a pointer to the parameter passed to the function;

include/ur_ddi.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,10 +1448,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
14481448
const ur_event_handle_t *,
14491449
ur_event_handle_t *);
14501450

1451+
///////////////////////////////////////////////////////////////////////////////
1452+
/// @brief Function-pointer for urEnqueueTimestampRecordingExp
1453+
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
1454+
ur_queue_handle_t,
1455+
bool,
1456+
uint32_t,
1457+
const ur_event_handle_t *,
1458+
ur_event_handle_t *);
1459+
14511460
///////////////////////////////////////////////////////////////////////////////
14521461
/// @brief Table of EnqueueExp functions pointers
14531462
typedef struct ur_enqueue_exp_dditable_t {
14541463
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
1464+
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
14551465
} ur_enqueue_exp_dditable_t;
14561466

14571467
///////////////////////////////////////////////////////////////////////////////

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru
19541954
/// - `buff_size < out_size`
19551955
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCooperativeKernelLaunchExpParams(const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);
19561956

1957+
///////////////////////////////////////////////////////////////////////////////
1958+
/// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
1959+
/// @returns
1960+
/// - ::UR_RESULT_SUCCESS
1961+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
1962+
/// - `buff_size < out_size`
1963+
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(const struct ur_enqueue_timestamp_recording_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);
1964+
19571965
///////////////////////////////////////////////////////////////////////////////
19581966
/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct
19591967
/// @returns

include/ur_print.hpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
916916
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
917917
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
918918
break;
919+
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
920+
os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
921+
break;
919922
default:
920923
os << "unknown enumerator";
921924
break;
@@ -2571,6 +2574,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25712574
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
25722575
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
25732576
break;
2577+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
2578+
os << "UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP";
2579+
break;
25742580
default:
25752581
os << "unknown enumerator";
25762582
break;
@@ -4280,6 +4286,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
42804286

42814287
os << ")";
42824288
} break;
4289+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
4290+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4291+
if (sizeof(ur_bool_t) > size) {
4292+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4293+
return UR_RESULT_ERROR_INVALID_SIZE;
4294+
}
4295+
os << (const void *)(tptr) << " (";
4296+
4297+
os << *tptr;
4298+
4299+
os << ")";
4300+
} break;
42834301
default:
42844302
os << "unknown enumerator";
42854303
return UR_RESULT_ERROR_INVALID_ENUMERATION;
@@ -8788,6 +8806,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) {
87888806
case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP:
87898807
os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP";
87908808
break;
8809+
case UR_COMMAND_TIMESTAMP_RECORDING_EXP:
8810+
os << "UR_COMMAND_TIMESTAMP_RECORDING_EXP";
8811+
break;
87918812
default:
87928813
os << "unknown enumerator";
87938814
break;
@@ -14104,6 +14125,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1410414125
return os;
1410514126
}
1410614127

14128+
///////////////////////////////////////////////////////////////////////////////
14129+
/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t type
14130+
/// @returns
14131+
/// std::ostream &
14132+
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) {
14133+
14134+
os << ".hQueue = ";
14135+
14136+
ur::details::printPtr(os,
14137+
*(params->phQueue));
14138+
14139+
os << ", ";
14140+
os << ".blocking = ";
14141+
14142+
os << *(params->pblocking);
14143+
14144+
os << ", ";
14145+
os << ".numEventsInWaitList = ";
14146+
14147+
os << *(params->pnumEventsInWaitList);
14148+
14149+
os << ", ";
14150+
os << ".phEventWaitList = {";
14151+
for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) {
14152+
if (i != 0) {
14153+
os << ", ";
14154+
}
14155+
14156+
ur::details::printPtr(os,
14157+
(*(params->pphEventWaitList))[i]);
14158+
}
14159+
os << "}";
14160+
14161+
os << ", ";
14162+
os << ".phEvent = ";
14163+
14164+
ur::details::printPtr(os,
14165+
*(params->pphEvent));
14166+
14167+
return os;
14168+
}
14169+
1410714170
///////////////////////////////////////////////////////////////////////////////
1410814171
/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type
1410914172
/// @returns
@@ -17126,6 +17189,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
1712617189
case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
1712717190
os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params;
1712817191
} break;
17192+
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
17193+
os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
17194+
} break;
1712917195
case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: {
1713017196
os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params;
1713117197
} break;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<%
2+
OneApi=tags['$OneApi']
3+
x=tags['$x']
4+
X=x.upper()
5+
%>
6+
7+
.. _experimental-enqueue-timestamp-recording:
8+
9+
================================================================================
10+
Enqueue Timestamp Recording
11+
================================================================================
12+
13+
.. warning::
14+
15+
Experimental features:
16+
17+
* May be replaced, updated, or removed at any time.
18+
* Do not require maintaining API/ABI stability of their own additions over
19+
time.
20+
* Do not require conformance testing of their own additions.
21+
22+
23+
Motivation
24+
--------------------------------------------------------------------------------
25+
Currently, the only way to get timestamp information is through enabling
26+
profiling on a queue and retrieving the information from events coming from
27+
commands submitted to it. However, not all systems give full control of the
28+
queue construction to the programmer wanting the profiling information. To amend
29+
this, this extension adds the ability to enqueue a timestamp recording on any
30+
queue, with or without profiling enabled. This event can in turn be queried for
31+
the usual profiling information.
32+
33+
34+
API
35+
--------------------------------------------------------------------------------
36+
37+
Enums
38+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39+
40+
* ${x}_device_info_t
41+
* ${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP
42+
43+
* ${x}_command_t
44+
* ${X}_COMMAND_TIMESTAMP_RECORDING_EXP
45+
46+
Functions
47+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48+
* ${x}EnqueueTimestampRecordingExp
49+
50+
Changelog
51+
--------------------------------------------------------------------------------
52+
53+
+-----------+------------------------+
54+
| Revision | Changes |
55+
+===========+========================+
56+
| 1.0 | Initial Draft |
57+
+-----------+------------------------+
58+
59+
60+
Support
61+
--------------------------------------------------------------------------------
62+
63+
Adapters which support this experimental feature *must* return true for the new
64+
`${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` device info query.
65+
66+
67+
Contributors
68+
--------------------------------------------------------------------------------
69+
70+
* Steffen Larsen `steffen.larsen@intel.com <steffen.larsen@intel.com>`_
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#
2+
# Copyright (C) 2024 Intel Corporation
3+
#
4+
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
5+
# See LICENSE.TXT
6+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
#
8+
# See YaML.md for syntax definition
9+
#
10+
--- #--------------------------------------------------------------------------
11+
type: header
12+
desc: "Intel $OneApi Unified Runtime Experimental APIs for enqueuing timestamp recordings"
13+
ordinal: "99"
14+
--- #--------------------------------------------------------------------------
15+
type: enum
16+
extend: true
17+
typed_etors: true
18+
desc: "Extension enums to $x_device_info_t to support timestamp recordings."
19+
name: $x_device_info_t
20+
etors:
21+
- name: TIMESTAMP_RECORDING_SUPPORT_EXP
22+
value: "0x2018"
23+
desc: "[$x_bool_t] returns true if the device supports timestamp recording"
24+
--- #--------------------------------------------------------------------------
25+
type: enum
26+
extend: true
27+
desc: "Command Type experimental enumerations."
28+
name: $x_command_t
29+
etors:
30+
- name: TIMESTAMP_RECORDING_EXP
31+
value: "0x2002"
32+
desc: Event created by $xEnqueueTimestampRecordingExp
33+
--- #--------------------------------------------------------------------------
34+
type: function
35+
desc: "Enqueue a command for recording the device timestamp"
36+
class: $xEnqueue
37+
name: TimestampRecordingExp
38+
params:
39+
- type: $x_queue_handle_t
40+
name: hQueue
41+
desc: "[in] handle of the queue object"
42+
- type: bool
43+
name: blocking
44+
desc: |
45+
[in] indicates whether the call to this function should block until
46+
until the device timestamp recording command has executed on the
47+
device.
48+
- type: uint32_t
49+
name: numEventsInWaitList
50+
desc: "[in] size of the event wait list"
51+
- type: "const $x_event_handle_t*"
52+
name: phEventWaitList
53+
desc: |
54+
[in][optional][range(0, numEventsInWaitList)] pointer to a list of events that must be complete before the kernel execution.
55+
If nullptr, the numEventsInWaitList must be 0, indicating no wait events.
56+
- type: $x_event_handle_t*
57+
name: phEvent
58+
desc: |
59+
[in,out] return an event object that identifies this particular kernel execution instance. Profiling information can be queried
60+
from this event as if `hQueue` had profiling enabled. Querying `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
61+
reports the timestamp at the time of the call to this function. Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
62+
reports the timestamp recorded when the command is executed on the device.
63+
returns:
64+
- $X_RESULT_ERROR_INVALID_NULL_HANDLE
65+
- $X_RESULT_ERROR_INVALID_NULL_POINTER
66+
- $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST

scripts/core/registry.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,9 @@ etors:
580580
- name: COMMAND_BUFFER_COMMAND_GET_INFO_EXP
581581
desc: Enumerator for $xCommandBufferCommandGetInfoExp
582582
value: '222'
583+
- name: ENQUEUE_TIMESTAMP_RECORDING_EXP
584+
desc: Enumerator for $xEnqueueTimestampRecordingExp
585+
value: '223'
583586
---
584587
type: enum
585588
desc: Defines structure types

0 commit comments

Comments
 (0)