Skip to content

Commit 2d02c21

Browse files
authored
Merge branch 'main' into fix_usm_allocation
2 parents 74e18f1 + 4c69624 commit 2d02c21

File tree

90 files changed

+2434
-691
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+2434
-691
lines changed

.github/scripts/get_system_info.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ function system_info {
5353
echo "**********/proc/meminfo**********"
5454
cat /proc/meminfo
5555
echo "**********build/bin/urinfo**********"
56-
$(dirname "$(readlink -f "$0")")/../../build/bin/urinfo || true
56+
$(dirname "$(readlink -f "$0")")/../../build/bin/urinfo --no-linear-ids --verbose || true
5757
echo "******OpenCL*******"
5858
# The driver version of OpenCL Graphics is the compute-runtime version
5959
clinfo || echo "OpenCL not installed"

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
55

6-
cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
6+
cmake_minimum_required(VERSION 3.20.0 FATAL_ERROR)
77
project(unified-runtime VERSION 0.9.0)
88

99
include(GNUInstallDirs)

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
## Table of contents
1414

1515
- [Unified Runtime](#unified-runtime)
16-
- [Adapters](#adapters)
1716
- [Table of contents](#table-of-contents)
1817
- [Contents of the repo](#contents-of-the-repo)
1918
- [Integration](#integration)
@@ -29,7 +28,7 @@
2928
- [Adapter naming convention](#adapter-naming-convention)
3029
- [Source code generation](#source-code-generation)
3130
- [Documentation](#documentation)
32-
6. [Release Process](#release-process)
31+
- [Release Process](#release-process)
3332

3433
## Contents of the repo
3534

@@ -88,7 +87,7 @@ for more detailed instructions on the correct setup.
8887

8988
Required packages:
9089
- C++ compiler with C++17 support
91-
- [CMake](https://cmake.org/) >= 3.14.0
90+
- [CMake](https://cmake.org/) >= 3.20.0
9291
- Python v3.6.6 or later
9392

9493
### Windows
@@ -141,6 +140,7 @@ List of options provided by CMake:
141140
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
142141
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
143142
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
143+
| UR_DEVICE_CODE_EXTRACTOR | Path of the `clang-offload-extract` executable from the DPC++ package, required for CTS device binaries | File path | `"${dirname(UR_DPCXX)}/clang-offload-extract"` |
144144
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
145145
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
146146
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |

include/ur_api.h

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ typedef enum ur_function_t {
222222
UR_FUNCTION_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_EXP = 220, ///< Enumerator for ::urCommandBufferUpdateKernelLaunchExp
223223
UR_FUNCTION_COMMAND_BUFFER_GET_INFO_EXP = 221, ///< Enumerator for ::urCommandBufferGetInfoExp
224224
UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP = 222, ///< Enumerator for ::urCommandBufferCommandGetInfoExp
225+
UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP = 223, ///< Enumerator for ::urEnqueueTimestampRecordingExp
225226
/// @cond
226227
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
227228
/// @endcond
@@ -1641,6 +1642,7 @@ typedef enum ur_device_info_t {
16411642
///< backed 3D sampled image data.
16421643
UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP = 0x2017, ///< [::ur_bool_t] returns true if the device is capable of fetching
16431644
///< non-USM backed 3D sampled image data.
1645+
UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP = 0x2018, ///< [::ur_bool_t] returns true if the device supports timestamp recording
16441646
/// @cond
16451647
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
16461648
/// @endcond
@@ -1666,7 +1668,7 @@ typedef enum ur_device_info_t {
16661668
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
16671669
/// + `NULL == hDevice`
16681670
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
1669-
/// + `::UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP < propName`
1671+
/// + `::UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP < propName`
16701672
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
16711673
/// + If `propName` is not supported by the adapter.
16721674
/// - ::UR_RESULT_ERROR_INVALID_SIZE
@@ -5618,6 +5620,7 @@ typedef enum ur_command_t {
56185620
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP = 0x1000, ///< Event created by ::urCommandBufferEnqueueExp
56195621
UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP = 0x2000, ///< Event created by ::urBindlessImagesWaitExternalSemaphoreExp
56205622
UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP = 0x2001, ///< Event created by ::urBindlessImagesSignalExternalSemaphoreExp
5623+
UR_COMMAND_TIMESTAMP_RECORDING_EXP = 0x2002, ///< Event created by ::urEnqueueTimestampRecordingExp
56215624
/// @cond
56225625
UR_COMMAND_FORCE_UINT32 = 0x7fffffff
56235626
/// @endcond
@@ -8890,6 +8893,46 @@ urKernelSuggestMaxCooperativeGroupCountExp(
88908893
uint32_t *pGroupCountRet ///< [out] pointer to maximum number of groups
88918894
);
88928895

8896+
#if !defined(__GNUC__)
8897+
#pragma endregion
8898+
#endif
8899+
// Intel 'oneAPI' Unified Runtime Experimental APIs for enqueuing timestamp recordings
8900+
#if !defined(__GNUC__)
8901+
#pragma region enqueue timestamp recording(experimental)
8902+
#endif
8903+
///////////////////////////////////////////////////////////////////////////////
8904+
/// @brief Enqueue a command for recording the device timestamp
8905+
///
8906+
/// @returns
8907+
/// - ::UR_RESULT_SUCCESS
8908+
/// - ::UR_RESULT_ERROR_UNINITIALIZED
8909+
/// - ::UR_RESULT_ERROR_DEVICE_LOST
8910+
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
8911+
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
8912+
/// + `NULL == hQueue`
8913+
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
8914+
/// + `NULL == phEvent`
8915+
/// - ::UR_RESULT_ERROR_INVALID_EVENT_WAIT_LIST
8916+
UR_APIEXPORT ur_result_t UR_APICALL
8917+
urEnqueueTimestampRecordingExp(
8918+
ur_queue_handle_t hQueue, ///< [in] handle of the queue object
8919+
bool blocking, ///< [in] indicates whether the call to this function should block until
8920+
///< until the device timestamp recording command has executed on the
8921+
///< device.
8922+
uint32_t numEventsInWaitList, ///< [in] size of the event wait list
8923+
const ur_event_handle_t *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of
8924+
///< events that must be complete before the kernel execution.
8925+
///< If nullptr, the numEventsInWaitList must be 0, indicating no wait
8926+
///< events.
8927+
ur_event_handle_t *phEvent ///< [in,out] return an event object that identifies this particular kernel
8928+
///< execution instance. Profiling information can be queried
8929+
///< from this event as if `hQueue` had profiling enabled. Querying
8930+
///< `UR_PROFILING_INFO_COMMAND_QUEUED` or `UR_PROFILING_INFO_COMMAND_SUBMIT`
8931+
///< reports the timestamp at the time of the call to this function.
8932+
///< Querying `UR_PROFILING_INFO_COMMAND_START` or `UR_PROFILING_INFO_COMMAND_END`
8933+
///< reports the timestamp recorded when the command is executed on the device.
8934+
);
8935+
88938936
#if !defined(__GNUC__)
88948937
#pragma endregion
88958938
#endif
@@ -10600,6 +10643,18 @@ typedef struct ur_enqueue_cooperative_kernel_launch_exp_params_t {
1060010643
ur_event_handle_t **pphEvent;
1060110644
} ur_enqueue_cooperative_kernel_launch_exp_params_t;
1060210645

10646+
///////////////////////////////////////////////////////////////////////////////
10647+
/// @brief Function parameters for urEnqueueTimestampRecordingExp
10648+
/// @details Each entry is a pointer to the parameter passed to the function;
10649+
/// allowing the callback the ability to modify the parameter's value
10650+
typedef struct ur_enqueue_timestamp_recording_exp_params_t {
10651+
ur_queue_handle_t *phQueue;
10652+
bool *pblocking;
10653+
uint32_t *pnumEventsInWaitList;
10654+
const ur_event_handle_t **pphEventWaitList;
10655+
ur_event_handle_t **pphEvent;
10656+
} ur_enqueue_timestamp_recording_exp_params_t;
10657+
1060310658
///////////////////////////////////////////////////////////////////////////////
1060410659
/// @brief Function parameters for urBindlessImagesUnsampledImageHandleDestroyExp
1060510660
/// @details Each entry is a pointer to the parameter passed to the function;

include/ur_ddi.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,10 +1448,20 @@ typedef ur_result_t(UR_APICALL *ur_pfnEnqueueCooperativeKernelLaunchExp_t)(
14481448
const ur_event_handle_t *,
14491449
ur_event_handle_t *);
14501450

1451+
///////////////////////////////////////////////////////////////////////////////
1452+
/// @brief Function-pointer for urEnqueueTimestampRecordingExp
1453+
typedef ur_result_t(UR_APICALL *ur_pfnEnqueueTimestampRecordingExp_t)(
1454+
ur_queue_handle_t,
1455+
bool,
1456+
uint32_t,
1457+
const ur_event_handle_t *,
1458+
ur_event_handle_t *);
1459+
14511460
///////////////////////////////////////////////////////////////////////////////
14521461
/// @brief Table of EnqueueExp functions pointers
14531462
typedef struct ur_enqueue_exp_dditable_t {
14541463
ur_pfnEnqueueCooperativeKernelLaunchExp_t pfnCooperativeKernelLaunchExp;
1464+
ur_pfnEnqueueTimestampRecordingExp_t pfnTimestampRecordingExp;
14551465
} ur_enqueue_exp_dditable_t;
14561466

14571467
///////////////////////////////////////////////////////////////////////////////

include/ur_print.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueWriteHostPipeParams(const stru
19541954
/// - `buff_size < out_size`
19551955
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueCooperativeKernelLaunchExpParams(const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);
19561956

1957+
///////////////////////////////////////////////////////////////////////////////
1958+
/// @brief Print ur_enqueue_timestamp_recording_exp_params_t struct
1959+
/// @returns
1960+
/// - ::UR_RESULT_SUCCESS
1961+
/// - ::UR_RESULT_ERROR_INVALID_SIZE
1962+
/// - `buff_size < out_size`
1963+
UR_APIEXPORT ur_result_t UR_APICALL urPrintEnqueueTimestampRecordingExpParams(const struct ur_enqueue_timestamp_recording_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);
1964+
19571965
///////////////////////////////////////////////////////////////////////////////
19581966
/// @brief Print ur_bindless_images_unsampled_image_handle_destroy_exp_params_t struct
19591967
/// @returns

include/ur_print.hpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
916916
case UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP:
917917
os << "UR_FUNCTION_COMMAND_BUFFER_COMMAND_GET_INFO_EXP";
918918
break;
919+
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP:
920+
os << "UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP";
921+
break;
919922
default:
920923
os << "unknown enumerator";
921924
break;
@@ -2571,6 +2574,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25712574
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
25722575
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
25732576
break;
2577+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP:
2578+
os << "UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP";
2579+
break;
25742580
default:
25752581
os << "unknown enumerator";
25762582
break;
@@ -4280,6 +4286,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
42804286

42814287
os << ")";
42824288
} break;
4289+
case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: {
4290+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4291+
if (sizeof(ur_bool_t) > size) {
4292+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4293+
return UR_RESULT_ERROR_INVALID_SIZE;
4294+
}
4295+
os << (const void *)(tptr) << " (";
4296+
4297+
os << *tptr;
4298+
4299+
os << ")";
4300+
} break;
42834301
default:
42844302
os << "unknown enumerator";
42854303
return UR_RESULT_ERROR_INVALID_ENUMERATION;
@@ -8788,6 +8806,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_command_t value) {
87888806
case UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP:
87898807
os << "UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP";
87908808
break;
8809+
case UR_COMMAND_TIMESTAMP_RECORDING_EXP:
8810+
os << "UR_COMMAND_TIMESTAMP_RECORDING_EXP";
8811+
break;
87918812
default:
87928813
os << "unknown enumerator";
87938814
break;
@@ -14104,6 +14125,48 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1410414125
return os;
1410514126
}
1410614127

14128+
///////////////////////////////////////////////////////////////////////////////
14129+
/// @brief Print operator for the ur_enqueue_timestamp_recording_exp_params_t type
14130+
/// @returns
14131+
/// std::ostream &
14132+
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_enqueue_timestamp_recording_exp_params_t *params) {
14133+
14134+
os << ".hQueue = ";
14135+
14136+
ur::details::printPtr(os,
14137+
*(params->phQueue));
14138+
14139+
os << ", ";
14140+
os << ".blocking = ";
14141+
14142+
os << *(params->pblocking);
14143+
14144+
os << ", ";
14145+
os << ".numEventsInWaitList = ";
14146+
14147+
os << *(params->pnumEventsInWaitList);
14148+
14149+
os << ", ";
14150+
os << ".phEventWaitList = {";
14151+
for (size_t i = 0; *(params->pphEventWaitList) != NULL && i < *params->pnumEventsInWaitList; ++i) {
14152+
if (i != 0) {
14153+
os << ", ";
14154+
}
14155+
14156+
ur::details::printPtr(os,
14157+
(*(params->pphEventWaitList))[i]);
14158+
}
14159+
os << "}";
14160+
14161+
os << ", ";
14162+
os << ".phEvent = ";
14163+
14164+
ur::details::printPtr(os,
14165+
*(params->pphEvent));
14166+
14167+
return os;
14168+
}
14169+
1410714170
///////////////////////////////////////////////////////////////////////////////
1410814171
/// @brief Print operator for the ur_bindless_images_unsampled_image_handle_destroy_exp_params_t type
1410914172
/// @returns
@@ -17126,6 +17189,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
1712617189
case UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP: {
1712717190
os << (const struct ur_enqueue_cooperative_kernel_launch_exp_params_t *)params;
1712817191
} break;
17192+
case UR_FUNCTION_ENQUEUE_TIMESTAMP_RECORDING_EXP: {
17193+
os << (const struct ur_enqueue_timestamp_recording_exp_params_t *)params;
17194+
} break;
1712917195
case UR_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP: {
1713017196
os << (const struct ur_bindless_images_unsampled_image_handle_destroy_exp_params_t *)params;
1713117197
} break;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
<%
2+
OneApi=tags['$OneApi']
3+
x=tags['$x']
4+
X=x.upper()
5+
%>
6+
7+
.. _experimental-enqueue-timestamp-recording:
8+
9+
================================================================================
10+
Enqueue Timestamp Recording
11+
================================================================================
12+
13+
.. warning::
14+
15+
Experimental features:
16+
17+
* May be replaced, updated, or removed at any time.
18+
* Do not require maintaining API/ABI stability of their own additions over
19+
time.
20+
* Do not require conformance testing of their own additions.
21+
22+
23+
Motivation
24+
--------------------------------------------------------------------------------
25+
Currently, the only way to get timestamp information is through enabling
26+
profiling on a queue and retrieving the information from events coming from
27+
commands submitted to it. However, not all systems give full control of the
28+
queue construction to the programmer wanting the profiling information. To amend
29+
this, this extension adds the ability to enqueue a timestamp recording on any
30+
queue, with or without profiling enabled. This event can in turn be queried for
31+
the usual profiling information.
32+
33+
34+
API
35+
--------------------------------------------------------------------------------
36+
37+
Enums
38+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39+
40+
* ${x}_device_info_t
41+
* ${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP
42+
43+
* ${x}_command_t
44+
* ${X}_COMMAND_TIMESTAMP_RECORDING_EXP
45+
46+
Functions
47+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
48+
* ${x}EnqueueTimestampRecordingExp
49+
50+
Changelog
51+
--------------------------------------------------------------------------------
52+
53+
+-----------+------------------------+
54+
| Revision | Changes |
55+
+===========+========================+
56+
| 1.0 | Initial Draft |
57+
+-----------+------------------------+
58+
59+
60+
Support
61+
--------------------------------------------------------------------------------
62+
63+
Adapters which support this experimental feature *must* return true for the new
64+
`${X}_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP` device info query.
65+
66+
67+
Contributors
68+
--------------------------------------------------------------------------------
69+
70+
* Steffen Larsen `steffen.larsen@intel.com <steffen.larsen@intel.com>`_

0 commit comments

Comments
 (0)