Skip to content

Commit 23694f8

Browse files
authored
Merge branch 'main' into fix_usm_allocation
2 parents 47e59a8 + f3fb858 commit 23694f8

36 files changed

+715
-365
lines changed

.github/CODEOWNERS

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,21 @@
22

33
# Level Zero adapter
44
source/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write
5+
test/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write
56

67
# CUDA and HIP adapters
78
source/adapters/cuda @oneapi-src/unified-runtime-cuda-write
9+
test/adapters/cuda @oneapi-src/unified-runtime-cuda-write
810
source/adapters/hip @oneapi-src/unified-runtime-hip-write
11+
test/adapters/hip @oneapi-src/unified-runtime-hip-write
912

1013
# OpenCL adapter
1114
source/adapters/opencl @oneapi-src/unified-runtime-opencl-write
15+
test/adapters/opencl @oneapi-src/unified-runtime-opencl-write
1216

1317
# Native CPU adapter
1418
source/adapters/native_cpu @oneapi-src/unified-runtime-native-cpu-write
19+
test/adapters/native_cpu @oneapi-src/unified-runtime-native-cpu-write
1520

1621
# Command-buffer experimental feature
1722
source/adapters/**/command_buffer.* @oneapi-src/unified-runtime-command-buffer-write
@@ -20,6 +25,7 @@ scripts/core/exp-command-buffer.yml @oneapi-src/unified-runtime-command-buff
2025
test/conformance/exp_command_buffer** @oneapi-src/unified-runtime-command-buffer-write
2126

2227
# Bindless Images experimental feature
23-
scripts/core/EXP-BINDLESS-IMAGES.rst @oneapi-src/unified-runtime-bindless-images-write
24-
scripts/core/exp-bindless-images.yml @oneapi-src/unified-runtime-bindless-images-write
25-
source/adapters/**/image.* @oneapi-src/unified-runtime-bindless-images-write
28+
source/adapters/**/image.* @oneapi-src/unified-runtime-bindless-images-write
29+
scripts/core/EXP-BINDLESS-IMAGES.rst @oneapi-src/unified-runtime-bindless-images-write
30+
scripts/core/exp-bindless-images.yml @oneapi-src/unified-runtime-bindless-images-write
31+
test/conformance/exp_bindless_images** @oneapi-src/unified-runtime-bindless-images-write

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
4848
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
4949
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
5050
set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable")
51+
set(UR_DPCXX_BUILD_FLAGS "" CACHE STRING "Build flags to pass to DPC++ when compiling device programs")
5152
set(UR_SYCL_LIBRARY_DIR "" CACHE PATH
5253
"Path of the SYCL runtime library directory")
5354
set(UR_CONFORMANCE_TARGET_TRIPLES "" CACHE STRING

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ List of options provided by CMake:
140140
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
141141
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
142142
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
143+
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
143144
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
144145
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
145146
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |

include/ur_api.h

Lines changed: 208 additions & 196 deletions
Large diffs are not rendered by default.

include/ur_print.hpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2553,6 +2553,24 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
25532553
case UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP:
25542554
os << "UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP";
25552555
break;
2556+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP:
2557+
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP";
2558+
break;
2559+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP:
2560+
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP";
2561+
break;
2562+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP:
2563+
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP";
2564+
break;
2565+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP:
2566+
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP";
2567+
break;
2568+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP:
2569+
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP";
2570+
break;
2571+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
2572+
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
2573+
break;
25562574
default:
25572575
os << "unknown enumerator";
25582576
break;
@@ -4190,6 +4208,78 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info
41904208

41914209
os << ")";
41924210
} break;
4211+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: {
4212+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4213+
if (sizeof(ur_bool_t) > size) {
4214+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4215+
return UR_RESULT_ERROR_INVALID_SIZE;
4216+
}
4217+
os << (const void *)(tptr) << " (";
4218+
4219+
os << *tptr;
4220+
4221+
os << ")";
4222+
} break;
4223+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: {
4224+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4225+
if (sizeof(ur_bool_t) > size) {
4226+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4227+
return UR_RESULT_ERROR_INVALID_SIZE;
4228+
}
4229+
os << (const void *)(tptr) << " (";
4230+
4231+
os << *tptr;
4232+
4233+
os << ")";
4234+
} break;
4235+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: {
4236+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4237+
if (sizeof(ur_bool_t) > size) {
4238+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4239+
return UR_RESULT_ERROR_INVALID_SIZE;
4240+
}
4241+
os << (const void *)(tptr) << " (";
4242+
4243+
os << *tptr;
4244+
4245+
os << ")";
4246+
} break;
4247+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: {
4248+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4249+
if (sizeof(ur_bool_t) > size) {
4250+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4251+
return UR_RESULT_ERROR_INVALID_SIZE;
4252+
}
4253+
os << (const void *)(tptr) << " (";
4254+
4255+
os << *tptr;
4256+
4257+
os << ")";
4258+
} break;
4259+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: {
4260+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4261+
if (sizeof(ur_bool_t) > size) {
4262+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4263+
return UR_RESULT_ERROR_INVALID_SIZE;
4264+
}
4265+
os << (const void *)(tptr) << " (";
4266+
4267+
os << *tptr;
4268+
4269+
os << ")";
4270+
} break;
4271+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: {
4272+
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
4273+
if (sizeof(ur_bool_t) > size) {
4274+
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
4275+
return UR_RESULT_ERROR_INVALID_SIZE;
4276+
}
4277+
os << (const void *)(tptr) << " (";
4278+
4279+
os << *tptr;
4280+
4281+
os << ")";
4282+
} break;
41934283
default:
41944284
os << "unknown enumerator";
41954285
return UR_RESULT_ERROR_INVALID_ENUMERATION;

scripts/core/EXP-BINDLESS-IMAGES.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,12 @@ Enums
9191
* ${X}_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP
9292
* ${X}_DEVICE_INFO_CUBEMAP_SUPPORT_EXP
9393
* ${X}_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP
94+
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
95+
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
96+
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
97+
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
98+
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
99+
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
94100

95101
* ${x}_command_t
96102
* ${X}_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP
@@ -198,6 +204,8 @@ Changelog
198204
+------------------------------------------------------------------------+
199205
| 10.0 | Added cubemap image type, sampling properties, and device |
200206
| | queries. |
207+
+------------------------------------------------------------------------+
208+
| 11.0 | Added device queries for sampled image fetch capabilities. |
201209
+----------+-------------------------------------------------------------+
202210

203211
Contributors

scripts/core/exp-bindless-images.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,24 @@ etors:
9292
- name: CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP
9393
value: "0x2011"
9494
desc: "[$x_bool_t] returns true if the device supports sampling cubemapped images across face boundaries"
95+
- name: BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
96+
value: "0x2012"
97+
desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 1D sampled image data."
98+
- name: BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
99+
value: "0x2013"
100+
desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 1D sampled image data."
101+
- name: BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
102+
value: "0x2014"
103+
desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 2D sampled image data."
104+
- name: BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
105+
value: "0x2015"
106+
desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 2D sampled image data."
107+
- name: BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
108+
value: "0x2016"
109+
desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 3D sampled image data."
110+
- name: BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
111+
value: "0x2017"
112+
desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 3D sampled image data."
95113
--- #--------------------------------------------------------------------------
96114
type: enum
97115
extend: true

source/adapters/cuda/device.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
926926
// CUDA supports cubemap seamless filtering.
927927
return ReturnValue(true);
928928
}
929+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: {
930+
// CUDA does support fetching 1D USM sampled image data.
931+
return ReturnValue(true);
932+
}
933+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: {
934+
// CUDA does not support fetching 1D non-USM sampled image data.
935+
return ReturnValue(false);
936+
}
937+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: {
938+
// CUDA does support fetching 2D USM sampled image data.
939+
return ReturnValue(true);
940+
}
941+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: {
942+
// CUDA does support fetching 2D non-USM sampled image data.
943+
return ReturnValue(true);
944+
}
945+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: {
946+
// CUDA does not support 3D USM sampled textures
947+
return ReturnValue(false);
948+
}
949+
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: {
950+
// CUDA does support fetching 3D non-USM sampled image data.
951+
return ReturnValue(true);
952+
}
929953
case UR_DEVICE_INFO_DEVICE_ID: {
930954
int Value = 0;
931955
UR_CHECK_ERROR(cuDeviceGetAttribute(

source/adapters/level_zero/command_buffer.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,8 +1048,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
10481048
// Create command-list to execute before `CommandListPtr` and will signal
10491049
// when `EventWaitList` dependencies are complete.
10501050
ur_command_list_ptr_t WaitCommandList{};
1051-
UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList,
1052-
false, false));
1051+
UR_CALL(Queue->Context->getAvailableCommandList(
1052+
Queue, WaitCommandList, false, NumEventsInWaitList, EventWaitList,
1053+
false));
10531054

10541055
ZE2UR_CALL(zeCommandListAppendBarrier,
10551056
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
@@ -1086,7 +1087,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
10861087
// Create a command-list to signal RetEvent on completion
10871088
ur_command_list_ptr_t SignalCommandList{};
10881089
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
1089-
false, false));
1090+
false, NumEventsInWaitList,
1091+
EventWaitList, false));
10901092
// Reset the wait-event for the UR command-buffer that is signaled when its
10911093
// submission dependencies have been satisfied.
10921094
ZE2UR_CALL(zeCommandListAppendEventReset,

source/adapters/level_zero/common.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,19 @@ static const uint32_t UrL0QueueSyncNonBlocking = [] {
241241
return L0QueueSyncLockingModeValue;
242242
}();
243243

244+
// Controls whether the L0 Adapter creates signal events for commands on
245+
// integrated gpu devices.
246+
static const uint32_t UrL0OutOfOrderIntegratedSignalEvent = [] {
247+
const char *UrL0OutOfOrderIntegratedSignalEventEnv =
248+
std::getenv("UR_L0_OOQ_INTEGRATED_SIGNAL_EVENT");
249+
uint32_t UrL0OutOfOrderIntegratedSignalEventValue = 1;
250+
if (UrL0OutOfOrderIntegratedSignalEventEnv) {
251+
UrL0OutOfOrderIntegratedSignalEventValue =
252+
std::atoi(UrL0OutOfOrderIntegratedSignalEventEnv);
253+
}
254+
return UrL0OutOfOrderIntegratedSignalEventValue;
255+
}();
256+
244257
// This class encapsulates actions taken along with a call to Level Zero API.
245258
class ZeCall {
246259
private:

0 commit comments

Comments
 (0)