Skip to content

Commit 8262de6

Browse files
authored
Merge branch 'adapters' into review/yang/urAdapterGet
2 parents 1b9fdc6 + 109ed46 commit 8262de6

File tree

9 files changed

+312
-90
lines changed

9 files changed

+312
-90
lines changed

source/adapters/level_zero/adapter.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ ur_result_t adapterStateTeardown() {
3838
// Print the balance of various create/destroy native calls.
3939
// The idea is to verify if the number of create(+) and destroy(-) calls are
4040
// matched.
41-
if (ZeCallCount && (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) != 0) {
41+
if (ZeCallCount && (UrL0LeaksDebug) != 0) {
4242
// clang-format off
4343
//
4444
// The format of this table is such that each row accounts for a
@@ -79,8 +79,7 @@ ur_result_t adapterStateTeardown() {
7979
//
8080
// clang-format on
8181

82-
fprintf(stderr, "ZE_DEBUG=%d: check balance of create/destroy calls\n",
83-
UR_L0_DEBUG_CALL_COUNT);
82+
fprintf(stderr, "Check balance of create/destroy calls\n");
8483
fprintf(stderr,
8584
"----------------------------------------------------------\n");
8685
for (const auto &Row : CreateDestroySet) {

source/adapters/level_zero/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName,
149149
const char *ZeArgs, bool TraceError) {
150150
urPrint("ZE ---> %s%s\n", ZeName, ZeArgs);
151151

152-
if (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) {
152+
if (UrL0LeaksDebug) {
153153
++(*ZeCallCount)[ZeName];
154154
}
155155

source/adapters/level_zero/common.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ enum UrDebugLevel {
187187
UR_L0_DEBUG_NONE = 0x0,
188188
UR_L0_DEBUG_BASIC = 0x1,
189189
UR_L0_DEBUG_VALIDATION = 0x2,
190-
UR_L0_DEBUG_CALL_COUNT = 0x4,
191190
UR_L0_DEBUG_ALL = -1
192191
};
193192

@@ -203,6 +202,13 @@ const int UrL0Debug = [] {
203202
return DebugMode;
204203
}();
205204

205+
const int UrL0LeaksDebug = [] {
206+
const char *UrRet = std::getenv("UR_L0_LEAKS_DEBUG");
207+
if (!UrRet)
208+
return 0;
209+
return std::atoi(UrRet);
210+
}();
211+
206212
// Controls Level Zero calls serialization to w/a Level Zero driver being not MT
207213
// ready. Recognized values (can be used as a bit mask):
208214
enum {

source/adapters/level_zero/platform.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(
3030
static std::once_flag ZeCallCountInitialized;
3131
try {
3232
std::call_once(ZeCallCountInitialized, []() {
33-
if (UrL0Debug & UR_L0_DEBUG_CALL_COUNT) {
33+
if (UrL0LeaksDebug) {
3434
ZeCallCount = new std::map<std::string, int>;
3535
}
3636
});

source/adapters/opencl/command_buffer.cpp

Lines changed: 199 additions & 81 deletions
Large diffs are not rendered by default.

source/adapters/opencl/command_buffer.hpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,17 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11+
#include <CL/cl_ext.h>
1112
#include <ur/ur.hpp>
1213

13-
/// Stub implementation of command-buffers for OpenCL
14+
struct ur_exp_command_buffer_handle_t_ {
15+
ur_queue_handle_t hInternalQueue;
16+
ur_context_handle_t hContext;
17+
cl_command_buffer_khr CLCommandBuffer;
1418

15-
struct ur_exp_command_buffer_handle_t_ {};
19+
ur_exp_command_buffer_handle_t_(ur_queue_handle_t hQueue,
20+
ur_context_handle_t hContext,
21+
cl_command_buffer_khr CLCommandBuffer)
22+
: hInternalQueue(hQueue), hContext(hContext),
23+
CLCommandBuffer(CLCommandBuffer) {}
24+
};

source/adapters/opencl/common.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ ur_result_t mapCLErrorToUR(cl_int Result) {
7777
return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE;
7878
case CL_INVALID_ARG_INDEX:
7979
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
80+
case CL_INVALID_COMMAND_BUFFER_KHR:
81+
return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP;
82+
case CL_INVALID_SYNC_POINT_WAIT_LIST_KHR:
83+
return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP;
8084
default:
8185
return UR_RESULT_ERROR_UNKNOWN;
8286
}

source/adapters/opencl/common.hpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,16 @@ CONSTFIX char EnqueueReadGlobalVariableName[] =
192192
// Names of host pipe functions queried from OpenCL
193193
CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL";
194194
CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL";
195+
// Names of command buffer functions queried from OpenCL
196+
CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR";
197+
CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR";
198+
CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR";
199+
CONSTFIX char FinalizeCommandBufferName[] = "clFinalizeCommandBufferKHR";
200+
CONSTFIX char CommandNRRangeKernelName[] = "clCommandNDRangeKernelKHR";
201+
CONSTFIX char CommandCopyBufferName[] = "clCommandCopyBufferKHR";
202+
CONSTFIX char CommandCopyBufferRectName[] = "clCommandCopyBufferRectKHR";
203+
CONSTFIX char CommandFillBufferName[] = "clCommandFillBufferKHR";
204+
CONSTFIX char EnqueueCommandBufferName[] = "clEnqueueCommandBufferKHR";
195205

196206
#undef CONSTFIX
197207

@@ -226,6 +236,58 @@ cl_int(CL_API_CALL *)(cl_command_queue queue, cl_program program,
226236
cl_uint num_events_in_waitlist,
227237
const cl_event *events_waitlist, cl_event *event);
228238

239+
using clCreateCommandBufferKHR_fn = CL_API_ENTRY cl_command_buffer_khr(
240+
CL_API_CALL *)(cl_uint num_queues, const cl_command_queue *queues,
241+
const cl_command_buffer_properties_khr *properties,
242+
cl_int *errcode_ret);
243+
244+
using clRetainCommandBufferKHR_fn = CL_API_ENTRY
245+
cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer);
246+
247+
using clReleaseCommandBufferKHR_fn = CL_API_ENTRY
248+
cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer);
249+
250+
using clFinalizeCommandBufferKHR_fn = CL_API_ENTRY
251+
cl_int(CL_API_CALL *)(cl_command_buffer_khr command_buffer);
252+
253+
using clCommandNDRangeKernelKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
254+
cl_command_buffer_khr command_buffer, cl_command_queue command_queue,
255+
const cl_ndrange_kernel_command_properties_khr *properties,
256+
cl_kernel kernel, cl_uint work_dim, const size_t *global_work_offset,
257+
const size_t *global_work_size, const size_t *local_work_size,
258+
cl_uint num_sync_points_in_wait_list,
259+
const cl_sync_point_khr *sync_point_wait_list,
260+
cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle);
261+
262+
using clCommandCopyBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
263+
cl_command_buffer_khr command_buffer, cl_command_queue command_queue,
264+
cl_mem src_buffer, cl_mem dst_buffer, size_t src_offset, size_t dst_offset,
265+
size_t size, cl_uint num_sync_points_in_wait_list,
266+
const cl_sync_point_khr *sync_point_wait_list,
267+
cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle);
268+
269+
using clCommandCopyBufferRectKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
270+
cl_command_buffer_khr command_buffer, cl_command_queue command_queue,
271+
cl_mem src_buffer, cl_mem dst_buffer, const size_t *src_origin,
272+
const size_t *dst_origin, const size_t *region, size_t src_row_pitch,
273+
size_t src_slice_pitch, size_t dst_row_pitch, size_t dst_slice_pitch,
274+
cl_uint num_sync_points_in_wait_list,
275+
const cl_sync_point_khr *sync_point_wait_list,
276+
cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle);
277+
278+
using clCommandFillBufferKHR_fn = CL_API_ENTRY cl_int(CL_API_CALL *)(
279+
cl_command_buffer_khr command_buffer, cl_command_queue command_queue,
280+
cl_mem buffer, const void *pattern, size_t pattern_size, size_t offset,
281+
size_t size, cl_uint num_sync_points_in_wait_list,
282+
const cl_sync_point_khr *sync_point_wait_list,
283+
cl_sync_point_khr *sync_point, cl_mutable_command_khr *mutable_handle);
284+
285+
using clEnqueueCommandBufferKHR_fn = CL_API_ENTRY
286+
cl_int(CL_API_CALL *)(cl_uint num_queues, cl_command_queue *queues,
287+
cl_command_buffer_khr command_buffer,
288+
cl_uint num_events_in_wait_list,
289+
const cl_event *event_wait_list, cl_event *event);
290+
229291
template <typename T> struct FuncPtrCache {
230292
std::map<cl_context, T> Map;
231293
std::mutex Mutex;
@@ -255,6 +317,15 @@ struct ExtFuncPtrCacheT {
255317
FuncPtrCache<clEnqueueWriteHostPipeINTEL_fn> clEnqueueWriteHostPipeINTELCache;
256318
FuncPtrCache<clSetProgramSpecializationConstant_fn>
257319
clSetProgramSpecializationConstantCache;
320+
FuncPtrCache<clCreateCommandBufferKHR_fn> clCreateCommandBufferKHRCache;
321+
FuncPtrCache<clRetainCommandBufferKHR_fn> clRetainCommandBufferKHRCache;
322+
FuncPtrCache<clReleaseCommandBufferKHR_fn> clReleaseCommandBufferKHRCache;
323+
FuncPtrCache<clFinalizeCommandBufferKHR_fn> clFinalizeCommandBufferKHRCache;
324+
FuncPtrCache<clCommandNDRangeKernelKHR_fn> clCommandNDRangeKernelKHRCache;
325+
FuncPtrCache<clCommandCopyBufferKHR_fn> clCommandCopyBufferKHRCache;
326+
FuncPtrCache<clCommandCopyBufferRectKHR_fn> clCommandCopyBufferRectKHRCache;
327+
FuncPtrCache<clCommandFillBufferKHR_fn> clCommandFillBufferKHRCache;
328+
FuncPtrCache<clEnqueueCommandBufferKHR_fn> clEnqueueCommandBufferKHRCache;
258329
};
259330
// A raw pointer is used here since the lifetime of this map has to be tied to
260331
// piTeardown to avoid issues with static destruction order (a user application

source/adapters/opencl/device.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
886886
case UR_DEVICE_INFO_PROFILE:
887887
case UR_DEVICE_INFO_VERSION:
888888
case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION:
889-
case UR_DEVICE_INFO_EXTENSIONS:
890889
case UR_DEVICE_INFO_BUILT_IN_KERNELS:
891890
case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES:
892891
case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
@@ -908,6 +907,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
908907

909908
return UR_RESULT_SUCCESS;
910909
}
910+
case UR_DEVICE_INFO_EXTENSIONS: {
911+
cl_device_id Dev = cl_adapter::cast<cl_device_id>(hDevice);
912+
size_t ExtSize = 0;
913+
CL_RETURN_ON_FAILURE(
914+
clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize));
915+
916+
std::string ExtStr(ExtSize, '\0');
917+
CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize,
918+
ExtStr.data(), nullptr));
919+
920+
std::string SupportedExtensions(ExtStr.c_str());
921+
if (ExtStr.find("cl_khr_command_buffer") != std::string::npos) {
922+
SupportedExtensions += " ur_exp_command_buffer";
923+
}
924+
return ReturnValue(SupportedExtensions.c_str());
925+
}
911926
/* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU
912927
* EU device-specific information extensions. Some of the queries are
913928
* enabled by cl_intel_device_attribute_query extension, but it's not yet in

0 commit comments

Comments
 (0)