Skip to content

Commit 59eb54a

Browse files
authored
[UR][CL] Use regular malloc where clHostMemAllocINTEL_fn is not supported for USM allocs. (#18119)
Fixes oneapi-src/unified-runtime#1837 The UR OpenCL adapter expects patterns of 2 for USM allocs, and fallsback to using clHostMemAllocINTEL_fn for non patterns of 2. Some devices/platforms don't support this extension so we can just use a regular malloc instead.
1 parent aa2c876 commit 59eb54a

File tree

2 files changed

+48
-40
lines changed

2 files changed

+48
-40
lines changed

unified-runtime/source/adapters/opencl/usm.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -293,10 +293,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
293293
// as the largest CL type (double16/long16 - 128 bytes), anything larger or
294294
// not a power of 2, we need to do on the host side and copy it into the
295295
// target allocation.
296-
clHostMemAllocINTEL_fn HostMemAlloc = nullptr;
297-
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clHostMemAllocINTEL_fn>(
298-
CLContext, ur::cl::getAdapter()->fnCache.clHostMemAllocINTELCache,
299-
cl_ext::HostMemAllocName, &HostMemAlloc));
300296

301297
clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr;
302298
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<clEnqueueMemcpyINTEL_fn>(
@@ -308,10 +304,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
308304
CLContext, ur::cl::getAdapter()->fnCache.clMemBlockingFreeINTELCache,
309305
cl_ext::MemBlockingFreeName, &USMFree));
310306

311-
cl_int ClErr = CL_SUCCESS;
312-
auto HostBuffer =
313-
static_cast<uint8_t *>(HostMemAlloc(CLContext, nullptr, size, 0, &ClErr));
314-
CL_RETURN_ON_FAILURE(ClErr);
307+
uint8_t *HostBuffer = new uint8_t[size];
315308

316309
auto *End = HostBuffer + size;
317310
for (auto *Iter = HostBuffer; Iter < End; Iter += patternSize) {
@@ -343,9 +336,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
343336
}
344337

345338
// This self destructs taking the event and allocation with it.
346-
auto Info = new AllocDeleterCallbackInfo(USMFree, CLContext, HostBuffer);
339+
AllocDeleterCallbackInfo *Info =
340+
new AllocDeleterCallbackInfo(CLContext, HostBuffer);
347341

348-
ClErr =
342+
cl_int ClErr =
349343
clSetEventCallback(CopyEvent, CL_COMPLETE,
350344
AllocDeleterCallback<AllocDeleterCallbackInfo>, Info);
351345
if (ClErr != CL_SUCCESS) {
@@ -422,7 +416,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
422416
CLContext, ur::cl::getAdapter()->fnCache.clHostMemAllocINTELCache,
423417
cl_ext::HostMemAllocName, &HostMemAlloc));
424418

425-
auto HostAlloc = HostMemAlloc(CLContext, nullptr, size, 0, &CLErr);
419+
auto HostAlloc = static_cast<uint8_t *>(
420+
HostMemAlloc(CLContext, nullptr, size, 0, &CLErr));
426421
CL_RETURN_ON_FAILURE(CLErr);
427422

428423
// Now that we've successfully allocated we should try to clean it up if we
@@ -489,12 +484,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy(
489484
}
490485

491486
// This self destructs taking the event and allocation with it.
492-
auto DeleterInfo = new AllocDeleterCallbackInfoWithQueue(
487+
auto DeleterInfo = new AllocDeleterCallbackInfoUSMWithQueue(
493488
USMFree, CLContext, HostAlloc, MissingQueue);
494489

495490
CLErr = clSetEventCallback(
496491
HostCopyEvent, CL_COMPLETE,
497-
AllocDeleterCallback<AllocDeleterCallbackInfoWithQueue>, DeleterInfo);
492+
AllocDeleterCallback<AllocDeleterCallbackInfoUSMWithQueue>,
493+
DeleterInfo);
498494

499495
if (CLErr != CL_SUCCESS) {
500496
// We can attempt to recover gracefully by attempting to wait for the

unified-runtime/source/adapters/opencl/usm.hpp

Lines changed: 39 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,44 +17,56 @@
1717
//
1818
// Example usage:
1919
//
20-
// auto Info = new AllocDeleterCallbackInfo(USMFreeFuncPtr, Context,
20+
// auto Info = new AllocDeleterCallbackInfoUSM(USMFreeFuncPtr, Context,
2121
// Allocation); clSetEventCallback(USMOpEvent, CL_COMPLETE,
2222
// AllocDeleterCallback, Info);
23-
struct AllocDeleterCallbackInfo {
24-
AllocDeleterCallbackInfo(clMemBlockingFreeINTEL_fn USMFree,
25-
cl_context CLContext, void *Allocation)
26-
: USMFree(USMFree), CLContext(CLContext), Allocation(Allocation) {
23+
24+
struct AllocDeleterCallbackInfoBase {
25+
AllocDeleterCallbackInfoBase(cl_context Context, uint8_t *Allocation)
26+
: CLContext(Context), Allocation(Allocation) {
2727
clRetainContext(CLContext);
2828
}
29-
~AllocDeleterCallbackInfo() {
30-
USMFree(CLContext, Allocation);
31-
clReleaseContext(CLContext);
32-
}
33-
AllocDeleterCallbackInfo(const AllocDeleterCallbackInfo &) = delete;
34-
AllocDeleterCallbackInfo &
35-
operator=(const AllocDeleterCallbackInfo &) = delete;
3629

37-
clMemBlockingFreeINTEL_fn USMFree;
30+
virtual ~AllocDeleterCallbackInfoBase() { clReleaseContext(CLContext); }
31+
32+
AllocDeleterCallbackInfoBase(const AllocDeleterCallbackInfoBase &) = delete;
33+
AllocDeleterCallbackInfoBase &
34+
operator=(const AllocDeleterCallbackInfoBase &) = delete;
35+
36+
protected:
3837
cl_context CLContext;
39-
void *Allocation;
38+
uint8_t *Allocation;
4039
};
4140

42-
struct AllocDeleterCallbackInfoWithQueue : AllocDeleterCallbackInfo {
43-
AllocDeleterCallbackInfoWithQueue(clMemBlockingFreeINTEL_fn USMFree,
44-
cl_context CLContext, void *Allocation,
45-
cl_command_queue CLQueue)
46-
: AllocDeleterCallbackInfo(USMFree, CLContext, Allocation),
47-
CLQueue(CLQueue) {
48-
clRetainContext(CLContext);
41+
struct AllocDeleterCallbackInfo : AllocDeleterCallbackInfoBase {
42+
AllocDeleterCallbackInfo(cl_context CLContext, uint8_t *Allocation)
43+
: AllocDeleterCallbackInfoBase(CLContext, Allocation) {}
44+
45+
~AllocDeleterCallbackInfo() override { delete[] Allocation; }
46+
};
47+
48+
struct AllocDeleterCallbackInfoUSM : AllocDeleterCallbackInfoBase {
49+
AllocDeleterCallbackInfoUSM(clMemBlockingFreeINTEL_fn USMFree,
50+
cl_context CLContext, uint8_t *Allocation)
51+
: AllocDeleterCallbackInfoBase(CLContext, Allocation), USMFree(USMFree) {}
52+
~AllocDeleterCallbackInfoUSM() override { USMFree(CLContext, Allocation); }
53+
54+
clMemBlockingFreeINTEL_fn USMFree;
55+
};
56+
57+
struct AllocDeleterCallbackInfoUSMWithQueue : AllocDeleterCallbackInfoUSM {
58+
AllocDeleterCallbackInfoUSMWithQueue(clMemBlockingFreeINTEL_fn USMFree,
59+
cl_context CLContext,
60+
uint8_t *Allocation,
61+
cl_command_queue CLQueue)
62+
: AllocDeleterCallbackInfoUSM(USMFree, CLContext, Allocation),
63+
CLQueue(CLQueue) {}
64+
~AllocDeleterCallbackInfoUSMWithQueue() override {
65+
clReleaseCommandQueue(CLQueue);
4966
}
50-
~AllocDeleterCallbackInfoWithQueue() { clReleaseCommandQueue(CLQueue); }
51-
AllocDeleterCallbackInfoWithQueue(const AllocDeleterCallbackInfoWithQueue &) =
52-
delete;
53-
AllocDeleterCallbackInfoWithQueue &
54-
operator=(const AllocDeleterCallbackInfoWithQueue &) = delete;
5567

5668
cl_command_queue CLQueue;
5769
};
5870

5971
template <class T>
60-
void AllocDeleterCallback(cl_event event, cl_int, void *pUserData);
72+
void AllocDeleterCallback(cl_event event, cl_int, uint8_t *pUserData);

0 commit comments

Comments
 (0)