Skip to content

Commit b6b25ee

Browse files
authored
[UR] Add reference counting and ext. fn cache to OpenCL adapters (#17854)
Rather than using atexit handles to drop the adapter (which causes ordering issues with SYCL atexit handlers), `urAdapterRetain/Release` now update a reference counter. When the last adapter handle is Released, the adapter is dropped. Since it is now simple to do so, the function cache has been moved to be a member of the adapter itself rather than being a separate global.
1 parent fc328cc commit b6b25ee

File tree

11 files changed

+105
-95
lines changed

11 files changed

+105
-95
lines changed

unified-runtime/source/adapters/opencl/adapter.cpp

Lines changed: 33 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
#include <dlfcn.h>
1919
#endif
2020

21+
// There can only be one OpenCL adapter alive at a time.
22+
// If it is alive (more get/retains than releases called), this is a pointer to
23+
// it.
24+
static ur_adapter_handle_t liveAdapter = nullptr;
25+
2126
ur_adapter_handle_t_::ur_adapter_handle_t_() {
2227
#ifdef _MSC_VER
2328

@@ -42,45 +47,38 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() {
4247
#undef CL_CORE_FUNCTION
4348

4449
#endif // _MSC_VER
50+
assert(!liveAdapter);
51+
liveAdapter = this;
4552
}
4653

47-
static ur_adapter_handle_t adapter = nullptr;
54+
ur_adapter_handle_t_::~ur_adapter_handle_t_() {
55+
assert(liveAdapter == this);
56+
liveAdapter = nullptr;
57+
}
4858

4959
ur_adapter_handle_t ur::cl::getAdapter() {
50-
if (!adapter) {
60+
if (!liveAdapter) {
5161
die("OpenCL adapter used before initalization or after destruction");
5262
}
53-
return adapter;
54-
}
55-
56-
static void globalAdapterShutdown() {
57-
if (cl_ext::ExtFuncPtrCache) {
58-
delete cl_ext::ExtFuncPtrCache;
59-
cl_ext::ExtFuncPtrCache = nullptr;
60-
}
61-
if (adapter) {
62-
delete adapter;
63-
adapter = nullptr;
64-
}
63+
return liveAdapter;
6564
}
6665

6766
UR_APIEXPORT ur_result_t UR_APICALL
6867
urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
6968
uint32_t *pNumAdapters) {
69+
static std::mutex AdapterConstructionMutex{};
70+
7071
if (NumEntries > 0 && phAdapters) {
71-
// Sometimes urAdaterGet may be called after the library already been torn
72-
// down, we also need to create a temporary handle for it.
73-
if (!adapter) {
74-
adapter = new ur_adapter_handle_t_();
75-
atexit(globalAdapterShutdown);
76-
}
72+
std::lock_guard<std::mutex> Lock{AdapterConstructionMutex};
7773

78-
std::lock_guard<std::mutex> Lock{adapter->Mutex};
79-
if (adapter->RefCount++ == 0) {
80-
cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT();
74+
if (!liveAdapter) {
75+
*phAdapters = new ur_adapter_handle_t_();
76+
} else {
77+
*phAdapters = liveAdapter;
8178
}
8279

83-
*phAdapters = adapter;
80+
auto &adapter = *phAdapters;
81+
adapter->RefCount++;
8482
}
8583

8684
if (pNumAdapters) {
@@ -90,21 +88,16 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters,
9088
return UR_RESULT_SUCCESS;
9189
}
9290

93-
UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) {
94-
++adapter->RefCount;
91+
UR_APIEXPORT ur_result_t UR_APICALL
92+
urAdapterRetain(ur_adapter_handle_t hAdapter) {
93+
++hAdapter->RefCount;
9594
return UR_RESULT_SUCCESS;
9695
}
9796

98-
UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) {
99-
// Check first if the adapter is valid pointer
100-
if (adapter) {
101-
std::lock_guard<std::mutex> Lock{adapter->Mutex};
102-
if (--adapter->RefCount == 0) {
103-
if (cl_ext::ExtFuncPtrCache) {
104-
delete cl_ext::ExtFuncPtrCache;
105-
cl_ext::ExtFuncPtrCache = nullptr;
106-
}
107-
}
97+
UR_APIEXPORT ur_result_t UR_APICALL
98+
urAdapterRelease(ur_adapter_handle_t hAdapter) {
99+
if (--hAdapter->RefCount == 0) {
100+
delete hAdapter;
108101
}
109102
return UR_RESULT_SUCCESS;
110103
}
@@ -117,18 +110,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError(
117110
return UR_RESULT_SUCCESS;
118111
}
119112

120-
UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t,
121-
ur_adapter_info_t propName,
122-
size_t propSize,
123-
void *pPropValue,
124-
size_t *pPropSizeRet) {
113+
UR_APIEXPORT ur_result_t UR_APICALL
114+
urAdapterGetInfo(ur_adapter_handle_t hAdapter, ur_adapter_info_t propName,
115+
size_t propSize, void *pPropValue, size_t *pPropSizeRet) {
125116
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
126117

127118
switch (propName) {
128119
case UR_ADAPTER_INFO_BACKEND:
129120
return ReturnValue(UR_ADAPTER_BACKEND_OPENCL);
130121
case UR_ADAPTER_INFO_REFERENCE_COUNT:
131-
return ReturnValue(adapter->RefCount.load());
122+
return ReturnValue(hAdapter->RefCount.load());
132123
case UR_ADAPTER_INFO_VERSION:
133124
return ReturnValue(uint32_t{1});
134125
default:

unified-runtime/source/adapters/opencl/adapter.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,25 @@
77
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
88
//
99
//===----------------------------------------------------------------------===//
10+
#pragma once
11+
1012
#include "device.hpp"
1113
#include "logger/ur_logger.hpp"
1214
#include "platform.hpp"
1315

1416
#include "CL/cl.h"
17+
#include "common.hpp"
1518
#include "logger/ur_logger.hpp"
1619

1720
struct ur_adapter_handle_t_ {
1821
ur_adapter_handle_t_();
22+
~ur_adapter_handle_t_();
23+
24+
ur_adapter_handle_t_(ur_adapter_handle_t_ &) = delete;
1925

2026
std::atomic<uint32_t> RefCount = 0;
21-
std::mutex Mutex;
2227
logger::Logger &log = logger::get_logger("opencl");
28+
cl_ext::ExtFuncPtrCacheT fnCache{};
2329

2430
std::vector<std::unique_ptr<ur_platform_handle_t_>> URPlatforms;
2531
uint32_t NumPlatforms = 0;

unified-runtime/source/adapters/opencl/command_buffer.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010

1111
#include "command_buffer.hpp"
12+
#include "adapter.hpp"
1213
#include "common.hpp"
1314
#include "context.hpp"
1415
#include "event.hpp"
@@ -25,7 +26,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
2526
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
2627
cl_int Res =
2728
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
28-
CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache,
29+
CLContext,
30+
ur::cl::getAdapter()->fnCache.clReleaseCommandBufferKHRCache,
2931
cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR);
3032
assert(Res == CL_SUCCESS);
3133
(void)Res;
@@ -42,7 +44,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
4244
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
4345
UR_RETURN_ON_FAILURE(
4446
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
45-
CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache,
47+
CLContext,
48+
ur::cl::getAdapter()->fnCache.clCreateCommandBufferKHRCache,
4649
cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR));
4750

4851
const bool IsUpdatable = pCommandBufferDesc->isUpdatable;
@@ -116,7 +119,8 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
116119
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
117120
UR_RETURN_ON_FAILURE(
118121
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
119-
CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache,
122+
CLContext,
123+
ur::cl::getAdapter()->fnCache.clFinalizeCommandBufferKHRCache,
120124
cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR));
121125

122126
CL_RETURN_ON_FAILURE(
@@ -148,7 +152,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
148152
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
149153
UR_RETURN_ON_FAILURE(
150154
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
151-
CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache,
155+
CLContext,
156+
ur::cl::getAdapter()->fnCache.clCommandNDRangeKernelKHRCache,
152157
cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR));
153158

154159
cl_mutable_command_khr CommandHandle = nullptr;
@@ -238,7 +243,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
238243
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
239244
UR_RETURN_ON_FAILURE(
240245
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
241-
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
246+
CLContext, ur::cl::getAdapter()->fnCache.clCommandCopyBufferKHRCache,
242247
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));
243248

244249
const bool IsInOrder = hCommandBuffer->IsInOrder;
@@ -280,7 +285,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
280285
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
281286
UR_RETURN_ON_FAILURE(
282287
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
283-
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
288+
CLContext,
289+
ur::cl::getAdapter()->fnCache.clCommandCopyBufferRectKHRCache,
284290
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));
285291

286292
const bool IsInOrder = hCommandBuffer->IsInOrder;
@@ -388,7 +394,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
388394
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
389395
UR_RETURN_ON_FAILURE(
390396
cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
391-
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
397+
CLContext, ur::cl::getAdapter()->fnCache.clCommandFillBufferKHRCache,
392398
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));
393399

394400
const bool IsInOrder = hCommandBuffer->IsInOrder;
@@ -459,7 +465,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp(
459465
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
460466
UR_RETURN_ON_FAILURE(
461467
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
462-
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache,
468+
CLContext,
469+
ur::cl::getAdapter()->fnCache.clEnqueueCommandBufferKHRCache,
463470
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR));
464471

465472
const uint32_t NumberOfQueues = 1;
@@ -618,7 +625,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
618625
cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
619626
UR_RETURN_ON_FAILURE(
620627
cl_ext::getExtFuncFromContext<decltype(clUpdateMutableCommandsKHR)>(
621-
CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache,
628+
CLContext,
629+
ur::cl::getAdapter()->fnCache.clUpdateMutableCommandsKHRCache,
622630
cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR));
623631

624632
std::vector<cl_mutable_dispatch_config_khr> ConfigList(numKernelUpdates);
@@ -754,7 +762,7 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp(
754762
UR_RETURN_ON_FAILURE(
755763
cl_ext::getExtFuncFromContext<decltype(clCommandBarrierWithWaitListKHR)>(
756764
CLContext,
757-
cl_ext::ExtFuncPtrCache->clCommandBarrierWithWaitListKHRCache,
765+
ur::cl::getAdapter()->fnCache.clCommandBarrierWithWaitListKHRCache,
758766
cl_ext::CommandBarrierWithWaitListName,
759767
&clCommandBarrierWithWaitListKHR));
760768

unified-runtime/source/adapters/opencl/common.hpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -349,11 +349,6 @@ struct ExtFuncPtrCacheT {
349349
#undef CL_EXTENSION_FUNC
350350
}
351351
};
352-
// A raw pointer is used here since the lifetime of this map has to be tied to
353-
// piTeardown to avoid issues with static destruction order (a user application
354-
// might have static objects that indirectly access this cache in their
355-
// destructor).
356-
inline ExtFuncPtrCacheT *ExtFuncPtrCache;
357352

358353
// USM helper function to get an extension function pointer
359354
template <typename T>

unified-runtime/source/adapters/opencl/context.cpp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,20 +117,10 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName,
117117

118118
UR_APIEXPORT ur_result_t UR_APICALL
119119
urContextRelease(ur_context_handle_t hContext) {
120-
// If we're reasonably sure this context is about to be detroyed we should
121-
// clear the ext function pointer cache. This isn't foolproof sadly but it
122-
// should drastically reduce the chances of the pathological case described
123-
// in the comments in common.hpp.
124120
static std::mutex contextReleaseMutex;
125-
auto clContext = hContext->CLContext;
126121

127122
std::lock_guard<std::mutex> lock(contextReleaseMutex);
128123
if (hContext->decrementReferenceCount() == 0) {
129-
// ExtFuncPtrCache is destroyed in an atexit() callback, so it doesn't
130-
// necessarily outlive the adapter (or all the contexts).
131-
if (cl_ext::ExtFuncPtrCache) {
132-
cl_ext::ExtFuncPtrCache->clearCache(clContext);
133-
}
134124
delete hContext;
135125
}
136126

unified-runtime/source/adapters/opencl/context.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//===----------------------------------------------------------------------===//
1010
#pragma once
1111

12+
#include "adapter.hpp"
1213
#include "common.hpp"
1314
#include "device.hpp"
1415

@@ -29,6 +30,9 @@ struct ur_context_handle_t_ {
2930
Devices.emplace_back(phDevices[i]);
3031
urDeviceRetain(phDevices[i]);
3132
}
33+
// The context retains a reference to the adapter so it can clear the
34+
// function ptr cache on destruction
35+
urAdapterRetain(ur::cl::getAdapter());
3236
RefCount = 1;
3337
}
3438

@@ -42,6 +46,13 @@ struct ur_context_handle_t_ {
4246
const ur_device_handle_t *phDevices,
4347
ur_context_handle_t &Context);
4448
~ur_context_handle_t_() {
49+
// If we're reasonably sure this context is about to be destroyed we should
50+
// clear the ext function pointer cache. This isn't foolproof sadly but it
51+
// should drastically reduce the chances of the pathological case described
52+
// in the comments in common.hpp.
53+
ur::cl::getAdapter()->fnCache.clearCache(CLContext);
54+
urAdapterRelease(ur::cl::getAdapter());
55+
4556
for (uint32_t i = 0; i < DeviceCount; i++) {
4657
urDeviceRelease(Devices[i]);
4758
}

unified-runtime/source/adapters/opencl/enqueue.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11+
#include "adapter.hpp"
1112
#include "common.hpp"
1213
#include "context.hpp"
1314
#include "event.hpp"
@@ -400,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite(
400401
MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents);
401402
cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr;
402403
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<decltype(F)>(
403-
Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache,
404+
Ctx, ur::cl::getAdapter()->fnCache.clEnqueueWriteGlobalVariableCache,
404405
cl_ext::EnqueueWriteGlobalVariableName, &F));
405406

406407
cl_int Res = F(hQueue->CLQueue, hProgram->CLProgram, name, blockingWrite,
@@ -422,7 +423,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead(
422423
MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents);
423424
cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr;
424425
UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext<decltype(F)>(
425-
Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache,
426+
Ctx, ur::cl::getAdapter()->fnCache.clEnqueueReadGlobalVariableCache,
426427
cl_ext::EnqueueReadGlobalVariableName, &F));
427428

428429
cl_int Res = F(hQueue->CLQueue, hProgram->CLProgram, name, blockingRead,
@@ -446,7 +447,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe(
446447
cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr;
447448
UR_RETURN_ON_FAILURE(
448449
cl_ext::getExtFuncFromContext<cl_ext::clEnqueueReadHostPipeINTEL_fn>(
449-
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache,
450+
CLContext,
451+
ur::cl::getAdapter()->fnCache.clEnqueueReadHostPipeINTELCache,
450452
cl_ext::EnqueueReadHostPipeName, &FuncPtr));
451453

452454
if (FuncPtr) {
@@ -474,7 +476,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe(
474476
cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr;
475477
UR_RETURN_ON_FAILURE(
476478
cl_ext::getExtFuncFromContext<cl_ext::clEnqueueWriteHostPipeINTEL_fn>(
477-
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache,
479+
CLContext,
480+
ur::cl::getAdapter()->fnCache.clEnqueueWriteHostPipeINTELCache,
478481
cl_ext::EnqueueWriteHostPipeName, &FuncPtr));
479482

480483
if (FuncPtr) {

0 commit comments

Comments
 (0)