Skip to content

Commit ca0a0e3

Browse files
authored
Merge branch 'main' into coop_kernel_query
2 parents 39b7262 + f2af85f commit ca0a0e3

File tree

10 files changed

+253
-93
lines changed

10 files changed

+253
-93
lines changed

.github/workflows/build-hw-reusable.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ jobs:
8282
tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz -C dpcpp_compiler
8383
8484
- name: Configure CMake
85-
# CFI sanitization seems to fail on our CUDA nodes
86-
# https://github.com/oneapi-src/unified-runtime/issues/2309
8785
run: >
8886
cmake
8987
-B${{github.workspace}}/build
@@ -96,7 +94,6 @@ jobs:
9694
-DUR_BUILD_ADAPTER_${{matrix.adapter.name}}=ON
9795
-DUR_CONFORMANCE_TEST_LOADER=${{ matrix.adapter.other_name != '' && 'ON' || 'OFF' }}
9896
${{ matrix.adapter.other_name != '' && format('-DUR_BUILD_ADAPTER_{0}=ON', matrix.adapter.other_name) || '' }}
99-
-DUR_USE_CFI=${{ matrix.adapter.name == 'CUDA' && 'OFF' || 'ON' }}
10097
-DUR_STATIC_LOADER=${{matrix.adapter.static_Loader}}
10198
-DUR_STATIC_ADAPTER_${{matrix.adapter.name}}=${{matrix.adapter.static_adapter}}
10299
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++

source/adapters/level_zero/adapter.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,14 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
407407

408408
return;
409409
}
410+
// Dynamically load the new L0 SysMan separate init and new EXP apis
411+
// separately. This must be done to avoid attempting to use symbols that do
412+
// not exist in older loader runtimes.
413+
#ifdef _WIN32
414+
GlobalAdapter->processHandle = GetModuleHandle(NULL);
415+
#else
416+
GlobalAdapter->processHandle = nullptr;
417+
#endif
410418

411419
// Check if the user has enabled the default L0 SysMan initialization.
412420
const int UrSysmanZesinitEnable = [] {
@@ -422,13 +430,13 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
422430
GlobalAdapter->getDeviceByUUIdFunctionPtr =
423431
(zes_pfnDriverGetDeviceByUuidExp_t)
424432
ur_loader::LibLoader::getFunctionPtr(
425-
processHandle, "zesDriverGetDeviceByUuidExp");
433+
GlobalAdapter->processHandle, "zesDriverGetDeviceByUuidExp");
426434
GlobalAdapter->getSysManDriversFunctionPtr =
427435
(zes_pfnDriverGet_t)ur_loader::LibLoader::getFunctionPtr(
428-
processHandle, "zesDriverGet");
436+
GlobalAdapter->processHandle, "zesDriverGet");
429437
GlobalAdapter->sysManInitFunctionPtr =
430-
(zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(processHandle,
431-
"zesInit");
438+
(zes_pfnInit_t)ur_loader::LibLoader::getFunctionPtr(
439+
GlobalAdapter->processHandle, "zesInit");
432440
}
433441
if (GlobalAdapter->getDeviceByUUIdFunctionPtr &&
434442
GlobalAdapter->getSysManDriversFunctionPtr &&

source/adapters/level_zero/adapter.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct ur_adapter_handle_t_ {
4545
std::optional<ze_result_t> ZesResult;
4646
ZeCache<Result<PlatformVec>> PlatformCache;
4747
logger::Logger &logger;
48+
HMODULE processHandle = nullptr;
4849
};
4950

5051
extern ur_adapter_handle_t_ *GlobalAdapter;

source/adapters/level_zero/platform.cpp

Lines changed: 72 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ ur_result_t ur_platform_handle_t_::initialize() {
220220
ZE2UR_CALL(zeDriverGetExtensionProperties,
221221
(ZeDriver, &Count, ZeExtensions.data()));
222222

223+
bool MutableCommandListSpecExtensionSupported = false;
223224
for (auto &extension : ZeExtensions) {
224225
// Check if global offset extension is available
225226
if (strncmp(extension.name, ZE_GLOBAL_OFFSET_EXP_NAME,
@@ -244,13 +245,11 @@ ur_result_t ur_platform_handle_t_::initialize() {
244245
ZeDriverEventPoolCountingEventsExtensionFound = true;
245246
}
246247
}
247-
248-
// Check if the ImmediateAppendCommandLists extension is available.
249-
if (strncmp(extension.name, ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME,
250-
strlen(ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_NAME) + 1) == 0) {
251-
if (extension.version ==
252-
ZE_IMMEDIATE_COMMAND_LIST_APPEND_EXP_VERSION_CURRENT) {
253-
zeDriverImmediateCommandListAppendFound = true;
248+
// Check if extension is available for Mutable Command List v1.1.
249+
if (strncmp(extension.name, ZE_MUTABLE_COMMAND_LIST_EXP_NAME,
250+
strlen(ZE_MUTABLE_COMMAND_LIST_EXP_NAME) + 1) == 0) {
251+
if (extension.version == ZE_MUTABLE_COMMAND_LIST_EXP_VERSION_1_1) {
252+
MutableCommandListSpecExtensionSupported = true;
254253
}
255254
}
256255
zeDriverExtensionMap[extension.name] = extension.version;
@@ -289,37 +288,72 @@ ur_result_t ur_platform_handle_t_::initialize() {
289288

290289
// Check if mutable command list extension is supported and initialize
291290
// function pointers.
292-
ZeMutableCmdListExt.Supported |=
293-
(ZE_CALL_NOCHECK(
294-
zeDriverGetExtensionFunctionAddress,
295-
(ZeDriver, "zeCommandListGetNextCommandIdExp",
296-
reinterpret_cast<void **>(
297-
&ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp))) == 0);
298-
299-
ZeMutableCmdListExt.Supported &=
300-
(ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress,
301-
(ZeDriver, "zeCommandListUpdateMutableCommandsExp",
302-
reinterpret_cast<void **>(
303-
&ZeMutableCmdListExt
304-
.zexCommandListUpdateMutableCommandsExp))) ==
305-
0);
306-
307-
ZeMutableCmdListExt.Supported &=
308-
(ZE_CALL_NOCHECK(
309-
zeDriverGetExtensionFunctionAddress,
310-
(ZeDriver, "zeCommandListUpdateMutableCommandSignalEventExp",
311-
reinterpret_cast<void **>(
312-
&ZeMutableCmdListExt
313-
.zexCommandListUpdateMutableCommandSignalEventExp))) == 0);
314-
315-
ZeMutableCmdListExt.Supported &=
316-
(ZE_CALL_NOCHECK(
317-
zeDriverGetExtensionFunctionAddress,
318-
(ZeDriver, "zeCommandListUpdateMutableCommandWaitEventsExp",
319-
reinterpret_cast<void **>(
320-
&ZeMutableCmdListExt
321-
.zexCommandListUpdateMutableCommandWaitEventsExp))) == 0);
322-
291+
if (MutableCommandListSpecExtensionSupported) {
292+
ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp =
293+
(ze_pfnCommandListGetNextCommandIdExp_t)
294+
ur_loader::LibLoader::getFunctionPtr(
295+
GlobalAdapter->processHandle,
296+
"zeCommandListGetNextCommandIdExp");
297+
ZeMutableCmdListExt.Supported |=
298+
ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp != nullptr;
299+
ZeMutableCmdListExt.zexCommandListUpdateMutableCommandsExp =
300+
(ze_pfnCommandListUpdateMutableCommandsExp_t)
301+
ur_loader::LibLoader::getFunctionPtr(
302+
GlobalAdapter->processHandle,
303+
"zeCommandListUpdateMutableCommandsExp");
304+
ZeMutableCmdListExt.Supported |=
305+
ZeMutableCmdListExt.zexCommandListUpdateMutableCommandsExp != nullptr;
306+
ZeMutableCmdListExt.zexCommandListUpdateMutableCommandSignalEventExp =
307+
(ze_pfnCommandListUpdateMutableCommandSignalEventExp_t)
308+
ur_loader::LibLoader::getFunctionPtr(
309+
GlobalAdapter->processHandle,
310+
"zeCommandListUpdateMutableCommandSignalEventExp");
311+
ZeMutableCmdListExt.Supported |=
312+
ZeMutableCmdListExt.zexCommandListUpdateMutableCommandSignalEventExp !=
313+
nullptr;
314+
ZeMutableCmdListExt.zexCommandListUpdateMutableCommandWaitEventsExp =
315+
(ze_pfnCommandListUpdateMutableCommandWaitEventsExp_t)
316+
ur_loader::LibLoader::getFunctionPtr(
317+
GlobalAdapter->processHandle,
318+
"zeCommandListUpdateMutableCommandWaitEventsExp");
319+
ZeMutableCmdListExt.Supported |=
320+
ZeMutableCmdListExt.zexCommandListUpdateMutableCommandWaitEventsExp !=
321+
nullptr;
322+
} else {
323+
ZeMutableCmdListExt.Supported |=
324+
(ZE_CALL_NOCHECK(
325+
zeDriverGetExtensionFunctionAddress,
326+
(ZeDriver, "zeCommandListGetNextCommandIdExp",
327+
reinterpret_cast<void **>(
328+
&ZeMutableCmdListExt.zexCommandListGetNextCommandIdExp))) ==
329+
0);
330+
331+
ZeMutableCmdListExt.Supported &=
332+
(ZE_CALL_NOCHECK(zeDriverGetExtensionFunctionAddress,
333+
(ZeDriver, "zeCommandListUpdateMutableCommandsExp",
334+
reinterpret_cast<void **>(
335+
&ZeMutableCmdListExt
336+
.zexCommandListUpdateMutableCommandsExp))) ==
337+
0);
338+
339+
ZeMutableCmdListExt.Supported &=
340+
(ZE_CALL_NOCHECK(
341+
zeDriverGetExtensionFunctionAddress,
342+
(ZeDriver, "zeCommandListUpdateMutableCommandSignalEventExp",
343+
reinterpret_cast<void **>(
344+
&ZeMutableCmdListExt
345+
.zexCommandListUpdateMutableCommandSignalEventExp))) ==
346+
0);
347+
348+
ZeMutableCmdListExt.Supported &=
349+
(ZE_CALL_NOCHECK(
350+
zeDriverGetExtensionFunctionAddress,
351+
(ZeDriver, "zeCommandListUpdateMutableCommandWaitEventsExp",
352+
reinterpret_cast<void **>(
353+
&ZeMutableCmdListExt
354+
.zexCommandListUpdateMutableCommandWaitEventsExp))) ==
355+
0);
356+
}
323357
return UR_RESULT_SUCCESS;
324358
}
325359

source/adapters/level_zero/platform.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "common.hpp"
1313
#include "ur_api.h"
1414
#include "ze_api.h"
15+
#include "ze_ddi.h"
1516
#include "zes_api.h"
1617

1718
struct ur_device_handle_t_;

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ ur_result_t ur_queue_immediate_in_order_t::queueRelease() {
153153
if (!RefCount.decrementAndTest())
154154
return UR_RESULT_SUCCESS;
155155

156+
UR_CALL(queueFinish());
157+
156158
delete this;
157159
return UR_RESULT_SUCCESS;
158160
}

source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices,
5252
return UR_RESULT_SUCCESS;
5353
}
5454

55+
bool isInstrumentedKernel(ur_kernel_handle_t hKernel) {
56+
auto hProgram = GetProgram(hKernel);
57+
auto PI = getAsanInterceptor()->getProgramInfo(hProgram);
58+
return PI->isKernelInstrumented(hKernel);
59+
}
60+
5561
} // namespace
5662

5763
///////////////////////////////////////////////////////////////////////////////
@@ -307,7 +313,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild(
307313

308314
UR_CALL(pfnProgramBuild(hContext, hProgram, pOptions));
309315

310-
UR_CALL(getAsanInterceptor()->registerProgram(hContext, hProgram));
316+
UR_CALL(getAsanInterceptor()->registerProgram(hProgram));
311317

312318
return UR_RESULT_SUCCESS;
313319
}
@@ -331,8 +337,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
331337
getContext()->logger.debug("==== urProgramBuildExp");
332338

333339
UR_CALL(pfnBuildExp(hProgram, numDevices, phDevices, pOptions));
334-
UR_CALL(
335-
getAsanInterceptor()->registerProgram(GetContext(hProgram), hProgram));
340+
UR_CALL(getAsanInterceptor()->registerProgram(hProgram));
336341

337342
return UR_RESULT_SUCCESS;
338343
}
@@ -359,7 +364,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink(
359364

360365
UR_CALL(pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram));
361366

362-
UR_CALL(getAsanInterceptor()->registerProgram(hContext, *phProgram));
367+
UR_CALL(getAsanInterceptor()->registerProgram(*phProgram));
363368

364369
return UR_RESULT_SUCCESS;
365370
}
@@ -390,7 +395,7 @@ ur_result_t UR_APICALL urProgramLinkExp(
390395
UR_CALL(pfnProgramLinkExp(hContext, numDevices, phDevices, count,
391396
phPrograms, pOptions, phProgram));
392397

393-
UR_CALL(getAsanInterceptor()->registerProgram(hContext, *phProgram));
398+
UR_CALL(getAsanInterceptor()->registerProgram(*phProgram));
394399

395400
return UR_RESULT_SUCCESS;
396401
}
@@ -460,7 +465,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
460465

461466
getContext()->logger.debug("==== urEnqueueKernelLaunch");
462467

463-
USMLaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue),
468+
if (!isInstrumentedKernel(hKernel)) {
469+
return pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
470+
pGlobalWorkSize, pLocalWorkSize,
471+
numEventsInWaitList, phEventWaitList, phEvent);
472+
}
473+
474+
USMLaunchInfo LaunchInfo(GetContext(hKernel), GetDevice(hQueue),
464475
pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
465476
workDim);
466477
UR_CALL(LaunchInfo.initialize());
@@ -1351,7 +1362,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreate(
13511362
getContext()->logger.debug("==== urKernelCreate");
13521363

13531364
UR_CALL(pfnCreate(hProgram, pKernelName, phKernel));
1354-
UR_CALL(getAsanInterceptor()->insertKernel(*phKernel));
1365+
if (isInstrumentedKernel(*phKernel)) {
1366+
UR_CALL(getAsanInterceptor()->insertKernel(*phKernel));
1367+
}
13551368

13561369
return UR_RESULT_SUCCESS;
13571370
}
@@ -1372,8 +1385,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelRetain(
13721385
UR_CALL(pfnRetain(hKernel));
13731386

13741387
auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel);
1375-
UR_ASSERT(KernelInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE);
1376-
KernelInfo->RefCount++;
1388+
if (KernelInfo) {
1389+
KernelInfo->RefCount++;
1390+
}
13771391

13781392
return UR_RESULT_SUCCESS;
13791393
}
@@ -1393,9 +1407,10 @@ __urdlllocal ur_result_t urKernelRelease(
13931407
UR_CALL(pfnRelease(hKernel));
13941408

13951409
auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel);
1396-
UR_ASSERT(KernelInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE);
1397-
if (--KernelInfo->RefCount == 0) {
1398-
UR_CALL(getAsanInterceptor()->eraseKernel(hKernel));
1410+
if (KernelInfo) {
1411+
if (--KernelInfo->RefCount == 0) {
1412+
UR_CALL(getAsanInterceptor()->eraseKernel(hKernel));
1413+
}
13991414
}
14001415

14011416
return UR_RESULT_SUCCESS;
@@ -1421,10 +1436,11 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue(
14211436
getContext()->logger.debug("==== urKernelSetArgValue");
14221437

14231438
std::shared_ptr<MemBuffer> MemBuffer;
1439+
std::shared_ptr<KernelInfo> KernelInfo;
14241440
if (argSize == sizeof(ur_mem_handle_t) &&
14251441
(MemBuffer = getAsanInterceptor()->getMemBuffer(
1426-
*ur_cast<const ur_mem_handle_t *>(pArgValue)))) {
1427-
auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel);
1442+
*ur_cast<const ur_mem_handle_t *>(pArgValue))) &&
1443+
(KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel))) {
14281444
std::scoped_lock<ur_shared_mutex> Guard(KernelInfo->Mutex);
14291445
KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer);
14301446
} else {
@@ -1452,8 +1468,10 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj(
14521468

14531469
getContext()->logger.debug("==== urKernelSetArgMemObj");
14541470

1455-
if (auto MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue)) {
1456-
auto KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel);
1471+
std::shared_ptr<MemBuffer> MemBuffer;
1472+
std::shared_ptr<KernelInfo> KernelInfo;
1473+
if ((MemBuffer = getAsanInterceptor()->getMemBuffer(hArgValue)) &&
1474+
(KernelInfo = getAsanInterceptor()->getKernelInfo(hKernel))) {
14571475
std::scoped_lock<ur_shared_mutex> Guard(KernelInfo->Mutex);
14581476
KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer);
14591477
} else {
@@ -1483,8 +1501,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal(
14831501
"==== urKernelSetArgLocal (argIndex={}, argSize={})", argIndex,
14841502
argSize);
14851503

1486-
{
1487-
auto KI = getAsanInterceptor()->getKernelInfo(hKernel);
1504+
if (auto KI = getAsanInterceptor()->getKernelInfo(hKernel)) {
14881505
std::scoped_lock<ur_shared_mutex> Guard(KI->Mutex);
14891506
// TODO: get local variable alignment
14901507
auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal(
@@ -1520,8 +1537,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer(
15201537
"==== urKernelSetArgPointer (argIndex={}, pArgValue={})", argIndex,
15211538
pArgValue);
15221539

1523-
if (getAsanInterceptor()->getOptions().DetectKernelArguments) {
1524-
auto KI = getAsanInterceptor()->getKernelInfo(hKernel);
1540+
std::shared_ptr<KernelInfo> KI;
1541+
if (getAsanInterceptor()->getOptions().DetectKernelArguments &&
1542+
(KI = getAsanInterceptor()->getKernelInfo(hKernel))) {
15251543
std::scoped_lock<ur_shared_mutex> Guard(KI->Mutex);
15261544
KI->PointerArgs[argIndex] = {pArgValue, GetCurrentBacktrace()};
15271545
}

0 commit comments

Comments
 (0)