Skip to content

Commit fae8cc2

Browse files
authored
[DeviceSanitizer] Print log when build/link fail (#17521)
Print logs when ur build/link fail. The main purpose for this patch is for OpenMP program rather than SYCL program since SYCL runtime has already do a good job printing those logs. Also, this PR fix a bug in L0 adapter where the error log does not set when `checkUnresolvedSymbols()` fails in `urProgramLinkExp()`. This bug would prevent the actual ze_log that contains the failure message(missing symbols) being added to the `UrProgram`.
1 parent a07d3fa commit fae8cc2

File tree

6 files changed

+124
-13
lines changed

6 files changed

+124
-13
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// This test is adapted from sycl/test-e2e/KernelAndProgram/build-log.cpp
2+
// REQUIRES: linux, cpu || (gpu && level_zero)
3+
// RUN: %{build} %device_asan_flags -DGPU -o %t_gpu.out
4+
// RUN: %{build} %device_asan_flags -o %t.out
5+
// RUN: %{run} not --crash %if gpu %{ %t_gpu.out %} %else %{ %t.out %} 2>&1 | FileCheck %s
6+
7+
#include <iostream>
8+
#include <sycl/detail/core.hpp>
9+
SYCL_EXTERNAL
10+
void symbol_that_does_not_exist();
11+
12+
void test() {
13+
sycl::queue Queue;
14+
15+
// Submitting this kernel should result in an exception with error code
16+
// `sycl::errc::build` and a message indicating
17+
// "PI_ERROR_BUILD_PROGRAM_FAILURE".
18+
auto Kernel = []() {
19+
#ifdef __SYCL_DEVICE_ONLY__
20+
#ifdef GPU
21+
asm volatile("undefined\n");
22+
#else // GPU
23+
symbol_that_does_not_exist();
24+
#endif // GPU
25+
#endif // __SYCL_DEVICE_ONLY__
26+
};
27+
28+
Queue.submit(
29+
[&](sycl::handler &CGH) { CGH.single_task<class SingleTask>(Kernel); });
30+
}
31+
32+
// CHECK: <SANITIZER>[ERROR]: Printing build log for program
33+
34+
int main() {
35+
test();
36+
return 0;
37+
}

unified-runtime/source/adapters/level_zero/program.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -490,15 +490,16 @@ ur_result_t urProgramLinkExp(
490490
// because the ZeBuildLog tells which symbols are unresolved.
491491
if (ZeResult == ZE_RESULT_SUCCESS) {
492492
ZeResult = checkUnresolvedSymbols(ZeModule, &ZeBuildLog);
493-
if (ZeResult != ZE_RESULT_SUCCESS) {
494-
return ze2urResult(ZeResult);
495-
}
493+
UrResult = ze2urResult(ZeResult);
496494
}
497495
UrProgram->setZeModule(ZeDevice, ZeModule);
498496
UrProgram->setBuildLog(ZeDevice, ZeBuildLog);
499497
UrProgram->setState(ZeDevice, (UrResult == UR_RESULT_SUCCESS)
500498
? ur_program_handle_t_::Exe
501499
: ur_program_handle_t_::Invalid);
500+
if (ZeResult != ZE_RESULT_SUCCESS) {
501+
return UrResult;
502+
}
502503
}
503504
} catch (const std::bad_alloc &) {
504505
return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY;

unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild(
322322

323323
getContext()->logger.debug("==== urProgramBuild");
324324

325-
UR_CALL(pfnProgramBuild(hContext, hProgram, pOptions));
325+
auto UrRes = pfnProgramBuild(hContext, hProgram, pOptions);
326+
if (UrRes != UR_RESULT_SUCCESS) {
327+
auto Devices = GetDevices(hContext);
328+
PrintUrBuildLog(hProgram, Devices.data(), Devices.size());
329+
return UrRes;
330+
}
326331

327332
UR_CALL(getAsanInterceptor()->registerProgram(hProgram));
328333

@@ -348,7 +353,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
348353

349354
getContext()->logger.debug("==== urProgramBuildExp");
350355

351-
UR_CALL(pfnBuildExp(hProgram, numDevices, phDevices, pOptions));
356+
auto UrRes = pfnBuildExp(hProgram, numDevices, phDevices, pOptions);
357+
if (UrRes != UR_RESULT_SUCCESS) {
358+
PrintUrBuildLog(hProgram, phDevices, numDevices);
359+
return UrRes;
360+
}
361+
352362
UR_CALL(getAsanInterceptor()->registerProgram(hProgram));
353363

354364
return UR_RESULT_SUCCESS;
@@ -375,7 +385,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink(
375385

376386
getContext()->logger.debug("==== urProgramLink");
377387

378-
UR_CALL(pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram));
388+
auto UrRes = pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram);
389+
if (UrRes != UR_RESULT_SUCCESS) {
390+
auto Devices = GetDevices(hContext);
391+
PrintUrBuildLog(*phProgram, Devices.data(), Devices.size());
392+
return UrRes;
393+
}
379394

380395
UR_CALL(getAsanInterceptor()->insertProgram(*phProgram));
381396
UR_CALL(getAsanInterceptor()->registerProgram(*phProgram));
@@ -408,8 +423,12 @@ ur_result_t UR_APICALL urProgramLinkExp(
408423

409424
getContext()->logger.debug("==== urProgramLinkExp");
410425

411-
UR_CALL(pfnProgramLinkExp(hContext, numDevices, phDevices, count, phPrograms,
412-
pOptions, phProgram));
426+
auto UrRes = pfnProgramLinkExp(hContext, numDevices, phDevices, count,
427+
phPrograms, pOptions, phProgram);
428+
if (UrRes != UR_RESULT_SUCCESS) {
429+
PrintUrBuildLog(*phProgram, phDevices, numDevices);
430+
return UrRes;
431+
}
413432

414433
UR_CALL(getAsanInterceptor()->insertProgram(*phProgram));
415434
UR_CALL(getAsanInterceptor()->registerProgram(*phProgram));

unified-runtime/source/loader/layers/sanitizer/msan/msan_ddi.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,12 @@ ur_result_t urProgramBuild(
265265

266266
getContext()->logger.debug("==== urProgramBuild");
267267

268-
UR_CALL(pfnProgramBuild(hContext, hProgram, pOptions));
268+
auto UrRes = pfnProgramBuild(hContext, hProgram, pOptions);
269+
if (UrRes != UR_RESULT_SUCCESS) {
270+
auto Devices = GetDevices(hContext);
271+
PrintUrBuildLog(hProgram, Devices.data(), Devices.size());
272+
return UrRes;
273+
}
269274

270275
UR_CALL(getMsanInterceptor()->registerProgram(hProgram));
271276

@@ -287,7 +292,12 @@ ur_result_t urProgramBuildExp(
287292

288293
getContext()->logger.debug("==== urProgramBuildExp");
289294

290-
UR_CALL(pfnBuildExp(hProgram, numDevices, phDevices, pOptions));
295+
auto UrRes = pfnBuildExp(hProgram, numDevices, phDevices, pOptions);
296+
if (UrRes != UR_RESULT_SUCCESS) {
297+
PrintUrBuildLog(hProgram, phDevices, numDevices);
298+
return UrRes;
299+
}
300+
291301
UR_CALL(getMsanInterceptor()->registerProgram(hProgram));
292302

293303
return UR_RESULT_SUCCESS;
@@ -310,7 +320,12 @@ ur_result_t urProgramLink(
310320

311321
getContext()->logger.debug("==== urProgramLink");
312322

313-
UR_CALL(pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram));
323+
auto UrRes = pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram);
324+
if (UrRes != UR_RESULT_SUCCESS) {
325+
auto Devices = GetDevices(hContext);
326+
PrintUrBuildLog(*phProgram, Devices.data(), Devices.size());
327+
return UrRes;
328+
}
314329

315330
UR_CALL(getMsanInterceptor()->insertProgram(*phProgram));
316331
UR_CALL(getMsanInterceptor()->registerProgram(*phProgram));
@@ -339,8 +354,12 @@ ur_result_t urProgramLinkExp(
339354

340355
getContext()->logger.debug("==== urProgramLinkExp");
341356

342-
UR_CALL(pfnProgramLinkExp(hContext, numDevices, phDevices, count, phPrograms,
343-
pOptions, phProgram));
357+
auto UrRes = pfnProgramLinkExp(hContext, numDevices, phDevices, count,
358+
phPrograms, pOptions, phProgram);
359+
if (UrRes != UR_RESULT_SUCCESS) {
360+
PrintUrBuildLog(*phProgram, phDevices, numDevices);
361+
return UrRes;
362+
}
344363

345364
UR_CALL(getMsanInterceptor()->insertProgram(*phProgram));
346365
UR_CALL(getMsanInterceptor()->registerProgram(*phProgram));

unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,4 +256,36 @@ ur_result_t EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr,
256256
Queue, Ptr, 1, &Value, Size, NumEvents, EventWaitList, OutEvent);
257257
}
258258

259+
void PrintUrBuildLog(ur_program_handle_t hProgram,
260+
ur_device_handle_t *phDevices, size_t numDevices) {
261+
getContext()->logger.error("Printing build log for program {}",
262+
(void *)hProgram);
263+
for (size_t i = 0; i < numDevices; i++) {
264+
std::vector<char> LogBuf;
265+
size_t LogSize = 0;
266+
auto hDevice = phDevices[i];
267+
268+
auto UrRes = getContext()->urDdiTable.Program.pfnGetBuildInfo(
269+
hProgram, hDevice, UR_PROGRAM_BUILD_INFO_LOG, 0, nullptr, &LogSize);
270+
if (UrRes != UR_RESULT_SUCCESS) {
271+
getContext()->logger.error("For device {}: failed to get build log size.",
272+
(void *)hDevice);
273+
continue;
274+
}
275+
276+
LogBuf.resize(LogSize);
277+
UrRes = getContext()->urDdiTable.Program.pfnGetBuildInfo(
278+
hProgram, hDevice, UR_PROGRAM_BUILD_INFO_LOG, LogSize, LogBuf.data(),
279+
nullptr);
280+
if (UrRes != UR_RESULT_SUCCESS) {
281+
getContext()->logger.error("For device {}: failed to get build log.",
282+
(void *)hDevice);
283+
continue;
284+
}
285+
286+
getContext()->logger.error("For device {}:\n{}", (void *)hDevice,
287+
LogBuf.data());
288+
}
289+
}
290+
259291
} // namespace ur_sanitizer_layer

unified-runtime/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,7 @@ EnqueueUSMBlockingSet(ur_queue_handle_t Queue, void *Ptr, char Value,
6565
const ur_event_handle_t *EventWaitList = nullptr,
6666
ur_event_handle_t *OutEvent = nullptr);
6767

68+
void PrintUrBuildLog(ur_program_handle_t hProgram,
69+
ur_device_handle_t *phDevices, size_t numDevices);
70+
6871
} // namespace ur_sanitizer_layer

0 commit comments

Comments
 (0)