Skip to content

Commit 58ca3a3

Browse files
authored
[DeviceSanitizer] Support out-of-bounds on private memory (#1676)
* support private memory * fix comment * enable private by default * using 24bit ASAN_PRIVATE_SIZE * use urKernelGetSuggestedLocalWorkSize * skip checking kernel if shadow memory is allocated failed * add PrivateShadowOffsetEnd * change default logger level * add logger in ManagedQueue * revert * revert back * fix build * use error * remove pfnGetSuggestedLocalWorkSize * fix m_Quarantine * add comment * fallback urKernelGetSuggestedLocalWorkSize
1 parent 764b75c commit 58ca3a3

File tree

7 files changed

+139
-49
lines changed

7 files changed

+139
-49
lines changed

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 97 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -714,16 +714,21 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
714714
EnqueueWriteGlobal(kSPIR_DeviceType, &DeviceInfo->Type,
715715
sizeof(DeviceInfo->Type));
716716

717-
if (DeviceInfo->Type == DeviceType::CPU) {
718-
break;
719-
}
720-
721717
if (LaunchInfo.LocalWorkSize.empty()) {
722-
LaunchInfo.LocalWorkSize.reserve(3);
723-
// FIXME: This is W/A until urKernelSuggestGroupSize is added
724-
LaunchInfo.LocalWorkSize[0] = 1;
725-
LaunchInfo.LocalWorkSize[1] = 1;
726-
LaunchInfo.LocalWorkSize[2] = 1;
718+
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
719+
auto URes = context.urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
720+
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
721+
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
722+
if (URes != UR_RESULT_SUCCESS) {
723+
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
724+
return URes;
725+
}
726+
// If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
727+
// to inefficient implementation
728+
for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
729+
LaunchInfo.LocalWorkSize[Dim] = 1;
730+
}
731+
}
727732
}
728733

729734
const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data();
@@ -733,56 +738,109 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
733738
LocalWorkSize[Dim];
734739
}
735740

736-
auto EnqueueAllocateDevice = [Context, &DeviceInfo, Queue,
737-
NumWG](size_t Size, uptr &Ptr) {
741+
auto EnqueueAllocateShadowMemory = [Context, &DeviceInfo,
742+
Queue](size_t Size, uptr &Ptr) {
743+
void *Allocated = nullptr;
738744
auto URes = context.urDdiTable.USM.pfnDeviceAlloc(
739745
Context, DeviceInfo->Handle, nullptr, nullptr, Size,
740-
(void **)&Ptr);
746+
&Allocated);
741747
if (URes != UR_RESULT_SUCCESS) {
742-
context.logger.error(
743-
"Failed to allocate shadow memory for local memory: {}",
744-
URes);
745-
context.logger.error(
746-
"Maybe the number of workgroup ({}) too large", NumWG);
747748
return URes;
748749
}
749-
// Initialize shadow memory of local memory
750-
URes = urEnqueueUSMSet(Queue, (void *)Ptr, 0, Size);
751-
if (URes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
752-
context.logger.error(
753-
"Failed to allocate shadow memory for local memory: {}",
754-
URes);
755-
context.logger.error(
756-
"Maybe the number of workgroup ({}) too large", NumWG);
757-
return URes;
750+
// Initialize shadow memory
751+
URes = urEnqueueUSMSet(Queue, Allocated, 0, Size);
752+
if (URes != UR_RESULT_SUCCESS) {
753+
[[maybe_unused]] auto URes =
754+
context.urDdiTable.USM.pfnFree(Context, Allocated);
755+
assert(URes == UR_RESULT_SUCCESS &&
756+
"urUSMFree failed at allocating shadow memory");
757+
Allocated = nullptr;
758758
}
759+
Ptr = (uptr)Allocated;
759760
return URes;
760761
};
761762

763+
auto LocalMemoryUsage =
764+
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
765+
auto PrivateMemoryUsage =
766+
GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle);
767+
768+
context.logger.info("KernelInfo {} (LocalMemory={}, PrivateMemory={})",
769+
(void *)Kernel, LocalMemoryUsage,
770+
PrivateMemoryUsage);
771+
762772
// Write shadow memory offset for local memory
763773
if (Options().DetectLocals) {
764774
// CPU needn't this
765775
if (DeviceInfo->Type == DeviceType::GPU_PVC) {
766-
size_t LocalMemorySize = GetLocalMemorySize(DeviceInfo->Handle);
767-
size_t LocalShadowMemorySize =
776+
const size_t LocalMemorySize =
777+
GetDeviceLocalMemorySize(DeviceInfo->Handle);
778+
const size_t LocalShadowMemorySize =
768779
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
769780

770781
context.logger.debug(
771-
"LocalMemoryInfo(WorkGroup={}, LocalMemorySize={}, "
782+
"LocalMemory(WorkGroup={}, LocalMemorySize={}, "
772783
"LocalShadowMemorySize={})",
773784
NumWG, LocalMemorySize, LocalShadowMemorySize);
774785

775-
UR_CALL(EnqueueAllocateDevice(
776-
LocalShadowMemorySize, LaunchInfo.Data->LocalShadowOffset));
777-
778-
LaunchInfo.Data->LocalShadowOffsetEnd =
779-
LaunchInfo.Data->LocalShadowOffset + LocalShadowMemorySize -
780-
1;
786+
if (EnqueueAllocateShadowMemory(
787+
LocalShadowMemorySize,
788+
LaunchInfo.Data->LocalShadowOffset) !=
789+
UR_RESULT_SUCCESS) {
790+
context.logger.warning(
791+
"Failed to allocate shadow memory for local "
792+
"memory, maybe the number of workgroup ({}) is too "
793+
"large",
794+
NumWG);
795+
context.logger.warning(
796+
"Skip checking local memory of kernel <{}>",
797+
GetKernelName(Kernel));
798+
} else {
799+
LaunchInfo.Data->LocalShadowOffsetEnd =
800+
LaunchInfo.Data->LocalShadowOffset +
801+
LocalShadowMemorySize - 1;
802+
803+
context.logger.info(
804+
"ShadowMemory(Local, {} - {})",
805+
(void *)LaunchInfo.Data->LocalShadowOffset,
806+
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
807+
}
808+
}
809+
}
781810

782-
context.logger.info(
783-
"ShadowMemory(Local, {} - {})",
784-
(void *)LaunchInfo.Data->LocalShadowOffset,
785-
(void *)LaunchInfo.Data->LocalShadowOffsetEnd);
811+
// Write shadow memory offset for private memory
812+
if (Options().DetectPrivates) {
813+
if (DeviceInfo->Type == DeviceType::CPU) {
814+
LaunchInfo.Data->PrivateShadowOffset = DeviceInfo->ShadowOffset;
815+
} else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
816+
const size_t PrivateShadowMemorySize =
817+
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
818+
819+
context.logger.debug("PrivateMemory(WorkGroup={}, "
820+
"PrivateShadowMemorySize={})",
821+
NumWG, PrivateShadowMemorySize);
822+
823+
if (EnqueueAllocateShadowMemory(
824+
PrivateShadowMemorySize,
825+
LaunchInfo.Data->PrivateShadowOffset) !=
826+
UR_RESULT_SUCCESS) {
827+
context.logger.warning(
828+
"Failed to allocate shadow memory for private "
829+
"memory, maybe the number of workgroup ({}) is too "
830+
"large",
831+
NumWG);
832+
context.logger.warning(
833+
"Skip checking private memory of kernel <{}>",
834+
GetKernelName(Kernel));
835+
} else {
836+
LaunchInfo.Data->PrivateShadowOffsetEnd =
837+
LaunchInfo.Data->PrivateShadowOffset +
838+
PrivateShadowMemorySize - 1;
839+
context.logger.info(
840+
"ShadowMemory(Private, {} - {})",
841+
(void *)LaunchInfo.Data->PrivateShadowOffset,
842+
(void *)LaunchInfo.Data->PrivateShadowOffsetEnd);
843+
}
786844
}
787845
}
788846
} while (false);

source/loader/layers/sanitizer/asan_libdevice.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ struct LocalArgsInfo {
7070
constexpr std::size_t ASAN_MAX_NUM_REPORTS = 10;
7171

7272
struct LaunchInfo {
73-
// Don't move this field, we use it in AddressSanitizerPass
7473
uintptr_t PrivateShadowOffset = 0;
74+
uintptr_t PrivateShadowOffsetEnd = 0;
7575

7676
uintptr_t LocalShadowOffset = 0;
7777
uintptr_t LocalShadowOffsetEnd = 0;
@@ -85,6 +85,10 @@ struct LaunchInfo {
8585
constexpr unsigned ASAN_SHADOW_SCALE = 4;
8686
constexpr unsigned ASAN_SHADOW_GRANULARITY = 1ULL << ASAN_SHADOW_SCALE;
8787

88+
// Based on the observation, only the last 24 bits of the address of the private
89+
// variable have changed
90+
constexpr std::size_t ASAN_PRIVATE_SIZE = 0xffffffULL + 1;
91+
8892
// These magic values are written to shadow for better error
8993
// reporting.
9094
constexpr int kUsmDeviceRedzoneMagic = (char)0x81;

source/loader/layers/sanitizer/asan_options.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ struct AsanOptions {
3737
uint64_t MaxRZSize = 2048;
3838
uint32_t MaxQuarantineSizeMB = 0;
3939
bool DetectLocals = true;
40+
bool DetectPrivates = true;
4041

4142
private:
4243
AsanOptions() {
@@ -91,6 +92,7 @@ struct AsanOptions {
9192

9293
SetBoolOption("debug", Debug);
9394
SetBoolOption("detect_locals", DetectLocals);
95+
SetBoolOption("detect_privates", DetectPrivates);
9496

9597
auto KV = OptionsEnvMap->find("quarantine_size_mb");
9698
if (KV != OptionsEnvMap->end()) {

source/loader/layers/sanitizer/ur_sanddi.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,9 +280,10 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
280280
UR_CALL(context.interceptor->preLaunchKernel(hKernel, hQueue, LaunchInfo));
281281

282282
ur_event_handle_t hEvent{};
283-
ur_result_t result = pfnKernelLaunch(
284-
hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
285-
pLocalWorkSize, numEventsInWaitList, phEventWaitList, &hEvent);
283+
ur_result_t result =
284+
pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset,
285+
pGlobalWorkSize, LaunchInfo.LocalWorkSize.data(),
286+
numEventsInWaitList, phEventWaitList, &hEvent);
286287

287288
if (result == UR_RESULT_SUCCESS) {
288289
UR_CALL(

source/loader/layers/sanitizer/ur_sanitizer_layer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ context_t context;
1818

1919
///////////////////////////////////////////////////////////////////////////////
2020
context_t::context_t()
21-
: logger(logger::create_logger("sanitizer")),
21+
: logger(logger::create_logger("sanitizer", false, false,
22+
logger::Level::WARN)),
2223
interceptor(std::make_unique<SanitizerInterceptor>()) {}
2324

2425
bool context_t::isAvailable() const { return true; }

source/loader/layers/sanitizer/ur_sanitizer_utils.cpp

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ManagedQueue::ManagedQueue(ur_context_handle_t Context,
1919
ur_device_handle_t Device) {
2020
[[maybe_unused]] auto Result =
2121
context.urDdiTable.Queue.pfnCreate(Context, Device, nullptr, &Handle);
22-
assert(Result == UR_RESULT_SUCCESS);
22+
assert(Result == UR_RESULT_SUCCESS && "Failed to create ManagedQueue");
2323
context.logger.debug(">>> ManagedQueue {}", (void *)Handle);
2424
}
2525

@@ -31,9 +31,9 @@ ManagedQueue::~ManagedQueue() {
3131
if (Result != UR_RESULT_SUCCESS) {
3232
context.logger.error("Failed to finish ManagedQueue: {}", Result);
3333
}
34-
assert(Result == UR_RESULT_SUCCESS);
34+
assert(Result == UR_RESULT_SUCCESS && "Failed to finish ManagedQueue");
3535
Result = context.urDdiTable.Queue.pfnRelease(Handle);
36-
assert(Result == UR_RESULT_SUCCESS);
36+
assert(Result == UR_RESULT_SUCCESS && "Failed to release ManagedQueue");
3737
}
3838

3939
ur_context_handle_t GetContext(ur_queue_handle_t Queue) {
@@ -81,7 +81,7 @@ ur_program_handle_t GetProgram(ur_kernel_handle_t Kernel) {
8181
return Program;
8282
}
8383

84-
size_t GetLocalMemorySize(ur_device_handle_t Device) {
84+
size_t GetDeviceLocalMemorySize(ur_device_handle_t Device) {
8585
size_t LocalMemorySize{};
8686
[[maybe_unused]] auto Result = context.urDdiTable.Device.pfnGetInfo(
8787
Device, UR_DEVICE_INFO_LOCAL_MEM_SIZE, sizeof(LocalMemorySize),
@@ -157,6 +157,26 @@ size_t GetKernelNumArgs(ur_kernel_handle_t Kernel) {
157157
return NumArgs;
158158
}
159159

160+
size_t GetKernelLocalMemorySize(ur_kernel_handle_t Kernel,
161+
ur_device_handle_t Device) {
162+
size_t Size = 0;
163+
[[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetGroupInfo(
164+
Kernel, Device, UR_KERNEL_GROUP_INFO_LOCAL_MEM_SIZE, sizeof(size_t),
165+
&Size, nullptr);
166+
assert(Res == UR_RESULT_SUCCESS);
167+
return Size;
168+
}
169+
170+
size_t GetKernelPrivateMemorySize(ur_kernel_handle_t Kernel,
171+
ur_device_handle_t Device) {
172+
size_t Size = 0;
173+
[[maybe_unused]] auto Res = context.urDdiTable.Kernel.pfnGetGroupInfo(
174+
Kernel, Device, UR_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE, sizeof(size_t),
175+
&Size, nullptr);
176+
assert(Res == UR_RESULT_SUCCESS);
177+
return Size;
178+
}
179+
160180
size_t GetVirtualMemGranularity(ur_context_handle_t Context,
161181
ur_device_handle_t Device) {
162182
size_t Size;

source/loader/layers/sanitizer/ur_sanitizer_utils.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,16 @@ ur_context_handle_t GetContext(ur_kernel_handle_t Kernel);
3636
ur_device_handle_t GetDevice(ur_queue_handle_t Queue);
3737
DeviceType GetDeviceType(ur_device_handle_t Device);
3838
std::string GetKernelName(ur_kernel_handle_t Kernel);
39-
size_t GetLocalMemorySize(ur_device_handle_t Device);
39+
size_t GetDeviceLocalMemorySize(ur_device_handle_t Device);
4040
ur_program_handle_t GetProgram(ur_kernel_handle_t Kernel);
4141
std::vector<ur_device_handle_t> GetProgramDevices(ur_program_handle_t Program);
4242
ur_device_handle_t GetUSMAllocDevice(ur_context_handle_t Context,
4343
const void *MemPtr);
4444
size_t GetKernelNumArgs(ur_kernel_handle_t Kernel);
45+
size_t GetKernelLocalMemorySize(ur_kernel_handle_t Kernel,
46+
ur_device_handle_t Device);
47+
size_t GetKernelPrivateMemorySize(ur_kernel_handle_t Kernel,
48+
ur_device_handle_t Device);
4549
size_t GetVirtualMemGranularity(ur_context_handle_t Context,
4650
ur_device_handle_t Device);
4751

0 commit comments

Comments
 (0)