Skip to content

[DevMSAN] Support device memory sanitizer for DG2 GPU device #16619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions libdevice/include/sanitizer_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,10 @@ enum ADDRESS_SPACE : uint32_t {

#if defined(__SPIR__) || defined(__SPIRV__)

#if defined(__SYCL_DEVICE_ONLY__)

#define __USE_SPIR_BUILTIN__ 1

#ifndef SYCL_EXTERNAL
#define SYCL_EXTERNAL
#endif // SYCL_EXTERNAL

#else // __SYCL_DEVICE_ONLY__

#define __USE_SPIR_BUILTIN__ 0

#endif // __SYCL_DEVICE_ONLY__

#if __USE_SPIR_BUILTIN__
extern SYCL_EXTERNAL int
__spirv_ocl_printf(const __SYCL_CONSTANT__ char *Format, ...);

Expand All @@ -63,7 +52,6 @@ extern SYCL_EXTERNAL __attribute__((convergent)) void
__spirv_ControlBarrier(uint32_t Execution, uint32_t Memory, uint32_t Semantics);

extern "C" SYCL_EXTERNAL void __devicelib_exit();
#endif // __USE_SPIR_BUILTIN__

__SYCL_GLOBAL__ void *ToGlobal(void *ptr) {
return __spirv_GenericCastToPtrExplicit_ToGlobal(ptr, 5);
Expand Down
23 changes: 23 additions & 0 deletions libdevice/sanitizer/msan_rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,27 @@ inline uptr __msan_get_shadow_cpu(uptr addr) {
return addr ^ 0x500000000000ULL;
}

inline uptr __msan_get_shadow_dg2(uptr addr, uint32_t as) {
if (as == ADDRESS_SPACE_GENERIC) {
ConvertGenericPointer(addr, as);
}

if (as != ADDRESS_SPACE_GLOBAL || !(addr & 0xffff'0000'0000'0000ULL))
return (uptr)((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
->CleanShadow;

// Device USM only
auto shadow_begin = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
->GlobalShadowOffset;
auto shadow_end = ((__SYCL_GLOBAL__ MsanLaunchInfo *)__MsanLaunchInfo.get())
->GlobalShadowOffsetEnd;
if (addr < shadow_begin) {
return addr + (shadow_begin - 0xffff'8000'0000'0000ULL);
} else {
return addr - (0xffff'ffff'ffff'ffffULL - shadow_end);
}
}

inline uptr __msan_get_shadow_pvc(uptr addr, uint32_t as) {
if (as == ADDRESS_SPACE_GENERIC) {
ConvertGenericPointer(addr, as);
Expand Down Expand Up @@ -210,6 +231,8 @@ DEVICE_EXTERN_C_NOINLINE uptr __msan_get_shadow(uptr addr, uint32_t as) {
shadow_ptr = __msan_get_shadow_cpu(addr);
} else if (launch_info->DeviceTy == DeviceType::GPU_PVC) {
shadow_ptr = __msan_get_shadow_pvc(addr, as);
} else if (launch_info->DeviceTy == DeviceType::GPU_DG2) {
shadow_ptr = __msan_get_shadow_dg2(addr, as);
} else {
MSAN_DEBUG(__spirv_ocl_printf(__msan_print_unsupport_device_type,
launch_info->DeviceTy));
Expand Down
3 changes: 2 additions & 1 deletion sycl/test-e2e/MemorySanitizer/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# TRACKER: https://github.com/intel/llvm/issues/16184
has_arch_gpu_intel_pvc = any('arch-intel_gpu_pvc' in T for T in config.sycl_dev_features.values())
if not has_arch_gpu_intel_pvc:
has_arch_gpu_intel_dg2 = any('gpu-intel-dg2' in T for T in config.sycl_dev_features.values())
if not has_arch_gpu_intel_pvc and not has_arch_gpu_intel_dg2:
config.unsupported_features += ['gpu']
else:
# TRACKER for PVC + igc-dev: https://github.com/intel/llvm/issues/16401
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ ur_result_t MsanShadowMemoryGPU::Setup() {
// To reserve very large amount of GPU virtual memroy, the pStart param
// should be beyond the SVM range, so that GFX driver will automatically
// switch to reservation on the GPU heap.
const void *StartAddress = (void *)(0x100'0000'0000'0000ULL);
const void *StartAddress = (void *)GetStartAddress();
// TODO: Protect Bad Zone
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
Context, StartAddress, ShadowSize, (void **)&ShadowBegin);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory {

virtual size_t GetShadowSize() = 0;

virtual uptr GetStartAddress() { return 0; }

private:
ur_result_t EnqueueMapShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size,
std::vector<ur_event_handle_t> &EventWaitList,
Expand Down Expand Up @@ -134,6 +136,8 @@ struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU {
uptr MemToShadow(uptr Ptr) override;

size_t GetShadowSize() override { return 0x8000'0000'0000ULL; }

uptr GetStartAddress() override { return 0x100'0000'0000'0000ULL; }
};

/// Shadow Memory layout of GPU DG2 device
Expand Down
Loading