Skip to content

Commit b863a4b

Browse files
authored
[DeviceSAN] Fix private shadow memory by recording the private base (#18253)
Before this patch, to map private buffer into shadow memory, we use the lower bits of private address as offset ``` shadow_ptr = private_shadow_base + (addr & (MSAN_PRIVATE_SIZE - 1)); ``` This is based on assumption that the high bits of allocated private address for each workitem is same. But recently, we observed that this is not true. Therefore, we record the base address of private memory for each workitem, so that we can get the offset. ``` shadow_ptr = private_shadow_base + (addr - private_base[workitem_id]); ``` Misc. - Fix warning in AddressSanitizer - Change the return type of "__msan_get_shadow" to remove "ptrtoint" instruction
1 parent e06e482 commit b863a4b

File tree

19 files changed

+429
-266
lines changed

19 files changed

+429
-266
lines changed

libdevice/include/sanitizer_defs.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
//===----------------------------------------------------------------------===//
88
#pragma once
99

10+
#include "atomic.hpp"
1011
#include "spir_global_var.hpp"
12+
#include "spirv_vars.h"
1113
#include <cstdint>
1214

1315
using uptr = uintptr_t;
@@ -64,4 +66,23 @@ __SYCL_PRIVATE__ void *ToPrivate(void *ptr) {
6466
return __spirv_GenericCastToPtrExplicit_ToPrivate(ptr, 7);
6567
}
6668

69+
size_t WorkGroupLinearId() {
70+
return __spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
71+
__spirv_BuiltInNumWorkgroups.z +
72+
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
73+
__spirv_BuiltInWorkgroupId.z;
74+
}
75+
76+
// For GPU device, each sub group is a hardware thread
77+
size_t SubGroupLinearId() {
78+
return __spirv_BuiltInGlobalLinearId / __spirv_BuiltInSubgroupSize;
79+
}
80+
81+
void SubGroupBarrier() {
82+
__spirv_ControlBarrier(__spv::Scope::Subgroup, __spv::Scope::Subgroup,
83+
__spv::MemorySemanticsMask::SequentiallyConsistent |
84+
__spv::MemorySemanticsMask::CrossWorkgroupMemory |
85+
__spv::MemorySemanticsMask::WorkgroupMemory);
86+
}
87+
6788
#endif // __SPIR__ || __SPIRV__

libdevice/sanitizer/asan_rtl.cpp

Lines changed: 86 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@ static const __SYCL_CONSTANT__ char __newline[] = "\n";
2626
static const __SYCL_CONSTANT__ char __global_shadow_out_of_bound[] =
2727
"[kernel] Global shadow memory out-of-bound (ptr: %p -> %p, base: %p)\n";
2828
static const __SYCL_CONSTANT__ char __local_shadow_out_of_bound[] =
29-
"[kernel] Local shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: "
30-
"%p)\n";
29+
"[kernel] Local shadow memory out-of-bound (ptr: %p -> %p, wid: %llu, "
30+
"base: %p)\n";
3131
static const __SYCL_CONSTANT__ char __private_shadow_out_of_bound[] =
32-
"[kernel] Private shadow memory out-of-bound (ptr: %p -> %p, wg: %d, base: "
33-
"%p)\n";
32+
"[kernel] Private shadow memory out-of-bound (ptr: %p -> %p, wid: %llu, "
33+
"sid: %llu, base: %p)\n";
3434

3535
static const __SYCL_CONSTANT__ char __asan_print_unsupport_device_type[] =
3636
"[kernel] Unsupport device type: %d\n";
@@ -117,54 +117,52 @@ inline uptr MemToShadow_DG2(uptr addr, uint32_t as) {
117117

118118
return shadow_ptr;
119119
} else if (as == ADDRESS_SPACE_LOCAL) { // local
120-
// The size of SLM is 64KB on DG2
121-
constexpr unsigned slm_size = 64 * 1024;
122-
const auto wg_lid =
123-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
124-
__spirv_BuiltInNumWorkgroups.z +
125-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
126-
__spirv_BuiltInWorkgroupId.z;
127-
128120
const auto shadow_offset = launch_info->LocalShadowOffset;
129121
if (shadow_offset == 0) {
130122
return 0;
131123
}
132124

133-
auto shadow_ptr = shadow_offset +
134-
((wg_lid * slm_size) >> ASAN_SHADOW_SCALE) +
125+
// The size of SLM is 64KB on DG2
126+
constexpr unsigned slm_size = 64 * 1024;
127+
const size_t wid = WorkGroupLinearId();
128+
129+
auto shadow_ptr = shadow_offset + ((wid * slm_size) >> ASAN_SHADOW_SCALE) +
135130
((addr & (slm_size - 1)) >> ASAN_SHADOW_SCALE);
136131

137132
ASAN_DEBUG(const auto shadow_offset_end = launch_info->LocalShadowOffsetEnd;
138133
if (shadow_ptr > shadow_offset_end) {
139134
__spirv_ocl_printf(__local_shadow_out_of_bound, addr,
140-
shadow_ptr, wg_lid, (uptr)shadow_offset);
135+
shadow_ptr, wid, (uptr)shadow_offset);
141136
return 0;
142137
});
143138
return shadow_ptr;
144139
} else if (as == ADDRESS_SPACE_PRIVATE) { // private
145-
// work-group linear id
146-
const auto WG_LID =
147-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
148-
__spirv_BuiltInNumWorkgroups.z +
149-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
150-
__spirv_BuiltInWorkgroupId.z;
151-
152140
const auto shadow_offset = launch_info->PrivateShadowOffset;
153141
if (shadow_offset == 0) {
154142
return 0;
155143
}
156144

145+
const auto wid = WorkGroupLinearId();
146+
const size_t sid = SubGroupLinearId();
147+
const uptr private_base = launch_info->PrivateBase[sid];
148+
149+
// FIXME: The recorded private_base may not be the most bottom one,
150+
// ideally there should have a build-in to get this information
151+
if (addr < private_base) {
152+
return 0;
153+
}
154+
157155
uptr shadow_ptr = shadow_offset +
158-
((WG_LID * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) +
159-
((addr & (ASAN_PRIVATE_SIZE - 1)) >> ASAN_SHADOW_SCALE);
156+
((wid * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) +
157+
((addr - private_base) >> ASAN_SHADOW_SCALE);
158+
159+
const auto shadow_offset_end = launch_info->PrivateShadowOffsetEnd;
160+
if (shadow_ptr > shadow_offset_end) {
161+
__spirv_ocl_printf(__private_shadow_out_of_bound, addr, shadow_ptr, wid,
162+
sid, private_base);
163+
return 0;
164+
};
160165

161-
ASAN_DEBUG(const auto shadow_offset_end =
162-
launch_info->PrivateShadowOffsetEnd;
163-
if (shadow_ptr > shadow_offset_end) {
164-
__spirv_ocl_printf(__private_shadow_out_of_bound, addr,
165-
shadow_ptr, WG_LID, (uptr)shadow_offset);
166-
return 0;
167-
});
168166
return shadow_ptr;
169167
}
170168

@@ -196,57 +194,52 @@ inline uptr MemToShadow_PVC(uptr addr, uint32_t as) {
196194
});
197195
return shadow_ptr;
198196
} else if (as == ADDRESS_SPACE_LOCAL) { // local
199-
// The size of SLM is 128KB on PVC
200-
constexpr unsigned SLM_SIZE = 128 * 1024;
201-
// work-group linear id
202-
const auto wg_lid =
203-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
204-
__spirv_BuiltInNumWorkgroups.z +
205-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
206-
__spirv_BuiltInWorkgroupId.z;
207-
208197
const auto shadow_offset = launch_info->LocalShadowOffset;
209-
210198
if (shadow_offset == 0) {
211199
return 0;
212200
}
213201

214-
uptr shadow_ptr = shadow_offset +
215-
((wg_lid * SLM_SIZE) >> ASAN_SHADOW_SCALE) +
202+
// The size of SLM is 128KB on PVC
203+
constexpr unsigned SLM_SIZE = 128 * 1024;
204+
const auto wid = WorkGroupLinearId();
205+
206+
uptr shadow_ptr = shadow_offset + ((wid * SLM_SIZE) >> ASAN_SHADOW_SCALE) +
216207
((addr & (SLM_SIZE - 1)) >> ASAN_SHADOW_SCALE);
217208

218209
ASAN_DEBUG(const auto shadow_offset_end = launch_info->LocalShadowOffsetEnd;
219210
if (shadow_ptr > shadow_offset_end) {
220211
__spirv_ocl_printf(__local_shadow_out_of_bound, addr,
221-
shadow_ptr, wg_lid, (uptr)shadow_offset);
212+
shadow_ptr, wid, (uptr)shadow_offset);
222213
return 0;
223214
});
224215
return shadow_ptr;
225216
} else if (as == ADDRESS_SPACE_PRIVATE) { // private
226-
// work-group linear id
227-
const auto WG_LID =
228-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
229-
__spirv_BuiltInNumWorkgroups.z +
230-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
231-
__spirv_BuiltInWorkgroupId.z;
232-
233217
const auto shadow_offset = launch_info->PrivateShadowOffset;
234-
235218
if (shadow_offset == 0) {
236219
return 0;
237220
}
238221

222+
const size_t wid = WorkGroupLinearId();
223+
const size_t sid = SubGroupLinearId();
224+
const uptr private_base = launch_info->PrivateBase[sid];
225+
226+
// FIXME: The recorded private_base may not be the most bottom one,
227+
// ideally there should have a build-in to get this information
228+
if (addr < private_base) {
229+
return 0;
230+
}
231+
239232
uptr shadow_ptr = shadow_offset +
240-
((WG_LID * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) +
241-
((addr & (ASAN_PRIVATE_SIZE - 1)) >> ASAN_SHADOW_SCALE);
233+
((wid * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE) +
234+
((addr - private_base) >> ASAN_SHADOW_SCALE);
235+
236+
const auto shadow_offset_end = launch_info->PrivateShadowOffsetEnd;
237+
if (shadow_ptr > shadow_offset_end) {
238+
__spirv_ocl_printf(__private_shadow_out_of_bound, addr, shadow_ptr, wid,
239+
sid, private_base);
240+
return 0;
241+
};
242242

243-
ASAN_DEBUG(const auto shadow_offset_end =
244-
launch_info->PrivateShadowOffsetEnd;
245-
if (shadow_ptr > shadow_offset_end) {
246-
__spirv_ocl_printf(__private_shadow_out_of_bound, addr,
247-
shadow_ptr, WG_LID, (uptr)shadow_offset);
248-
return 0;
249-
});
250243
return shadow_ptr;
251244
}
252245

@@ -347,15 +340,9 @@ void __asan_internal_report_save(ErrorType error_type) {
347340
const int Expected = ASAN_REPORT_NONE;
348341
int Desired = ASAN_REPORT_START;
349342

350-
// work-group linear id
351-
const auto WG_LID =
352-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
353-
__spirv_BuiltInNumWorkgroups.z +
354-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
355-
__spirv_BuiltInWorkgroupId.z;
356-
343+
const size_t wid = WorkGroupLinearId();
357344
auto &SanitizerReport = ((__SYCL_GLOBAL__ AsanRuntimeData *)__AsanLaunchInfo)
358-
->Report[WG_LID % ASAN_MAX_NUM_REPORTS];
345+
->Report[wid % ASAN_MAX_NUM_REPORTS];
359346

360347
if (atomicCompareAndSet(
361348
&(((__SYCL_GLOBAL__ AsanRuntimeData *)__AsanLaunchInfo)->ReportFlag),
@@ -383,15 +370,9 @@ void __asan_internal_report_save(
383370
const int Expected = ASAN_REPORT_NONE;
384371
int Desired = ASAN_REPORT_START;
385372

386-
// work-group linear id
387-
const auto WG_LID =
388-
__spirv_BuiltInWorkgroupId.x * __spirv_BuiltInNumWorkgroups.y *
389-
__spirv_BuiltInNumWorkgroups.z +
390-
__spirv_BuiltInWorkgroupId.y * __spirv_BuiltInNumWorkgroups.z +
391-
__spirv_BuiltInWorkgroupId.z;
392-
373+
const size_t wid = WorkGroupLinearId();
393374
auto &SanitizerReport = ((__SYCL_GLOBAL__ AsanRuntimeData *)__AsanLaunchInfo)
394-
->Report[WG_LID % ASAN_MAX_NUM_REPORTS];
375+
->Report[wid % ASAN_MAX_NUM_REPORTS];
395376

396377
if ((is_recover ||
397378
atomicCompareAndSet(
@@ -896,24 +877,43 @@ static __SYCL_CONSTANT__ const char __mem_set_shadow_private[] =
896877

897878
// We outline the function of setting shadow memory of private memory, because
898879
// it may allocate failed on UR
899-
DEVICE_EXTERN_C_NOINLINE void __asan_set_shadow_private(uptr begin, uptr size,
880+
DEVICE_EXTERN_C_NOINLINE void __asan_set_shadow_private(uptr shadow, uptr size,
900881
char val) {
901-
if (!__AsanLaunchInfo)
882+
auto *launch_info = (__SYCL_GLOBAL__ const AsanRuntimeData *)__AsanLaunchInfo;
883+
if (!launch_info || launch_info->PrivateShadowOffset == 0)
902884
return;
903885

904-
ASAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_private_begin));
905-
906-
auto *launch_info = (__SYCL_GLOBAL__ const AsanRuntimeData *)__AsanLaunchInfo;
907-
if (launch_info->PrivateShadowOffset == 0)
886+
// "__asan_mem_to_shadow" may return 0 although "PrivateShadowOffset != 0", in
887+
// this case, "shadow" may be out of range of private shadow
888+
if (shadow < launch_info->PrivateShadowOffset)
908889
return;
909890

910-
ASAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_private, (void *)begin,
911-
(void *)(begin + size), val & 0xFF));
891+
ASAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_private_begin));
892+
893+
ASAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_private, (void *)shadow,
894+
(void *)(shadow + size), val & 0xFF));
912895

913896
for (size_t i = 0; i < size; i++)
914-
((__SYCL_GLOBAL__ u8 *)begin)[i] = val;
897+
((__SYCL_GLOBAL__ u8 *)shadow)[i] = val;
915898

916899
ASAN_DEBUG(__spirv_ocl_printf(__mem_set_shadow_private_end));
917900
}
918901

902+
static __SYCL_CONSTANT__ const char __asan_print_private_base[] =
903+
"[kernel] set_private_base: %llu -> %p\n";
904+
905+
DEVICE_EXTERN_C_NOINLINE void
906+
__asan_set_private_base(__SYCL_PRIVATE__ void *ptr) {
907+
auto launch_info = (__SYCL_GLOBAL__ const AsanRuntimeData *)__AsanLaunchInfo;
908+
if (!launch_info || launch_info->PrivateShadowOffset == 0)
909+
return;
910+
// Only set on the first sub-group item
911+
if (__spirv_BuiltInSubgroupLocalInvocationId != 0)
912+
return;
913+
const size_t sid = SubGroupLinearId();
914+
launch_info->PrivateBase[sid] = (uptr)ptr;
915+
SubGroupBarrier();
916+
ASAN_DEBUG(__spirv_ocl_printf(__asan_print_private_base, sid, ptr));
917+
}
918+
919919
#endif // __SPIR__ || __SPIRV__

0 commit comments

Comments
 (0)