@@ -24,6 +24,7 @@ constexpr int kUsmDeviceRedzoneMagic = (char)0x81;
24
24
constexpr int kUsmHostRedzoneMagic = (char )0x82 ;
25
25
constexpr int kUsmSharedRedzoneMagic = (char )0x83 ;
26
26
constexpr int kMemBufferRedzoneMagic = (char )0x84 ;
27
+ constexpr int kDeviceGlobalRedZoneMagic = (char )0x85 ;
27
28
28
29
constexpr auto kSPIR_AsanShadowMemoryGlobalStart =
29
30
" __AsanShadowMemoryGlobalStart" ;
@@ -36,6 +37,9 @@ constexpr auto kSPIR_DeviceType = "__DeviceType";
36
37
37
38
constexpr auto kSPIR_DeviceSanitizerReportMem = " __DeviceSanitizerReportMem" ;
38
39
40
+ constexpr auto kSPIR_AsanDeviceGlobalCount = " __AsanDeviceGlobalCount" ;
41
+ constexpr auto kSPIR_AsanDeviceGlobalMetadata = " __AsanDeviceGlobalMetadata" ;
42
+
39
43
DeviceSanitizerReport SPIR_DeviceSanitizerReportMem;
40
44
41
45
uptr MemToShadow_CPU (uptr USM_SHADOW_BASE, uptr UPtr) {
@@ -78,6 +82,19 @@ ur_program_handle_t getProgram(ur_kernel_handle_t Kernel) {
78
82
return Program;
79
83
}
80
84
85
+ void getProgramDevices (ur_program_handle_t Program,
86
+ std::vector<ur_device_handle_t > &Devices) {
87
+ size_t PropSize;
88
+ [[maybe_unused]] ur_result_t Result = context.urDdiTable .Program .pfnGetInfo (
89
+ Program, UR_PROGRAM_INFO_DEVICES, 0 , nullptr , &PropSize);
90
+ assert (Result == UR_RESULT_SUCCESS);
91
+
92
+ Devices.resize (PropSize / sizeof (ur_device_handle_t ));
93
+ Result = context.urDdiTable .Program .pfnGetInfo (
94
+ Program, UR_PROGRAM_INFO_DEVICES, PropSize, Devices.data (), nullptr );
95
+ assert (Result == UR_RESULT_SUCCESS);
96
+ }
97
+
81
98
size_t getLocalMemorySize (ur_device_handle_t Device) {
82
99
size_t LocalMemorySize;
83
100
[[maybe_unused]] auto Result = context.urDdiTable .Device .pfnGetInfo (
@@ -124,7 +141,7 @@ SanitizerInterceptor::~SanitizerInterceptor() {
124
141
ur_result_t SanitizerInterceptor::allocateMemory (
125
142
ur_context_handle_t Context, ur_device_handle_t Device,
126
143
const ur_usm_desc_t *Properties, ur_usm_pool_handle_t Pool, size_t Size,
127
- void **ResultPtr, USMMemoryType Type) {
144
+ void **ResultPtr, AllocType Type) {
128
145
auto Alignment = Properties->align ;
129
146
assert (Alignment == 0 || IsPowerOfTwo (Alignment));
130
147
@@ -147,13 +164,13 @@ ur_result_t SanitizerInterceptor::allocateMemory(
147
164
148
165
void *Allocated = nullptr ;
149
166
150
- if (Type == USMMemoryType::DEVICE ) {
167
+ if (Type == AllocType::DEVICE_USM ) {
151
168
UR_CALL (context.urDdiTable .USM .pfnDeviceAlloc (
152
169
Context, Device, Properties, Pool, NeededSize, &Allocated));
153
- } else if (Type == USMMemoryType::HOST ) {
170
+ } else if (Type == AllocType::HOST_USM ) {
154
171
UR_CALL (context.urDdiTable .USM .pfnHostAlloc (Context, Properties, Pool,
155
172
NeededSize, &Allocated));
156
- } else if (Type == USMMemoryType::SHARE ) {
173
+ } else if (Type == AllocType::SHARED_USM ) {
157
174
UR_CALL (context.urDdiTable .USM .pfnSharedAlloc (
158
175
Context, Device, Properties, Pool, NeededSize, &Allocated));
159
176
} else {
@@ -173,31 +190,31 @@ ur_result_t SanitizerInterceptor::allocateMemory(
173
190
174
191
*ResultPtr = reinterpret_cast <void *>(UserBegin);
175
192
176
- auto AllocInfo = std::make_shared<USMAllocInfo >(
177
- USMAllocInfo {AllocBegin, UserBegin, UserEnd, NeededSize, Type});
193
+ auto AI = std::make_shared<AllocInfo >(
194
+ AllocInfo {AllocBegin, UserBegin, UserEnd, NeededSize, Type});
178
195
179
196
// For updating shadow memory
180
197
if (DeviceInfo) { // device/shared USM
181
198
std::scoped_lock<ur_shared_mutex> Guard (DeviceInfo->Mutex );
182
- DeviceInfo->AllocInfos .emplace_back (AllocInfo );
199
+ DeviceInfo->AllocInfos .emplace_back (AI );
183
200
} else { // host USM's AllocInfo needs to insert into all devices
184
201
for (auto &pair : ContextInfo->DeviceMap ) {
185
202
auto DeviceInfo = pair.second ;
186
203
std::scoped_lock<ur_shared_mutex> Guard (DeviceInfo->Mutex );
187
- DeviceInfo->AllocInfos .emplace_back (AllocInfo );
204
+ DeviceInfo->AllocInfos .emplace_back (AI );
188
205
}
189
206
}
190
207
191
208
// For memory release
192
209
{
193
210
std::scoped_lock<ur_shared_mutex> Guard (ContextInfo->Mutex );
194
- ContextInfo->AllocatedUSMMap [AllocBegin] = std::move (AllocInfo );
211
+ ContextInfo->AllocatedUSMMap [AllocBegin] = std::move (AI );
195
212
}
196
213
197
214
context.logger .info (
198
215
" AllocInfos(AllocBegin={}, User={}-{}, NeededSize={}, Type={})" ,
199
216
(void *)AllocBegin, (void *)UserBegin, (void *)UserEnd, NeededSize,
200
- Type);
217
+ ToString ( Type) );
201
218
202
219
return UR_RESULT_SUCCESS;
203
220
}
@@ -282,8 +299,8 @@ void SanitizerInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel,
282
299
auto KernelName = getKernelName (Kernel);
283
300
284
301
context.logger .always (" \n ====ERROR: DeviceSanitizer: {} on {}" ,
285
- DeviceSanitizerFormat (AH->ErrorType ),
286
- DeviceSanitizerFormat (AH->MemoryType ));
302
+ ToString (AH->ErrorType ),
303
+ ToString (AH->MemoryType ));
287
304
context.logger .always (
288
305
" {} of size {} at kernel <{}> LID({}, {}, {}) GID({}, "
289
306
" {}, {})" ,
@@ -475,7 +492,7 @@ ur_result_t SanitizerInterceptor::enqueueMemSetShadow(
475
492
// / ref: https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm#mapping
476
493
ur_result_t SanitizerInterceptor::enqueueAllocInfo (
477
494
ur_context_handle_t Context, ur_device_handle_t Device,
478
- ur_queue_handle_t Queue, std::shared_ptr<USMAllocInfo > &AllocInfo,
495
+ ur_queue_handle_t Queue, std::shared_ptr<AllocInfo > &AllocInfo,
479
496
ur_event_handle_t &LastEvent) {
480
497
// Init zero
481
498
UR_CALL (enqueueMemSetShadow (Context, Device, Queue, AllocInfo->AllocBegin ,
@@ -496,18 +513,21 @@ ur_result_t SanitizerInterceptor::enqueueAllocInfo(
496
513
497
514
int ShadowByte;
498
515
switch (AllocInfo->Type ) {
499
- case USMMemoryType::HOST :
516
+ case AllocType::HOST_USM :
500
517
ShadowByte = kUsmHostRedzoneMagic ;
501
518
break ;
502
- case USMMemoryType::DEVICE :
519
+ case AllocType::DEVICE_USM :
503
520
ShadowByte = kUsmDeviceRedzoneMagic ;
504
521
break ;
505
- case USMMemoryType::SHARE :
522
+ case AllocType::SHARED_USM :
506
523
ShadowByte = kUsmSharedRedzoneMagic ;
507
524
break ;
508
- case USMMemoryType ::MEM_BUFFER:
525
+ case AllocType ::MEM_BUFFER:
509
526
ShadowByte = kMemBufferRedzoneMagic ;
510
527
break ;
528
+ case AllocType::DEVICE_GLOBAL:
529
+ ShadowByte = kDeviceGlobalRedZoneMagic ;
530
+ break ;
511
531
default :
512
532
ShadowByte = 0xff ;
513
533
assert (false && " Unknow AllocInfo Type" );
@@ -553,6 +573,62 @@ ur_result_t SanitizerInterceptor::updateShadowMemory(ur_queue_handle_t Queue) {
553
573
return UR_RESULT_SUCCESS;
554
574
}
555
575
576
+ ur_result_t
577
+ SanitizerInterceptor::registerDeviceGlobals (ur_context_handle_t Context,
578
+ ur_program_handle_t Program) {
579
+ std::vector<ur_device_handle_t > Devices;
580
+ getProgramDevices (Program, Devices);
581
+
582
+ for (auto Device : Devices) {
583
+ ur_queue_handle_t Queue;
584
+ ur_result_t Result = context.urDdiTable .Queue .pfnCreate (
585
+ Context, Device, nullptr , &Queue);
586
+ if (Result != UR_RESULT_SUCCESS) {
587
+ context.logger .error (" Failed to create command queue: {}" , Result);
588
+ return Result;
589
+ }
590
+
591
+ uint64_t NumOfDeviceGlobal;
592
+ Result = context.urDdiTable .Enqueue .pfnDeviceGlobalVariableRead (
593
+ Queue, Program, kSPIR_AsanDeviceGlobalCount , true ,
594
+ sizeof (NumOfDeviceGlobal), 0 , &NumOfDeviceGlobal, 0 , nullptr ,
595
+ nullptr );
596
+ if (Result == UR_RESULT_ERROR_INVALID_ARGUMENT) {
597
+ context.logger .info (" No device globals" );
598
+ continue ;
599
+ } else if (Result != UR_RESULT_SUCCESS) {
600
+ context.logger .error (" Device Global[{}] Read Failed: {}" ,
601
+ kSPIR_AsanDeviceGlobalCount , Result);
602
+ return Result;
603
+ }
604
+
605
+ std::vector<DeviceGlobalInfo> GVInfos (NumOfDeviceGlobal);
606
+ Result = context.urDdiTable .Enqueue .pfnDeviceGlobalVariableRead (
607
+ Queue, Program, kSPIR_AsanDeviceGlobalMetadata , true ,
608
+ sizeof (DeviceGlobalInfo) * NumOfDeviceGlobal, 0 , &GVInfos[0 ], 0 ,
609
+ nullptr , nullptr );
610
+ if (Result != UR_RESULT_SUCCESS) {
611
+ context.logger .error (" Device Global[{}] Read Failed: {}" ,
612
+ kSPIR_AsanDeviceGlobalMetadata , Result);
613
+ return Result;
614
+ }
615
+
616
+ auto ContextInfo = getContextInfo (Context);
617
+ auto DeviceInfo = ContextInfo->getDeviceInfo (Device);
618
+ for (size_t i = 0 ; i < NumOfDeviceGlobal; i++) {
619
+ auto AI = std::make_shared<AllocInfo>(AllocInfo{
620
+ GVInfos[i].Addr , GVInfos[i].Addr ,
621
+ GVInfos[i].Addr + GVInfos[i].Size , GVInfos[i].SizeWithRedZone ,
622
+ AllocType::DEVICE_GLOBAL});
623
+
624
+ std::scoped_lock<ur_shared_mutex> Guard (DeviceInfo->Mutex );
625
+ DeviceInfo->AllocInfos .emplace_back (AI);
626
+ }
627
+ }
628
+
629
+ return UR_RESULT_SUCCESS;
630
+ }
631
+
556
632
ur_result_t SanitizerInterceptor::insertContext (ur_context_handle_t Context) {
557
633
auto ContextInfo = std::make_shared<ur_sanitizer_layer::ContextInfo>();
558
634
0 commit comments