Skip to content

Commit d8fde1b

Browse files
authored
Merge pull request #2266 from zhaomaosu/reuse-shadow-memory
[DeviceASAN] Re-use shadow if required size is not larger than last one
2 parents d88481b + bc5105f commit d8fde1b

File tree

3 files changed

+163
-124
lines changed

3 files changed

+163
-124
lines changed

source/loader/layers/sanitizer/asan/asan_interceptor.cpp

Lines changed: 34 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -773,28 +773,6 @@ ur_result_t AsanInterceptor::prepareLaunch(
773773
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
774774
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;
775775

776-
auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
777-
Device = DeviceInfo->Handle,
778-
Queue](size_t Size, uptr &Ptr) {
779-
void *Allocated = nullptr;
780-
auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc(
781-
Context, Device, nullptr, nullptr, Size, &Allocated);
782-
if (URes != UR_RESULT_SUCCESS) {
783-
return URes;
784-
}
785-
// Initialize shadow memory
786-
URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size);
787-
if (URes != UR_RESULT_SUCCESS) {
788-
[[maybe_unused]] auto URes =
789-
getContext()->urDdiTable.USM.pfnFree(Context, Allocated);
790-
assert(URes == UR_RESULT_SUCCESS &&
791-
"urUSMFree failed at allocating shadow memory");
792-
Allocated = nullptr;
793-
}
794-
Ptr = (uptr)Allocated;
795-
return URes;
796-
};
797-
798776
auto LocalMemoryUsage =
799777
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
800778
auto PrivateMemoryUsage =
@@ -806,86 +784,45 @@ ur_result_t AsanInterceptor::prepareLaunch(
806784

807785
// Write shadow memory offset for local memory
808786
if (getOptions().DetectLocals) {
809-
// CPU needn't this
810-
if (DeviceInfo->Type == DeviceType::GPU_PVC ||
811-
DeviceInfo->Type == DeviceType::GPU_DG2) {
812-
const size_t LocalMemorySize =
813-
GetDeviceLocalMemorySize(DeviceInfo->Handle);
814-
const size_t LocalShadowMemorySize =
815-
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
816-
817-
getContext()->logger.debug(
818-
"LocalMemory(WorkGroup={}, LocalMemorySize={}, "
819-
"LocalShadowMemorySize={})",
820-
NumWG, LocalMemorySize, LocalShadowMemorySize);
821-
822-
if (EnqueueAllocateShadowMemory(
823-
LocalShadowMemorySize,
824-
LaunchInfo.Data.Host.LocalShadowOffset) !=
825-
UR_RESULT_SUCCESS) {
826-
getContext()->logger.warning(
827-
"Failed to allocate shadow memory for local "
828-
"memory, maybe the number of workgroup ({}) is too "
829-
"large",
830-
NumWG);
831-
getContext()->logger.warning(
832-
"Skip checking local memory of kernel <{}>",
833-
GetKernelName(Kernel));
834-
} else {
835-
LaunchInfo.Data.Host.LocalShadowOffsetEnd =
836-
LaunchInfo.Data.Host.LocalShadowOffset +
837-
LocalShadowMemorySize - 1;
838-
839-
ContextInfo->Stats.UpdateShadowMalloced(
840-
LocalShadowMemorySize);
841-
842-
getContext()->logger.info(
843-
"ShadowMemory(Local, {} - {})",
844-
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
845-
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
846-
}
787+
if (DeviceInfo->Shadow->AllocLocalShadow(
788+
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
789+
LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
790+
UR_RESULT_SUCCESS) {
791+
getContext()->logger.warning(
792+
"Failed to allocate shadow memory for local "
793+
"memory, maybe the number of workgroup ({}) is too "
794+
"large",
795+
NumWG);
796+
getContext()->logger.warning(
797+
"Skip checking local memory of kernel <{}>",
798+
GetKernelName(Kernel));
799+
} else {
800+
getContext()->logger.info(
801+
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
802+
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
803+
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
847804
}
848805
}
849806

850807
// Write shadow memory offset for private memory
851808
if (getOptions().DetectPrivates) {
852-
if (DeviceInfo->Type == DeviceType::CPU) {
853-
LaunchInfo.Data.Host.PrivateShadowOffset =
854-
DeviceInfo->Shadow->ShadowBegin;
855-
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
856-
DeviceInfo->Type == DeviceType::GPU_DG2) {
857-
const size_t PrivateShadowMemorySize =
858-
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
859-
860-
getContext()->logger.debug("PrivateMemory(WorkGroup={}, "
861-
"PrivateShadowMemorySize={})",
862-
NumWG, PrivateShadowMemorySize);
863-
864-
if (EnqueueAllocateShadowMemory(
865-
PrivateShadowMemorySize,
866-
LaunchInfo.Data.Host.PrivateShadowOffset) !=
867-
UR_RESULT_SUCCESS) {
868-
getContext()->logger.warning(
869-
"Failed to allocate shadow memory for private "
870-
"memory, maybe the number of workgroup ({}) is too "
871-
"large",
872-
NumWG);
873-
getContext()->logger.warning(
874-
"Skip checking private memory of kernel <{}>",
875-
GetKernelName(Kernel));
876-
} else {
877-
LaunchInfo.Data.Host.PrivateShadowOffsetEnd =
878-
LaunchInfo.Data.Host.PrivateShadowOffset +
879-
PrivateShadowMemorySize - 1;
880-
881-
ContextInfo->Stats.UpdateShadowMalloced(
882-
PrivateShadowMemorySize);
883-
884-
getContext()->logger.info(
885-
"ShadowMemory(Private, {} - {})",
886-
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
887-
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
888-
}
809+
if (DeviceInfo->Shadow->AllocPrivateShadow(
810+
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
811+
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
812+
UR_RESULT_SUCCESS) {
813+
getContext()->logger.warning(
814+
"Failed to allocate shadow memory for private "
815+
"memory, maybe the number of workgroup ({}) is too "
816+
"large",
817+
NumWG);
818+
getContext()->logger.warning(
819+
"Skip checking private memory of kernel <{}>",
820+
GetKernelName(Kernel));
821+
} else {
822+
getContext()->logger.info(
823+
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
824+
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
825+
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
889826
}
890827
}
891828

@@ -970,24 +907,6 @@ ContextInfo::~ContextInfo() {
970907

971908
AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
972909
[[maybe_unused]] ur_result_t Result;
973-
auto Type = GetDeviceType(Context, Device);
974-
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
975-
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
976-
if (Host.PrivateShadowOffset) {
977-
ContextInfo->Stats.UpdateShadowFreed(Host.PrivateShadowOffsetEnd -
978-
Host.PrivateShadowOffset + 1);
979-
Result = getContext()->urDdiTable.USM.pfnFree(
980-
Context, (void *)Host.PrivateShadowOffset);
981-
assert(Result == UR_RESULT_SUCCESS);
982-
}
983-
if (Host.LocalShadowOffset) {
984-
ContextInfo->Stats.UpdateShadowFreed(Host.LocalShadowOffsetEnd -
985-
Host.LocalShadowOffset + 1);
986-
Result = getContext()->urDdiTable.USM.pfnFree(
987-
Context, (void *)Host.LocalShadowOffset);
988-
assert(Result == UR_RESULT_SUCCESS);
989-
}
990-
}
991910
if (Host.LocalArgs) {
992911
Result = getContext()->urDdiTable.USM.pfnFree(Context,
993912
(void *)Host.LocalArgs);

source/loader/layers/sanitizer/asan/asan_shadow.cpp

Lines changed: 97 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -132,16 +132,23 @@ ur_result_t ShadowMemoryGPU::Setup() {
132132
}
133133

134134
ur_result_t ShadowMemoryGPU::Destory() {
135-
if (ShadowBegin == 0) {
136-
return UR_RESULT_SUCCESS;
135+
if (PrivateShadowOffset != 0) {
136+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
137+
Context, (void *)PrivateShadowOffset));
138+
PrivateShadowOffset = 0;
137139
}
138-
static ur_result_t Result = [this]() {
139-
auto Result = getContext()->urDdiTable.VirtualMem.pfnFree(
140-
Context, (const void *)ShadowBegin, GetShadowSize());
141-
getContext()->urDdiTable.Context.pfnRelease(Context);
142-
return Result;
143-
}();
144-
return Result;
140+
if (LocalShadowOffset != 0) {
141+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
142+
Context, (void *)LocalShadowOffset));
143+
LocalShadowOffset = 0;
144+
}
145+
if (ShadowBegin != 0) {
146+
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
147+
Context, (const void *)ShadowBegin, GetShadowSize()));
148+
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
149+
ShadowBegin = ShadowEnd = 0;
150+
}
151+
return UR_RESULT_SUCCESS;
145152
}
146153

147154
ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
@@ -257,6 +264,87 @@ ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
257264
return UR_RESULT_SUCCESS;
258265
}
259266

267+
ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
268+
uint32_t NumWG, uptr &Begin,
269+
uptr &End) {
270+
const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device);
271+
const size_t RequiredShadowSize =
272+
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
273+
static size_t LastAllocedSize = 0;
274+
if (RequiredShadowSize > LastAllocedSize) {
275+
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
276+
if (LocalShadowOffset) {
277+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
278+
Context, (void *)LocalShadowOffset));
279+
ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
280+
LocalShadowOffset = 0;
281+
LastAllocedSize = 0;
282+
}
283+
284+
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
285+
Context, Device, nullptr, nullptr, RequiredShadowSize,
286+
(void **)&LocalShadowOffset));
287+
288+
// Initialize shadow memory
289+
ur_result_t URes = EnqueueUSMBlockingSet(
290+
Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize);
291+
if (URes != UR_RESULT_SUCCESS) {
292+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
293+
Context, (void *)LocalShadowOffset));
294+
LocalShadowOffset = 0;
295+
LastAllocedSize = 0;
296+
}
297+
298+
ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);
299+
300+
LastAllocedSize = RequiredShadowSize;
301+
}
302+
303+
Begin = LocalShadowOffset;
304+
End = LocalShadowOffset + RequiredShadowSize - 1;
305+
return UR_RESULT_SUCCESS;
306+
}
307+
308+
ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
309+
uint32_t NumWG, uptr &Begin,
310+
uptr &End) {
311+
const size_t RequiredShadowSize =
312+
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
313+
static size_t LastAllocedSize = 0;
314+
if (RequiredShadowSize > LastAllocedSize) {
315+
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
316+
if (PrivateShadowOffset) {
317+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
318+
Context, (void *)PrivateShadowOffset));
319+
ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
320+
PrivateShadowOffset = 0;
321+
LastAllocedSize = 0;
322+
}
323+
324+
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
325+
Context, Device, nullptr, nullptr, RequiredShadowSize,
326+
(void **)&PrivateShadowOffset));
327+
328+
// Initialize shadow memory
329+
ur_result_t URes = EnqueueUSMBlockingSet(
330+
Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize);
331+
if (URes != UR_RESULT_SUCCESS) {
332+
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
333+
Context, (void *)PrivateShadowOffset));
334+
PrivateShadowOffset = 0;
335+
LastAllocedSize = 0;
336+
}
337+
338+
ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);
339+
340+
LastAllocedSize = RequiredShadowSize;
341+
}
342+
343+
Begin = PrivateShadowOffset;
344+
End = PrivateShadowOffset + RequiredShadowSize - 1;
345+
return UR_RESULT_SUCCESS;
346+
}
347+
260348
uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) {
261349
if (Ptr & 0xFF00000000000000ULL) { // Device USM
262350
return ShadowBegin + 0x80000000000ULL +

source/loader/layers/sanitizer/asan/asan_shadow.hpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,14 @@ struct ShadowMemory {
4141

4242
virtual size_t GetShadowSize() = 0;
4343

44+
virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue,
45+
uint32_t NumWG, uptr &Begin,
46+
uptr &End) = 0;
47+
48+
virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue,
49+
uint32_t NumWG, uptr &Begin,
50+
uptr &End) = 0;
51+
4452
ur_context_handle_t Context{};
4553

4654
ur_device_handle_t Device{};
@@ -64,6 +72,20 @@ struct ShadowMemoryCPU final : public ShadowMemory {
6472
uptr Size, u8 Value) override;
6573

6674
size_t GetShadowSize() override { return 0x80000000000ULL; }
75+
76+
ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
77+
uptr &End) override {
78+
Begin = ShadowBegin;
79+
End = ShadowEnd;
80+
return UR_RESULT_SUCCESS;
81+
}
82+
83+
ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
84+
uptr &End) override {
85+
Begin = ShadowBegin;
86+
End = ShadowEnd;
87+
return UR_RESULT_SUCCESS;
88+
}
6789
};
6890

6991
struct ShadowMemoryGPU : public ShadowMemory {
@@ -78,12 +100,22 @@ struct ShadowMemoryGPU : public ShadowMemory {
78100

79101
ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo> AI) override final;
80102

103+
ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
104+
uptr &Begin, uptr &End) override final;
105+
106+
ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG,
107+
uptr &Begin, uptr &End) override final;
108+
81109
ur_mutex VirtualMemMapsMutex;
82110

83111
std::unordered_map<
84112
uptr, std::pair<ur_physical_mem_handle_t,
85113
std::unordered_set<std::shared_ptr<AllocInfo>>>>
86114
VirtualMemMaps;
115+
116+
uptr LocalShadowOffset = 0;
117+
118+
uptr PrivateShadowOffset = 0;
87119
};
88120

89121
/// Shadow Memory layout of GPU PVC device

0 commit comments

Comments
 (0)