Skip to content

Commit 9f78383

Browse files
authored
Merge pull request #1533 from AllanZyne/sanitizer-buffer
[DeviceSanitizer] Support detecting out-of-bounds errors on sycl::buffer
2 parents c015f89 + b944210 commit 9f78383

File tree

6 files changed

+1150
-19
lines changed

6 files changed

+1150
-19
lines changed

source/loader/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ if(UR_ENABLE_SANITIZER)
108108
${CMAKE_CURRENT_SOURCE_DIR}/../ur/ur.cpp
109109
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.cpp
110110
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_allocator.hpp
111+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.cpp
112+
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_buffer.hpp
111113
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.cpp
112114
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_interceptor.hpp
113115
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan_libdevice.hpp
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
6+
* See LICENSE.TXT
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* @file asan_buffer.cpp
10+
*
11+
*/
12+
13+
#include "asan_buffer.hpp"
14+
#include "asan_interceptor.hpp"
15+
#include "ur_sanitizer_layer.hpp"
16+
#include "ur_sanitizer_utils.hpp"
17+
18+
namespace ur_sanitizer_layer {
19+
20+
ur_result_t EnqueueMemCopyRectHelper(
21+
ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,
22+
ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,
23+
size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,
24+
bool Blocking, uint32_t NumEventsInWaitList,
25+
const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) {
26+
// If user doesn't determine src/dst row pitch and slice pitch, just use
27+
// region for it.
28+
if (SrcRowPitch == 0) {
29+
SrcRowPitch = Region.width;
30+
}
31+
32+
if (SrcSlicePitch == 0) {
33+
SrcSlicePitch = SrcRowPitch * Region.height;
34+
}
35+
36+
if (DstRowPitch == 0) {
37+
DstRowPitch = Region.width;
38+
}
39+
40+
if (DstSlicePitch == 0) {
41+
DstSlicePitch = DstRowPitch * Region.height;
42+
}
43+
44+
// Calculate the src and dst addresses that actually will be copied.
45+
char *SrcOrigin = pSrc + SrcOffset.x + SrcRowPitch * SrcOffset.y +
46+
SrcSlicePitch * SrcOffset.z;
47+
char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y +
48+
DstSlicePitch * DstOffset.z;
49+
50+
std::vector<ur_event_handle_t> Events;
51+
Events.reserve(Region.depth);
52+
// For now, USM doesn't support 3D memory copy operation, so we can only
53+
// loop call 2D memory copy function to implement it.
54+
for (size_t i = 0; i < Region.depth; i++) {
55+
ur_event_handle_t NewEvent{};
56+
UR_CALL(context.urDdiTable.Enqueue.pfnUSMMemcpy2D(
57+
Queue, Blocking, DstOrigin + (i * DstSlicePitch), DstRowPitch,
58+
SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width,
59+
Region.height, NumEventsInWaitList, EventWaitList, &NewEvent));
60+
61+
Events.push_back(NewEvent);
62+
}
63+
64+
UR_CALL(context.urDdiTable.Enqueue.pfnEventsWait(Queue, Events.size(),
65+
Events.data(), Event));
66+
67+
return UR_RESULT_SUCCESS;
68+
}
69+
70+
ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
71+
// Sub-buffers don't maintain own allocations but rely on parent buffer.
72+
if (SubBuffer) {
73+
UR_CALL(SubBuffer->Parent->getHandle(Device, Handle));
74+
Handle += SubBuffer->Origin;
75+
return UR_RESULT_SUCCESS;
76+
}
77+
78+
auto &Allocation = Allocations[Device];
79+
if (!Allocation) {
80+
ur_usm_desc_t USMDesc{};
81+
USMDesc.align = getAlignment();
82+
ur_usm_pool_handle_t Pool{};
83+
ur_result_t URes = context.interceptor->allocateMemory(
84+
Context, Device, &USMDesc, Pool, Size, AllocType::MEM_BUFFER,
85+
ur_cast<void **>(&Allocation));
86+
if (URes != UR_RESULT_SUCCESS) {
87+
context.logger.error(
88+
"Failed to allocate {} bytes memory for buffer {}", Size, this);
89+
return URes;
90+
}
91+
92+
if (HostPtr) {
93+
ManagedQueue Queue(Context, Device);
94+
URes = context.urDdiTable.Enqueue.pfnUSMMemcpy(
95+
Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr);
96+
if (URes != UR_RESULT_SUCCESS) {
97+
context.logger.error("Failed to copy {} bytes data from host "
98+
"pointer {} to buffer {}",
99+
Size, HostPtr, this);
100+
return URes;
101+
}
102+
}
103+
}
104+
105+
Handle = Allocation;
106+
107+
return UR_RESULT_SUCCESS;
108+
}
109+
110+
ur_result_t MemBuffer::free() {
111+
for (const auto &[_, Ptr] : Allocations) {
112+
ur_result_t URes = context.interceptor->releaseMemory(Context, Ptr);
113+
if (URes != UR_RESULT_SUCCESS) {
114+
context.logger.error("Failed to free buffer handle {}", Ptr);
115+
return URes;
116+
}
117+
}
118+
Allocations.clear();
119+
return UR_RESULT_SUCCESS;
120+
}
121+
122+
size_t MemBuffer::getAlignment() {
123+
// Choose an alignment that is at most 128 and is the next power of 2
124+
// for sizes less than 128.
125+
// TODO: If we don't set the alignment size explicitly, the device will
126+
// usually choose a very large size (more than 1k). Then sanitizer will
127+
// allocate extra unnessary memory. Not sure if this will impact
128+
// performance.
129+
size_t MsbIdx = 63 - __builtin_clz(Size);
130+
size_t Alignment = (1 << (MsbIdx + 1));
131+
if (Alignment > 128) {
132+
Alignment = 128;
133+
}
134+
return Alignment;
135+
}
136+
137+
} // namespace ur_sanitizer_layer
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
6+
* See LICENSE.TXT
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* @file asan_buffer.hpp
10+
*
11+
*/
12+
13+
#pragma once
14+
15+
#include <atomic>
16+
#include <memory>
17+
#include <optional>
18+
19+
#include "common.hpp"
20+
21+
namespace ur_sanitizer_layer {
22+
23+
struct MemBuffer {
24+
// Buffer constructor
25+
MemBuffer(ur_context_handle_t Context, size_t Size, char *HostPtr)
26+
: Context(Context), Size(Size), HostPtr(HostPtr) {}
27+
28+
// Sub-buffer constructor
29+
MemBuffer(std::shared_ptr<MemBuffer> Parent, size_t Origin, size_t Size)
30+
: Context(Parent->Context), Size(Size), SubBuffer{{Parent, Origin}} {}
31+
32+
ur_result_t getHandle(ur_device_handle_t Device, char *&Handle);
33+
34+
ur_result_t free();
35+
36+
size_t getAlignment();
37+
38+
std::unordered_map<ur_device_handle_t, char *> Allocations;
39+
40+
enum AccessMode { UNKNOWN, READ_WRITE, READ_ONLY, WRITE_ONLY };
41+
42+
struct Mapping {
43+
size_t Offset;
44+
size_t Size;
45+
};
46+
47+
std::unordered_map<void *, Mapping> Mappings;
48+
49+
ur_context_handle_t Context;
50+
51+
size_t Size;
52+
53+
char *HostPtr{};
54+
55+
struct SubBuffer_t {
56+
std::shared_ptr<MemBuffer> Parent;
57+
size_t Origin;
58+
};
59+
60+
std::optional<SubBuffer_t> SubBuffer;
61+
62+
std::atomic<int32_t> RefCount = 1;
63+
64+
ur_shared_mutex Mutex;
65+
};
66+
67+
ur_result_t EnqueueMemCopyRectHelper(
68+
ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset,
69+
ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch,
70+
size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch,
71+
bool Blocking, uint32_t NumEventsInWaitList,
72+
const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event);
73+
74+
} // namespace ur_sanitizer_layer

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ ur_result_t SanitizerInterceptor::allocateMemory(
259259
} else if (Type == AllocType::SHARED_USM) {
260260
UR_CALL(context.urDdiTable.USM.pfnSharedAlloc(
261261
Context, Device, Properties, Pool, NeededSize, &Allocated));
262+
} else if (Type == AllocType::MEM_BUFFER) {
263+
UR_CALL(context.urDdiTable.USM.pfnDeviceAlloc(
264+
Context, Device, Properties, Pool, NeededSize, &Allocated));
262265
} else {
263266
context.logger.error("Unsupport memory type");
264267
return UR_RESULT_ERROR_INVALID_ARGUMENT;
@@ -662,13 +665,54 @@ ur_result_t SanitizerInterceptor::eraseKernel(ur_kernel_handle_t Kernel) {
662665
return UR_RESULT_SUCCESS;
663666
}
664667

668+
ur_result_t
669+
SanitizerInterceptor::insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer) {
670+
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
671+
assert(m_MemBufferMap.find(ur_cast<ur_mem_handle_t>(MemBuffer.get())) ==
672+
m_MemBufferMap.end());
673+
m_MemBufferMap.emplace(reinterpret_cast<ur_mem_handle_t>(MemBuffer.get()),
674+
MemBuffer);
675+
return UR_RESULT_SUCCESS;
676+
}
677+
678+
ur_result_t SanitizerInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) {
679+
std::scoped_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
680+
assert(m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end());
681+
m_MemBufferMap.erase(MemHandle);
682+
return UR_RESULT_SUCCESS;
683+
}
684+
685+
std::shared_ptr<MemBuffer>
686+
SanitizerInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
687+
std::shared_lock<ur_shared_mutex> Guard(m_MemBufferMapMutex);
688+
if (m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()) {
689+
return m_MemBufferMap[MemHandle];
690+
}
691+
return nullptr;
692+
}
693+
665694
ur_result_t SanitizerInterceptor::prepareLaunch(
666695
ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
667696
ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
668697
USMLaunchInfo &LaunchInfo) {
669698
auto Program = GetProgram(Kernel);
670699

671700
do {
701+
// Set membuffer arguments
702+
auto KernelInfo = getKernelInfo(Kernel);
703+
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
704+
char *ArgPointer = nullptr;
705+
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
706+
ur_result_t URes = context.urDdiTable.Kernel.pfnSetArgPointer(
707+
Kernel, ArgIndex, nullptr, &ArgPointer);
708+
if (URes != UR_RESULT_SUCCESS) {
709+
context.logger.error(
710+
"Failed to set buffer {} as the {} arg to kernel {}: {}",
711+
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
712+
URes);
713+
}
714+
}
715+
672716
// Set launch info argument
673717
auto ArgNums = GetKernelNumArgs(Kernel);
674718
if (ArgNums) {

source/loader/layers/sanitizer/asan_interceptor.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#pragma once
1414

1515
#include "asan_allocator.hpp"
16+
#include "asan_buffer.hpp"
1617
#include "asan_libdevice.hpp"
1718
#include "common.hpp"
1819
#include "ur_sanitizer_layer.hpp"
@@ -81,8 +82,10 @@ struct QueueInfo {
8182

8283
struct KernelInfo {
8384
ur_kernel_handle_t Handle;
84-
8585
ur_shared_mutex Mutex;
86+
std::atomic<int32_t> RefCount = 1;
87+
std::unordered_map<uint32_t, std::shared_ptr<MemBuffer>> BufferArgs;
88+
8689
// Need preserve the order of local arguments
8790
std::map<uint32_t, LocalArgsInfo> LocalArgs;
8891

@@ -194,6 +197,10 @@ class SanitizerInterceptor {
194197
ur_result_t insertKernel(ur_kernel_handle_t Kernel);
195198
ur_result_t eraseKernel(ur_kernel_handle_t Kernel);
196199

200+
ur_result_t insertMemBuffer(std::shared_ptr<MemBuffer> MemBuffer);
201+
ur_result_t eraseMemBuffer(ur_mem_handle_t MemHandle);
202+
std::shared_ptr<MemBuffer> getMemBuffer(ur_mem_handle_t MemHandle);
203+
197204
std::optional<AllocationIterator> findAllocInfoByAddress(uptr Address);
198205

199206
std::shared_ptr<ContextInfo> getContextInfo(ur_context_handle_t Context) {
@@ -245,6 +252,10 @@ class SanitizerInterceptor {
245252
m_KernelMap;
246253
ur_shared_mutex m_KernelMapMutex;
247254

255+
std::unordered_map<ur_mem_handle_t, std::shared_ptr<MemBuffer>>
256+
m_MemBufferMap;
257+
ur_shared_mutex m_MemBufferMapMutex;
258+
248259
/// Assumption: all USM chunks are allocated in one VA
249260
AllocationMap m_AllocationMap;
250261
ur_shared_mutex m_AllocationMapMutex;

0 commit comments

Comments
 (0)