Skip to content

Commit 1a1839b

Browse files
Merge pull request #1879 from AllanZyne/review/yang/fix_dsan_destruction
[DeviceSanitizer] Fix interceptor destruction order
2 parents 9d2e9ca + fe18b4a commit 1a1839b

File tree

7 files changed

+196
-4
lines changed

7 files changed

+196
-4
lines changed

source/loader/layers/sanitizer/asan_interceptor.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,18 @@ SanitizerInterceptor::~SanitizerInterceptor() {
186186
DestroyShadowMemoryOnCPU();
187187
DestroyShadowMemoryOnPVC();
188188
DestroyShadowMemoryOnDG2();
189+
190+
// We must release these objects before releasing adapters, since
191+
// they may use the adapter in their destructor
192+
m_Quarantine = nullptr;
193+
m_MemBufferMap.clear();
194+
m_AllocationMap.clear();
195+
m_KernelMap.clear();
196+
m_ContextMap.clear();
197+
198+
for (auto Adapter : m_Adapters) {
199+
getContext()->urDdiTable.Global.pfnAdapterRelease(Adapter);
200+
}
189201
}
190202

191203
/// The memory chunk allocated from the underlying allocator looks like this:

source/loader/layers/sanitizer/asan_interceptor.hpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <optional>
2323
#include <queue>
2424
#include <unordered_map>
25+
#include <unordered_set>
2526
#include <vector>
2627

2728
namespace ur_sanitizer_layer {
@@ -48,9 +49,8 @@ struct DeviceInfo {
4849
std::queue<std::shared_ptr<AllocInfo>> Quarantine;
4950
size_t QuarantineSize = 0;
5051

51-
// TODO: re-enable retaining and releasing device handles in DeviceInfo
52-
// constructor/destructor. See PR
53-
// https://github.com/oneapi-src/unified-runtime/pull/1883
52+
// Device handles are special and alive in the whole process lifetime,
53+
// so we needn't retain&release here.
5454
explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {}
5555

5656
ur_result_t allocShadowMemory(ur_context_handle_t Context);
@@ -199,6 +199,16 @@ class SanitizerInterceptor {
199199
ur_result_t eraseMemBuffer(ur_mem_handle_t MemHandle);
200200
std::shared_ptr<MemBuffer> getMemBuffer(ur_mem_handle_t MemHandle);
201201

202+
ur_result_t holdAdapter(ur_adapter_handle_t Adapter) {
203+
std::scoped_lock<ur_shared_mutex> Guard(m_AdaptersMutex);
204+
if (m_Adapters.find(Adapter) != m_Adapters.end()) {
205+
return UR_RESULT_SUCCESS;
206+
}
207+
UR_CALL(getContext()->urDdiTable.Global.pfnAdapterRetain(Adapter));
208+
m_Adapters.insert(Adapter);
209+
return UR_RESULT_SUCCESS;
210+
}
211+
202212
std::optional<AllocationIterator> findAllocInfoByAddress(uptr Address);
203213

204214
std::shared_ptr<ContextInfo> getContextInfo(ur_context_handle_t Context) {
@@ -260,6 +270,9 @@ class SanitizerInterceptor {
260270

261271
std::unique_ptr<Quarantine> m_Quarantine;
262272
logger::Logger &logger;
273+
274+
std::unordered_set<ur_adapter_handle_t> m_Adapters;
275+
ur_shared_mutex m_AdaptersMutex;
263276
};
264277

265278
} // namespace ur_sanitizer_layer

source/loader/layers/sanitizer/ur_sanddi.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,38 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices,
5252

5353
} // namespace
5454

55+
///////////////////////////////////////////////////////////////////////////////
56+
/// @brief Intercept function for urAdapterGet
57+
__urdlllocal ur_result_t UR_APICALL urAdapterGet(
58+
uint32_t
59+
NumEntries, ///< [in] the number of adapters to be added to phAdapters.
60+
///< If phAdapters is not NULL, then NumEntries should be greater than
61+
///< zero, otherwise ::UR_RESULT_ERROR_INVALID_SIZE,
62+
///< will be returned.
63+
ur_adapter_handle_t *
64+
phAdapters, ///< [out][optional][range(0, NumEntries)] array of handle of adapters.
65+
///< If NumEntries is less than the number of adapters available, then
66+
///< ::urAdapterGet shall only retrieve that number of platforms.
67+
uint32_t *
68+
pNumAdapters ///< [out][optional] returns the total number of adapters available.
69+
) {
70+
auto pfnAdapterGet = getContext()->urDdiTable.Global.pfnAdapterGet;
71+
72+
if (nullptr == pfnAdapterGet) {
73+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
74+
}
75+
76+
ur_result_t result = pfnAdapterGet(NumEntries, phAdapters, pNumAdapters);
77+
if (result == UR_RESULT_SUCCESS && phAdapters) {
78+
const uint32_t NumAdapters = pNumAdapters ? *pNumAdapters : NumEntries;
79+
for (uint32_t i = 0; i < NumAdapters; ++i) {
80+
UR_CALL(getContext()->interceptor->holdAdapter(phAdapters[i]));
81+
}
82+
}
83+
84+
return result;
85+
}
86+
5587
///////////////////////////////////////////////////////////////////////////////
5688
/// @brief Intercept function for urUSMHostAlloc
5789
__urdlllocal ur_result_t UR_APICALL urUSMHostAlloc(
@@ -1324,6 +1356,36 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer(
13241356
return result;
13251357
}
13261358

1359+
///////////////////////////////////////////////////////////////////////////////
1360+
/// @brief Exported function for filling application's Global table
1361+
/// with current process' addresses
1362+
///
1363+
/// @returns
1364+
/// - ::UR_RESULT_SUCCESS
1365+
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
1366+
/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
1367+
__urdlllocal ur_result_t UR_APICALL urGetGlobalProcAddrTable(
1368+
ur_api_version_t version, ///< [in] API version requested
1369+
ur_global_dditable_t
1370+
*pDdiTable ///< [in,out] pointer to table of DDI function pointers
1371+
) {
1372+
if (nullptr == pDdiTable) {
1373+
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
1374+
}
1375+
1376+
if (UR_MAJOR_VERSION(ur_sanitizer_layer::getContext()->version) !=
1377+
UR_MAJOR_VERSION(version) ||
1378+
UR_MINOR_VERSION(ur_sanitizer_layer::getContext()->version) >
1379+
UR_MINOR_VERSION(version)) {
1380+
return UR_RESULT_ERROR_UNSUPPORTED_VERSION;
1381+
}
1382+
1383+
ur_result_t result = UR_RESULT_SUCCESS;
1384+
1385+
pDdiTable->pfnAdapterGet = ur_sanitizer_layer::urAdapterGet;
1386+
1387+
return result;
1388+
}
13271389
///////////////////////////////////////////////////////////////////////////////
13281390
/// @brief Exported function for filling application's Context table
13291391
/// with current process' addresses
@@ -1597,6 +1659,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable,
15971659

15981660
urDdiTable = *dditable;
15991661

1662+
if (UR_RESULT_SUCCESS == result) {
1663+
result = ur_sanitizer_layer::urGetGlobalProcAddrTable(
1664+
UR_API_VERSION_CURRENT, &dditable->Global);
1665+
}
1666+
16001667
if (UR_RESULT_SUCCESS == result) {
16011668
result = ur_sanitizer_layer::urGetContextProcAddrTable(
16021669
UR_API_VERSION_CURRENT, &dditable->Context);

source/loader/ur_lib.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ void context_t::initLayers() const {
5757
}
5858

5959
void context_t::tearDownLayers() const {
60-
for (auto &[layer, destroy] : layers) {
60+
for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
61+
auto [layer, destroy] = *it;
6162
layer->tearDown();
6263
destroy();
6364
}

test/layers/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,7 @@ add_subdirectory(validation)
88
if(UR_ENABLE_TRACING)
99
add_subdirectory(tracing)
1010
endif()
11+
12+
if(UR_ENABLE_SANITIZER)
13+
add_subdirectory(sanitizer)
14+
endif()

test/layers/sanitizer/CMakeLists.txt

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright (C) 2023-2024 Intel Corporation
2+
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
# See LICENSE.TXT
4+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
set(UR_SANITIZER_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR})
7+
set(SAN_TEST_PREFIX sanitizer_test)
8+
9+
function(add_sanitizer_test_executable name)
10+
add_ur_executable(${SAN_TEST_PREFIX}-${name}
11+
${ARGN})
12+
target_link_libraries(${SAN_TEST_PREFIX}-${name}
13+
PRIVATE
14+
${PROJECT_NAME}::loader
15+
${PROJECT_NAME}::headers
16+
${PROJECT_NAME}::testing
17+
${PROJECT_NAME}::mock
18+
GTest::gtest_main)
19+
endfunction()
20+
21+
function(set_sanitizer_test_properties name)
22+
set_tests_properties(${name} PROPERTIES LABELS "sanitizer")
23+
set_property(TEST ${name} PROPERTY ENVIRONMENT
24+
"UR_LOG_SANITIZER=level:debug\;flush:debug\;output:stdout")
25+
endfunction()
26+
27+
function(add_sanitizer_test name)
28+
add_sanitizer_test_executable(${name} ${ARGN})
29+
30+
add_test(NAME ${name}
31+
COMMAND ${SAN_TEST_PREFIX}-${name}
32+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
33+
34+
set_sanitizer_test_properties(${name})
35+
endfunction()
36+
37+
add_sanitizer_test(asan asan.cpp)

test/layers/sanitizer/asan.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
6+
* See LICENSE.TXT
7+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8+
*
9+
* @file asan.cpp
10+
*
11+
*/
12+
13+
#include <gtest/gtest.h>
14+
#include <ur_api.h>
15+
16+
TEST(DeviceAsan, Initialization) {
17+
ur_result_t status;
18+
19+
ur_loader_config_handle_t loaderConfig;
20+
status = urLoaderConfigCreate(&loaderConfig);
21+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
22+
status = urLoaderConfigEnableLayer(loaderConfig, "UR_LAYER_ASAN");
23+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
24+
25+
status = urLoaderInit(0, loaderConfig);
26+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
27+
28+
ur_adapter_handle_t adapter;
29+
status = urAdapterGet(1, &adapter, nullptr);
30+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
31+
32+
ur_platform_handle_t platform;
33+
status = urPlatformGet(&adapter, 1, 1, &platform, nullptr);
34+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
35+
36+
ur_device_handle_t device;
37+
status = urDeviceGet(platform, UR_DEVICE_TYPE_DEFAULT, 1, &device, nullptr);
38+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
39+
40+
ur_context_handle_t context;
41+
status = urContextCreate(1, &device, nullptr, &context);
42+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
43+
44+
status = urContextRelease(context);
45+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
46+
47+
status = urDeviceRelease(device);
48+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
49+
50+
status = urAdapterRelease(adapter);
51+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
52+
53+
status = urLoaderTearDown();
54+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
55+
56+
status = urLoaderConfigRelease(loaderConfig);
57+
ASSERT_EQ(status, UR_RESULT_SUCCESS);
58+
}

0 commit comments

Comments
 (0)