Skip to content

Commit be7057c

Browse files
committed
Use pointer metadata
1 parent be3ed4c commit be7057c

File tree

2 files changed

+138
-70
lines changed

2 files changed

+138
-70
lines changed

source/adapters/native_cpu/context.hpp

Lines changed: 113 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010

1111
#pragma once
1212

13-
#include <unordered_map>
13+
#include <mutex>
14+
#include <set>
1415
#include <ur_api.h>
1516

1617
#include "common.hpp"
@@ -19,40 +20,131 @@
1920

2021
namespace native_cpu {
2122
struct usm_alloc_info {
22-
const ur_usm_type_t type;
23+
ur_usm_type_t type;
2324
const void *base_ptr;
24-
const size_t size;
25-
const ur_device_handle_t device;
26-
const ur_usm_pool_handle_t pool;
27-
usm_alloc_info(ur_usm_type_t type, const void *base_ptr, size_t size,
28-
ur_device_handle_t device, ur_usm_pool_handle_t pool)
29-
: type(type), base_ptr(base_ptr), size(size), device(device), pool(pool) {
30-
}
25+
size_t size;
26+
ur_device_handle_t device;
27+
ur_usm_pool_handle_t pool;
28+
29+
// We store a pointer to the actual allocation because it is needed when
30+
// freeing memory.
31+
void *base_alloc_ptr;
32+
constexpr usm_alloc_info(ur_usm_type_t type, const void *base_ptr,
33+
size_t size, ur_device_handle_t device,
34+
ur_usm_pool_handle_t pool, void *base_alloc_ptr)
35+
: type(type), base_ptr(base_ptr), size(size), device(device), pool(pool),
36+
base_alloc_ptr(base_alloc_ptr) {}
3137
};
38+
39+
constexpr usm_alloc_info usm_alloc_info_null_entry(UR_USM_TYPE_UNKNOWN, nullptr,
40+
0, nullptr, nullptr,
41+
nullptr);
42+
43+
constexpr size_t alloc_header_size = sizeof(usm_alloc_info);
44+
45+
// Computes the padding that we need to add to ensure the
46+
// pointer returned by UR is aligned as the user requested.
47+
static size_t get_padding(uint32_t alignment) {
48+
assert(alignment >= alignof(usm_alloc_info) &&
49+
"memory not aligned to usm_alloc_info");
50+
if (!alignment || alloc_header_size % alignment == 0)
51+
return 0;
52+
size_t padd = 0;
53+
if (alignment <= alloc_header_size) {
54+
padd = alignment - (alloc_header_size % alignment);
55+
} else {
56+
padd = alignment - alloc_header_size;
57+
}
58+
return padd;
59+
}
60+
61+
// In order to satisfy the MemAllocInfo queries we allocate extra memory
62+
// for the native_cpu::usm_alloc_info struct.
63+
// To satisfy the alignment requirements we "pad" the memory
64+
// allocation so that the pointer returned to the user
65+
// always satisfies (ptr % align) == 0.
66+
static inline void *malloc_impl(uint32_t alignment, size_t size) {
67+
void *ptr = nullptr;
68+
assert(alignment >= alignof(usm_alloc_info) &&
69+
"memory not aligned to usm_alloc_info");
70+
#ifdef _MSC_VER
71+
ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size,
72+
alignment);
73+
74+
#else
75+
ptr = std::aligned_alloc(alignment,
76+
alloc_header_size + get_padding(alignment) + size);
77+
#endif
78+
return ptr;
79+
}
80+
81+
// The info struct is retrieved by subtracting its size from the pointer
82+
// returned to the user.
83+
static inline uint8_t *get_alloc_info_addr(const void *ptr) {
84+
return (uint8_t *)const_cast<void *>(ptr) - alloc_header_size;
85+
}
86+
87+
static usm_alloc_info get_alloc_info(void *ptr) {
88+
return *(usm_alloc_info *)get_alloc_info_addr(ptr);
89+
}
90+
3291
} // namespace native_cpu
3392

3493
struct ur_context_handle_t_ : RefCounted {
3594
ur_context_handle_t_(ur_device_handle_t_ *phDevices) : _device{phDevices} {}
3695

3796
ur_device_handle_t _device;
3897

39-
void add_alloc_info_entry(const void *ptr, ur_usm_type_t type, size_t size,
40-
ur_usm_pool_handle_t pool) {
41-
native_cpu::usm_alloc_info info(type, ptr, size, this->_device, pool);
42-
alloc_info.insert(std::make_pair(ptr, info));
98+
ur_result_t remove_alloc(void *ptr) {
99+
std::lock_guard<std::mutex> lock(alloc_mutex);
100+
const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr);
101+
UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN,
102+
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
103+
#ifdef _MSC_VER
104+
_aligned_free(info.base_alloc_ptr);
105+
#else
106+
free(info.base_alloc_ptr);
107+
#endif
108+
allocations.erase(ptr);
109+
return UR_RESULT_SUCCESS;
43110
}
44111

45-
native_cpu::usm_alloc_info get_alloc_info_entry(const void *ptr) const {
46-
auto it = alloc_info.find(ptr);
47-
if (it == alloc_info.end()) {
48-
return native_cpu::usm_alloc_info(UR_USM_TYPE_UNKNOWN, ptr, 0, nullptr,
49-
nullptr);
112+
const native_cpu::usm_alloc_info &
113+
get_alloc_info_entry(const void *ptr) const {
114+
auto it = allocations.find(ptr);
115+
if (it == allocations.end()) {
116+
return native_cpu::usm_alloc_info_null_entry;
50117
}
51-
return it->second;
118+
119+
return *(native_cpu::usm_alloc_info *)native_cpu::get_alloc_info_addr(ptr);
52120
}
53121

54-
void remove_alloc_info_entry(void *ptr) { alloc_info.erase(ptr); }
122+
void *add_alloc(uint32_t alignment, ur_usm_type_t type, size_t size,
123+
ur_usm_pool_handle_t pool) {
124+
std::lock_guard<std::mutex> lock(alloc_mutex);
125+
// We need to ensure that we align to at least alignof(usm_alloc_info),
126+
// otherwise its start address may be unaligned.
127+
alignment =
128+
std::max<size_t>(alignment, alignof(native_cpu::usm_alloc_info));
129+
void *alloc = native_cpu::malloc_impl(alignment, size);
130+
if (!alloc)
131+
return nullptr;
132+
// Compute the address of the pointer that we'll return to the user.
133+
void *ptr = native_cpu::alloc_header_size +
134+
native_cpu::get_padding(alignment) + (uint8_t *)alloc;
135+
uint8_t *info_addr = native_cpu::get_alloc_info_addr(ptr);
136+
if (!info_addr)
137+
return nullptr;
138+
// Do a placement new of the alloc_info to avoid allocation and copy
139+
auto info = new (info_addr)
140+
native_cpu::usm_alloc_info(type, ptr, size, this->_device, pool, alloc);
141+
if (!info)
142+
return nullptr;
143+
allocations.insert(ptr);
144+
return ptr;
145+
}
55146

56147
private:
57-
std::unordered_map<const void *, native_cpu::usm_alloc_info> alloc_info;
148+
std::mutex alloc_mutex;
149+
std::set<const void *> allocations;
58150
};

source/adapters/native_cpu/usm.cpp

Lines changed: 25 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,20 @@
1717

1818
namespace native_cpu {
1919

20-
static void *malloc_impl(uint32_t alignment, size_t size) {
21-
void *ptr = nullptr;
22-
if (alignment) {
23-
ptr = std::aligned_alloc(alignment, size);
24-
} else {
25-
ptr = malloc(size);
26-
}
27-
return ptr;
20+
static ur_result_t alloc_helper(ur_context_handle_t hContext,
21+
const ur_usm_desc_t *pUSMDesc, size_t size,
22+
void **ppMem, ur_usm_type_t type) {
23+
auto alignment = pUSMDesc ? pUSMDesc->align : 1u;
24+
UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE);
25+
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
26+
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
27+
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
28+
29+
auto *ptr = hContext->add_alloc(alignment, type, size, nullptr);
30+
UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_RESOURCES);
31+
*ppMem = ptr;
32+
33+
return UR_RESULT_SUCCESS;
2834
}
2935

3036
} // namespace native_cpu
@@ -34,18 +40,8 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
3440
ur_usm_pool_handle_t pool, size_t size, void **ppMem) {
3541
std::ignore = pool;
3642

37-
auto alignment = pUSMDesc ? pUSMDesc->align : 0u;
38-
UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE);
39-
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
40-
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
41-
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
42-
43-
void *ptr = native_cpu::malloc_impl(alignment, size);
44-
UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_HOST_MEMORY);
45-
hContext->add_alloc_info_entry(ptr, UR_USM_TYPE_HOST, size, nullptr);
46-
*ppMem = ptr;
47-
48-
return UR_RESULT_SUCCESS;
43+
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
44+
UR_USM_TYPE_HOST);
4945
}
5046

5147
UR_APIEXPORT ur_result_t UR_APICALL
@@ -55,18 +51,8 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
5551
std::ignore = hDevice;
5652
std::ignore = pool;
5753

58-
auto alignment = pUSMDesc ? pUSMDesc->align : 0u;
59-
UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE);
60-
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
61-
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
62-
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
63-
64-
void *ptr = native_cpu::malloc_impl(alignment, size);
65-
UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_RESOURCES);
66-
*ppMem = ptr;
67-
hContext->add_alloc_info_entry(ptr, UR_USM_TYPE_DEVICE, size, nullptr);
68-
69-
return UR_RESULT_SUCCESS;
54+
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
55+
UR_USM_TYPE_DEVICE);
7056
}
7157

7258
UR_APIEXPORT ur_result_t UR_APICALL
@@ -76,29 +62,19 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
7662
std::ignore = hDevice;
7763
std::ignore = pool;
7864

79-
auto alignment = pUSMDesc ? pUSMDesc->align : 0u;
80-
UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE);
81-
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
82-
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
83-
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
84-
85-
void *ptr = native_cpu::malloc_impl(alignment, size);
86-
UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_HOST_MEMORY);
87-
*ppMem = ptr;
88-
hContext->add_alloc_info_entry(ptr, UR_USM_TYPE_SHARED, size, nullptr);
89-
90-
return UR_RESULT_SUCCESS;
65+
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
66+
UR_USM_TYPE_SHARED);
9167
}
9268

9369
UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
9470
void *pMem) {
9571

9672
UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
73+
UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_POINTER);
9774

98-
hContext->remove_alloc_info_entry(pMem);
99-
free(pMem);
75+
auto res = hContext->remove_alloc(pMem);
10076

101-
return UR_RESULT_SUCCESS;
77+
return res;
10278
}
10379

10480
UR_APIEXPORT ur_result_t UR_APICALL
@@ -113,7 +89,8 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
11389
DIE_NO_IMPLEMENTATION;
11490
}
11591

116-
native_cpu::usm_alloc_info alloc_info = hContext->get_alloc_info_entry(pMem);
92+
const native_cpu::usm_alloc_info &alloc_info =
93+
hContext->get_alloc_info_entry(pMem);
11794
switch (propName) {
11895
case UR_USM_ALLOC_INFO_TYPE:
11996
return ReturnValue(alloc_info.type);
@@ -123,7 +100,6 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
123100
return ReturnValue(alloc_info.device);
124101
case UR_USM_ALLOC_INFO_POOL:
125102
return ReturnValue(alloc_info.pool);
126-
;
127103
default:
128104
DIE_NO_IMPLEMENTATION;
129105
}

0 commit comments

Comments
 (0)