Skip to content

Commit a89657c

Browse files
Merge pull request #1699 from PietroGhg/pietro/usm_fixes
[NATIVECPU] Implement urUSMGetMemAllocInfo and aligned alloc
2 parents 2355a7d + be7057c commit a89657c

File tree

2 files changed

+172
-36
lines changed

2 files changed

+172
-36
lines changed

source/adapters/native_cpu/context.hpp

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,141 @@
1010

1111
#pragma once
1212

13+
#include <mutex>
14+
#include <set>
1315
#include <ur_api.h>
1416

1517
#include "common.hpp"
1618
#include "device.hpp"
19+
#include "ur/ur.hpp"
20+
21+
namespace native_cpu {
22+
struct usm_alloc_info {
23+
ur_usm_type_t type;
24+
const void *base_ptr;
25+
size_t size;
26+
ur_device_handle_t device;
27+
ur_usm_pool_handle_t pool;
28+
29+
// We store a pointer to the actual allocation because it is needed when
30+
// freeing memory.
31+
void *base_alloc_ptr;
32+
constexpr usm_alloc_info(ur_usm_type_t type, const void *base_ptr,
33+
size_t size, ur_device_handle_t device,
34+
ur_usm_pool_handle_t pool, void *base_alloc_ptr)
35+
: type(type), base_ptr(base_ptr), size(size), device(device), pool(pool),
36+
base_alloc_ptr(base_alloc_ptr) {}
37+
};
38+
39+
constexpr usm_alloc_info usm_alloc_info_null_entry(UR_USM_TYPE_UNKNOWN, nullptr,
40+
0, nullptr, nullptr,
41+
nullptr);
42+
43+
constexpr size_t alloc_header_size = sizeof(usm_alloc_info);
44+
45+
// Computes the padding that we need to add to ensure the
46+
// pointer returned by UR is aligned as the user requested.
47+
static size_t get_padding(uint32_t alignment) {
48+
assert(alignment >= alignof(usm_alloc_info) &&
49+
"memory not aligned to usm_alloc_info");
50+
if (!alignment || alloc_header_size % alignment == 0)
51+
return 0;
52+
size_t padd = 0;
53+
if (alignment <= alloc_header_size) {
54+
padd = alignment - (alloc_header_size % alignment);
55+
} else {
56+
padd = alignment - alloc_header_size;
57+
}
58+
return padd;
59+
}
60+
61+
// In order to satisfy the MemAllocInfo queries we allocate extra memory
62+
// for the native_cpu::usm_alloc_info struct.
63+
// To satisfy the alignment requirements we "pad" the memory
64+
// allocation so that the pointer returned to the user
65+
// always satisfies (ptr % align) == 0.
66+
static inline void *malloc_impl(uint32_t alignment, size_t size) {
67+
void *ptr = nullptr;
68+
assert(alignment >= alignof(usm_alloc_info) &&
69+
"memory not aligned to usm_alloc_info");
70+
#ifdef _MSC_VER
71+
ptr = _aligned_malloc(alloc_header_size + get_padding(alignment) + size,
72+
alignment);
73+
74+
#else
75+
ptr = std::aligned_alloc(alignment,
76+
alloc_header_size + get_padding(alignment) + size);
77+
#endif
78+
return ptr;
79+
}
80+
81+
// The info struct is retrieved by subtracting its size from the pointer
82+
// returned to the user.
83+
static inline uint8_t *get_alloc_info_addr(const void *ptr) {
84+
return (uint8_t *)const_cast<void *>(ptr) - alloc_header_size;
85+
}
86+
87+
static usm_alloc_info get_alloc_info(void *ptr) {
88+
return *(usm_alloc_info *)get_alloc_info_addr(ptr);
89+
}
90+
91+
} // namespace native_cpu
1792

1893
struct ur_context_handle_t_ : RefCounted {
1994
ur_context_handle_t_(ur_device_handle_t_ *phDevices) : _device{phDevices} {}
2095

2196
ur_device_handle_t _device;
97+
98+
ur_result_t remove_alloc(void *ptr) {
99+
std::lock_guard<std::mutex> lock(alloc_mutex);
100+
const native_cpu::usm_alloc_info &info = native_cpu::get_alloc_info(ptr);
101+
UR_ASSERT(info.type != UR_USM_TYPE_UNKNOWN,
102+
UR_RESULT_ERROR_INVALID_MEM_OBJECT);
103+
#ifdef _MSC_VER
104+
_aligned_free(info.base_alloc_ptr);
105+
#else
106+
free(info.base_alloc_ptr);
107+
#endif
108+
allocations.erase(ptr);
109+
return UR_RESULT_SUCCESS;
110+
}
111+
112+
const native_cpu::usm_alloc_info &
113+
get_alloc_info_entry(const void *ptr) const {
114+
auto it = allocations.find(ptr);
115+
if (it == allocations.end()) {
116+
return native_cpu::usm_alloc_info_null_entry;
117+
}
118+
119+
return *(native_cpu::usm_alloc_info *)native_cpu::get_alloc_info_addr(ptr);
120+
}
121+
122+
void *add_alloc(uint32_t alignment, ur_usm_type_t type, size_t size,
123+
ur_usm_pool_handle_t pool) {
124+
std::lock_guard<std::mutex> lock(alloc_mutex);
125+
// We need to ensure that we align to at least alignof(usm_alloc_info),
126+
// otherwise its start address may be unaligned.
127+
alignment =
128+
std::max<size_t>(alignment, alignof(native_cpu::usm_alloc_info));
129+
void *alloc = native_cpu::malloc_impl(alignment, size);
130+
if (!alloc)
131+
return nullptr;
132+
// Compute the address of the pointer that we'll return to the user.
133+
void *ptr = native_cpu::alloc_header_size +
134+
native_cpu::get_padding(alignment) + (uint8_t *)alloc;
135+
uint8_t *info_addr = native_cpu::get_alloc_info_addr(ptr);
136+
if (!info_addr)
137+
return nullptr;
138+
// Do a placement new of the alloc_info to avoid allocation and copy
139+
auto info = new (info_addr)
140+
native_cpu::usm_alloc_info(type, ptr, size, this->_device, pool, alloc);
141+
if (!info)
142+
return nullptr;
143+
allocations.insert(ptr);
144+
return ptr;
145+
}
146+
147+
private:
148+
std::mutex alloc_mutex;
149+
std::set<const void *> allocations;
22150
};

source/adapters/native_cpu/usm.cpp

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,90 +8,98 @@
88
//
99
//===----------------------------------------------------------------------===//
1010

11+
#include "ur/ur.hpp"
1112
#include "ur_api.h"
1213

1314
#include "common.hpp"
15+
#include "context.hpp"
16+
#include <cstdlib>
1417

15-
UR_APIEXPORT ur_result_t UR_APICALL
16-
urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
17-
ur_usm_pool_handle_t pool, size_t size, void **ppMem) {
18-
std::ignore = hContext;
19-
std::ignore = pUSMDesc;
20-
std::ignore = pool;
18+
namespace native_cpu {
2119

20+
static ur_result_t alloc_helper(ur_context_handle_t hContext,
21+
const ur_usm_desc_t *pUSMDesc, size_t size,
22+
void **ppMem, ur_usm_type_t type) {
23+
auto alignment = pUSMDesc ? pUSMDesc->align : 1u;
24+
UR_ASSERT((alignment & (alignment - 1)) == 0, UR_RESULT_ERROR_INVALID_VALUE);
2225
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
2326
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
2427
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
2528

26-
*ppMem = malloc(size);
29+
auto *ptr = hContext->add_alloc(alignment, type, size, nullptr);
30+
UR_ASSERT(ptr != nullptr, UR_RESULT_ERROR_OUT_OF_RESOURCES);
31+
*ppMem = ptr;
2732

2833
return UR_RESULT_SUCCESS;
2934
}
3035

36+
} // namespace native_cpu
37+
38+
UR_APIEXPORT ur_result_t UR_APICALL
39+
urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc,
40+
ur_usm_pool_handle_t pool, size_t size, void **ppMem) {
41+
std::ignore = pool;
42+
43+
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
44+
UR_USM_TYPE_HOST);
45+
}
46+
3147
UR_APIEXPORT ur_result_t UR_APICALL
3248
urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
3349
const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
3450
size_t size, void **ppMem) {
35-
std::ignore = hContext;
3651
std::ignore = hDevice;
37-
std::ignore = pUSMDesc;
3852
std::ignore = pool;
3953

40-
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
41-
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
42-
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
43-
44-
*ppMem = malloc(size);
45-
46-
return UR_RESULT_SUCCESS;
54+
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
55+
UR_USM_TYPE_DEVICE);
4756
}
4857

4958
UR_APIEXPORT ur_result_t UR_APICALL
5059
urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice,
5160
const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t pool,
5261
size_t size, void **ppMem) {
53-
std::ignore = hContext;
5462
std::ignore = hDevice;
55-
std::ignore = pUSMDesc;
5663
std::ignore = pool;
5764

58-
UR_ASSERT(ppMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
59-
// TODO: Check Max size when UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE is implemented
60-
UR_ASSERT(size > 0, UR_RESULT_ERROR_INVALID_USM_SIZE);
61-
62-
*ppMem = malloc(size);
63-
64-
return UR_RESULT_SUCCESS;
65+
return native_cpu::alloc_helper(hContext, pUSMDesc, size, ppMem,
66+
UR_USM_TYPE_SHARED);
6567
}
6668

6769
UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext,
6870
void *pMem) {
69-
std::ignore = hContext;
7071

7172
UR_ASSERT(pMem, UR_RESULT_ERROR_INVALID_NULL_POINTER);
73+
UR_ASSERT(hContext, UR_RESULT_ERROR_INVALID_NULL_POINTER);
7274

73-
free(pMem);
75+
auto res = hContext->remove_alloc(pMem);
7476

75-
return UR_RESULT_SUCCESS;
77+
return res;
7678
}
7779

7880
UR_APIEXPORT ur_result_t UR_APICALL
7981
urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem,
8082
ur_usm_alloc_info_t propName, size_t propSize,
8183
void *pPropValue, size_t *pPropSizeRet) {
82-
std::ignore = hContext;
83-
std::ignore = pMem;
84-
std::ignore = propName;
85-
std::ignore = propSize;
86-
std::ignore = pPropValue;
87-
std::ignore = pPropSizeRet;
8884

85+
UR_ASSERT(pMem != nullptr, UR_RESULT_ERROR_INVALID_NULL_POINTER);
8986
UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet);
87+
if (propName == UR_USM_ALLOC_INFO_BASE_PTR) {
88+
// TODO: logic to compute base ptr given ptr
89+
DIE_NO_IMPLEMENTATION;
90+
}
9091

92+
const native_cpu::usm_alloc_info &alloc_info =
93+
hContext->get_alloc_info_entry(pMem);
9194
switch (propName) {
9295
case UR_USM_ALLOC_INFO_TYPE:
93-
// Todo implement this in context
94-
return ReturnValue(UR_USM_TYPE_DEVICE);
96+
return ReturnValue(alloc_info.type);
97+
case UR_USM_ALLOC_INFO_SIZE:
98+
return ReturnValue(alloc_info.size);
99+
case UR_USM_ALLOC_INFO_DEVICE:
100+
return ReturnValue(alloc_info.device);
101+
case UR_USM_ALLOC_INFO_POOL:
102+
return ReturnValue(alloc_info.pool);
95103
default:
96104
DIE_NO_IMPLEMENTATION;
97105
}

0 commit comments

Comments
 (0)