Skip to content

Commit 9c59759

Browse files
authored
Merge pull request #408 from kswiecicki/memspace-hbw
Add memspace "highest bandwidth"
2 parents 3c2513c + c69bed7 commit 9c59759

19 files changed

+608
-109
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,11 @@ using umfMemspaceHostAllGet.
223223
Memspace backed by all available NUMA nodes discovered on the platform sorted by capacity.
224224
Can be retrieved using umfMemspaceHighestCapacityGet.
225225

226+
#### Highest bandwidth memspace
227+
228+
Memspace backed by an aggregated list of NUMA nodes identified as highest bandwidth after selecting each available NUMA node as the initiator.
229+
Querying the bandwidth value requires HMAT support on the platform. Calling `umfMemspaceHighestBandwidthGet()` will return NULL if it's not supported.
230+
226231
### Proxy library
227232

228233
UMF provides the UMF proxy library (`umf_proxy`) that makes it possible

include/umf/memspace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ umf_memspace_handle_t umfMemspaceHostAllGet(void);
5656
///
5757
umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
5858

59+
/// \brief Retrieves predefined highest bandwidth memspace.
60+
/// \return highest bandwidth memspace handle on success or NULL on failure.
61+
///
62+
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
63+
5964
#ifdef __cplusplus
6065
}
6166
#endif

scripts/qemu/run-build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ make -j $(nproc)
3939
echo password | sudo sync;
4040
echo password | sudo sh -c "/usr/bin/echo 3 > /proc/sys/vm/drop_caches"
4141

42-
ctest --output-on-failure
42+
ctest --verbose
4343

4444
# run tests bound to a numa node
4545
numactl -N 0 ctest --output-on-failure

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
8282
memory_targets/memory_target_numa.c
8383
memspaces/memspace_numa.c
8484
memspaces/memspace_host_all.c
85-
memspaces/memspace_highest_capacity.c)
85+
memspaces/memspace_highest_capacity.c
86+
memspaces/memspace_highest_bandwidth.c)
8687

8788
set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
8889
provider/provider_os_memory_linux.c)

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ UMF_1.0 {
3030
umfMemoryProviderPutIPCHandle;
3131
umfMemspaceCreateFromNumaArray;
3232
umfMemspaceDestroy;
33+
umfMemspaceHighestBandwidthGet;
3334
umfMemspaceHighestCapacityGet;
3435
umfMemspaceHostAllGet;
3536
umfOpenIPCHandle;

src/libumf_linux.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void __attribute__((destructor)) umfDestroy(void) {
2929
umfMemoryTrackerDestroy(t);
3030
umfMemspaceHostAllDestroy();
3131
umfMemspaceHighestCapacityDestroy();
32+
umfMemspaceHighestBandwidthDestroy();
3233
umfDestroyTopology();
3334
}
3435

src/memory_target.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,21 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,
7979

8080
umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
8181
size_t *capacity) {
82-
assert(memoryTarget);
83-
assert(capacity);
82+
if (!memoryTarget || !capacity) {
83+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
84+
}
85+
8486
return memoryTarget->ops->get_capacity(memoryTarget->priv, capacity);
8587
}
88+
89+
umf_result_t
90+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
91+
umf_memory_target_handle_t dstMemoryTarget,
92+
size_t *bandwidth) {
93+
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
94+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
95+
}
96+
97+
return srcMemoryTarget->ops->get_bandwidth(
98+
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
99+
}

src/memory_target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,
3737
umf_memory_target_handle_t *outHandle);
3838
umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
3939
size_t *capacity);
40+
umf_result_t
41+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
42+
umf_memory_target_handle_t dstMemoryTarget,
43+
size_t *bandwidth);
4044

4145
#ifdef __cplusplus
4246
}

src/memory_target_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
4141
umf_memory_provider_handle_t *provider);
4242

4343
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
44+
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
45+
size_t *bandwidth);
4446
} umf_memory_target_ops_t;
4547

4648
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "base_alloc_global.h"
2020
#include "memory_target_numa.h"
2121
#include "topology.h"
22+
#include "utils_log.h"
2223

2324
struct numa_memory_target_t {
2425
unsigned physical_id;
@@ -124,6 +125,10 @@ static umf_result_t numa_clone(void *memTarget, void **outMemTarget) {
124125
}
125126

126127
static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
128+
if (!memTarget || !capacity) {
129+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
130+
}
131+
127132
hwloc_topology_t topology = umfGetTopology();
128133
if (!topology) {
129134
return UMF_RESULT_ERROR_NOT_SUPPORTED;
@@ -143,12 +148,61 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
143148
return UMF_RESULT_SUCCESS;
144149
}
145150

151+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
152+
void *dstMemoryTarget,
153+
size_t *bandwidth) {
154+
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
155+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
156+
}
157+
158+
hwloc_topology_t topology = umfGetTopology();
159+
if (!topology) {
160+
return UMF_RESULT_ERROR_NOT_SUPPORTED;
161+
}
162+
163+
hwloc_obj_t srcNumaNode = hwloc_get_obj_by_type(
164+
topology, HWLOC_OBJ_NUMANODE,
165+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id);
166+
if (!srcNumaNode) {
167+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
168+
}
169+
170+
hwloc_obj_t dstNumaNode = hwloc_get_obj_by_type(
171+
topology, HWLOC_OBJ_NUMANODE,
172+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
173+
if (!dstNumaNode) {
174+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
175+
}
176+
177+
// Given NUMA nodes aren't local, HWLOC returns an error in such case.
178+
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
179+
*bandwidth = 0;
180+
return UMF_RESULT_SUCCESS;
181+
}
182+
183+
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
184+
.type = HWLOC_LOCATION_TYPE_CPUSET};
185+
hwloc_uint64_t value = 0;
186+
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
187+
dstNumaNode, &initiator, 0, &value);
188+
if (ret) {
189+
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
190+
srcNumaNode->os_index, dstNumaNode->os_index);
191+
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
192+
: UMF_RESULT_ERROR_UNKNOWN;
193+
}
194+
195+
*bandwidth = value;
196+
return UMF_RESULT_SUCCESS;
197+
}
198+
146199
struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
147200
.version = UMF_VERSION_CURRENT,
148201
.initialize = numa_initialize,
149202
.finalize = numa_finalize,
150203
.pool_create_from_memspace = numa_pool_create_from_memspace,
151204
.clone = numa_clone,
152205
.get_capacity = numa_get_capacity,
206+
.get_bandwidth = numa_get_bandwidth,
153207
.memory_provider_create_from_memspace =
154208
numa_memory_provider_create_from_memspace};

src/memspace.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,85 @@ umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
208208

209209
return UMF_RESULT_SUCCESS;
210210
}
211+
212+
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
213+
umfGetTargetFn getTarget,
214+
umf_memspace_handle_t *filteredMemspace) {
215+
if (!hMemspace || !getTarget) {
216+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
217+
}
218+
219+
umf_memory_target_handle_t *uniqueBestNodes =
220+
umf_ba_global_alloc(hMemspace->size * sizeof(*uniqueBestNodes));
221+
if (!uniqueBestNodes) {
222+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
223+
}
224+
225+
umf_result_t ret = UMF_RESULT_SUCCESS;
226+
227+
size_t numUniqueBestNodes = 0;
228+
for (size_t nodeIdx = 0; nodeIdx < hMemspace->size; nodeIdx++) {
229+
umf_memory_target_handle_t target = NULL;
230+
ret = getTarget(hMemspace->nodes[nodeIdx], hMemspace->nodes,
231+
hMemspace->size, &target);
232+
if (ret != UMF_RESULT_SUCCESS) {
233+
goto err_free_best_targets;
234+
}
235+
236+
// check if the target is already present in the best nodes
237+
size_t bestTargetIdx;
238+
for (bestTargetIdx = 0; bestTargetIdx < numUniqueBestNodes;
239+
bestTargetIdx++) {
240+
if (uniqueBestNodes[bestTargetIdx] == target) {
241+
break;
242+
}
243+
}
244+
245+
// if the target is not present, add it to the best nodes
246+
if (bestTargetIdx == numUniqueBestNodes) {
247+
uniqueBestNodes[numUniqueBestNodes++] = target;
248+
}
249+
}
250+
251+
// copy the unique best nodes into a new memspace
252+
umf_memspace_handle_t newMemspace =
253+
umf_ba_global_alloc(sizeof(*newMemspace));
254+
if (!newMemspace) {
255+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
256+
goto err_free_best_targets;
257+
}
258+
259+
newMemspace->size = numUniqueBestNodes;
260+
newMemspace->nodes =
261+
umf_ba_global_alloc(sizeof(*newMemspace->nodes) * newMemspace->size);
262+
if (!newMemspace->nodes) {
263+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
264+
goto err_free_new_memspace;
265+
}
266+
267+
size_t cloneIdx = 0;
268+
for (size_t cloneIdx = 0; cloneIdx < newMemspace->size; cloneIdx++) {
269+
ret = umfMemoryTargetClone(uniqueBestNodes[cloneIdx],
270+
&newMemspace->nodes[cloneIdx]);
271+
if (ret != UMF_RESULT_SUCCESS) {
272+
goto err_free_cloned_nodes;
273+
}
274+
}
275+
276+
*filteredMemspace = newMemspace;
277+
umf_ba_global_free(uniqueBestNodes);
278+
279+
return UMF_RESULT_SUCCESS;
280+
281+
err_free_cloned_nodes:
282+
while (cloneIdx != 0) {
283+
cloneIdx--;
284+
umfMemoryTargetDestroy(newMemspace->nodes[cloneIdx]);
285+
}
286+
umf_ba_global_free(newMemspace->nodes);
287+
err_free_new_memspace:
288+
umf_ba_global_free(newMemspace);
289+
err_free_best_targets:
290+
umf_ba_global_free(uniqueBestNodes);
291+
return ret;
292+
}

src/memspace_internal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t,
3939
umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
4040
umfGetPropertyFn getProperty);
4141

42+
typedef umf_result_t (*umfGetTargetFn)(umf_memory_target_handle_t initiator,
43+
umf_memory_target_handle_t *nodes,
44+
size_t numNodes,
45+
umf_memory_target_handle_t *target);
46+
47+
///
48+
/// \brief Filters the targets using getTarget() to create a new memspace
49+
///
50+
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
51+
umfGetTargetFn getTarget,
52+
umf_memspace_handle_t *filteredMemspace);
53+
4254
///
4355
/// \brief Destroys memspace
4456
/// \param hMemspace handle to memspace
@@ -47,6 +59,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
4759

4860
void umfMemspaceHostAllDestroy(void);
4961
void umfMemspaceHighestCapacityDestroy(void);
62+
void umfMemspaceHighestBandwidthDestroy(void);
5063

5164
#ifdef __cplusplus
5265
}

0 commit comments

Comments
 (0)