Skip to content

Commit 670398b

Browse files
authored
Merge pull request #509 from kswiecicki/memspace-lat
Add memspace "lowest latency"
2 parents 34eb606 + 8f394d8 commit 670398b

20 files changed

+506
-210
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,11 @@ Can be retrieved using umfMemspaceHighestCapacityGet.
229229
Memspace backed by an aggregated list of NUMA nodes identified as highest bandwidth after selecting each available NUMA node as the initiator.
230230
Querying the bandwidth value requires HMAT support on the platform. Calling `umfMemspaceHighestBandwidthGet()` will return NULL if it's not supported.
231231

232+
#### Lowest latency memspace
233+
234+
Memspace backed by an aggregated list of NUMA nodes identified as lowest latency after selecting each available NUMA node as the initiator.
235+
Querying the latency value requires HMAT support on the platform. Calling `umfMemspaceLowestLatencyGet()` will return NULL if it's not supported.
236+
232237
### Proxy library
233238

234239
UMF provides the UMF proxy library (`umf_proxy`) that makes it possible

include/umf/memspace.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,17 @@ umf_memspace_handle_t umfMemspaceHostAllGet(void);
5757
umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
5858

5959
/// \brief Retrieves predefined highest bandwidth memspace.
60-
/// \return highest bandwidth memspace handle on success or NULL on failure.
60+
/// \return highest bandwidth memspace handle on success or NULL on
61+
/// failure (no HMAT support).
6162
///
6263
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
6364

65+
/// \brief Retrieves predefined lowest latency memspace.
66+
/// \return lowest latency memspace handle on success or NULL on
67+
/// failure (no HMAT support).
68+
///
69+
umf_memspace_handle_t umfMemspaceLowestLatencyGet(void);
70+
6471
#ifdef __cplusplus
6572
}
6673
#endif

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
8585
memspaces/memspace_numa.c
8686
memspaces/memspace_host_all.c
8787
memspaces/memspace_highest_capacity.c
88-
memspaces/memspace_highest_bandwidth.c)
88+
memspaces/memspace_highest_bandwidth.c
89+
memspaces/memspace_lowest_latency.c)
8990

9091
set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
9192
provider/provider_os_memory_linux.c)

src/libumf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ void umfTearDown(void) {
3434
umfMemspaceHostAllDestroy();
3535
umfMemspaceHighestCapacityDestroy();
3636
umfMemspaceHighestBandwidthDestroy();
37+
umfMemspaceLowestLatencyDestroy();
3738
umfDestroyTopology();
3839
#endif
3940
// make sure TRACKER is not used after being destroyed

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ UMF_1.0 {
3838
umfMemspaceHighestBandwidthGet;
3939
umfMemspaceHighestCapacityGet;
4040
umfMemspaceHostAllGet;
41+
umfMemspaceLowestLatencyGet;
4142
umfOpenIPCHandle;
4243
umfOsMemoryProviderOps;
4344
umfPoolAlignedMalloc;

src/memory_target.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,15 @@ umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
9797
return srcMemoryTarget->ops->get_bandwidth(
9898
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
9999
}
100+
101+
umf_result_t
102+
umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
103+
umf_memory_target_handle_t dstMemoryTarget,
104+
size_t *latency) {
105+
if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
106+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
107+
}
108+
109+
return srcMemoryTarget->ops->get_latency(srcMemoryTarget->priv,
110+
dstMemoryTarget->priv, latency);
111+
}

src/memory_target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ umf_result_t
3939
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
4040
umf_memory_target_handle_t dstMemoryTarget,
4141
size_t *bandwidth);
42+
umf_result_t
43+
umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
44+
umf_memory_target_handle_t dstMemoryTarget,
45+
size_t *latency);
4246

4347
#ifdef __cplusplus
4448
}

src/memory_target_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
4141
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
4242
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
4343
size_t *bandwidth);
44+
umf_result_t (*get_latency)(void *srcMemoryTarget, void *dstMemoryTarget,
45+
size_t *latency);
4446
} umf_memory_target_ops_t;
4547

4648
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,26 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
167167
return UMF_RESULT_SUCCESS;
168168
}
169169

170-
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
171-
void *dstMemoryTarget,
172-
size_t *bandwidth) {
173-
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
174-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
170+
typedef enum memattr_type_t {
171+
MEMATTR_TYPE_BANDWIDTH,
172+
MEMATTR_TYPE_LATENCY
173+
} memattr_type_t;
174+
175+
static size_t memattr_get_worst_value(memattr_type_t type) {
176+
switch (type) {
177+
case MEMATTR_TYPE_BANDWIDTH:
178+
return 0;
179+
case MEMATTR_TYPE_LATENCY:
180+
return SIZE_MAX;
181+
default:
182+
assert(0); // Should not be reachable
183+
return 0;
175184
}
185+
}
176186

187+
static umf_result_t query_attribute_value(void *srcMemoryTarget,
188+
void *dstMemoryTarget, size_t *value,
189+
memattr_type_t type) {
177190
hwloc_topology_t topology = umfGetTopology();
178191
if (!topology) {
179192
return UMF_RESULT_ERROR_NOT_SUPPORTED;
@@ -195,23 +208,75 @@ static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
195208

196209
// Given NUMA nodes aren't local, HWLOC returns an error in such case.
197210
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
198-
*bandwidth = 0;
211+
// Since we want to skip such query, we return the worst possible
212+
// value for given memory attribute.
213+
*value = memattr_get_worst_value(type);
199214
return UMF_RESULT_SUCCESS;
200215
}
201216

217+
enum hwloc_memattr_id_e hwlocMemAttrType = INT_MAX;
218+
switch (type) {
219+
case MEMATTR_TYPE_BANDWIDTH:
220+
hwlocMemAttrType = HWLOC_MEMATTR_ID_BANDWIDTH;
221+
break;
222+
case MEMATTR_TYPE_LATENCY:
223+
hwlocMemAttrType = HWLOC_MEMATTR_ID_LATENCY;
224+
break;
225+
default:
226+
assert(0); // Shouldn't be reachable.
227+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
228+
}
229+
202230
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
203231
.type = HWLOC_LOCATION_TYPE_CPUSET};
204-
hwloc_uint64_t value = 0;
205-
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
206-
dstNumaNode, &initiator, 0, &value);
232+
233+
hwloc_uint64_t memAttrValue = 0;
234+
int ret = hwloc_memattr_get_value(topology, hwlocMemAttrType, dstNumaNode,
235+
&initiator, 0, &memAttrValue);
207236
if (ret) {
208-
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
209-
srcNumaNode->os_index, dstNumaNode->os_index);
210237
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
211238
: UMF_RESULT_ERROR_UNKNOWN;
212239
}
213240

214-
*bandwidth = value;
241+
*value = memAttrValue;
242+
243+
return UMF_RESULT_SUCCESS;
244+
}
245+
246+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
247+
void *dstMemoryTarget,
248+
size_t *bandwidth) {
249+
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
250+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
251+
}
252+
253+
umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
254+
bandwidth, MEMATTR_TYPE_BANDWIDTH);
255+
if (ret) {
256+
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
257+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
258+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
259+
return ret;
260+
}
261+
262+
return UMF_RESULT_SUCCESS;
263+
}
264+
265+
static umf_result_t numa_get_latency(void *srcMemoryTarget,
266+
void *dstMemoryTarget, size_t *latency) {
267+
if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
268+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
269+
}
270+
271+
umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
272+
latency, MEMATTR_TYPE_LATENCY);
273+
if (ret) {
274+
LOG_ERR("Retrieving latency for initiator node %u to node %u failed.",
275+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
276+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
277+
return ret;
278+
}
279+
215280
return UMF_RESULT_SUCCESS;
216281
}
217282

@@ -223,5 +288,6 @@ struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
223288
.clone = numa_clone,
224289
.get_capacity = numa_get_capacity,
225290
.get_bandwidth = numa_get_bandwidth,
291+
.get_latency = numa_get_latency,
226292
.memory_provider_create_from_memspace =
227293
numa_memory_provider_create_from_memspace};

src/memspace_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
6060
void umfMemspaceHostAllDestroy(void);
6161
void umfMemspaceHighestCapacityDestroy(void);
6262
void umfMemspaceHighestBandwidthDestroy(void);
63+
void umfMemspaceLowestLatencyDestroy(void);
6364

6465
#ifdef __cplusplus
6566
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
6+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
*
8+
*/
9+
10+
#include <assert.h>
11+
#include <ctype.h>
12+
#include <hwloc.h>
13+
#include <stdlib.h>
14+
15+
#include "base_alloc_global.h"
16+
#include "memory_target_numa.h"
17+
#include "memspace_internal.h"
18+
#include "memspace_numa.h"
19+
#include "topology.h"
20+
#include "utils_common.h"
21+
#include "utils_concurrency.h"
22+
#include "utils_log.h"
23+
24+
static umf_result_t getBestLatencyTarget(umf_memory_target_handle_t initiator,
25+
umf_memory_target_handle_t *nodes,
26+
size_t numNodes,
27+
umf_memory_target_handle_t *target) {
28+
size_t bestNodeIdx = 0;
29+
size_t bestLatency = SIZE_MAX;
30+
for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) {
31+
size_t latency = SIZE_MAX;
32+
umf_result_t ret =
33+
umfMemoryTargetGetLatency(initiator, nodes[nodeIdx], &latency);
34+
if (ret) {
35+
return ret;
36+
}
37+
38+
if (latency < bestLatency) {
39+
bestNodeIdx = nodeIdx;
40+
bestLatency = latency;
41+
}
42+
}
43+
44+
*target = nodes[bestNodeIdx];
45+
46+
return UMF_RESULT_SUCCESS;
47+
}
48+
49+
static umf_result_t
50+
umfMemspaceLowestLatencyCreate(umf_memspace_handle_t *hMemspace) {
51+
if (!hMemspace) {
52+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
53+
}
54+
55+
umf_memspace_handle_t hostAllMemspace = umfMemspaceHostAllGet();
56+
if (!hostAllMemspace) {
57+
return UMF_RESULT_ERROR_UNKNOWN;
58+
}
59+
60+
umf_memspace_handle_t lowLatencyMemspace = NULL;
61+
umf_result_t ret = umfMemspaceFilter(hostAllMemspace, getBestLatencyTarget,
62+
&lowLatencyMemspace);
63+
if (ret != UMF_RESULT_SUCCESS) {
64+
// HWLOC could possibly return an 'EINVAL' error, which in this context
65+
// means that the HMAT is unavailable and we can't obtain the
66+
// 'latency' value of any NUMA node.
67+
return ret;
68+
}
69+
70+
*hMemspace = lowLatencyMemspace;
71+
return UMF_RESULT_SUCCESS;
72+
}
73+
74+
static umf_memspace_handle_t UMF_MEMSPACE_LOWEST_LATENCY = NULL;
75+
static UTIL_ONCE_FLAG UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED =
76+
UTIL_ONCE_FLAG_INIT;
77+
78+
void umfMemspaceLowestLatencyDestroy(void) {
79+
if (UMF_MEMSPACE_LOWEST_LATENCY) {
80+
umfMemspaceDestroy(UMF_MEMSPACE_LOWEST_LATENCY);
81+
UMF_MEMSPACE_LOWEST_LATENCY = NULL;
82+
}
83+
}
84+
85+
static void umfMemspaceLowestLatencyInit(void) {
86+
umf_result_t ret =
87+
umfMemspaceLowestLatencyCreate(&UMF_MEMSPACE_LOWEST_LATENCY);
88+
if (ret != UMF_RESULT_SUCCESS) {
89+
LOG_ERR("Creating the lowest latency memspace failed with a %u error\n",
90+
ret);
91+
assert(ret == UMF_RESULT_ERROR_NOT_SUPPORTED);
92+
}
93+
94+
#if defined(_WIN32) && !defined(UMF_SHARED_LIBRARY)
95+
atexit(umfMemspaceLowestLatencyDestroy);
96+
#endif
97+
}
98+
99+
umf_memspace_handle_t umfMemspaceLowestLatencyGet(void) {
100+
util_init_once(&UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED,
101+
umfMemspaceLowestLatencyInit);
102+
return UMF_MEMSPACE_LOWEST_LATENCY;
103+
}

test/CMakeLists.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,23 +162,27 @@ if(LINUX) # OS-specific functions are implemented only for Linux now
162162
add_umf_test(
163163
NAME memspace_numa
164164
SRCS memspaces/memspace_numa.cpp
165-
LIBS ${LIBNUMA_LIBRARIES})
165+
LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
166166
add_umf_test(
167167
NAME provider_os_memory_config
168168
SRCS provider_os_memory_config.cpp
169-
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
169+
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
170170
add_umf_test(
171171
NAME memspace_host_all
172172
SRCS memspaces/memspace_host_all.cpp
173-
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
173+
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
174174
add_umf_test(
175175
NAME memspace_highest_capacity
176176
SRCS memspaces/memspace_highest_capacity.cpp
177-
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
177+
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
178178
add_umf_test(
179179
NAME memspace_highest_bandwidth
180180
SRCS memspaces/memspace_highest_bandwidth.cpp
181181
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
182+
add_umf_test(
183+
NAME memspace_lowest_latency
184+
SRCS memspaces/memspace_lowest_latency.cpp
185+
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
182186
add_umf_test(
183187
NAME mempolicy
184188
SRCS memspaces/mempolicy.cpp

0 commit comments

Comments
 (0)