Skip to content

Commit 0d4479b

Browse files
committed
Add memspace "lowest latency"
This memspace is analogous to the 'highest bandwidth' memspace in its composition, but it focuses on the NUMA nodes with best latency.
1 parent e5a15fc commit 0d4479b

10 files changed

+209
-13
lines changed

include/umf/memspace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
6161
///
6262
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
6363

64+
/// \brief Retrieves predefined lowest latency memspace.
65+
/// \return lowest latency memspace handle on success or NULL on failure.
66+
///
67+
umf_memspace_handle_t umfMemspaceLowestLatencyGet(void);
68+
6469
#ifdef __cplusplus
6570
}
6671
#endif

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
8484
memspaces/memspace_numa.c
8585
memspaces/memspace_host_all.c
8686
memspaces/memspace_highest_capacity.c
87-
memspaces/memspace_highest_bandwidth.c)
87+
memspaces/memspace_highest_bandwidth.c
88+
memspaces/memspace_lowest_latency.c)
8889

8990
set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
9091
provider/provider_os_memory_linux.c)

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ UMF_1.0 {
3636
umfMemspaceHighestBandwidthGet;
3737
umfMemspaceHighestCapacityGet;
3838
umfMemspaceHostAllGet;
39+
umfMemspaceLowestLatencyGet;
3940
umfOpenIPCHandle;
4041
umfOsMemoryProviderOps;
4142
umfPoolAlignedMalloc;

src/libumf_linux.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ void __attribute__((destructor)) umfDestroy(void) {
3030
umfMemspaceHostAllDestroy();
3131
umfMemspaceHighestCapacityDestroy();
3232
umfMemspaceHighestBandwidthDestroy();
33+
umfMemspaceLowestLatencyDestroy();
3334
umfDestroyTopology();
3435
}
3536

src/memory_target.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,15 @@ umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
9797
return srcMemoryTarget->ops->get_bandwidth(
9898
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
9999
}
100+
101+
umf_result_t
102+
umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
103+
umf_memory_target_handle_t dstMemoryTarget,
104+
size_t *latency) {
105+
if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
106+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
107+
}
108+
109+
return srcMemoryTarget->ops->get_latency(srcMemoryTarget->priv,
110+
dstMemoryTarget->priv, latency);
111+
}

src/memory_target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ umf_result_t
3939
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
4040
umf_memory_target_handle_t dstMemoryTarget,
4141
size_t *bandwidth);
42+
umf_result_t
43+
umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
44+
umf_memory_target_handle_t dstMemoryTarget,
45+
size_t *latency);
4246

4347
#ifdef __cplusplus
4448
}

src/memory_target_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
4141
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
4242
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
4343
size_t *bandwidth);
44+
umf_result_t (*get_latency)(void *srcMemoryTarget, void *dstMemoryTarget,
45+
size_t *latency);
4446
} umf_memory_target_ops_t;
4547

4648
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 78 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -167,13 +167,26 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
167167
return UMF_RESULT_SUCCESS;
168168
}
169169

170-
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
171-
void *dstMemoryTarget,
172-
size_t *bandwidth) {
173-
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
174-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
170+
typedef enum memattr_type {
171+
MEMATTR_TYPE_BANDWIDTH,
172+
MEMATTR_TYPE_LATENCY
173+
} memattr_type;
174+
175+
static size_t memattr_get_worst_value(memattr_type type) {
176+
switch (type) {
177+
case MEMATTR_TYPE_BANDWIDTH:
178+
return 0;
179+
case MEMATTR_TYPE_LATENCY:
180+
return SIZE_MAX;
181+
default:
182+
assert(0); // Should not be reachable
183+
return 0;
175184
}
185+
}
176186

187+
static umf_result_t query_attribute_value(void *srcMemoryTarget,
188+
void *dstMemoryTarget, size_t *value,
189+
memattr_type type) {
177190
hwloc_topology_t topology = umfGetTopology();
178191
if (!topology) {
179192
return UMF_RESULT_ERROR_NOT_SUPPORTED;
@@ -195,23 +208,75 @@ static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
195208

196209
// Given NUMA nodes aren't local, HWLOC returns an error in such case.
197210
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
198-
*bandwidth = 0;
211+
// Since we want to skip such query, we return the worst possible
212+
// value for given memory attribute.
213+
*value = memattr_get_worst_value(type);
199214
return UMF_RESULT_SUCCESS;
200215
}
201216

217+
enum hwloc_memattr_id_e hwlocMemAttrType = INT_MAX;
218+
switch (type) {
219+
case MEMATTR_TYPE_BANDWIDTH:
220+
hwlocMemAttrType = HWLOC_MEMATTR_ID_BANDWIDTH;
221+
break;
222+
case MEMATTR_TYPE_LATENCY:
223+
hwlocMemAttrType = HWLOC_MEMATTR_ID_LATENCY;
224+
break;
225+
default:
226+
assert(0); // Shouldn't be reachable.
227+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
228+
}
229+
202230
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
203231
.type = HWLOC_LOCATION_TYPE_CPUSET};
204-
hwloc_uint64_t value = 0;
205-
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
206-
dstNumaNode, &initiator, 0, &value);
232+
233+
hwloc_uint64_t memAttrValue = 0;
234+
int ret = hwloc_memattr_get_value(topology, hwlocMemAttrType, dstNumaNode,
235+
&initiator, 0, &memAttrValue);
207236
if (ret) {
208-
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
209-
srcNumaNode->os_index, dstNumaNode->os_index);
210237
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
211238
: UMF_RESULT_ERROR_UNKNOWN;
212239
}
213240

214-
*bandwidth = value;
241+
*value = memAttrValue;
242+
243+
return UMF_RESULT_SUCCESS;
244+
}
245+
246+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
247+
void *dstMemoryTarget,
248+
size_t *bandwidth) {
249+
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
250+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
251+
}
252+
253+
umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
254+
bandwidth, MEMATTR_TYPE_BANDWIDTH);
255+
if (ret) {
256+
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
257+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
258+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
259+
return ret;
260+
}
261+
262+
return UMF_RESULT_SUCCESS;
263+
}
264+
265+
static umf_result_t numa_get_latency(void *srcMemoryTarget,
266+
void *dstMemoryTarget, size_t *latency) {
267+
if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
268+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
269+
}
270+
271+
umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
272+
latency, MEMATTR_TYPE_LATENCY);
273+
if (ret) {
274+
LOG_ERR("Retrieving latency for initiator node %u to node %u failed.",
275+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
276+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
277+
return ret;
278+
}
279+
215280
return UMF_RESULT_SUCCESS;
216281
}
217282

@@ -223,5 +288,6 @@ struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
223288
.clone = numa_clone,
224289
.get_capacity = numa_get_capacity,
225290
.get_bandwidth = numa_get_bandwidth,
291+
.get_latency = numa_get_latency,
226292
.memory_provider_create_from_memspace =
227293
numa_memory_provider_create_from_memspace};

src/memspace_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
6060
void umfMemspaceHostAllDestroy(void);
6161
void umfMemspaceHighestCapacityDestroy(void);
6262
void umfMemspaceHighestBandwidthDestroy(void);
63+
void umfMemspaceLowestLatencyDestroy(void);
6364

6465
#ifdef __cplusplus
6566
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
6+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
*
8+
*/
9+
10+
#include <assert.h>
11+
#include <ctype.h>
12+
#include <hwloc.h>
13+
#include <stdlib.h>
14+
15+
#include "base_alloc_global.h"
16+
#include "memory_target_numa.h"
17+
#include "memspace_internal.h"
18+
#include "memspace_numa.h"
19+
#include "topology.h"
20+
#include "utils_common.h"
21+
#include "utils_concurrency.h"
22+
#include "utils_log.h"
23+
24+
static umf_result_t getBestLatencyTarget(umf_memory_target_handle_t initiator,
25+
umf_memory_target_handle_t *nodes,
26+
size_t numNodes,
27+
umf_memory_target_handle_t *target) {
28+
size_t bestNodeIdx = 0;
29+
size_t bestLatency = SIZE_MAX;
30+
for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) {
31+
size_t latency = SIZE_MAX;
32+
umf_result_t ret =
33+
umfMemoryTargetGetLatency(initiator, nodes[nodeIdx], &latency);
34+
if (ret) {
35+
return ret;
36+
}
37+
38+
if (latency < bestLatency) {
39+
bestNodeIdx = nodeIdx;
40+
bestLatency = latency;
41+
}
42+
}
43+
44+
*target = nodes[bestNodeIdx];
45+
46+
return UMF_RESULT_SUCCESS;
47+
}
48+
49+
static umf_result_t
50+
umfMemspaceLowestLatencyCreate(umf_memspace_handle_t *hMemspace) {
51+
if (!hMemspace) {
52+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
53+
}
54+
55+
umf_memspace_handle_t hostAllMemspace = umfMemspaceHostAllGet();
56+
if (!hostAllMemspace) {
57+
return UMF_RESULT_ERROR_UNKNOWN;
58+
}
59+
60+
umf_memspace_handle_t lowLatencyMemspace = NULL;
61+
umf_result_t ret = umfMemspaceFilter(hostAllMemspace, getBestLatencyTarget,
62+
&lowLatencyMemspace);
63+
if (ret != UMF_RESULT_SUCCESS) {
64+
// HWLOC could possibly return an 'EINVAL' error, which in this context
65+
// means that the HMAT is unavailable and we can't obtain the
66+
// 'latency' value of any NUMA node.
67+
return ret;
68+
}
69+
70+
*hMemspace = lowLatencyMemspace;
71+
return UMF_RESULT_SUCCESS;
72+
}
73+
74+
static umf_memspace_handle_t UMF_MEMSPACE_LOWEST_LATENCY = NULL;
75+
static UTIL_ONCE_FLAG UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED =
76+
UTIL_ONCE_FLAG_INIT;
77+
78+
void umfMemspaceLowestLatencyDestroy(void) {
79+
if (UMF_MEMSPACE_LOWEST_LATENCY) {
80+
umfMemspaceDestroy(UMF_MEMSPACE_LOWEST_LATENCY);
81+
UMF_MEMSPACE_LOWEST_LATENCY = NULL;
82+
}
83+
}
84+
85+
static void umfMemspaceLowestLatencyInit(void) {
86+
umf_result_t ret =
87+
umfMemspaceLowestLatencyCreate(&UMF_MEMSPACE_LOWEST_LATENCY);
88+
if (ret != UMF_RESULT_SUCCESS) {
89+
LOG_ERR("Creating the lowest latency memspace failed with a %u error\n",
90+
ret);
91+
assert(ret == UMF_RESULT_ERROR_NOT_SUPPORTED);
92+
}
93+
94+
#if defined(_WIN32) && !defined(UMF_SHARED_LIBRARY)
95+
atexit(umfMemspaceLowestLatencyDestroy);
96+
#endif
97+
}
98+
99+
umf_memspace_handle_t umfMemspaceLowestLatencyGet(void) {
100+
util_init_once(&UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED,
101+
umfMemspaceLowestLatencyInit);
102+
return UMF_MEMSPACE_LOWEST_LATENCY;
103+
}

0 commit comments

Comments
 (0)