Skip to content

Commit bee1cfe

Browse files
committed
Add memspace "lowest latency"
This memspace is analogous to the 'highest bandwidth' memspace in its composition, but it focuses on the NUMA nodes with best latency.
1 parent c69bed7 commit bee1cfe

10 files changed

+192
-14
lines changed

include/umf/memspace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
6161
///
6262
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
6363

64+
/// \brief Retrieves predefined lowest latency memspace.
65+
/// \return lowest latency memspace handle on success or NULL on failure.
66+
///
67+
umf_memspace_handle_t umfMemspaceLowestLatencyGet(void);
68+
6469
#ifdef __cplusplus
6570
}
6671
#endif

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
8282
memspaces/memspace_numa.c
8383
memspaces/memspace_host_all.c
8484
memspaces/memspace_highest_capacity.c
85-
memspaces/memspace_highest_bandwidth.c)
85+
memspaces/memspace_highest_bandwidth.c
86+
memspaces/memspace_lowest_latency.c)
8687

8788
set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
8889
provider/provider_os_memory_linux.c)

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ UMF_1.0 {
3333
umfMemspaceHighestBandwidthGet;
3434
umfMemspaceHighestCapacityGet;
3535
umfMemspaceHostAllGet;
36+
umfMemspaceLowestLatencyGet;
3637
umfOpenIPCHandle;
3738
umfOsMemoryProviderOps;
3839
umfPoolAlignedMalloc;

src/libumf_linux.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ void __attribute__((destructor)) umfDestroy(void) {
3030
umfMemspaceHostAllDestroy();
3131
umfMemspaceHighestCapacityDestroy();
3232
umfMemspaceHighestBandwidthDestroy();
33+
umfMemspaceLowestLatencyDestroy();
3334
umfDestroyTopology();
3435
}
3536

src/memory_target.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,15 @@ umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
9797
return srcMemoryTarget->ops->get_bandwidth(
9898
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
9999
}
100+
101+
umf_result_t
102+
umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
103+
umf_memory_target_handle_t dstMemoryTarget,
104+
size_t *latency) {
105+
if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
106+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
107+
}
108+
109+
return srcMemoryTarget->ops->get_latency(srcMemoryTarget->priv,
110+
dstMemoryTarget->priv, latency);
111+
}

src/memory_target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ umf_result_t
4141
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
4242
umf_memory_target_handle_t dstMemoryTarget,
4343
size_t *bandwidth);
44+
umf_result_t
45+
umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
46+
umf_memory_target_handle_t dstMemoryTarget,
47+
size_t *latency);
4448

4549
#ifdef __cplusplus
4650
}

src/memory_target_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ typedef struct umf_memory_target_ops_t {
4343
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
4444
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
4545
size_t *bandwidth);
46+
umf_result_t (*get_latency)(void *srcMemoryTarget, void *dstMemoryTarget,
47+
size_t *latency);
4648
} umf_memory_target_ops_t;
4749

4850
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,11 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
148148
return UMF_RESULT_SUCCESS;
149149
}
150150

151-
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
152-
void *dstMemoryTarget,
153-
size_t *bandwidth) {
154-
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
155-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
156-
}
151+
enum memattr_type { MEMATTR_TYPE_BANDWIDTH, MEMATTR_TYPE_LATENCY };
157152

153+
static umf_result_t query_attribute_value(void *srcMemoryTarget,
154+
void *dstMemoryTarget, size_t *value,
155+
enum memattr_type type) {
158156
hwloc_topology_t topology = umfGetTopology();
159157
if (!topology) {
160158
return UMF_RESULT_ERROR_NOT_SUPPORTED;
@@ -176,23 +174,72 @@ static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
176174

177175
// Given NUMA nodes aren't local, HWLOC returns an error in such case.
178176
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
179-
*bandwidth = 0;
177+
*value = 0;
180178
return UMF_RESULT_SUCCESS;
181179
}
182180

181+
enum hwloc_memattr_id_e hwlocMemAttrType = INT_MAX;
182+
switch (type) {
183+
case MEMATTR_TYPE_BANDWIDTH:
184+
hwlocMemAttrType = HWLOC_MEMATTR_ID_BANDWIDTH;
185+
break;
186+
case MEMATTR_TYPE_LATENCY:
187+
hwlocMemAttrType = HWLOC_MEMATTR_ID_LATENCY;
188+
break;
189+
default:
190+
assert(0); // Shouldn't be reachable.
191+
}
192+
183193
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
184194
.type = HWLOC_LOCATION_TYPE_CPUSET};
185-
hwloc_uint64_t value = 0;
186-
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
187-
dstNumaNode, &initiator, 0, &value);
195+
196+
hwloc_uint64_t memAttrValue = 0;
197+
int ret = hwloc_memattr_get_value(topology, hwlocMemAttrType, dstNumaNode,
198+
&initiator, 0, &memAttrValue);
188199
if (ret) {
189-
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
190-
srcNumaNode->os_index, dstNumaNode->os_index);
191200
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
192201
: UMF_RESULT_ERROR_UNKNOWN;
193202
}
194203

195-
*bandwidth = value;
204+
*value = memAttrValue;
205+
206+
return UMF_RESULT_SUCCESS;
207+
}
208+
209+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
210+
void *dstMemoryTarget,
211+
size_t *bandwidth) {
212+
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
213+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
214+
}
215+
216+
umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
217+
bandwidth, MEMATTR_TYPE_BANDWIDTH);
218+
if (ret) {
219+
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
220+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
221+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
222+
return ret;
223+
}
224+
225+
return UMF_RESULT_SUCCESS;
226+
}
227+
228+
static umf_result_t numa_get_latency(void *srcMemoryTarget,
229+
void *dstMemoryTarget, size_t *latency) {
230+
if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
231+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
232+
}
233+
234+
umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
235+
latency, MEMATTR_TYPE_LATENCY);
236+
if (ret) {
237+
LOG_ERR("Retrieving latency for initiator node %u to node %u failed.",
238+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
239+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
240+
return ret;
241+
}
242+
196243
return UMF_RESULT_SUCCESS;
197244
}
198245

@@ -204,5 +251,6 @@ struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
204251
.clone = numa_clone,
205252
.get_capacity = numa_get_capacity,
206253
.get_bandwidth = numa_get_bandwidth,
254+
.get_latency = numa_get_latency,
207255
.memory_provider_create_from_memspace =
208256
numa_memory_provider_create_from_memspace};

src/memspace_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
6060
void umfMemspaceHostAllDestroy(void);
6161
void umfMemspaceHighestCapacityDestroy(void);
6262
void umfMemspaceHighestBandwidthDestroy(void);
63+
void umfMemspaceLowestLatencyDestroy(void);
6364

6465
#ifdef __cplusplus
6566
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
6+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
*
8+
*/
9+
10+
#include <assert.h>
11+
#include <ctype.h>
12+
#include <hwloc.h>
13+
#include <stdlib.h>
14+
15+
#include "base_alloc_global.h"
16+
#include "memory_target_numa.h"
17+
#include "memspace_internal.h"
18+
#include "memspace_numa.h"
19+
#include "topology.h"
20+
#include "utils_common.h"
21+
#include "utils_concurrency.h"
22+
#include "utils_log.h"
23+
24+
static umf_result_t getBestLatencyTarget(umf_memory_target_handle_t initiator,
25+
umf_memory_target_handle_t *nodes,
26+
size_t numNodes,
27+
umf_memory_target_handle_t *target) {
28+
size_t bestNodeIdx = 0;
29+
size_t bestLatency = 0;
30+
for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) {
31+
size_t latency = 0;
32+
umf_result_t ret =
33+
umfMemoryTargetGetLatency(initiator, nodes[nodeIdx], &latency);
34+
if (ret) {
35+
return ret;
36+
}
37+
38+
if (latency > bestLatency) {
39+
bestNodeIdx = nodeIdx;
40+
bestLatency = latency;
41+
}
42+
}
43+
44+
*target = nodes[bestNodeIdx];
45+
46+
return UMF_RESULT_SUCCESS;
47+
}
48+
49+
static umf_result_t
50+
umfMemspaceLowestLatencyCreate(umf_memspace_handle_t *hMemspace) {
51+
if (!hMemspace) {
52+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
53+
}
54+
55+
umf_memspace_handle_t hostAllMemspace = umfMemspaceHostAllGet();
56+
if (!hostAllMemspace) {
57+
return UMF_RESULT_ERROR_UNKNOWN;
58+
}
59+
60+
umf_memspace_handle_t lowLatencyMemspace = NULL;
61+
umf_result_t ret = umfMemspaceFilter(hostAllMemspace, getBestLatencyTarget,
62+
&lowLatencyMemspace);
63+
if (ret != UMF_RESULT_SUCCESS) {
64+
// HWLOC could possibly return an 'EINVAL' error, which in this context
65+
// means that the HMAT is unavailable and we can't obtain the
66+
// 'latency' value of any NUMA node.
67+
return ret;
68+
}
69+
70+
*hMemspace = lowLatencyMemspace;
71+
return UMF_RESULT_SUCCESS;
72+
}
73+
74+
static umf_memspace_handle_t UMF_MEMSPACE_LOWEST_LATENCY = NULL;
75+
static UTIL_ONCE_FLAG UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED =
76+
UTIL_ONCE_FLAG_INIT;
77+
78+
void umfMemspaceLowestLatencyDestroy(void) {
79+
if (UMF_MEMSPACE_LOWEST_LATENCY) {
80+
umfMemspaceDestroy(UMF_MEMSPACE_LOWEST_LATENCY);
81+
UMF_MEMSPACE_LOWEST_LATENCY = NULL;
82+
}
83+
}
84+
85+
static void umfMemspaceLowestLatencyInit(void) {
86+
umf_result_t ret =
87+
umfMemspaceLowestLatencyCreate(&UMF_MEMSPACE_LOWEST_LATENCY);
88+
if (ret != UMF_RESULT_SUCCESS) {
89+
LOG_ERR("Creating the lowest latency memspace failed with a %u error\n",
90+
ret);
91+
assert(ret == UMF_RESULT_ERROR_NOT_SUPPORTED);
92+
}
93+
94+
#if defined(_WIN32) && !defined(UMF_SHARED_LIBRARY)
95+
atexit(umfMemspaceLowestLatencyDestroy);
96+
#endif
97+
}
98+
99+
umf_memspace_handle_t umfMemspaceLowestLatencyGet(void) {
100+
util_init_once(&UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED,
101+
umfMemspaceLowestLatencyInit);
102+
return UMF_MEMSPACE_LOWEST_LATENCY;
103+
}

0 commit comments

Comments
 (0)