From f1b563218f04ab58ec214a86479b3b4e923e0c5c Mon Sep 17 00:00:00 2001
From: Krzysztof Swiecicki <krzysztof.swiecicki@intel.com>
Date: Wed, 22 May 2024 09:42:18 +0000
Subject: [PATCH 1/3] Add memspace "lowest latency"

This memspace is analogous to the 'highest bandwidth' memspace in its
composition, but it focuses on the NUMA nodes with best latency.
---
 include/umf/memspace.h                  |   5 ++
 src/CMakeLists.txt                      |   3 +-
 src/libumf.c                            |   1 +
 src/libumf.map                          |   1 +
 src/memory_target.c                     |  12 +++
 src/memory_target.h                     |   4 +
 src/memory_target_ops.h                 |   2 +
 src/memory_targets/memory_target_numa.c |  90 ++++++++++++++++++---
 src/memspace_internal.h                 |   1 +
 src/memspaces/memspace_lowest_latency.c | 103 ++++++++++++++++++++++++
 10 files changed, 209 insertions(+), 13 deletions(-)
 create mode 100644 src/memspaces/memspace_lowest_latency.c

diff --git a/include/umf/memspace.h b/include/umf/memspace.h
index 9467fd308..8acfa9325 100644
--- a/include/umf/memspace.h
+++ b/include/umf/memspace.h
@@ -61,6 +61,11 @@ umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
 ///
 umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
 
+/// \brief Retrieves predefined lowest latency memspace.
+/// \return lowest latency memspace handle on success or NULL on failure.
+///
+umf_memspace_handle_t umfMemspaceLowestLatencyGet(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2b1318baf..2e256e911 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -85,7 +85,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
     memspaces/memspace_numa.c
     memspaces/memspace_host_all.c
     memspaces/memspace_highest_capacity.c
-    memspaces/memspace_highest_bandwidth.c)
+    memspaces/memspace_highest_bandwidth.c
+    memspaces/memspace_lowest_latency.c)
 
 set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
                       provider/provider_os_memory_linux.c)
diff --git a/src/libumf.c b/src/libumf.c
index 87a8afef5..161baa387 100644
--- a/src/libumf.c
+++ b/src/libumf.c
@@ -34,6 +34,7 @@ void umfTearDown(void) {
         umfMemspaceHostAllDestroy();
         umfMemspaceHighestCapacityDestroy();
         umfMemspaceHighestBandwidthDestroy();
+        umfMemspaceLowestLatencyDestroy();
         umfDestroyTopology();
 #endif
         // make sure TRACKER is not used after being destroyed
diff --git a/src/libumf.map b/src/libumf.map
index 23fd8867b..293a8c66d 100644
--- a/src/libumf.map
+++ b/src/libumf.map
@@ -38,6 +38,7 @@ UMF_1.0 {
         umfMemspaceHighestBandwidthGet;
         umfMemspaceHighestCapacityGet;
         umfMemspaceHostAllGet;
+        umfMemspaceLowestLatencyGet;
         umfOpenIPCHandle;
         umfOsMemoryProviderOps;
         umfPoolAlignedMalloc;
diff --git a/src/memory_target.c b/src/memory_target.c
index 6ec08eab8..3cbdb09d9 100644
--- a/src/memory_target.c
+++ b/src/memory_target.c
@@ -97,3 +97,15 @@ umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
     return srcMemoryTarget->ops->get_bandwidth(
         srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
 }
+
+umf_result_t
+umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
+                          umf_memory_target_handle_t dstMemoryTarget,
+                          size_t *latency) {
+    if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    return srcMemoryTarget->ops->get_latency(srcMemoryTarget->priv,
+                                             dstMemoryTarget->priv, latency);
+}
diff --git a/src/memory_target.h b/src/memory_target.h
index cb394ddff..c522cce24 100644
--- a/src/memory_target.h
+++ b/src/memory_target.h
@@ -39,6 +39,10 @@ umf_result_t
 umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
                             umf_memory_target_handle_t dstMemoryTarget,
                             size_t *bandwidth);
+umf_result_t
+umfMemoryTargetGetLatency(umf_memory_target_handle_t srcMemoryTarget,
+                          umf_memory_target_handle_t dstMemoryTarget,
+                          size_t *latency);
 
 #ifdef __cplusplus
 }
diff --git a/src/memory_target_ops.h b/src/memory_target_ops.h
index 767e939cc..b62b5545d 100644
--- a/src/memory_target_ops.h
+++ b/src/memory_target_ops.h
@@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
     umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
     umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
                                   size_t *bandwidth);
+    umf_result_t (*get_latency)(void *srcMemoryTarget, void *dstMemoryTarget,
+                                size_t *latency);
 } umf_memory_target_ops_t;
 
 #ifdef __cplusplus
diff --git a/src/memory_targets/memory_target_numa.c b/src/memory_targets/memory_target_numa.c
index dc1bf435e..4b830ae9e 100644
--- a/src/memory_targets/memory_target_numa.c
+++ b/src/memory_targets/memory_target_numa.c
@@ -167,13 +167,26 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
     return UMF_RESULT_SUCCESS;
 }
 
-static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
-                                       void *dstMemoryTarget,
-                                       size_t *bandwidth) {
-    if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
-        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+typedef enum memattr_type_t {
+    MEMATTR_TYPE_BANDWIDTH,
+    MEMATTR_TYPE_LATENCY
+} memattr_type_t;
+
+static size_t memattr_get_worst_value(memattr_type_t type) {
+    switch (type) {
+    case MEMATTR_TYPE_BANDWIDTH:
+        return 0;
+    case MEMATTR_TYPE_LATENCY:
+        return SIZE_MAX;
+    default:
+        assert(0); // Should not be reachable
+        return 0;
     }
+}
 
+static umf_result_t query_attribute_value(void *srcMemoryTarget,
+                                          void *dstMemoryTarget, size_t *value,
+                                          memattr_type_t type) {
     hwloc_topology_t topology = umfGetTopology();
     if (!topology) {
         return UMF_RESULT_ERROR_NOT_SUPPORTED;
@@ -195,23 +208,75 @@ static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
 
     // Given NUMA nodes aren't local, HWLOC returns an error in such case.
     if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
-        *bandwidth = 0;
+        // Since we want to skip such query, we return the worst possible
+        // value for given memory attribute.
+        *value = memattr_get_worst_value(type);
         return UMF_RESULT_SUCCESS;
     }
 
+    enum hwloc_memattr_id_e hwlocMemAttrType = INT_MAX;
+    switch (type) {
+    case MEMATTR_TYPE_BANDWIDTH:
+        hwlocMemAttrType = HWLOC_MEMATTR_ID_BANDWIDTH;
+        break;
+    case MEMATTR_TYPE_LATENCY:
+        hwlocMemAttrType = HWLOC_MEMATTR_ID_LATENCY;
+        break;
+    default:
+        assert(0); // Shouldn't be reachable.
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
     struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
                                        .type = HWLOC_LOCATION_TYPE_CPUSET};
-    hwloc_uint64_t value = 0;
-    int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
-                                      dstNumaNode, &initiator, 0, &value);
+
+    hwloc_uint64_t memAttrValue = 0;
+    int ret = hwloc_memattr_get_value(topology, hwlocMemAttrType, dstNumaNode,
+                                      &initiator, 0, &memAttrValue);
     if (ret) {
-        LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
-                srcNumaNode->os_index, dstNumaNode->os_index);
         return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
                                  : UMF_RESULT_ERROR_UNKNOWN;
     }
 
-    *bandwidth = value;
+    *value = memAttrValue;
+
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
+                                       void *dstMemoryTarget,
+                                       size_t *bandwidth) {
+    if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
+                                             bandwidth, MEMATTR_TYPE_BANDWIDTH);
+    if (ret) {
+        LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
+                ((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
+                ((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
+        return ret;
+    }
+
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_result_t numa_get_latency(void *srcMemoryTarget,
+                                     void *dstMemoryTarget, size_t *latency) {
+    if (!srcMemoryTarget || !dstMemoryTarget || !latency) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    umf_result_t ret = query_attribute_value(srcMemoryTarget, dstMemoryTarget,
+                                             latency, MEMATTR_TYPE_LATENCY);
+    if (ret) {
+        LOG_ERR("Retrieving latency for initiator node %u to node %u failed.",
+                ((struct numa_memory_target_t *)srcMemoryTarget)->physical_id,
+                ((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
+        return ret;
+    }
+
     return UMF_RESULT_SUCCESS;
 }
 
@@ -223,5 +288,6 @@ struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
     .clone = numa_clone,
     .get_capacity = numa_get_capacity,
     .get_bandwidth = numa_get_bandwidth,
+    .get_latency = numa_get_latency,
     .memory_provider_create_from_memspace =
         numa_memory_provider_create_from_memspace};
diff --git a/src/memspace_internal.h b/src/memspace_internal.h
index 6ced67303..857049392 100644
--- a/src/memspace_internal.h
+++ b/src/memspace_internal.h
@@ -60,6 +60,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
 void umfMemspaceHostAllDestroy(void);
 void umfMemspaceHighestCapacityDestroy(void);
 void umfMemspaceHighestBandwidthDestroy(void);
+void umfMemspaceLowestLatencyDestroy(void);
 
 #ifdef __cplusplus
 }
diff --git a/src/memspaces/memspace_lowest_latency.c b/src/memspaces/memspace_lowest_latency.c
new file mode 100644
index 000000000..fa6985362
--- /dev/null
+++ b/src/memspaces/memspace_lowest_latency.c
@@ -0,0 +1,103 @@
+/*
+ *
+ * Copyright (C) 2024 Intel Corporation
+ *
+ * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <hwloc.h>
+#include <stdlib.h>
+
+#include "base_alloc_global.h"
+#include "memory_target_numa.h"
+#include "memspace_internal.h"
+#include "memspace_numa.h"
+#include "topology.h"
+#include "utils_common.h"
+#include "utils_concurrency.h"
+#include "utils_log.h"
+
+static umf_result_t getBestLatencyTarget(umf_memory_target_handle_t initiator,
+                                         umf_memory_target_handle_t *nodes,
+                                         size_t numNodes,
+                                         umf_memory_target_handle_t *target) {
+    size_t bestNodeIdx = 0;
+    size_t bestLatency = SIZE_MAX;
+    for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) {
+        size_t latency = SIZE_MAX;
+        umf_result_t ret =
+            umfMemoryTargetGetLatency(initiator, nodes[nodeIdx], &latency);
+        if (ret) {
+            return ret;
+        }
+
+        if (latency < bestLatency) {
+            bestNodeIdx = nodeIdx;
+            bestLatency = latency;
+        }
+    }
+
+    *target = nodes[bestNodeIdx];
+
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_result_t
+umfMemspaceLowestLatencyCreate(umf_memspace_handle_t *hMemspace) {
+    if (!hMemspace) {
+        return UMF_RESULT_ERROR_INVALID_ARGUMENT;
+    }
+
+    umf_memspace_handle_t hostAllMemspace = umfMemspaceHostAllGet();
+    if (!hostAllMemspace) {
+        return UMF_RESULT_ERROR_UNKNOWN;
+    }
+
+    umf_memspace_handle_t lowLatencyMemspace = NULL;
+    umf_result_t ret = umfMemspaceFilter(hostAllMemspace, getBestLatencyTarget,
+                                         &lowLatencyMemspace);
+    if (ret != UMF_RESULT_SUCCESS) {
+        // HWLOC could possibly return an 'EINVAL' error, which in this context
+        // means that the HMAT is unavailable and we can't obtain the
+        // 'latency' value of any NUMA node.
+        return ret;
+    }
+
+    *hMemspace = lowLatencyMemspace;
+    return UMF_RESULT_SUCCESS;
+}
+
+static umf_memspace_handle_t UMF_MEMSPACE_LOWEST_LATENCY = NULL;
+static UTIL_ONCE_FLAG UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED =
+    UTIL_ONCE_FLAG_INIT;
+
+void umfMemspaceLowestLatencyDestroy(void) {
+    if (UMF_MEMSPACE_LOWEST_LATENCY) {
+        umfMemspaceDestroy(UMF_MEMSPACE_LOWEST_LATENCY);
+        UMF_MEMSPACE_LOWEST_LATENCY = NULL;
+    }
+}
+
+static void umfMemspaceLowestLatencyInit(void) {
+    umf_result_t ret =
+        umfMemspaceLowestLatencyCreate(&UMF_MEMSPACE_LOWEST_LATENCY);
+    if (ret != UMF_RESULT_SUCCESS) {
+        LOG_ERR("Creating the lowest latency memspace failed with a %u error\n",
+                ret);
+        assert(ret == UMF_RESULT_ERROR_NOT_SUPPORTED);
+    }
+
+#if defined(_WIN32) && !defined(UMF_SHARED_LIBRARY)
+    atexit(umfMemspaceLowestLatencyDestroy);
+#endif
+}
+
+umf_memspace_handle_t umfMemspaceLowestLatencyGet(void) {
+    util_init_once(&UMF_MEMSPACE_LOWEST_LATENCY_INITIALIZED,
+                   umfMemspaceLowestLatencyInit);
+    return UMF_MEMSPACE_LOWEST_LATENCY;
+}

From 2e3c0090d6e63ba1e9079ab3450e58bdfd82a706 Mon Sep 17 00:00:00 2001
From: Krzysztof Swiecicki <krzysztof.swiecicki@intel.com>
Date: Wed, 22 May 2024 10:06:33 +0000
Subject: [PATCH 2/3] Add tests for memspace "lowest latency"

Those tests are skipped with GTEST_SKIP() when latency property
can't be queried (HMAT is not supported on the platform).
---
 test/CMakeLists.txt                           |  12 +-
 test/memspaces/memspace_fixtures.hpp          | 221 ++++++++++++++++++
 test/memspaces/memspace_helpers.hpp           |  25 --
 test/memspaces/memspace_highest_bandwidth.cpp | 177 +-------------
 test/memspaces/memspace_highest_capacity.cpp  |   1 +
 test/memspaces/memspace_host_all.cpp          |   1 +
 test/memspaces/memspace_lowest_latency.cpp    |  44 ++++
 test/memspaces/memspace_numa.cpp              |   1 +
 test/test_valgrind.sh                         |   3 +
 9 files changed, 289 insertions(+), 196 deletions(-)
 create mode 100644 test/memspaces/memspace_fixtures.hpp
 create mode 100644 test/memspaces/memspace_lowest_latency.cpp

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f88fa1789..f8a4982f4 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -162,23 +162,27 @@ if(LINUX) # OS-specific functions are implemented only for Linux now
     add_umf_test(
         NAME memspace_numa
         SRCS memspaces/memspace_numa.cpp
-        LIBS ${LIBNUMA_LIBRARIES})
+        LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
     add_umf_test(
         NAME provider_os_memory_config
         SRCS provider_os_memory_config.cpp
-        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
+        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
     add_umf_test(
         NAME memspace_host_all
         SRCS memspaces/memspace_host_all.cpp
-        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
+        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
     add_umf_test(
         NAME memspace_highest_capacity
         SRCS memspaces/memspace_highest_capacity.cpp
-        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
+        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
     add_umf_test(
         NAME memspace_highest_bandwidth
         SRCS memspaces/memspace_highest_bandwidth.cpp
         LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
+    add_umf_test(
+        NAME memspace_lowest_latency
+        SRCS memspaces/memspace_lowest_latency.cpp
+        LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES})
     add_umf_test(
         NAME mempolicy
         SRCS memspaces/mempolicy.cpp
diff --git a/test/memspaces/memspace_fixtures.hpp b/test/memspaces/memspace_fixtures.hpp
new file mode 100644
index 000000000..de01e9ae6
--- /dev/null
+++ b/test/memspaces/memspace_fixtures.hpp
@@ -0,0 +1,221 @@
+// Copyright (C) 2024 Intel Corporation
+// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef UMF_MEMSPACE_FIXTURES_HPP
+#define UMF_MEMSPACE_FIXTURES_HPP
+
+#include "base.hpp"
+#include "memspace_helpers.hpp"
+#include "test_helpers.h"
+
+#include <hwloc.h>
+#include <numa.h>
+#include <numaif.h>
+#include <thread>
+#include <umf/memspace.h>
+
+#define SIZE_4K (4096UL)
+#define SIZE_4M (SIZE_4K * 1024UL)
+
+// In HWLOC v2.3.0, the 'hwloc_location_type_e' enum is defined inside an
+// 'hwloc_location' struct. In newer versions, this enum is defined globally.
+// To prevent compile errors in C++ tests related this scope change
+// 'hwloc_location_type_e' has been aliased.
+using hwloc_location_type_alias = decltype(hwloc_location::type);
+
+struct numaNodesTest : ::umf_test::test {
+    void SetUp() override {
+        ::umf_test::test::SetUp();
+
+        if (numa_available() == -1 || numa_all_nodes_ptr == nullptr) {
+            GTEST_FAIL() << "Failed to initialize libnuma";
+        }
+
+        int maxNode = numa_max_node();
+        if (maxNode < 0) {
+            GTEST_FAIL() << "No available numa nodes";
+        }
+
+        for (int i = 0; i <= maxNode; i++) {
+            if (numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
+                nodeIds.emplace_back(i);
+                maxNodeId = i;
+            }
+        }
+    }
+
+    std::vector<unsigned> nodeIds;
+    unsigned long maxNodeId = 0;
+};
+
+using isQuerySupportedFunc = bool (*)(size_t);
+using memspaceGetFunc = umf_memspace_handle_t (*)();
+using memspaceGetParams = std::tuple<isQuerySupportedFunc, memspaceGetFunc>;
+
+struct memspaceGetTest : ::numaNodesTest,
+                         ::testing::WithParamInterface<memspaceGetParams> {
+    void SetUp() override {
+        ::numaNodesTest::SetUp();
+
+        auto [isQuerySupported, memspaceGet] = this->GetParam();
+
+        if (!isQuerySupported(nodeIds.front())) {
+            GTEST_SKIP();
+        }
+
+        hMemspace = memspaceGet();
+        ASSERT_NE(hMemspace, nullptr);
+    }
+
+    umf_memspace_handle_t hMemspace = nullptr;
+};
+
+struct memspaceProviderTest : ::memspaceGetTest {
+    void SetUp() override {
+        ::memspaceGetTest::SetUp();
+
+        if (::memspaceGetTest::IsSkipped()) {
+            GTEST_SKIP();
+        }
+
+        umf_result_t ret =
+            umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider);
+        ASSERT_EQ(ret, UMF_RESULT_SUCCESS);
+        ASSERT_NE(hProvider, nullptr);
+    }
+
+    void TearDown() override {
+        ::memspaceGetTest::TearDown();
+
+        if (hProvider) {
+            umfMemoryProviderDestroy(hProvider);
+        }
+    }
+
+    umf_memory_provider_handle_t hProvider = nullptr;
+};
+
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(memspaceGetTest);
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(memspaceProviderTest);
+
+TEST_P(memspaceGetTest, providerFromMemspace) {
+    umf_memory_provider_handle_t hProvider = nullptr;
+    umf_result_t ret =
+        umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider);
+    UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
+    UT_ASSERTne(hProvider, nullptr);
+
+    umfMemoryProviderDestroy(hProvider);
+}
+
+TEST_P(memspaceProviderTest, allocFree) {
+    void *ptr = nullptr;
+    size_t size = SIZE_4K;
+    size_t alignment = 0;
+
+    umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
+    UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
+    UT_ASSERTne(ptr, nullptr);
+
+    // Access the allocation, so that all the pages associated with it are
+    // allocated on some NUMA node.
+    memset(ptr, 0xFF, size);
+
+    ret = umfMemoryProviderFree(hProvider, ptr, size);
+    UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
+}
+
+static std::vector<int> getAllCpus() {
+    std::vector<int> allCpus;
+    for (int i = 0; i < numa_num_possible_cpus(); ++i) {
+        if (numa_bitmask_isbitset(numa_all_cpus_ptr, i)) {
+            allCpus.push_back(i);
+        }
+    }
+
+    return allCpus;
+}
+
+#define MAX_NODES 512
+
+TEST_P(memspaceProviderTest, allocLocalMt) {
+    auto pinAllocValidate = [&](umf_memory_provider_handle_t hProvider,
+                                int cpu) {
+        hwloc_topology_t topology = NULL;
+        UT_ASSERTeq(hwloc_topology_init(&topology), 0);
+        UT_ASSERTeq(hwloc_topology_load(topology), 0);
+
+        // Pin current thread to the provided CPU.
+        hwloc_cpuset_t pinCpuset = hwloc_bitmap_alloc();
+        UT_ASSERTeq(hwloc_bitmap_set(pinCpuset, cpu), 0);
+        UT_ASSERTeq(
+            hwloc_set_cpubind(topology, pinCpuset, HWLOC_CPUBIND_THREAD), 0);
+
+        // Confirm that the thread is pinned to the provided CPU.
+        hwloc_cpuset_t curCpuset = hwloc_bitmap_alloc();
+        UT_ASSERTeq(
+            hwloc_get_cpubind(topology, curCpuset, HWLOC_CPUBIND_THREAD), 0);
+        UT_ASSERT(hwloc_bitmap_isequal(curCpuset, pinCpuset));
+        hwloc_bitmap_free(curCpuset);
+        hwloc_bitmap_free(pinCpuset);
+
+        // Allocate some memory.
+        const size_t size = SIZE_4K;
+        const size_t alignment = 0;
+        void *ptr = nullptr;
+
+        umf_result_t ret =
+            umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
+        UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
+        UT_ASSERTne(ptr, nullptr);
+
+        // Access the allocation, so that all the pages associated with it are
+        // allocated on some NUMA node.
+        memset(ptr, 0xFF, size);
+
+        // Get the NUMA node responsible for this allocation.
+        int mode = -1;
+        std::vector<size_t> boundNodeIds;
+        size_t allocNodeId = SIZE_MAX;
+        getAllocationPolicy(ptr, maxNodeId, mode, boundNodeIds, allocNodeId);
+
+        // Get the CPUs associated with the specified NUMA node.
+        hwloc_obj_t allocNodeObj =
+            hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, allocNodeId);
+
+        unsigned nNodes = MAX_NODES;
+        std::vector<hwloc_obj_t> localNodes(MAX_NODES);
+        hwloc_location loc;
+        loc.location.object = allocNodeObj,
+        loc.type = hwloc_location_type_alias::HWLOC_LOCATION_TYPE_OBJECT;
+        UT_ASSERTeq(hwloc_get_local_numanode_objs(topology, &loc, &nNodes,
+                                                  localNodes.data(), 0),
+                    0);
+        UT_ASSERT(nNodes <= MAX_NODES);
+
+        // Confirm that the allocation from this thread was made to a local
+        // NUMA node.
+        UT_ASSERT(std::any_of(localNodes.begin(), localNodes.end(),
+                              [&allocNodeObj](hwloc_obj_t node) {
+                                  return node == allocNodeObj;
+                              }));
+
+        ret = umfMemoryProviderFree(hProvider, ptr, size);
+        UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
+
+        hwloc_topology_destroy(topology);
+    };
+
+    const auto cpus = getAllCpus();
+    std::vector<std::thread> threads;
+    for (auto cpu : cpus) {
+        threads.emplace_back(pinAllocValidate, hProvider, cpu);
+    }
+
+    for (auto &thread : threads) {
+        thread.join();
+    }
+}
+
+#endif /* UMF_MEMSPACE_FIXTURES_HPP */
diff --git a/test/memspaces/memspace_helpers.hpp b/test/memspaces/memspace_helpers.hpp
index b92beba63..9ce9a004e 100644
--- a/test/memspaces/memspace_helpers.hpp
+++ b/test/memspaces/memspace_helpers.hpp
@@ -17,31 +17,6 @@
 #define SIZE_4K (4096UL)
 #define SIZE_4M (SIZE_4K * 1024UL)
 
-struct numaNodesTest : ::umf_test::test {
-    void SetUp() override {
-        ::umf_test::test::SetUp();
-
-        if (numa_available() == -1 || numa_all_nodes_ptr == nullptr) {
-            GTEST_FAIL() << "Failed to initialize libnuma";
-        }
-
-        int maxNode = numa_max_node();
-        if (maxNode < 0) {
-            GTEST_FAIL() << "No available numa nodes";
-        }
-
-        for (int i = 0; i <= maxNode; i++) {
-            if (numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
-                nodeIds.emplace_back(i);
-                maxNodeId = i;
-            }
-        }
-    }
-
-    std::vector<unsigned> nodeIds;
-    unsigned long maxNodeId = 0;
-};
-
 ///
 /// @brief Retrieves the memory policy information for \p ptr.
 /// @param ptr allocation pointer.
diff --git a/test/memspaces/memspace_highest_bandwidth.cpp b/test/memspaces/memspace_highest_bandwidth.cpp
index 7a56eeb26..2b5330751 100644
--- a/test/memspaces/memspace_highest_bandwidth.cpp
+++ b/test/memspaces/memspace_highest_bandwidth.cpp
@@ -2,23 +2,14 @@
 // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#include "memory_target_numa.h"
+#include "memspace_fixtures.hpp"
 #include "memspace_helpers.hpp"
 #include "memspace_internal.h"
 #include "test_helpers.h"
 
 #include <hwloc.h>
-#include <thread>
 #include <umf/memspace.h>
 
-using umf_test::test;
-
-// In HWLOC v2.3.0, the 'hwloc_location_type_e' enum is defined inside an
-// 'hwloc_location' struct. In newer versions, this enum is defined globally.
-// To prevent compile errors in C++ tests related this scope change
-// 'hwloc_location_type_e' has been aliased.
-using hwloc_location_type_alias = decltype(hwloc_location::type);
-
 static bool canQueryBandwidth(size_t nodeId) {
     hwloc_topology_t topology = nullptr;
     int ret = hwloc_topology_init(&topology);
@@ -43,161 +34,13 @@ static bool canQueryBandwidth(size_t nodeId) {
     return (ret == 0);
 }
 
-struct memspaceHighestBandwidthTest : ::numaNodesTest {
-    void SetUp() override {
-        ::numaNodesTest::SetUp();
-
-        if (!canQueryBandwidth(nodeIds.front())) {
-            GTEST_SKIP();
-        }
-
-        hMemspace = umfMemspaceHighestBandwidthGet();
-        ASSERT_NE(hMemspace, nullptr);
-    }
-
-    umf_memspace_handle_t hMemspace = nullptr;
-};
-
-struct memspaceHighestBandwidthProviderTest : ::memspaceHighestBandwidthTest {
-    void SetUp() override {
-        ::memspaceHighestBandwidthTest::SetUp();
-
-        if (!canQueryBandwidth(nodeIds.front())) {
-            GTEST_SKIP();
-        }
-
-        umf_result_t ret =
-            umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider);
-        ASSERT_EQ(ret, UMF_RESULT_SUCCESS);
-        ASSERT_NE(hProvider, nullptr);
-    }
-
-    void TearDown() override {
-        ::memspaceHighestBandwidthTest::TearDown();
-
-        if (hProvider) {
-            umfMemoryProviderDestroy(hProvider);
-        }
-    }
-
-    umf_memory_provider_handle_t hProvider = nullptr;
-};
-
-TEST_F(memspaceHighestBandwidthTest, providerFromMemspace) {
-    umf_memory_provider_handle_t hProvider = nullptr;
-    umf_result_t ret =
-        umfMemoryProviderCreateFromMemspace(hMemspace, nullptr, &hProvider);
-    UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
-    UT_ASSERTne(hProvider, nullptr);
-
-    umfMemoryProviderDestroy(hProvider);
-}
-
-TEST_F(memspaceHighestBandwidthProviderTest, allocFree) {
-    void *ptr = nullptr;
-    size_t size = SIZE_4K;
-    size_t alignment = 0;
-
-    umf_result_t ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
-    UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
-    UT_ASSERTne(ptr, nullptr);
-
-    // Access the allocation, so that all the pages associated with it are
-    // allocated on some NUMA node.
-    memset(ptr, 0xFF, size);
-
-    ret = umfMemoryProviderFree(hProvider, ptr, size);
-    UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
-}
+INSTANTIATE_TEST_SUITE_P(memspaceLowestLatencyTest, memspaceGetTest,
+                         ::testing::Values(memspaceGetParams{
+                             canQueryBandwidth,
+                             umfMemspaceHighestBandwidthGet}));
 
-static std::vector<int> getAllCpus() {
-    std::vector<int> allCpus;
-    for (int i = 0; i < numa_num_possible_cpus(); ++i) {
-        if (numa_bitmask_isbitset(numa_all_cpus_ptr, i)) {
-            allCpus.push_back(i);
-        }
-    }
-
-    return allCpus;
-}
-
-#define MAX_NODES 512
-
-TEST_F(memspaceHighestBandwidthProviderTest, allocLocalMt) {
-    auto pinAllocValidate = [&](umf_memory_provider_handle_t hProvider,
-                                int cpu) {
-        hwloc_topology_t topology = NULL;
-        UT_ASSERTeq(hwloc_topology_init(&topology), 0);
-        UT_ASSERTeq(hwloc_topology_load(topology), 0);
-
-        // Pin current thread to the provided CPU.
-        hwloc_cpuset_t pinCpuset = hwloc_bitmap_alloc();
-        UT_ASSERTeq(hwloc_bitmap_set(pinCpuset, cpu), 0);
-        UT_ASSERTeq(
-            hwloc_set_cpubind(topology, pinCpuset, HWLOC_CPUBIND_THREAD), 0);
-
-        // Confirm that the thread is pinned to the provided CPU.
-        hwloc_cpuset_t curCpuset = hwloc_bitmap_alloc();
-        UT_ASSERTeq(
-            hwloc_get_cpubind(topology, curCpuset, HWLOC_CPUBIND_THREAD), 0);
-        UT_ASSERT(hwloc_bitmap_isequal(curCpuset, pinCpuset));
-        hwloc_bitmap_free(curCpuset);
-        hwloc_bitmap_free(pinCpuset);
-
-        // Allocate some memory.
-        const size_t size = SIZE_4K;
-        const size_t alignment = 0;
-        void *ptr = nullptr;
-
-        umf_result_t ret =
-            umfMemoryProviderAlloc(hProvider, size, alignment, &ptr);
-        UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
-        UT_ASSERTne(ptr, nullptr);
-
-        // Access the allocation, so that all the pages associated with it are
-        // allocated on some NUMA node.
-        memset(ptr, 0xFF, size);
-
-        // Get the NUMA node responsible for this allocation.
-        int mode = -1;
-        std::vector<size_t> boundNodeIds;
-        size_t allocNodeId = SIZE_MAX;
-        getAllocationPolicy(ptr, maxNodeId, mode, boundNodeIds, allocNodeId);
-
-        // Get the CPUs associated with the specified NUMA node.
-        hwloc_obj_t allocNodeObj =
-            hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, allocNodeId);
-
-        unsigned nNodes = MAX_NODES;
-        std::vector<hwloc_obj_t> localNodes(MAX_NODES);
-        hwloc_location loc;
-        loc.location.object = allocNodeObj,
-        loc.type = hwloc_location_type_alias::HWLOC_LOCATION_TYPE_OBJECT;
-        UT_ASSERTeq(hwloc_get_local_numanode_objs(topology, &loc, &nNodes,
-                                                  localNodes.data(), 0),
-                    0);
-        UT_ASSERT(nNodes <= MAX_NODES);
-
-        // Confirm that the allocation from this thread was made to a local
-        // NUMA node.
-        UT_ASSERT(std::any_of(localNodes.begin(), localNodes.end(),
-                              [&allocNodeObj](hwloc_obj_t node) {
-                                  return node == allocNodeObj;
-                              }));
-
-        ret = umfMemoryProviderFree(hProvider, ptr, size);
-        UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
-
-        hwloc_topology_destroy(topology);
-    };
-
-    const auto cpus = getAllCpus();
-    std::vector<std::thread> threads;
-    for (auto cpu : cpus) {
-        threads.emplace_back(pinAllocValidate, hProvider, cpu);
-    }
-
-    for (auto &thread : threads) {
-        thread.join();
-    }
-}
+INSTANTIATE_TEST_SUITE_P(memspaceLowestLatencyProviderTest,
+                         memspaceProviderTest,
+                         ::testing::Values(memspaceGetParams{
+                             canQueryBandwidth,
+                             umfMemspaceHighestBandwidthGet}));
diff --git a/test/memspaces/memspace_highest_capacity.cpp b/test/memspaces/memspace_highest_capacity.cpp
index 162aa18f7..b16f3f50d 100644
--- a/test/memspaces/memspace_highest_capacity.cpp
+++ b/test/memspaces/memspace_highest_capacity.cpp
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "memory_target_numa.h"
+#include "memspace_fixtures.hpp"
 #include "memspace_helpers.hpp"
 #include "memspace_internal.h"
 #include "numa_helpers.h"
diff --git a/test/memspaces/memspace_host_all.cpp b/test/memspaces/memspace_host_all.cpp
index e0326709b..588b24f15 100644
--- a/test/memspaces/memspace_host_all.cpp
+++ b/test/memspaces/memspace_host_all.cpp
@@ -3,6 +3,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include "memory_target_numa.h"
+#include "memspace_fixtures.hpp"
 #include "memspace_helpers.hpp"
 #include "memspace_internal.h"
 #include "numa_helpers.h"
diff --git a/test/memspaces/memspace_lowest_latency.cpp b/test/memspaces/memspace_lowest_latency.cpp
new file mode 100644
index 000000000..f0eabcf3e
--- /dev/null
+++ b/test/memspaces/memspace_lowest_latency.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2024 Intel Corporation
+// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "memspace_fixtures.hpp"
+#include "memspace_helpers.hpp"
+#include "memspace_internal.h"
+#include "test_helpers.h"
+
+#include <hwloc.h>
+#include <umf/memspace.h>
+
+static bool canQueryLatency(size_t nodeId) {
+    hwloc_topology_t topology = nullptr;
+    int ret = hwloc_topology_init(&topology);
+    UT_ASSERTeq(ret, 0);
+    ret = hwloc_topology_load(topology);
+    UT_ASSERTeq(ret, 0);
+
+    hwloc_obj_t numaNode =
+        hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, nodeId);
+    UT_ASSERTne(numaNode, nullptr);
+
+    // Setup initiator structure.
+    struct hwloc_location initiator;
+    initiator.location.cpuset = numaNode->cpuset;
+    initiator.type = hwloc_location_type_alias::HWLOC_LOCATION_TYPE_CPUSET;
+
+    hwloc_uint64_t value = 0;
+    ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_LATENCY, numaNode,
+                                  &initiator, 0, &value);
+
+    hwloc_topology_destroy(topology);
+    return (ret == 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(memspaceLowestLatencyTest, memspaceGetTest,
+                         ::testing::Values(memspaceGetParams{
+                             canQueryLatency, umfMemspaceLowestLatencyGet}));
+
+INSTANTIATE_TEST_SUITE_P(memspaceLowestLatencyProviderTest,
+                         memspaceProviderTest,
+                         ::testing::Values(memspaceGetParams{
+                             canQueryLatency, umfMemspaceLowestLatencyGet}));
diff --git a/test/memspaces/memspace_numa.cpp b/test/memspaces/memspace_numa.cpp
index c214ef189..225cccd9f 100644
--- a/test/memspaces/memspace_numa.cpp
+++ b/test/memspaces/memspace_numa.cpp
@@ -4,6 +4,7 @@
 
 #include "memspaces/memspace_numa.h"
 #include "base.hpp"
+#include "memspace_fixtures.hpp"
 #include "memspace_helpers.hpp"
 #include "memspace_internal.h"
 
diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh
index 5680fefef..62252fcdb 100755
--- a/test/test_valgrind.sh
+++ b/test/test_valgrind.sh
@@ -103,6 +103,9 @@ for test in $(ls -1 umf_test-*); do
 	umf_test-memspace_highest_bandwidth)
 		FILTER='--gtest_filter="-*allocLocalMt*"'
 		;;
+	umf_test-memspace_lowest_latency)
+		FILTER='--gtest_filter="-*allocLocalMt*"'
+		;;
 	esac
 
 	[ "$FILTER" != "" ] && echo -n "($FILTER) "

From 8f394d8afc4e5273886d2b1a8baf99db5a969663 Mon Sep 17 00:00:00 2001
From: Krzysztof Swiecicki <krzysztof.swiecicki@intel.com>
Date: Wed, 22 May 2024 10:08:25 +0000
Subject: [PATCH 3/3] Add brief introduction of lowest latency memspace

---
 README.md              | 5 +++++
 include/umf/memspace.h | 6 ++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index de833eeab..9204ab7c9 100644
--- a/README.md
+++ b/README.md
@@ -229,6 +229,11 @@ Can be retrieved using umfMemspaceHighestCapacityGet.
 Memspace backed by an aggregated list of NUMA nodes identified as highest bandwidth after selecting each available NUMA node as the initiator.
 Querying the bandwidth value requires HMAT support on the platform. Calling `umfMemspaceHighestBandwidthGet()` will return NULL if it's not supported.
 
+#### Lowest latency memspace
+
+Memspace backed by an aggregated list of NUMA nodes identified as lowest latency after selecting each available NUMA node as the initiator.
+Querying the latency value requires HMAT support on the platform. Calling `umfMemspaceLowestLatencyGet()` will return NULL if it's not supported.
+
 ### Proxy library
 
 UMF provides the UMF proxy library (`umf_proxy`) that makes it possible
diff --git a/include/umf/memspace.h b/include/umf/memspace.h
index 8acfa9325..694675974 100644
--- a/include/umf/memspace.h
+++ b/include/umf/memspace.h
@@ -57,12 +57,14 @@ umf_memspace_handle_t umfMemspaceHostAllGet(void);
 umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
 
 /// \brief Retrieves predefined highest bandwidth memspace.
-/// \return highest bandwidth memspace handle on success or NULL on failure.
+/// \return highest bandwidth memspace handle on success or NULL on
+///         failure (no HMAT support).
 ///
 umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
 
 /// \brief Retrieves predefined lowest latency memspace.
-/// \return lowest latency memspace handle on success or NULL on failure.
+/// \return lowest latency memspace handle on success or NULL on
+///         failure (no HMAT support).
 ///
 umf_memspace_handle_t umfMemspaceLowestLatencyGet(void);