Skip to content

llama: move page cache via mbind to prevent cross-NUMA access #13335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ target_include_directories(llama PRIVATE .)
target_include_directories(llama PUBLIC ../include)
target_compile_features (llama PRIVATE cxx_std_17) # don't bump

target_link_libraries(llama PUBLIC ggml)
# Conditionally link numa on Linux
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(llama PUBLIC ggml numa)
else()
target_link_libraries(llama PUBLIC ggml)
endif()

if (BUILD_SHARED_LIBS)
set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
38 changes: 38 additions & 0 deletions src/llama-mmap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
#include <cerrno>
#include <algorithm>

#ifdef __linux__
#include <numa.h>
#include <numaif.h>
#include <sched.h>
#endif

#ifdef __has_include
#if __has_include(<unistd.h>)
#include <unistd.h>
Expand Down Expand Up @@ -273,6 +279,27 @@ struct llama_mmap::impl {
#ifdef _POSIX_MAPPED_FILES
std::vector<std::pair<size_t, size_t>> mapped_fragments;

#ifdef __linux__
static void move_pages(void *addr, size_t size) {
int cpu, ret;
struct bitmask *nodemask = numa_allocate_nodemask();

/* Get memory policy of the calling thread. */
ret = get_mempolicy(nullptr, nodemask->maskp, nodemask->size, nullptr, 0);
if (ret || numa_bitmask_weight(nodemask) == 0) {
cpu = sched_getcpu();
if (cpu >= 0) {
numa_bitmask_clearall(nodemask);
numa_bitmask_setbit(nodemask, numa_node_of_cpu(cpu));
}
}
if (numa_bitmask_weight(nodemask) == 1) {
mbind(addr, size, MPOL_BIND, nodemask->maskp, nodemask->size, MPOL_MF_MOVE);
}
numa_free_nodemask(nodemask);
}
#endif

impl(struct llama_file * file, size_t prefetch, bool numa) {
size = file->size();
int fd = file->file_id();
Expand All @@ -291,6 +318,17 @@ struct llama_mmap::impl {
}

if (prefetch > 0) {
#ifdef __linux__
/*
* Given that we already pre-fault all memory when prefetch > 0, it is
* necessary to move any page cache pages that might have been
* instantiated during previous runs on different NUMA nodes. This call
* to move_pages() ensures that all memory-mapped pages are relocated
* according to the calling thread's memory policy or the CPU on which
* it is running.
*/
move_pages(addr, file->size());
#endif
if (posix_madvise(addr, std::min(file->size(), prefetch), POSIX_MADV_WILLNEED)) {
LLAMA_LOG_WARN("warning: posix_madvise(.., POSIX_MADV_WILLNEED) failed: %s\n",
strerror(errno));
Expand Down