From 0c2e8013d87bfd6c11a9f890fb2d03c51139719b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 9 Nov 2025 16:24:48 +0000 Subject: [PATCH 1/4] coredump: fix handling of num_pages This patch fixes the following error: $ sudo make -C test/others/criu-coredump run ... Traceback (most recent call last): File "/home/circleci/criu/coredump/coredump", line 55, in main() File "/home/circleci/criu/coredump/coredump", line 47, in main coredump(opts) File "/home/circleci/criu/coredump/coredump", line 14, in coredump cores = generator(os.path.realpath(opts['in'])) File "/home/circleci/criu/coredump/criu_coredump/coredump.py", line 192, in __call__ self.coredumps[pid] = self._gen_coredump(pid) File "/home/circleci/criu/coredump/criu_coredump/coredump.py", line 214, in _gen_coredump cd.vmas = self._gen_vmas(pid) File "/home/circleci/criu/coredump/criu_coredump/coredump.py", line 992, in _gen_vmas v.data = self._gen_mem_chunk(pid, vma, v.filesz) File "/home/circleci/criu/coredump/criu_coredump/coredump.py", line 879, in _gen_mem_chunk page_mem = self._get_page(pid, page_no) File "/home/circleci/criu/coredump/criu_coredump/coredump.py", line 797, in _get_page num_pages = m.get("nr_pages", m.compat_nr_pages) AttributeError: 'dict' object has no attribute 'compat_nr_pages' + exit 1 make[1]: *** [Makefile:3: run] Error 1 Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- coredump/criu_coredump/coredump.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 3c9cd45aaa..acb806ace7 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -794,7 +794,8 @@ def _get_page(self, pid, page_no): off = 0 # in pages for m in pagemap[1:]: found = False - num_pages = m.get("nr_pages", m.compat_nr_pages) + num_pages = m.get("nr_pages", m["compat_nr_pages"]) + for i in range(num_pages): if m["vaddr"] + i * PAGESIZE == page_no * PAGESIZE: found = True From fab840d6d9509f092a80ab77df8c0a5a17dd8d53 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 8 Nov 2025 15:57:22 +0000 Subject: [PATCH 2/4] Revert "plugins/amdgpu: Implement parallel restore" This functionality (#2527) is being reverted and excluded from this release due to issue #2812. It will be included in a subsequent release once all associated issues are resolved. Signed-off-by: Andrei Vagin --- Documentation/criu-amdgpu-plugin.txt | 1 - plugins/amdgpu/Makefile | 2 +- plugins/amdgpu/README.md | 23 +- plugins/amdgpu/amdgpu_plugin.c | 418 +++--------------------- plugins/amdgpu/amdgpu_plugin_topology.c | 2 +- plugins/amdgpu/amdgpu_plugin_topology.h | 1 - plugins/amdgpu/amdgpu_socket_utils.c | 320 ------------------ plugins/amdgpu/amdgpu_socket_utils.h | 54 --- 8 files changed, 51 insertions(+), 770 deletions(-) delete mode 100644 plugins/amdgpu/amdgpu_socket_utils.c delete mode 100644 plugins/amdgpu/amdgpu_socket_utils.h diff --git a/Documentation/criu-amdgpu-plugin.txt b/Documentation/criu-amdgpu-plugin.txt index fe76fc3bc6..68803f3dbc 100644 --- a/Documentation/criu-amdgpu-plugin.txt +++ b/Documentation/criu-amdgpu-plugin.txt @@ -15,7 +15,6 @@ Checkpoint / Restore inside a docker container Pytorch Tensorflow Using CRIU Image Streamer -Parallel Restore DESCRIPTION ----------- diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile index 870a039cdb..3d55f8bb48 100644 --- a/plugins/amdgpu/Makefile +++ b/plugins/amdgpu/Makefile @@ -27,7 +27,7 @@ endif criu-amdgpu.pb-c.c: criu-amdgpu.proto protoc --proto_path=. --c_out=. criu-amdgpu.proto -amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c amdgpu_socket_utils.c +amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC) amdgpu_plugin_clean: diff --git a/plugins/amdgpu/README.md b/plugins/amdgpu/README.md index b808fbc4f0..1078eafe6f 100644 --- a/plugins/amdgpu/README.md +++ b/plugins/amdgpu/README.md @@ -3,8 +3,7 @@ Supporting ROCm with CRIU _Felix Kuehling _
_Rajneesh Bardwaj _
-_David Yat Sin _
-_Yanning Yang _ +_David Yat Sin _ # Introduction @@ -225,26 +224,6 @@ to resume execution on the GPUs. *This new plugin is enabled by the new hook `__RESUME_DEVICES_LATE` in our RFC patch series.* -## Restoring BO content in parallel - -Restoring the BO content is an important part in the restore of GPU state and -usually takes a significant amount of time. A possible location for this -procedure is the `cr_plugin_restore_file` hook. However, restoring in this hook -blocks the target process from performing other restore operations, which -hinders further optimization of the restore process. - -Therefore, a new plugin hook that runs in the master restore process is -introduced, and it interacts with the `cr_plugin_restore_file` hook to complete -the restore of BO content. Specifically, the target process only needs to send -the relevant BOs to the master restore process, while this new hook handles all -the restore of buffer objects. Through this method, during the restore of the BO -content, the target process can perform other restore operations, thus -accelerating the restore procedure. This is an implementation of the gCROP -method proposed in the ACM SoCC'24 paper: [On-demand and Parallel -Checkpoint/Restore for GPU Applications](https://dl.acm.org/doi/10.1145/3698038.3698510). - -*This optimization technique is enabled by the `__POST_FORKING` hook.* - ## Other CRIU changes In addition to the new plugins, we need to make some changes to CRIU itself to diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index 69194fbc79..96c0861628 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -28,13 +28,11 @@ #include "xmalloc.h" #include "criu-log.h" #include "files.h" -#include "pstree.h" #include "common/list.h" #include "amdgpu_plugin_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" -#include "amdgpu_socket_utils.h" #include "img-streamer.h" #include "image.h" @@ -66,18 +64,6 @@ bool plugin_added_to_inventory = false; bool plugin_disabled = false; -/* - * In the case of a single process (common case), this optimization can effectively - * reduce the restore latency with parallel restore. In the case of multiple processes, - * states are already restored in parallel within different processes. Therefore, this - * optimization does not introduce further improvement and will be disabled by default - * in this case. The flag, parallel_disabled, is used to control whether the - * optimization is enabled or disabled. - */ -bool parallel_disabled = false; - -pthread_t parallel_thread = 0; -int parallel_thread_result = 0; /**************************************************************************************************/ /* Call ioctl, restarting if it is interrupted */ @@ -365,15 +351,6 @@ int amdgpu_plugin_init(int stage) maps_init(&restore_maps); if (stage == CR_PLUGIN_STAGE__RESTORE) { - if (has_children(root_item)) { - pr_info("Parallel restore disabled\n"); - parallel_disabled = true; - } else { - if (install_parallel_sock() < 0) { - pr_err("Failed to install parallel socket\n"); - return -1; - } - } /* Default Values */ kfd_fw_version_check = true; kfd_sdma_fw_version_check = true; @@ -1462,9 +1439,14 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e) static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e) { - struct thread_data *thread_datas = NULL; + struct thread_data *thread_datas; int thread_i, ret = 0; - int offset = 0; + + thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); + if (!thread_datas) { + ret = -ENOMEM; + goto exit; + } for (int i = 0; i < e->num_of_bos; i++) { struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i]; @@ -1507,101 +1489,56 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf } } - if (!parallel_disabled) { - parallel_restore_cmd restore_cmd; - pr_info("Begin to send parallel restore cmd\n"); - ret = init_parallel_restore_cmd(e->num_of_bos, id, e->num_of_gpus, &restore_cmd); - if (ret) - goto exit_parallel; - - for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { - uint32_t target_gpu_id; - struct tp_node *dev; + thread_i = 0; + for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { + struct tp_node *dev; + int ret_thread = 0; + uint32_t target_gpu_id; - if (!e->device_entries[i]->gpu_id) - continue; + if (!e->device_entries[i]->gpu_id) + continue; - target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); - dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); - if (!dev) { - pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); - ret = -ENODEV; - goto exit_parallel; - } - parallel_restore_gpu_id_add(e->device_entries[i]->gpu_id, dev->drm_render_minor, &restore_cmd); + /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ + target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); - for (int j = 0; j < e->num_of_bos; j++) { - if (bo_buckets[j].gpu_id != e->device_entries[i]->gpu_id) - continue; - if (bo_buckets[j].alloc_flags & - (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - parallel_restore_bo_add(bo_buckets[j].dmabuf_fd, bo_buckets[j].gpu_id, - bo_buckets[j].size, offset, &restore_cmd); - offset += bo_buckets[j].size; - } - } - } - ret = send_parallel_restore_cmd(&restore_cmd); -exit_parallel: - free_parallel_restore_cmd(&restore_cmd); - } else { - thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); - if (!thread_datas) { - ret = -ENOMEM; + /* We need the fd for actual_gpu_id */ + dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); + if (!dev) { + pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); + ret = -ENODEV; goto exit; } - thread_i = 0; - for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { - struct tp_node *dev; - int ret_thread = 0; - uint32_t target_gpu_id; - - if (!e->device_entries[i]->gpu_id) - continue; - - /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ - target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); - - /* We need the fd for actual_gpu_id */ - dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); - if (!dev) { - pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); - ret = -ENODEV; - goto exit; - } - - thread_datas[thread_i].id = id; - thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; - thread_datas[thread_i].bo_buckets = bo_buckets; - thread_datas[thread_i].bo_entries = e->bo_entries; - thread_datas[thread_i].pid = e->pid; - thread_datas[thread_i].num_of_bos = e->num_of_bos; + thread_datas[thread_i].id = id; + thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; + thread_datas[thread_i].bo_buckets = bo_buckets; + thread_datas[thread_i].bo_entries = e->bo_entries; + thread_datas[thread_i].pid = e->pid; + thread_datas[thread_i].num_of_bos = e->num_of_bos; - thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); - if (thread_datas[thread_i].drm_fd < 0) { - ret = -thread_datas[thread_i].drm_fd; - goto exit; - } + thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); + if (thread_datas[thread_i].drm_fd < 0) { + ret = -thread_datas[thread_i].drm_fd; + goto exit; + } - ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, - (void *)&thread_datas[thread_i]); - if (ret_thread) { - pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); - ret = -ret_thread; - goto exit; - } - thread_i++; + ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, + (void *)&thread_datas[thread_i]); + if (ret_thread) { + pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); + ret = -ret_thread; + goto exit; } + thread_i++; + } - for (int i = 0; i < e->num_of_gpus; i++) { - pthread_join(thread_datas[i].thread, NULL); - pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); + for (int i = 0; i < e->num_of_gpus; i++) { + pthread_join(thread_datas[i].thread, NULL); + pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); - if (thread_datas[i].ret) { - ret = thread_datas[i].ret; - goto exit; - } + if (thread_datas[i].ret) { + ret = thread_datas[i].ret; + goto exit; } } exit: @@ -1609,8 +1546,8 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD) close(bo_buckets[i].dmabuf_fd); } - if (thread_datas) - xfree(thread_datas); + + xfree(thread_datas); return ret; } @@ -1899,24 +1836,6 @@ int amdgpu_plugin_resume_devices_late(int target_pid) if (plugin_disabled) return -ENOTSUP; - if (!parallel_disabled) { - pr_info("Close parallel restore server\n"); - if (close_parallel_restore_server()) { - pr_err("Close parallel restore server fail\n"); - return -1; - } - - exit_code = pthread_join(parallel_thread, NULL); - if (exit_code) { - pr_err("Failed to join parallel thread ret:%d\n", exit_code); - return -1; - } - if (parallel_thread_result) { - pr_err("Parallel restore fail\n"); - return parallel_thread_result; - } - } - pr_info("Inside %s for target pid = %d\n", __func__, target_pid); fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); @@ -1943,244 +1862,3 @@ int amdgpu_plugin_resume_devices_late(int target_pid) } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, amdgpu_plugin_resume_devices_late) - -int sdma_copy_bo_helper(uint64_t size, int fd, FILE *storage_fp, void *buffer, size_t buffer_size, - amdgpu_device_handle h_dev, uint64_t max_copy_size, enum sdma_op_type type) -{ - return sdma_copy_bo((struct kfd_criu_bo_bucket){ 0, size, 0, 0, 0, 0, fd, 0 }, storage_fp, buffer, - buffer_size, h_dev, max_copy_size, SDMA_OP_VRAM_WRITE); -} - -int init_dev(int dev_minor, amdgpu_device_handle *h_dev, uint64_t *max_copy_size) -{ - int ret = 0; - int drm_fd = -1; - uint32_t major, minor; - - struct amdgpu_gpu_info gpu_info = { 0 }; - - drm_fd = open_drm_render_device(dev_minor); - if (drm_fd < 0) { - return drm_fd; - } - - ret = amdgpu_device_initialize(drm_fd, &major, &minor, h_dev); - if (ret) { - pr_perror("Failed to initialize device"); - goto err; - } - - ret = amdgpu_query_gpu_info(*h_dev, &gpu_info); - if (ret) { - pr_perror("failed to query gpuinfo via libdrm"); - goto err; - } - *max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : - SDMA_LINEAR_COPY_MAX_SIZE - 1; - return 0; -err: - amdgpu_device_deinitialize(*h_dev); - return ret; -} - -FILE *get_bo_contents_fp(int id, int gpu_id, size_t tot_size) -{ - char img_path[PATH_MAX]; - size_t image_size = 0; - FILE *bo_contents_fp = NULL; - - snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, id, gpu_id); - bo_contents_fp = open_img_file(img_path, false, &image_size); - if (!bo_contents_fp) { - pr_perror("Cannot fopen %s", img_path); - return NULL; - } - - if (tot_size != image_size) { - pr_err("%s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size, tot_size); - fclose(bo_contents_fp); - return NULL; - } - return bo_contents_fp; -} - -struct parallel_thread_data { - pthread_t thread; - uint32_t gpu_id; - int minor; - parallel_restore_cmd *restore_cmd; - int ret; -}; - -void *parallel_restore_bo_contents(void *_thread_data) -{ - struct parallel_thread_data *thread_data = (struct parallel_thread_data *)_thread_data; - amdgpu_device_handle h_dev; - uint64_t max_copy_size; - size_t total_bo_size = 0, max_bo_size = 0, buffer_size = 0; - FILE *bo_contents_fp = NULL; - parallel_restore_entry *entry; - parallel_restore_cmd *restore_cmd = thread_data->restore_cmd; - int ret = 0; - int offset = 0; - void *buffer = NULL; - - ret = init_dev(thread_data->minor, &h_dev, &max_copy_size); - if (ret) { - goto err; - } - - for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) { - if (restore_cmd->entries[i].gpu_id == thread_data->gpu_id) { - total_bo_size += restore_cmd->entries[i].size; - max_bo_size = max(restore_cmd->entries[i].size, max_bo_size); - } - } - - buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size; - - bo_contents_fp = get_bo_contents_fp(restore_cmd->cmd_head.id, thread_data->gpu_id, total_bo_size); - if (bo_contents_fp == NULL) { - ret = -1; - goto err_sdma; - } - offset = ftell(bo_contents_fp); - - posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size); - if (!buffer) { - pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE."); - ret = -ENOMEM; - goto err_sdma; - } - - for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) { - if (restore_cmd->entries[i].gpu_id != thread_data->gpu_id) - continue; - - entry = &restore_cmd->entries[i]; - fseek(bo_contents_fp, entry->read_offset + offset, SEEK_SET); - ret = sdma_copy_bo_helper(entry->size, restore_cmd->fds_write[entry->write_id], bo_contents_fp, buffer, - buffer_size, h_dev, max_copy_size, SDMA_OP_VRAM_WRITE); - if (ret) { - pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); - goto err_sdma; - } - } - -err_sdma: - if (bo_contents_fp) - fclose(bo_contents_fp); - if (buffer) - xfree(buffer); - amdgpu_device_deinitialize(h_dev); -err: - thread_data->ret = ret; - return NULL; -} - -void *restore_device_parallel_worker(void *arg) -{ - while (1) { - parallel_restore_cmd restore_cmd = { 0 }; - struct parallel_thread_data *thread_datas = NULL; - int ret; - int error_occurred = 0, join_ret = 0, created_threads = 0; - - ret = recv_parallel_restore_cmd(&restore_cmd); - if (ret) { - if (ret == 1) { - *(int *)arg = 0; - goto exit; - } - goto err; - } - - thread_datas = xzalloc(sizeof(*thread_datas) * restore_cmd.cmd_head.gpu_num); - if (!thread_datas) { - ret = -ENOMEM; - goto err; - } - - for (; created_threads < restore_cmd.cmd_head.gpu_num; created_threads++) { - thread_datas[created_threads].gpu_id = restore_cmd.gpu_ids[created_threads].gpu_id; - thread_datas[created_threads].minor = restore_cmd.gpu_ids[created_threads].minor; - thread_datas[created_threads].restore_cmd = &restore_cmd; - - ret = pthread_create(&thread_datas[created_threads].thread, NULL, parallel_restore_bo_contents, - (void *)&thread_datas[created_threads]); - if (ret) { - pr_err("Failed to create thread[0x%x] ret:%d\n", thread_datas[created_threads].gpu_id, ret); - error_occurred = 1; - break; - } - } - - for (int i = 0; i < created_threads; i++) { - join_ret = pthread_join(thread_datas[i].thread, NULL); - if (join_ret != 0) { - pr_err("pthread_join failed for Thread[0x%x] ret:%d\n", - thread_datas[i].gpu_id, join_ret); - if (!error_occurred) { - ret = join_ret; - error_occurred = 1; - } - } - - pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); - - /* Check thread return value */ - if (thread_datas[i].ret && !error_occurred) { - ret = thread_datas[i].ret; - error_occurred = 1; - } - } - - if (thread_datas) - xfree(thread_datas); -err: - free_parallel_restore_cmd(&restore_cmd); - - if (ret) { - *(int *)arg = ret; - return NULL; - } - } -exit: - return NULL; -} - -/* - * While the background thread is running, some processing functions (e.g., stop_cgroupd) - * in the main thread need to block SIGCHLD. To prevent interference from this background - * thread, SIGCHLD is blocked in this thread. - */ -static int back_thread_create(pthread_t *newthread, void *(*f)(void *), void *arg) -{ - int ret = 0; - sigset_t blockmask, oldmask; - - sigemptyset(&blockmask); - sigaddset(&blockmask, SIGCHLD); - sigprocmask(SIG_BLOCK, &blockmask, &oldmask); - - ret = pthread_create(newthread, NULL, f, arg); - if (ret) { - pr_err("Create worker thread fail: %d\n", ret); - return -1; - } - - sigprocmask(SIG_SETMASK, &oldmask, NULL); - return 0; -} - -int amdgpu_plugin_post_forking(void) -{ - if (plugin_disabled) - return -ENOTSUP; - - if (parallel_disabled) - return 0; - - return back_thread_create(¶llel_thread, restore_device_parallel_worker, ¶llel_thread_result); -} -CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking) \ No newline at end of file diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c index 730f2e0284..5b4396a0cc 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.c +++ b/plugins/amdgpu/amdgpu_plugin_topology.c @@ -45,7 +45,7 @@ bool kfd_capability_check = true; */ int fd_next = -1; -int open_drm_render_device(int minor) +static int open_drm_render_device(int minor) { char path[128]; int fd, ret_fd; diff --git a/plugins/amdgpu/amdgpu_plugin_topology.h b/plugins/amdgpu/amdgpu_plugin_topology.h index e19f8e7ce9..c890e3ddae 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.h +++ b/plugins/amdgpu/amdgpu_plugin_topology.h @@ -118,7 +118,6 @@ struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32 struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor); struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index); -int open_drm_render_device(int minor); int node_get_drm_render_device(struct tp_node *node); void sys_close_drm_render_devices(struct tp_system *sys); diff --git a/plugins/amdgpu/amdgpu_socket_utils.c b/plugins/amdgpu/amdgpu_socket_utils.c deleted file mode 100644 index c8bf6d1ba3..0000000000 --- a/plugins/amdgpu/amdgpu_socket_utils.c +++ /dev/null @@ -1,320 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "amdgpu_socket_utils.h" -#include "criu-log.h" -#include "common/scm.h" -#include "fdstore.h" -#include "util-pie.h" -#include "util.h" - -int parallel_socket_addr_len; -struct sockaddr_un parallel_socket_addr; -int parallel_socket_id = 0; - -static void amdgpu_socket_name_gen(struct sockaddr_un *addr, int *len) -{ - addr->sun_family = AF_UNIX; - snprintf(addr->sun_path, UNIX_PATH_MAX, "x/criu-amdgpu-parallel-%s", criu_run_id); - *len = SUN_LEN(addr); - *addr->sun_path = '\0'; -} - -int install_parallel_sock(void) -{ - int ret = 0; - int sock_fd; - - sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (sock_fd < 0) { - pr_perror("socket creation failed"); - return -1; - } - - amdgpu_socket_name_gen(¶llel_socket_addr, ¶llel_socket_addr_len); - ret = bind(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); - if (ret < 0) { - pr_perror("bind failed"); - goto err; - } - - ret = listen(sock_fd, SOMAXCONN); - if (ret < 0) { - pr_perror("listen failed"); - goto err; - } - - parallel_socket_id = fdstore_add(sock_fd); - if (parallel_socket_id < 0) { - ret = -1; - goto err; - } -err: - close(sock_fd); - return ret; -} - -void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset, - parallel_restore_cmd *restore_cmd) -{ - parallel_restore_entry *restore_entry = &restore_cmd->entries[restore_cmd->cmd_head.entry_num]; - restore_entry->gpu_id = gpu_id; - restore_entry->write_id = restore_cmd->cmd_head.fd_write_num; - restore_entry->write_offset = 0; - restore_entry->read_offset = offset; - restore_entry->size = size; - - restore_cmd->fds_write[restore_cmd->cmd_head.fd_write_num] = dmabuf_fd; - - restore_cmd->cmd_head.entry_num += 1; - restore_cmd->cmd_head.fd_write_num += 1; -} - -void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd) -{ - restore_cmd->gpu_ids[restore_cmd->cmd_head.gpu_num] = (parallel_gpu_info){ gpu_id, minor }; - restore_cmd->cmd_head.gpu_num += 1; -} - -static int send_metadata(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send(sock_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { - pr_perror("Send parallel restore command head fail"); - return -1; - } - return 0; -} - -static int send_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) { - pr_perror("Send GPU ids of parallel restore command fail"); - return -1; - } - return 0; -} - -static int send_cmds(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send(sock_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) { - pr_perror("Send parallel restore command fail"); - return -1; - } - return 0; -} - -static int send_dmabuf_fds(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send_fds(sock_fd, NULL, 0, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) { - pr_perror("Send dmabuf fds fail"); - return -1; - } - return 0; -} - -int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) -{ - int sock_fd; - int ret = 0; - - sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (sock_fd < 0) { - pr_perror("Socket creation failed"); - return -1; - } - - ret = connect(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); - if (ret < 0) { - pr_perror("Connect failed"); - goto err; - } - - ret = send_metadata(sock_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = send_gpu_ids(sock_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = send_cmds(sock_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = send_dmabuf_fds(sock_fd, restore_cmd); - -err: - close(sock_fd); - return ret; -} - -int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd) -{ - restore_cmd->cmd_head.id = id; - restore_cmd->cmd_head.fd_write_num = 0; - restore_cmd->cmd_head.entry_num = 0; - restore_cmd->cmd_head.gpu_num = 0; - - restore_cmd->gpu_ids = xzalloc(gpu_num * sizeof(parallel_gpu_info)); - if (!restore_cmd->gpu_ids) - return -ENOMEM; - restore_cmd->fds_write = xzalloc(num * sizeof(int)); - if (!restore_cmd->fds_write) - return -ENOMEM; - restore_cmd->entries = xzalloc(num * sizeof(parallel_restore_entry)); - if (!restore_cmd->entries) - return -ENOMEM; - return 0; -} - -void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) -{ - if (restore_cmd->gpu_ids) - xfree(restore_cmd->gpu_ids); - if (restore_cmd->fds_write) - xfree(restore_cmd->fds_write); - if (restore_cmd->entries) - xfree(restore_cmd->entries); -} - -static int init_parallel_restore_cmd_by_head(parallel_restore_cmd *restore_cmd) -{ - restore_cmd->gpu_ids = xzalloc(restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info)); - if (!restore_cmd->gpu_ids) - return -ENOMEM; - restore_cmd->fds_write = xzalloc(restore_cmd->cmd_head.fd_write_num * sizeof(int)); - if (!restore_cmd->fds_write) - return -ENOMEM; - restore_cmd->entries = xzalloc(restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry)); - if (!restore_cmd->entries) - return -ENOMEM; - return 0; -} - -static int check_quit_cmd(parallel_restore_cmd *restore_cmd) -{ - return restore_cmd->cmd_head.fd_write_num == 0; -} - -static int recv_metadata(int client_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv(client_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { - pr_perror("Recv parallel restore command head fail"); - return -1; - } - return 0; -} - -static int recv_cmds(int client_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv(client_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) { - pr_perror("Recv parallel restore command fail"); - return -1; - } - return 0; -} - -static int recv_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) { - pr_perror("Send GPU ids of parallel restore command fail"); - return -1; - } - return 0; -} - -static int recv_dmabuf_fds(int client_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv_fds(client_fd, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) { - pr_perror("Recv dmabuf fds fail"); - return -1; - } - return 0; -} - -int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) -{ - int sock_fd, client_fd; - int ret = 0; - - sock_fd = fdstore_get(parallel_socket_id); - if (sock_fd < 0) - return -1; - - client_fd = accept(sock_fd, NULL, NULL); - if (client_fd < 0) { - ret = client_fd; - goto err_accept; - } - - ret = recv_metadata(client_fd, restore_cmd); - if (ret) { - goto err; - } - - // Return 1 to quit - if (check_quit_cmd(restore_cmd)) { - ret = 1; - goto err; - } - - ret = init_parallel_restore_cmd_by_head(restore_cmd); - if (ret) { - goto err; - } - - ret = recv_gpu_ids(client_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = recv_cmds(client_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = recv_dmabuf_fds(client_fd, restore_cmd); - -err: - close(client_fd); -err_accept: - close(sock_fd); - return ret; -} - -int close_parallel_restore_server(void) -{ - int sock_fd; - int ret = 0; - parallel_restore_cmd_head cmd_head; - - sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (sock_fd < 0) { - pr_perror("Socket creation failed"); - return -1; - } - - ret = connect(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); - if (ret < 0) { - pr_perror("Connect failed"); - goto err; - } - - memset(&cmd_head, 0, sizeof(parallel_restore_cmd_head)); - if (send(sock_fd, &cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { - pr_perror("Send parallel restore command head fail"); - return -1; - } - -err: - close(sock_fd); - return ret; -} \ No newline at end of file diff --git a/plugins/amdgpu/amdgpu_socket_utils.h b/plugins/amdgpu/amdgpu_socket_utils.h deleted file mode 100644 index d7200c6bd5..0000000000 --- a/plugins/amdgpu/amdgpu_socket_utils.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__ -#define __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__ - -typedef struct { - int id; - int fd_write_num; /* The number of buffer objects to be restored. */ - int entry_num; /* The number of restore commands.*/ - int gpu_num; -} parallel_restore_cmd_head; - -typedef struct { - int gpu_id; - int minor; -} parallel_gpu_info; - -typedef struct { - int gpu_id; - int write_id; - uint64_t read_offset; - uint64_t write_offset; - uint64_t size; -} parallel_restore_entry; - -typedef struct { - parallel_restore_cmd_head cmd_head; - int *fds_write; - parallel_gpu_info *gpu_ids; - parallel_restore_entry *entries; -} parallel_restore_cmd; - -/* - * For parallel_restore, a background thread in the main CRIU process is used to restore the GPU - * buffer object. However, initially, the ownership of these buffer objects and the metadata for - * restoration are all with the target process. Therefore, we introduce a series of functions to - * help the target process send these tasks to the main CRIU process. - */ -int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd); - -void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); - -int install_parallel_sock(void); - -int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); - -int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); - -void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset, - parallel_restore_cmd *restore_cmd); - -void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd); - -int close_parallel_restore_server(void); - -#endif \ No newline at end of file From e32958a2c40174f9c5e0632c8a6166e141108a6d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 9 Nov 2025 20:26:50 -0800 Subject: [PATCH 3/4] test/vdso-proxy: handle merged vma-s When we compare two list of vma-s, we need to take into account that some of them could be merged. Fixes #12286 Signed-off-by: Andrei Vagin --- test/zdtm/static/vdso-proxy.c | 51 +++++++++++++++-------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/test/zdtm/static/vdso-proxy.c b/test/zdtm/static/vdso-proxy.c index 43334974fd..a53e6cdc0d 100644 --- a/test/zdtm/static/vdso-proxy.c +++ b/test/zdtm/static/vdso-proxy.c @@ -70,6 +70,7 @@ static int parse_maps(struct vm_area *vmas) #endif v->is_vvar_or_vdso |= strstr(buf, "[vdso]") != NULL; v->is_vvar_or_vdso |= strstr(buf, "[vvar]") != NULL; + v->is_vvar_or_vdso |= strstr(buf, "[vvar_vclock]") != NULL; test_msg("[NOTE]\tVMA: [%#" PRIx64 ", %#" PRIx64 "]\n", v->start, v->end); } @@ -86,42 +87,35 @@ static int parse_maps(struct vm_area *vmas) return i; } -int compare_vmas(struct vm_area *vmax, struct vm_area *vmay) -{ - if (vmax->start > vmay->start) - return 1; - if (vmax->start < vmay->start) - return -1; - if (vmax->end > vmay->end) - return 1; - if (vmax->end < vmay->end) - return -1; - - return 0; -} - -static int check_vvar_vdso(struct vm_area *before, struct vm_area *after) +static int check_vvar_vdso(struct vm_area *before, int nr_before, struct vm_area *after, int nr_after) { int i, j = 0; - for (i = 0; i < MAX_VMAS && j < MAX_VMAS; i++, j++) { - int cmp = compare_vmas(&before[i], &after[j]); - - if (cmp == 0) - continue; - - if (cmp < 0) { /* Lost mapping */ + for (i = 0, j = 0; i < nr_before || j < nr_after;) { + if (j == nr_after || before[i].start < after[j].start) { test_msg("[NOTE]\tLost mapping: %#" PRIx64 "-%#" PRIx64 "\n", before[i].start, before[i].end); - j--; if (before[i].is_vvar_or_vdso) { fail("Lost vvar/vdso mapping"); return -1; } + i++; continue; } - - test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end); - i--; + if (i == nr_before || before[i].start > after[j].start) { + test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end); + j++; + continue; + } + if (before[i].end == after[j].end) { + i++; + j++; + } else if (before[i].end > after[j].end) { + before[i].start = after[j].end; + j++; + } else { + after[j].start = before[i].end; + i++; + } } return 0; @@ -129,11 +123,10 @@ static int check_vvar_vdso(struct vm_area *before, struct vm_area *after) static struct vm_area vmas_before[MAX_VMAS]; static struct vm_area vmas_after[MAX_VMAS]; +static int nr_before, nr_after; int main(int argc, char *argv[]) { - int nr_before, nr_after; - test_init(argc, argv); test_msg("[NOTE]\tMappings before:\n"); @@ -154,7 +147,7 @@ int main(int argc, char *argv[]) } /* After restore vDSO/VVAR blobs must remain in the old place. */ - if (check_vvar_vdso(vmas_before, vmas_after)) + if (check_vvar_vdso(vmas_before, nr_before, vmas_after, nr_after)) return -1; if (nr_before + 2 < nr_after) { From cd2d534b05d0e15a60188000d8896277b297c544 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 8 Nov 2025 06:53:19 +0000 Subject: [PATCH 4/4] criu: Version 4.2 (CRIUTIBILITY) Major changes: * plugins/amdgpu: Implement parallel restore * Handle processes with uprobes vma * Fix: getsockopt usage for SO_PASSCRED/SO_PASSSEC on Linux 6.16 * Relax ELF magic check to support MIPS libraries * pagemap: prevent integer overflow in pagemap_len This release's name is a nod to the growing challenge we face in maintaining compatibility across the rapidly evolving Linux kernel ecosystem. The full changelog can be found here: https://criu.org/Download/criu/4.2. Signed-off-by: Andrei Vagin --- Makefile.versions | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.versions b/Makefile.versions index 0b1a46a16f..3e6c9ed220 100644 --- a/Makefile.versions +++ b/Makefile.versions @@ -1,10 +1,10 @@ # # CRIU version. CRIU_VERSION_MAJOR := 4 -CRIU_VERSION_MINOR := 1 -CRIU_VERSION_SUBLEVEL := 1 +CRIU_VERSION_MINOR := 2 +CRIU_VERSION_SUBLEVEL := CRIU_VERSION_EXTRA := -CRIU_VERSION_NAME := CRISCV +CRIU_VERSION_NAME := CRIUTIBILITY CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA)) export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL