Skip to content

Commit a31648f

Browse files
davidhildenbrandmstsirkin
authored andcommitted
virtio-mem: keep retrying on offline_and_remove_memory() errors in Sub Block Mode (SBM)
In case offline_and_remove_memory() fails in SBM, we leave a completely unplugged Linux memory block stick around until we try plugging memory again. We won't try removing that memory block again. offline_and_remove_memory() may, for example, fail if we're racing with another alloc_contig_range() user, if allocating temporary memory fails, or if some memory notifier rejected the offlining request. Let's handle that case better, by simple retrying to offline and remove such memory. Tested using CONFIG_MEMORY_NOTIFIER_ERROR_INJECT. Signed-off-by: David Hildenbrand <david@redhat.com> Message-Id: <20230713145551.2824980-4-david@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
1 parent ddf4098 commit a31648f

File tree

1 file changed

+73
-19
lines changed

1 file changed

+73
-19
lines changed

drivers/virtio/virtio_mem.c

Lines changed: 73 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,13 @@ struct virtio_mem {
168168
/* The number of subblocks per Linux memory block. */
169169
uint32_t sbs_per_mb;
170170

171+
/*
172+
* Some of the Linux memory blocks tracked as "partially
173+
* plugged" are completely unplugged and can be offlined
174+
* and removed -- which previously failed.
175+
*/
176+
bool have_unplugged_mb;
177+
171178
/* Summary of all memory block states. */
172179
unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
173180

@@ -765,6 +772,34 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
765772
return virtio_mem_offline_and_remove_memory(vm, addr, size);
766773
}
767774

775+
/*
776+
* Try (offlining and) removing memory from Linux in case all subblocks are
777+
* unplugged. Can be called on online and offline memory blocks.
778+
*
779+
* May modify the state of memory blocks in virtio-mem.
780+
*/
781+
static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm,
782+
unsigned long mb_id)
783+
{
784+
int rc;
785+
786+
/*
787+
* Once all subblocks of a memory block were unplugged, offline and
788+
* remove it.
789+
*/
790+
if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
791+
return 0;
792+
793+
/* offline_and_remove_memory() works for online and offline memory. */
794+
mutex_unlock(&vm->hotplug_mutex);
795+
rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
796+
mutex_lock(&vm->hotplug_mutex);
797+
if (!rc)
798+
virtio_mem_sbm_set_mb_state(vm, mb_id,
799+
VIRTIO_MEM_SBM_MB_UNUSED);
800+
return rc;
801+
}
802+
768803
/*
769804
* See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
770805
* all Linux memory blocks covered by the big block.
@@ -1988,20 +2023,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
19882023
}
19892024

19902025
unplugged:
1991-
/*
1992-
* Once all subblocks of a memory block were unplugged, offline and
1993-
* remove it. This will usually not fail, as no memory is in use
1994-
* anymore - however some other notifiers might NACK the request.
1995-
*/
1996-
if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
1997-
mutex_unlock(&vm->hotplug_mutex);
1998-
rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
1999-
mutex_lock(&vm->hotplug_mutex);
2000-
if (!rc)
2001-
virtio_mem_sbm_set_mb_state(vm, mb_id,
2002-
VIRTIO_MEM_SBM_MB_UNUSED);
2003-
}
2004-
2026+
rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id);
2027+
if (rc)
2028+
vm->sbm.have_unplugged_mb = 1;
2029+
/* Ignore errors, this is not critical. We'll retry later. */
20052030
return 0;
20062031
}
20072032

@@ -2253,12 +2278,13 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
22532278

22542279
/*
22552280
* Try to unplug all blocks that couldn't be unplugged before, for example,
2256-
* because the hypervisor was busy.
2281+
* because the hypervisor was busy. Further, offline and remove any memory
2282+
* blocks where we previously failed.
22572283
*/
2258-
static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
2284+
static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
22592285
{
22602286
unsigned long id;
2261-
int rc;
2287+
int rc = 0;
22622288

22632289
if (!vm->in_sbm) {
22642290
virtio_mem_bbm_for_each_bb(vm, id,
@@ -2280,6 +2306,27 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
22802306
VIRTIO_MEM_SBM_MB_UNUSED);
22812307
}
22822308

2309+
if (!vm->sbm.have_unplugged_mb)
2310+
return 0;
2311+
2312+
/*
2313+
* Let's retry (offlining and) removing completely unplugged Linux
2314+
* memory blocks.
2315+
*/
2316+
vm->sbm.have_unplugged_mb = false;
2317+
2318+
mutex_lock(&vm->hotplug_mutex);
2319+
virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL)
2320+
rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
2321+
virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL)
2322+
rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
2323+
virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
2324+
rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
2325+
mutex_unlock(&vm->hotplug_mutex);
2326+
2327+
if (rc)
2328+
vm->sbm.have_unplugged_mb = true;
2329+
/* Ignore errors, this is not critical. We'll retry later. */
22832330
return 0;
22842331
}
22852332

@@ -2361,9 +2408,9 @@ static void virtio_mem_run_wq(struct work_struct *work)
23612408
virtio_mem_refresh_config(vm);
23622409
}
23632410

2364-
/* Unplug any leftovers from previous runs */
2411+
/* Cleanup any leftovers from previous runs */
23652412
if (!rc)
2366-
rc = virtio_mem_unplug_pending_mb(vm);
2413+
rc = virtio_mem_cleanup_pending_mb(vm);
23672414

23682415
if (!rc && vm->requested_size != vm->plugged_size) {
23692416
if (vm->requested_size > vm->plugged_size) {
@@ -2375,6 +2422,13 @@ static void virtio_mem_run_wq(struct work_struct *work)
23752422
}
23762423
}
23772424

2425+
/*
2426+
* Keep retrying to offline and remove completely unplugged Linux
2427+
* memory blocks.
2428+
*/
2429+
if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb)
2430+
rc = -EBUSY;
2431+
23782432
switch (rc) {
23792433
case 0:
23802434
vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;

0 commit comments

Comments
 (0)