Skip to content

Commit a81d95a

Browse files
committed
Merge tag 'kvm-x86-asyncpf-6.9' of https://github.com/kvm-x86/linux into HEAD
KVM async page fault changes for 6.9: - Always flush the async page fault workqueue when a work item is being removed, especially during vCPU destruction, to ensure that there are no workers running in KVM code when all references to KVM-the-module are gone, i.e. to prevent a use-after-free if kvm.ko is unloaded. - Grab a reference to the VM's mm_struct in the async #PF worker itself instead of gifting the worker a reference, e.g. so that there's no need to remember to *conditionally* clean up after the worker.
2 parents 4d4c028 + c2744ed commit a81d95a

File tree

2 files changed

+49
-25
lines changed

2 files changed

+49
-25
lines changed

include/linux/kvm_host.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,6 @@ struct kvm_async_pf {
238238
struct list_head link;
239239
struct list_head queue;
240240
struct kvm_vcpu *vcpu;
241-
struct mm_struct *mm;
242241
gpa_t cr2_or_gpa;
243242
unsigned long addr;
244243
struct kvm_arch_async_pf arch;

virt/kvm/async_pf.c

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
4646
{
4747
struct kvm_async_pf *apf =
4848
container_of(work, struct kvm_async_pf, work);
49-
struct mm_struct *mm = apf->mm;
5049
struct kvm_vcpu *vcpu = apf->vcpu;
50+
struct mm_struct *mm = vcpu->kvm->mm;
5151
unsigned long addr = apf->addr;
5252
gpa_t cr2_or_gpa = apf->cr2_or_gpa;
5353
int locked = 1;
@@ -56,15 +56,24 @@ static void async_pf_execute(struct work_struct *work)
5656
might_sleep();
5757

5858
/*
59-
* This work is run asynchronously to the task which owns
60-
* mm and might be done in another context, so we must
61-
* access remotely.
59+
* Attempt to pin the VM's host address space, and simply skip gup() if
60+
* acquiring a pin fail, i.e. if the process is exiting. Note, KVM
61+
* holds a reference to its associated mm_struct until the very end of
62+
* kvm_destroy_vm(), i.e. the struct itself won't be freed before this
63+
* work item is fully processed.
6264
*/
63-
mmap_read_lock(mm);
64-
get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
65-
if (locked)
66-
mmap_read_unlock(mm);
65+
if (mmget_not_zero(mm)) {
66+
mmap_read_lock(mm);
67+
get_user_pages_remote(mm, addr, 1, FOLL_WRITE, NULL, &locked);
68+
if (locked)
69+
mmap_read_unlock(mm);
70+
mmput(mm);
71+
}
6772

73+
/*
74+
* Notify and kick the vCPU even if faulting in the page failed, e.g.
75+
* so that the vCPU can retry the fault synchronously.
76+
*/
6877
if (IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC))
6978
kvm_arch_async_page_present(vcpu, apf);
7079

@@ -74,20 +83,39 @@ static void async_pf_execute(struct work_struct *work)
7483
apf->vcpu = NULL;
7584
spin_unlock(&vcpu->async_pf.lock);
7685

77-
if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
78-
kvm_arch_async_page_present_queued(vcpu);
79-
8086
/*
81-
* apf may be freed by kvm_check_async_pf_completion() after
82-
* this point
87+
* The apf struct may be freed by kvm_check_async_pf_completion() as
88+
* soon as the lock is dropped. Nullify it to prevent improper usage.
8389
*/
90+
apf = NULL;
91+
92+
if (!IS_ENABLED(CONFIG_KVM_ASYNC_PF_SYNC) && first)
93+
kvm_arch_async_page_present_queued(vcpu);
8494

8595
trace_kvm_async_pf_completed(addr, cr2_or_gpa);
8696

8797
__kvm_vcpu_wake_up(vcpu);
98+
}
8899

89-
mmput(mm);
90-
kvm_put_kvm(vcpu->kvm);
100+
static void kvm_flush_and_free_async_pf_work(struct kvm_async_pf *work)
101+
{
102+
/*
103+
* The async #PF is "done", but KVM must wait for the work item itself,
104+
* i.e. async_pf_execute(), to run to completion. If KVM is a module,
105+
* KVM must ensure *no* code owned by the KVM (the module) can be run
106+
* after the last call to module_put(). Note, flushing the work item
107+
* is always required when the item is taken off the completion queue.
108+
* E.g. even if the vCPU handles the item in the "normal" path, the VM
109+
* could be terminated before async_pf_execute() completes.
110+
*
111+
* Wake all events skip the queue and go straight done, i.e. don't
112+
* need to be flushed (but sanity check that the work wasn't queued).
113+
*/
114+
if (work->wakeup_all)
115+
WARN_ON_ONCE(work->work.func);
116+
else
117+
flush_work(&work->work);
118+
kmem_cache_free(async_pf_cache, work);
91119
}
92120

93121
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
@@ -112,11 +140,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
112140
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
113141
flush_work(&work->work);
114142
#else
115-
if (cancel_work_sync(&work->work)) {
116-
mmput(work->mm);
117-
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
143+
if (cancel_work_sync(&work->work))
118144
kmem_cache_free(async_pf_cache, work);
119-
}
120145
#endif
121146
spin_lock(&vcpu->async_pf.lock);
122147
}
@@ -126,7 +151,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
126151
list_first_entry(&vcpu->async_pf.done,
127152
typeof(*work), link);
128153
list_del(&work->link);
129-
kmem_cache_free(async_pf_cache, work);
154+
155+
spin_unlock(&vcpu->async_pf.lock);
156+
kvm_flush_and_free_async_pf_work(work);
157+
spin_lock(&vcpu->async_pf.lock);
130158
}
131159
spin_unlock(&vcpu->async_pf.lock);
132160

@@ -151,7 +179,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
151179

152180
list_del(&work->queue);
153181
vcpu->async_pf.queued--;
154-
kmem_cache_free(async_pf_cache, work);
182+
kvm_flush_and_free_async_pf_work(work);
155183
}
156184
}
157185

@@ -184,9 +212,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
184212
work->cr2_or_gpa = cr2_or_gpa;
185213
work->addr = hva;
186214
work->arch = *arch;
187-
work->mm = current->mm;
188-
mmget(work->mm);
189-
kvm_get_kvm(work->vcpu->kvm);
190215

191216
INIT_WORK(&work->work, async_pf_execute);
192217

0 commit comments

Comments
 (0)