@@ -46,8 +46,8 @@ static void async_pf_execute(struct work_struct *work)
46
46
{
47
47
struct kvm_async_pf * apf =
48
48
container_of (work , struct kvm_async_pf , work );
49
- struct mm_struct * mm = apf -> mm ;
50
49
struct kvm_vcpu * vcpu = apf -> vcpu ;
50
+ struct mm_struct * mm = vcpu -> kvm -> mm ;
51
51
unsigned long addr = apf -> addr ;
52
52
gpa_t cr2_or_gpa = apf -> cr2_or_gpa ;
53
53
int locked = 1 ;
@@ -56,15 +56,24 @@ static void async_pf_execute(struct work_struct *work)
56
56
might_sleep ();
57
57
58
58
/*
59
- * This work is run asynchronously to the task which owns
60
- * mm and might be done in another context, so we must
61
- * access remotely.
59
+ * Attempt to pin the VM's host address space, and simply skip gup() if
60
+ * acquiring a pin fail, i.e. if the process is exiting. Note, KVM
61
+ * holds a reference to its associated mm_struct until the very end of
62
+ * kvm_destroy_vm(), i.e. the struct itself won't be freed before this
63
+ * work item is fully processed.
62
64
*/
63
- mmap_read_lock (mm );
64
- get_user_pages_remote (mm , addr , 1 , FOLL_WRITE , NULL , & locked );
65
- if (locked )
66
- mmap_read_unlock (mm );
65
+ if (mmget_not_zero (mm )) {
66
+ mmap_read_lock (mm );
67
+ get_user_pages_remote (mm , addr , 1 , FOLL_WRITE , NULL , & locked );
68
+ if (locked )
69
+ mmap_read_unlock (mm );
70
+ mmput (mm );
71
+ }
67
72
73
+ /*
74
+ * Notify and kick the vCPU even if faulting in the page failed, e.g.
75
+ * so that the vCPU can retry the fault synchronously.
76
+ */
68
77
if (IS_ENABLED (CONFIG_KVM_ASYNC_PF_SYNC ))
69
78
kvm_arch_async_page_present (vcpu , apf );
70
79
@@ -74,20 +83,39 @@ static void async_pf_execute(struct work_struct *work)
74
83
apf -> vcpu = NULL ;
75
84
spin_unlock (& vcpu -> async_pf .lock );
76
85
77
- if (!IS_ENABLED (CONFIG_KVM_ASYNC_PF_SYNC ) && first )
78
- kvm_arch_async_page_present_queued (vcpu );
79
-
80
86
/*
81
- * apf may be freed by kvm_check_async_pf_completion() after
82
- * this point
87
+ * The apf struct may be freed by kvm_check_async_pf_completion() as
88
+ * soon as the lock is dropped. Nullify it to prevent improper usage.
83
89
*/
90
+ apf = NULL ;
91
+
92
+ if (!IS_ENABLED (CONFIG_KVM_ASYNC_PF_SYNC ) && first )
93
+ kvm_arch_async_page_present_queued (vcpu );
84
94
85
95
trace_kvm_async_pf_completed (addr , cr2_or_gpa );
86
96
87
97
__kvm_vcpu_wake_up (vcpu );
98
+ }
88
99
89
- mmput (mm );
90
- kvm_put_kvm (vcpu -> kvm );
100
+ static void kvm_flush_and_free_async_pf_work (struct kvm_async_pf * work )
101
+ {
102
+ /*
103
+ * The async #PF is "done", but KVM must wait for the work item itself,
104
+ * i.e. async_pf_execute(), to run to completion. If KVM is a module,
105
+ * KVM must ensure *no* code owned by the KVM (the module) can be run
106
+ * after the last call to module_put(). Note, flushing the work item
107
+ * is always required when the item is taken off the completion queue.
108
+ * E.g. even if the vCPU handles the item in the "normal" path, the VM
109
+ * could be terminated before async_pf_execute() completes.
110
+ *
111
+ * Wake all events skip the queue and go straight done, i.e. don't
112
+ * need to be flushed (but sanity check that the work wasn't queued).
113
+ */
114
+ if (work -> wakeup_all )
115
+ WARN_ON_ONCE (work -> work .func );
116
+ else
117
+ flush_work (& work -> work );
118
+ kmem_cache_free (async_pf_cache , work );
91
119
}
92
120
93
121
void kvm_clear_async_pf_completion_queue (struct kvm_vcpu * vcpu )
@@ -112,11 +140,8 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
112
140
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
113
141
flush_work (& work -> work );
114
142
#else
115
- if (cancel_work_sync (& work -> work )) {
116
- mmput (work -> mm );
117
- kvm_put_kvm (vcpu -> kvm ); /* == work->vcpu->kvm */
143
+ if (cancel_work_sync (& work -> work ))
118
144
kmem_cache_free (async_pf_cache , work );
119
- }
120
145
#endif
121
146
spin_lock (& vcpu -> async_pf .lock );
122
147
}
@@ -126,7 +151,10 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
126
151
list_first_entry (& vcpu -> async_pf .done ,
127
152
typeof (* work ), link );
128
153
list_del (& work -> link );
129
- kmem_cache_free (async_pf_cache , work );
154
+
155
+ spin_unlock (& vcpu -> async_pf .lock );
156
+ kvm_flush_and_free_async_pf_work (work );
157
+ spin_lock (& vcpu -> async_pf .lock );
130
158
}
131
159
spin_unlock (& vcpu -> async_pf .lock );
132
160
@@ -151,7 +179,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
151
179
152
180
list_del (& work -> queue );
153
181
vcpu -> async_pf .queued -- ;
154
- kmem_cache_free ( async_pf_cache , work );
182
+ kvm_flush_and_free_async_pf_work ( work );
155
183
}
156
184
}
157
185
@@ -184,9 +212,6 @@ bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
184
212
work -> cr2_or_gpa = cr2_or_gpa ;
185
213
work -> addr = hva ;
186
214
work -> arch = * arch ;
187
- work -> mm = current -> mm ;
188
- mmget (work -> mm );
189
- kvm_get_kvm (work -> vcpu -> kvm );
190
215
191
216
INIT_WORK (& work -> work , async_pf_execute );
192
217
0 commit comments