Skip to content

Commit fcb732d

Browse files
dwmw2bonzini
authored andcommitted
KVM: x86/xen: Fix runstate updates to be atomic when preempting vCPU
There are circumstances whem kvm_xen_update_runstate_guest() should not sleep because it ends up being called from __schedule() when the vCPU is preempted: [ 222.830825] kvm_xen_update_runstate_guest+0x24/0x100 [ 222.830878] kvm_arch_vcpu_put+0x14c/0x200 [ 222.830920] kvm_sched_out+0x30/0x40 [ 222.830960] __schedule+0x55c/0x9f0 To handle this, make it use the same trick as __kvm_xen_has_interrupt(), of using the hva from the gfn_to_hva_cache directly. Then it can use pagefault_disable() around the accesses and just bail out if the page is absent (which is unlikely). I almost switched to using a gfn_to_pfn_cache here and bailing out if kvm_map_gfn() fails, like kvm_steal_time_set_preempted() does — but on closer inspection it looks like kvm_map_gfn() will *always* fail in atomic context for a page in IOMEM, which means it will silently fail to make the update every single time for such guests, AFAICT. So I didn't do it that way after all. And will probably fix that one too. Cc: stable@vger.kernel.org Fixes: 30b5c85 ("KVM: x86/xen: Add support for vCPU runstate information") Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Message-Id: <b17a93e5ff4561e57b1238e3e7ccd0b613eb827e.camel@infradead.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 3915035 commit fcb732d

File tree

1 file changed

+67
-30
lines changed

1 file changed

+67
-30
lines changed

arch/x86/kvm/xen.c

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -133,32 +133,57 @@ static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
133133
void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
134134
{
135135
struct kvm_vcpu_xen *vx = &v->arch.xen;
136+
struct gfn_to_hva_cache *ghc = &vx->runstate_cache;
137+
struct kvm_memslots *slots = kvm_memslots(v->kvm);
138+
bool atomic = (state == RUNSTATE_runnable);
136139
uint64_t state_entry_time;
137-
unsigned int offset;
140+
int __user *user_state;
141+
uint64_t __user *user_times;
138142

139143
kvm_xen_update_runstate(v, state);
140144

141145
if (!vx->runstate_set)
142146
return;
143147

144-
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
148+
if (unlikely(slots->generation != ghc->generation || kvm_is_error_hva(ghc->hva)) &&
149+
kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len))
150+
return;
151+
152+
/* We made sure it fits in a single page */
153+
BUG_ON(!ghc->memslot);
154+
155+
if (atomic)
156+
pagefault_disable();
145157

146-
offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
147-
#ifdef CONFIG_X86_64
148158
/*
149-
* The only difference is alignment of uint64_t in 32-bit.
150-
* So the first field 'state' is accessed directly using
151-
* offsetof() (where its offset happens to be zero), while the
152-
* remaining fields which are all uint64_t, start at 'offset'
153-
* which we tweak here by adding 4.
159+
* The only difference between 32-bit and 64-bit versions of the
160+
* runstate struct us the alignment of uint64_t in 32-bit, which
161+
* means that the 64-bit version has an additional 4 bytes of
162+
* padding after the first field 'state'.
163+
*
164+
* So we use 'int __user *user_state' to point to the state field,
165+
* and 'uint64_t __user *user_times' for runstate_entry_time. So
166+
* the actual array of time[] in each state starts at user_times[1].
154167
*/
168+
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
169+
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
170+
user_state = (int __user *)ghc->hva;
171+
172+
BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
173+
174+
user_times = (uint64_t __user *)(ghc->hva +
175+
offsetof(struct compat_vcpu_runstate_info,
176+
state_entry_time));
177+
#ifdef CONFIG_X86_64
155178
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
156179
offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
157180
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
158181
offsetof(struct compat_vcpu_runstate_info, time) + 4);
159182

160183
if (v->kvm->arch.xen.long_mode)
161-
offset = offsetof(struct vcpu_runstate_info, state_entry_time);
184+
user_times = (uint64_t __user *)(ghc->hva +
185+
offsetof(struct vcpu_runstate_info,
186+
state_entry_time));
162187
#endif
163188
/*
164189
* First write the updated state_entry_time at the appropriate
@@ -172,10 +197,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
172197
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
173198
sizeof(state_entry_time));
174199

175-
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
176-
&state_entry_time, offset,
177-
sizeof(state_entry_time)))
178-
return;
200+
if (__put_user(state_entry_time, user_times))
201+
goto out;
179202
smp_wmb();
180203

181204
/*
@@ -189,11 +212,8 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
189212
BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
190213
sizeof(vx->current_runstate));
191214

192-
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
193-
&vx->current_runstate,
194-
offsetof(struct vcpu_runstate_info, state),
195-
sizeof(vx->current_runstate)))
196-
return;
215+
if (__put_user(vx->current_runstate, user_state))
216+
goto out;
197217

198218
/*
199219
* Write the actual runstate times immediately after the
@@ -208,24 +228,23 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
208228
BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
209229
sizeof(vx->runstate_times));
210230

211-
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
212-
&vx->runstate_times[0],
213-
offset + sizeof(u64),
214-
sizeof(vx->runstate_times)))
215-
return;
216-
231+
if (__copy_to_user(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)))
232+
goto out;
217233
smp_wmb();
218234

219235
/*
220236
* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
221237
* runstate_entry_time field.
222238
*/
223-
224239
state_entry_time &= ~XEN_RUNSTATE_UPDATE;
225-
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
226-
&state_entry_time, offset,
227-
sizeof(state_entry_time)))
228-
return;
240+
__put_user(state_entry_time, user_times);
241+
smp_wmb();
242+
243+
out:
244+
mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
245+
246+
if (atomic)
247+
pagefault_enable();
229248
}
230249

231250
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
@@ -443,6 +462,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
443462
break;
444463
}
445464

465+
/* It must fit within a single page */
466+
if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
467+
r = -EINVAL;
468+
break;
469+
}
470+
446471
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
447472
&vcpu->arch.xen.vcpu_info_cache,
448473
data->u.gpa,
@@ -460,6 +485,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
460485
break;
461486
}
462487

488+
/* It must fit within a single page */
489+
if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct pvclock_vcpu_time_info) > PAGE_SIZE) {
490+
r = -EINVAL;
491+
break;
492+
}
493+
463494
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
464495
&vcpu->arch.xen.vcpu_time_info_cache,
465496
data->u.gpa,
@@ -481,6 +512,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
481512
break;
482513
}
483514

515+
/* It must fit within a single page */
516+
if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_runstate_info) > PAGE_SIZE) {
517+
r = -EINVAL;
518+
break;
519+
}
520+
484521
r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
485522
&vcpu->arch.xen.runstate_cache,
486523
data->u.gpa,

0 commit comments

Comments
 (0)