Skip to content

Commit 98a05fe

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "x86: - Do not register IRQ bypass consumer if posted interrupts not supported - Fix missed device interrupt due to non-atomic update of IRR - Use GFP_KERNEL_ACCOUNT for pid_table in ipiv - Make VMREAD error path play nice with noinstr - x86: Acquire SRCU read lock when handling fastpath MSR writes - Support linking rseq tests statically against glibc 2.35+ - Fix reference count for stats file descriptors - Detect userspace setting invalid CR0 Non-KVM: - Remove coccinelle script that has caused multiple confusion ("debugfs, coccinelle: check for obsolete DEFINE_SIMPLE_ATTRIBUTE() usage", acked by Greg)" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits) KVM: selftests: Expand x86's sregs test to cover illegal CR0 values KVM: VMX: Don't fudge CR0 and CR4 for restricted L2 guest KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid Revert "debugfs, coccinelle: check for obsolete DEFINE_SIMPLE_ATTRIBUTE() usage" KVM: selftests: Verify stats fd is usable after VM fd has been closed KVM: selftests: Verify stats fd can be dup()'d and read KVM: selftests: Verify userspace can create "redundant" binary stats files KVM: selftests: Explicitly free vcpus array in binary stats test KVM: selftests: Clean up stats fd in common stats_test() helper KVM: selftests: Use pread() to read binary stats header KVM: Grab a reference to KVM for VM and vCPU stats file descriptors selftests/rseq: Play nice with binaries statically linked against glibc 2.35+ Revert "KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid" KVM: x86: Acquire SRCU read lock when handling fastpath MSR writes KVM: VMX: Use vmread_error() to report VM-Fail in "goto" path KVM: VMX: Make VMREAD error path play nice with noinstr KVM: x86/irq: Conditionally register IRQ bypass consumer again KVM: X86: Use GFP_KERNEL_ACCOUNT for pid_table in ipiv KVM: x86: check the kvm_cpu_get_interrupt result before using it KVM: x86: VMX: set irr_pending in kvm_apic_update_irr ...
2 parents c959e90 + 5a75911 commit 98a05fe

File tree

14 files changed

+255
-186
lines changed

14 files changed

+255
-186
lines changed

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
3737
KVM_X86_OP(get_cpl)
3838
KVM_X86_OP(set_segment)
3939
KVM_X86_OP(get_cs_db_l_bits)
40+
KVM_X86_OP(is_valid_cr0)
4041
KVM_X86_OP(set_cr0)
4142
KVM_X86_OP_OPTIONAL(post_set_cr3)
4243
KVM_X86_OP(is_valid_cr4)

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
15661566
void (*set_segment)(struct kvm_vcpu *vcpu,
15671567
struct kvm_segment *var, int seg);
15681568
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
1569+
bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
15691570
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
15701571
void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
1571-
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
1572+
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
15721573
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
15731574
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
15741575
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);

arch/x86/kvm/lapic.c

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -637,16 +637,22 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
637637
*max_irr = -1;
638638

639639
for (i = vec = 0; i <= 7; i++, vec += 32) {
640+
u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
641+
642+
irr_val = *p_irr;
640643
pir_val = READ_ONCE(pir[i]);
641-
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
644+
642645
if (pir_val) {
646+
pir_val = xchg(&pir[i], 0);
647+
643648
prev_irr_val = irr_val;
644-
irr_val |= xchg(&pir[i], 0);
645-
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
646-
if (prev_irr_val != irr_val) {
647-
max_updated_irr =
648-
__fls(irr_val ^ prev_irr_val) + vec;
649-
}
649+
do {
650+
irr_val = prev_irr_val | pir_val;
651+
} while (prev_irr_val != irr_val &&
652+
!try_cmpxchg(p_irr, &prev_irr_val, irr_val));
653+
654+
if (prev_irr_val != irr_val)
655+
max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
650656
}
651657
if (irr_val)
652658
*max_irr = __fls(irr_val) + vec;
@@ -660,8 +666,11 @@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
660666
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
661667
{
662668
struct kvm_lapic *apic = vcpu->arch.apic;
669+
bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
663670

664-
return __kvm_apic_update_irr(pir, apic->regs, max_irr);
671+
if (unlikely(!apic->apicv_active && irr_updated))
672+
apic->irr_pending = true;
673+
return irr_updated;
665674
}
666675
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
667676

arch/x86/kvm/svm/svm.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
17861786
}
17871787
}
17881788

1789+
static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1790+
{
1791+
return true;
1792+
}
1793+
17891794
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
17901795
{
17911796
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3986,14 +3991,8 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
39863991

39873992
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
39883993
{
3989-
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3990-
3991-
/*
3992-
* Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
3993-
* can't read guest memory (dereference memslots) to decode the WRMSR.
3994-
*/
3995-
if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
3996-
nrips && control->next_rip)
3994+
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
3995+
to_svm(vcpu)->vmcb->control.exit_info_1)
39973996
return handle_fastpath_set_msr_irqoff(vcpu);
39983997

39993998
return EXIT_FASTPATH_NONE;
@@ -4815,6 +4814,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
48154814
.set_segment = svm_set_segment,
48164815
.get_cpl = svm_get_cpl,
48174816
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
4817+
.is_valid_cr0 = svm_is_valid_cr0,
48184818
.set_cr0 = svm_set_cr0,
48194819
.post_set_cr3 = sev_post_set_cr3,
48204820
.is_valid_cr4 = svm_is_valid_cr4,

arch/x86/kvm/vmx/vmenter.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,10 +303,8 @@ SYM_FUNC_START(vmx_do_nmi_irqoff)
303303
VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
304304
SYM_FUNC_END(vmx_do_nmi_irqoff)
305305

306-
307-
.section .text, "ax"
308-
309306
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
307+
310308
/**
311309
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
312310
* @field: VMCS field encoding that failed
@@ -335,7 +333,7 @@ SYM_FUNC_START(vmread_error_trampoline)
335333
mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
336334
mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
337335

338-
call vmread_error
336+
call vmread_error_trampoline2
339337

340338
/* Zero out @fault, which will be popped into the result register. */
341339
_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
@@ -357,6 +355,8 @@ SYM_FUNC_START(vmread_error_trampoline)
357355
SYM_FUNC_END(vmread_error_trampoline)
358356
#endif
359357

358+
.section .text, "ax"
359+
360360
SYM_FUNC_START(vmx_do_interrupt_irqoff)
361361
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
362362
SYM_FUNC_END(vmx_do_interrupt_irqoff)

arch/x86/kvm/vmx/vmx.c

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -441,13 +441,23 @@ do { \
441441
pr_warn_ratelimited(fmt); \
442442
} while (0)
443443

444-
void vmread_error(unsigned long field, bool fault)
444+
noinline void vmread_error(unsigned long field)
445445
{
446-
if (fault)
446+
vmx_insn_failed("vmread failed: field=%lx\n", field);
447+
}
448+
449+
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
450+
noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
451+
{
452+
if (fault) {
447453
kvm_spurious_fault();
448-
else
449-
vmx_insn_failed("vmread failed: field=%lx\n", field);
454+
} else {
455+
instrumentation_begin();
456+
vmread_error(field);
457+
instrumentation_end();
458+
}
450459
}
460+
#endif
451461

452462
noinline void vmwrite_error(unsigned long field, unsigned long value)
453463
{
@@ -1503,6 +1513,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
15031513
struct vcpu_vmx *vmx = to_vmx(vcpu);
15041514
unsigned long old_rflags;
15051515

1516+
/*
1517+
* Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
1518+
* is an unrestricted guest in order to mark L2 as needing emulation
1519+
* if L1 runs L2 as a restricted guest.
1520+
*/
15061521
if (is_unrestricted_guest(vcpu)) {
15071522
kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
15081523
vmx->rflags = rflags;
@@ -3037,6 +3052,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
30373052
struct vcpu_vmx *vmx = to_vmx(vcpu);
30383053
struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
30393054

3055+
/*
3056+
* KVM should never use VM86 to virtualize Real Mode when L2 is active,
3057+
* as using VM86 is unnecessary if unrestricted guest is enabled, and
3058+
* if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
3059+
* should VM-Fail and KVM should reject userspace attempts to stuff
3060+
* CR0.PG=0 when L2 is active.
3061+
*/
3062+
WARN_ON_ONCE(is_guest_mode(vcpu));
3063+
30403064
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
30413065
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
30423066
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
@@ -3226,6 +3250,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
32263250
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
32273251
CPU_BASED_CR3_STORE_EXITING)
32283252

3253+
static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
3254+
{
3255+
if (is_guest_mode(vcpu))
3256+
return nested_guest_cr0_valid(vcpu, cr0);
3257+
3258+
if (to_vmx(vcpu)->nested.vmxon)
3259+
return nested_host_cr0_valid(vcpu, cr0);
3260+
3261+
return true;
3262+
}
3263+
32293264
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
32303265
{
32313266
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3235,7 +3270,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
32353270
old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
32363271

32373272
hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
3238-
if (is_unrestricted_guest(vcpu))
3273+
if (enable_unrestricted_guest)
32393274
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
32403275
else {
32413276
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
@@ -3263,7 +3298,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
32633298
}
32643299
#endif
32653300

3266-
if (enable_ept && !is_unrestricted_guest(vcpu)) {
3301+
if (enable_ept && !enable_unrestricted_guest) {
32673302
/*
32683303
* Ensure KVM has an up-to-date snapshot of the guest's CR3. If
32693304
* the below code _enables_ CR3 exiting, vmx_cache_reg() will
@@ -3394,7 +3429,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
33943429
* this bit, even if host CR4.MCE == 0.
33953430
*/
33963431
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
3397-
if (is_unrestricted_guest(vcpu))
3432+
if (enable_unrestricted_guest)
33983433
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
33993434
else if (vmx->rmode.vm86_active)
34003435
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
@@ -3414,7 +3449,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
34143449
vcpu->arch.cr4 = cr4;
34153450
kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
34163451

3417-
if (!is_unrestricted_guest(vcpu)) {
3452+
if (!enable_unrestricted_guest) {
34183453
if (enable_ept) {
34193454
if (!is_paging(vcpu)) {
34203455
hw_cr4 &= ~X86_CR4_PAE;
@@ -4651,7 +4686,8 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
46514686
if (kvm_vmx->pid_table)
46524687
return 0;
46534688

4654-
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
4689+
pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
4690+
vmx_get_pid_table_order(kvm));
46554691
if (!pages)
46564692
return -ENOMEM;
46574693

@@ -5364,18 +5400,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
53645400
val = (val & ~vmcs12->cr0_guest_host_mask) |
53655401
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
53665402

5367-
if (!nested_guest_cr0_valid(vcpu, val))
5368-
return 1;
5369-
53705403
if (kvm_set_cr0(vcpu, val))
53715404
return 1;
53725405
vmcs_writel(CR0_READ_SHADOW, orig_val);
53735406
return 0;
53745407
} else {
5375-
if (to_vmx(vcpu)->nested.vmxon &&
5376-
!nested_host_cr0_valid(vcpu, val))
5377-
return 1;
5378-
53795408
return kvm_set_cr0(vcpu, val);
53805409
}
53815410
}
@@ -8203,6 +8232,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
82038232
.set_segment = vmx_set_segment,
82048233
.get_cpl = vmx_get_cpl,
82058234
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
8235+
.is_valid_cr0 = vmx_is_valid_cr0,
82068236
.set_cr0 = vmx_set_cr0,
82078237
.is_valid_cr4 = vmx_is_valid_cr4,
82088238
.set_cr4 = vmx_set_cr4,

arch/x86/kvm/vmx/vmx_ops.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "vmcs.h"
1111
#include "../x86.h"
1212

13-
void vmread_error(unsigned long field, bool fault);
13+
void vmread_error(unsigned long field);
1414
void vmwrite_error(unsigned long field, unsigned long value);
1515
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
1616
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
@@ -31,6 +31,13 @@ void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
3131
* void vmread_error_trampoline(unsigned long field, bool fault);
3232
*/
3333
extern unsigned long vmread_error_trampoline;
34+
35+
/*
36+
* The second VMREAD error trampoline, called from the assembly trampoline,
37+
* exists primarily to enable instrumentation for the VM-Fail path.
38+
*/
39+
void vmread_error_trampoline2(unsigned long field, bool fault);
40+
3441
#endif
3542

3643
static __always_inline void vmcs_check16(unsigned long field)
@@ -101,8 +108,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
101108

102109
do_fail:
103110
instrumentation_begin();
104-
WARN_ONCE(1, KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
105-
pr_warn_ratelimited(KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
111+
vmread_error(field);
106112
instrumentation_end();
107113
return 0;
108114

0 commit comments

Comments
 (0)