Skip to content

Commit bb4409a

Browse files
committed
Merge tag 'kvm-x86-misc-6.13' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.13 - Clean up and optimize KVM's handling of writes to MSR_IA32_APICBASE. - Quirk KVM's misguided behavior of initialized certain feature MSRs to their maximum supported feature set, which can result in KVM creating invalid vCPU state. E.g. initializing PERF_CAPABILITIES to a non-zero value results in the vCPU having invalid state if userspace hides PDCM from the guest, which can lead to save/restore failures. - Fix KVM's handling of non-canonical checks for vCPUs that support LA57 to better follow the "architecture", in quotes because the actual behavior is poorly documented. E.g. most MSR writes and descriptor table loads ignore CR4.LA57 and operate purely on whether the CPU supports LA57. - Bypass the register cache when querying CPL from kvm_sched_out(), as filling the cache from IRQ context is generally unsafe, and harden the cache accessors to try to prevent similar issues from occuring in the future. - Advertise AMD_IBPB_RET to userspace, and fix a related bug where KVM over-advertises SPEC_CTRL when trying to support cross-vendor VMs. - Minor cleanups
2 parents ef6fdc0 + a75b7bb commit bb4409a

File tree

30 files changed

+419
-156
lines changed

30 files changed

+419
-156
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
81078107
or moved memslot isn't reachable, i.e KVM
81088108
_may_ invalidate only SPTEs related to the
81098109
memslot.
8110+
8111+
KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the
8112+
vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
8113+
MSR_IA32_ARCH_CAPABILITIES (0x10a),
8114+
MSR_PLATFORM_INFO (0xce), and all VMX MSRs
8115+
(0x480..0x492) to the maximal capabilities
8116+
supported by KVM. KVM also sets
8117+
MSR_IA32_UCODE_REV (0x8b) to an arbitrary
8118+
value (which is different for Intel vs.
8119+
AMD). Lastly, when guest CPUID is set (by
8120+
userspace), KVM modifies select VMX MSR
8121+
fields to force consistency between guest
8122+
CPUID and L2's effective ISA. When this
8123+
quirk is disabled, KVM zeroes the vCPU's MSR
8124+
values (with two exceptions, see below),
8125+
i.e. treats the feature MSRs like CPUID
8126+
leaves and gives userspace full control of
8127+
the vCPU model definition. This quirk does
8128+
not affect VMX MSRs CR0/CR4_FIXED1 (0x487
8129+
and 0x489), as KVM does now allow them to
8130+
be set by userspace (KVM sets them based on
8131+
guest CPUID, for safety purposes).
81108132
=================================== ============================================
81118133

81128134
7.32 KVM_CAP_MAX_VCPU_ID

Documentation/virt/kvm/x86/errata.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,18 @@ Note however that any software (e.g ``WIN87EM.DLL``) expecting these features
3333
to be present likely predates these CPUID feature bits, and therefore
3434
doesn't know to check for them anyway.
3535

36+
``KVM_SET_VCPU_EVENTS`` issue
37+
-----------------------------
38+
39+
Invalid KVM_SET_VCPU_EVENTS input with respect to error codes *may* result in
40+
failed VM-Entry on Intel CPUs. Pre-CET Intel CPUs require that exception
41+
injection through the VMCS correctly set the "error code valid" flag, e.g.
42+
require the flag be set when injecting a #GP, clear when injecting a #UD,
43+
clear when injecting a soft exception, etc. Intel CPUs that enumerate
44+
IA32_VMX_BASIC[56] as '1' relax VMX's consistency checks, and AMD CPUs have no
45+
restrictions whatsoever. KVM_SET_VCPU_EVENTS doesn't sanity check the vector
46+
versus "has_error_code", i.e. KVM's ABI follows AMD behavior.
47+
3648
Nested virtualization features
3749
------------------------------
3850

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ KVM_X86_OP(set_msr)
3434
KVM_X86_OP(get_segment_base)
3535
KVM_X86_OP(get_segment)
3636
KVM_X86_OP(get_cpl)
37+
KVM_X86_OP(get_cpl_no_cache)
3738
KVM_X86_OP(set_segment)
3839
KVM_X86_OP(get_cs_db_l_bits)
3940
KVM_X86_OP(is_valid_cr0)

arch/x86/include/asm/kvm_host.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1655,6 +1655,7 @@ struct kvm_x86_ops {
16551655
void (*get_segment)(struct kvm_vcpu *vcpu,
16561656
struct kvm_segment *var, int seg);
16571657
int (*get_cpl)(struct kvm_vcpu *vcpu);
1658+
int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
16581659
void (*set_segment)(struct kvm_vcpu *vcpu,
16591660
struct kvm_segment *var, int seg);
16601661
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
@@ -2358,7 +2359,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
23582359
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
23592360
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
23602361
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
2361-
KVM_X86_QUIRK_SLOT_ZAP_ALL)
2362+
KVM_X86_QUIRK_SLOT_ZAP_ALL | \
2363+
KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
23622364

23632365
/*
23642366
* KVM previously used a u32 field in kvm_run to indicate the hypercall was

arch/x86/include/uapi/asm/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ struct kvm_sync_regs {
440440
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
441441
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
442442
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
443+
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
443444

444445
#define KVM_STATE_NESTED_FORMAT_VMX 0
445446
#define KVM_STATE_NESTED_FORMAT_SVM 1

arch/x86/kvm/cpuid.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,9 @@ void kvm_set_cpu_caps(void)
690690
kvm_cpu_cap_set(X86_FEATURE_TSC_ADJUST);
691691
kvm_cpu_cap_set(X86_FEATURE_ARCH_CAPABILITIES);
692692

693-
if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
693+
if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) &&
694+
boot_cpu_has(X86_FEATURE_AMD_IBPB) &&
695+
boot_cpu_has(X86_FEATURE_AMD_IBRS))
694696
kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL);
695697
if (boot_cpu_has(X86_FEATURE_STIBP))
696698
kvm_cpu_cap_set(X86_FEATURE_INTEL_STIBP);
@@ -755,16 +757,20 @@ void kvm_set_cpu_caps(void)
755757
F(CLZERO) | F(XSAVEERPTR) |
756758
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
757759
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
758-
F(AMD_PSFD)
760+
F(AMD_PSFD) | F(AMD_IBPB_RET)
759761
);
760762

761763
/*
762764
* AMD has separate bits for each SPEC_CTRL bit.
763765
* arch/x86/kernel/cpu/bugs.c is kind enough to
764766
* record that in cpufeatures so use them.
765767
*/
766-
if (boot_cpu_has(X86_FEATURE_IBPB))
768+
if (boot_cpu_has(X86_FEATURE_IBPB)) {
767769
kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB);
770+
if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) &&
771+
!boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB))
772+
kvm_cpu_cap_set(X86_FEATURE_AMD_IBPB_RET);
773+
}
768774
if (boot_cpu_has(X86_FEATURE_IBRS))
769775
kvm_cpu_cap_set(X86_FEATURE_AMD_IBRS);
770776
if (boot_cpu_has(X86_FEATURE_STIBP))

arch/x86/kvm/cpuid.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#ifndef ARCH_X86_KVM_CPUID_H
33
#define ARCH_X86_KVM_CPUID_H
44

5-
#include "x86.h"
65
#include "reverse_cpuid.h"
76
#include <asm/cpu.h>
87
#include <asm/processor.h>

arch/x86/kvm/emulate.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -651,9 +651,10 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt)
651651
}
652652

653653
static inline bool emul_is_noncanonical_address(u64 la,
654-
struct x86_emulate_ctxt *ctxt)
654+
struct x86_emulate_ctxt *ctxt,
655+
unsigned int flags)
655656
{
656-
return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt));
657+
return !ctxt->ops->is_canonical_addr(ctxt, la, flags);
657658
}
658659

659660
/*
@@ -1733,7 +1734,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
17331734
if (ret != X86EMUL_CONTINUE)
17341735
return ret;
17351736
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
1736-
((u64)base3 << 32), ctxt))
1737+
((u64)base3 << 32), ctxt,
1738+
X86EMUL_F_DT_LOAD))
17371739
return emulate_gp(ctxt, err_code);
17381740
}
17391741

@@ -2516,8 +2518,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
25162518
ss_sel = cs_sel + 8;
25172519
cs.d = 0;
25182520
cs.l = 1;
2519-
if (emul_is_noncanonical_address(rcx, ctxt) ||
2520-
emul_is_noncanonical_address(rdx, ctxt))
2521+
if (emul_is_noncanonical_address(rcx, ctxt, 0) ||
2522+
emul_is_noncanonical_address(rdx, ctxt, 0))
25212523
return emulate_gp(ctxt, 0);
25222524
break;
25232525
}
@@ -3494,7 +3496,8 @@ static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
34943496
if (rc != X86EMUL_CONTINUE)
34953497
return rc;
34963498
if (ctxt->mode == X86EMUL_MODE_PROT64 &&
3497-
emul_is_noncanonical_address(desc_ptr.address, ctxt))
3499+
emul_is_noncanonical_address(desc_ptr.address, ctxt,
3500+
X86EMUL_F_DT_LOAD))
34983501
return emulate_gp(ctxt, 0);
34993502
if (lgdt)
35003503
ctxt->ops->set_gdt(ctxt, &desc_ptr);

arch/x86/kvm/kvm_cache_regs.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
4343
BUILD_KVM_GPR_ACCESSORS(r15, R15)
4444
#endif
4545

46+
/*
47+
* Using the register cache from interrupt context is generally not allowed, as
48+
* caching a register and marking it available/dirty can't be done atomically,
49+
* i.e. accesses from interrupt context may clobber state or read stale data if
50+
* the vCPU task is in the process of updating the cache. The exception is if
51+
* KVM is handling a PMI IRQ/NMI VM-Exit, as that bound code sequence doesn't
52+
* touch the cache, it runs after the cache is reset (post VM-Exit), and PMIs
53+
* need to access several registers that are cacheable.
54+
*/
55+
#define kvm_assert_register_caching_allowed(vcpu) \
56+
lockdep_assert_once(in_task() || kvm_arch_pmi_in_guest(vcpu))
57+
4658
/*
4759
* avail dirty
4860
* 0 0 register in VMCS/VMCB
@@ -53,24 +65,28 @@ BUILD_KVM_GPR_ACCESSORS(r15, R15)
5365
static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
5466
enum kvm_reg reg)
5567
{
68+
kvm_assert_register_caching_allowed(vcpu);
5669
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
5770
}
5871

5972
static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
6073
enum kvm_reg reg)
6174
{
75+
kvm_assert_register_caching_allowed(vcpu);
6276
return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
6377
}
6478

6579
static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
6680
enum kvm_reg reg)
6781
{
82+
kvm_assert_register_caching_allowed(vcpu);
6883
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
6984
}
7085

7186
static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
7287
enum kvm_reg reg)
7388
{
89+
kvm_assert_register_caching_allowed(vcpu);
7490
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
7591
__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
7692
}
@@ -84,6 +100,7 @@ static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
84100
static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu,
85101
enum kvm_reg reg)
86102
{
103+
kvm_assert_register_caching_allowed(vcpu);
87104
return arch___test_and_set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
88105
}
89106

arch/x86/kvm/kvm_emulate.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ struct x86_instruction_info {
9494
#define X86EMUL_F_FETCH BIT(1)
9595
#define X86EMUL_F_IMPLICIT BIT(2)
9696
#define X86EMUL_F_INVLPG BIT(3)
97+
#define X86EMUL_F_MSR BIT(4)
98+
#define X86EMUL_F_DT_LOAD BIT(5)
9799

98100
struct x86_emulate_ops {
99101
void (*vm_bugged)(struct x86_emulate_ctxt *ctxt);
@@ -235,6 +237,9 @@ struct x86_emulate_ops {
235237

236238
gva_t (*get_untagged_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
237239
unsigned int flags);
240+
241+
bool (*is_canonical_addr)(struct x86_emulate_ctxt *ctxt, gva_t addr,
242+
unsigned int flags);
238243
};
239244

240245
/* Type, address-of, and value of an instruction's operand. */

0 commit comments

Comments
 (0)