Skip to content

Commit a382b06

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "arm64: - Fix a couple of bugs affecting pKVM's PSCI relay implementation when running in the hVHE mode, resulting in the host being entered with the MMU in an unknown state, and EL2 being in the wrong mode x86: - Set RFLAGS.IF in C code on SVM to get VMRUN out of the STI shadow - Ensure DEBUGCTL is context switched on AMD to avoid running the guest with the host's value, which can lead to unexpected bus lock #DBs - Suppress DEBUGCTL.BTF on AMD (to match Intel), as KVM doesn't properly emulate BTF. KVM's lack of context switching has meant BTF has always been broken to some extent - Always save DR masks for SNP vCPUs if DebugSwap is *supported*, as the guest can enable DebugSwap without KVM's knowledge - Fix a bug in mmu_stress_tests where a vCPU could finish the "writes to RO memory" phase without actually generating a write-protection fault - Fix a printf() goof in the SEV smoke test that causes build failures with -Werror - Explicitly zero EAX and EBX in CPUID.0x8000_0022 output when PERFMON_V2 isn't supported by KVM" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: Explicitly zero EAX and EBX when PERFMON_V2 isn't supported by KVM KVM: selftests: Fix printf() format goof in SEV smoke test KVM: selftests: Ensure all vCPUs hit -EFAULT during initial RO stage KVM: SVM: Don't rely on DebugSwap to restore host DR0..DR3 KVM: SVM: Save host DR masks on CPUs with DebugSwap KVM: arm64: Initialize SCTLR_EL1 in __kvm_hyp_init_cpu() KVM: arm64: Initialize HCR_EL2.E2H early KVM: x86: Snapshot the host's DEBUGCTL after disabling IRQs KVM: SVM: Manually context switch DEBUGCTL if LBR virtualization is disabled KVM: x86: Snapshot the host's DEBUGCTL in common x86 KVM: SVM: Suppress DEBUGCTL.BTF on AMD KVM: SVM: Drop DEBUGCTL[5:2] from guest's effective value KVM: selftests: Assert that STI blocking isn't set after event injection KVM: SVM: Set RFLAGS.IF=1 in C code, to get VMRUN out of the STI shadow
2 parents 1110ce6 + ea9bd29 commit a382b06

File tree

16 files changed

+130
-62
lines changed

16 files changed

+130
-62
lines changed

arch/arm64/include/asm/el2_setup.h

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@
1616
#include <asm/sysreg.h>
1717
#include <linux/irqchip/arm-gic-v3.h>
1818

19+
.macro init_el2_hcr val
20+
mov_q x0, \val
21+
22+
/*
23+
* Compliant CPUs advertise their VHE-onlyness with
24+
* ID_AA64MMFR4_EL1.E2H0 < 0. On such CPUs HCR_EL2.E2H is RES1, but it
25+
* can reset into an UNKNOWN state and might not read as 1 until it has
26+
* been initialized explicitly.
27+
*
28+
* Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
29+
* don't advertise it (they predate this relaxation).
30+
*
31+
* Initalize HCR_EL2.E2H so that later code can rely upon HCR_EL2.E2H
32+
* indicating whether the CPU is running in E2H mode.
33+
*/
34+
mrs_s x1, SYS_ID_AA64MMFR4_EL1
35+
sbfx x1, x1, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
36+
cmp x1, #0
37+
b.ge .LnVHE_\@
38+
39+
orr x0, x0, #HCR_E2H
40+
.LnVHE_\@:
41+
msr hcr_el2, x0
42+
isb
43+
.endm
44+
1945
.macro __init_el2_sctlr
2046
mov_q x0, INIT_SCTLR_EL2_MMU_OFF
2147
msr sctlr_el2, x0
@@ -244,11 +270,6 @@
244270
.Lskip_gcs_\@:
245271
.endm
246272

247-
.macro __init_el2_nvhe_prepare_eret
248-
mov x0, #INIT_PSTATE_EL1
249-
msr spsr_el2, x0
250-
.endm
251-
252273
.macro __init_el2_mpam
253274
/* Memory Partitioning And Monitoring: disable EL2 traps */
254275
mrs x1, id_aa64pfr0_el1

arch/arm64/kernel/head.S

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -298,25 +298,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
298298
msr sctlr_el2, x0
299299
isb
300300
0:
301-
mov_q x0, HCR_HOST_NVHE_FLAGS
302-
303-
/*
304-
* Compliant CPUs advertise their VHE-onlyness with
305-
* ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
306-
* RES1 in that case. Publish the E2H bit early so that
307-
* it can be picked up by the init_el2_state macro.
308-
*
309-
* Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
310-
* don't advertise it (they predate this relaxation).
311-
*/
312-
mrs_s x1, SYS_ID_AA64MMFR4_EL1
313-
tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
314-
315-
orr x0, x0, #HCR_E2H
316-
1:
317-
msr hcr_el2, x0
318-
isb
319301

302+
init_el2_hcr HCR_HOST_NVHE_FLAGS
320303
init_el2_state
321304

322305
/* Hypervisor stub */
@@ -339,7 +322,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
339322
msr sctlr_el1, x1
340323
mov x2, xzr
341324
3:
342-
__init_el2_nvhe_prepare_eret
325+
mov x0, #INIT_PSTATE_EL1
326+
msr spsr_el2, x0
343327

344328
mov w0, #BOOT_CPU_MODE_EL2
345329
orr x0, x0, x2

arch/arm64/kvm/hyp/nvhe/hyp-init.S

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,12 @@ __do_hyp_init:
7373
eret
7474
SYM_CODE_END(__kvm_hyp_init)
7575

76+
/*
77+
* Initialize EL2 CPU state to sane values.
78+
*
79+
* HCR_EL2.E2H must have been initialized already.
80+
*/
7681
SYM_CODE_START_LOCAL(__kvm_init_el2_state)
77-
/* Initialize EL2 CPU state to sane values. */
7882
init_el2_state // Clobbers x0..x2
7983
finalise_el2_state
8084
ret
@@ -206,9 +210,9 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
206210

207211
2: msr SPsel, #1 // We want to use SP_EL{1,2}
208212

209-
bl __kvm_init_el2_state
213+
init_el2_hcr 0
210214

211-
__init_el2_nvhe_prepare_eret
215+
bl __kvm_init_el2_state
212216

213217
/* Enable MMU, set vectors and stack. */
214218
mov x0, x28

arch/arm64/kvm/hyp/nvhe/psci-relay.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
218218
if (is_cpu_on)
219219
release_boot_args(boot_args);
220220

221+
write_sysreg_el1(INIT_SCTLR_EL1_MMU_OFF, SYS_SCTLR);
222+
write_sysreg(INIT_PSTATE_EL1, SPSR_EL2);
223+
221224
__host_enter(host_ctxt);
222225
}
223226

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,7 @@ struct kvm_vcpu_arch {
780780
u32 pkru;
781781
u32 hflags;
782782
u64 efer;
783+
u64 host_debugctl;
783784
u64 apic_base;
784785
struct kvm_lapic *apic; /* kernel irqchip context */
785786
bool load_eoi_exitmap_pending;

arch/x86/kvm/cpuid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
17631763

17641764
entry->ecx = entry->edx = 0;
17651765
if (!enable_pmu || !kvm_cpu_cap_has(X86_FEATURE_PERFMON_V2)) {
1766-
entry->eax = entry->ebx;
1766+
entry->eax = entry->ebx = 0;
17671767
break;
17681768
}
17691769

arch/x86/kvm/svm/sev.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4590,6 +4590,8 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm)
45904590

45914591
void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa)
45924592
{
4593+
struct kvm *kvm = svm->vcpu.kvm;
4594+
45934595
/*
45944596
* All host state for SEV-ES guests is categorized into three swap types
45954597
* based on how it is handled by hardware during a world switch:
@@ -4613,14 +4615,22 @@ void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_are
46134615

46144616
/*
46154617
* If DebugSwap is enabled, debug registers are loaded but NOT saved by
4616-
* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
4617-
* saves and loads debug registers (Type-A).
4618+
* the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU does
4619+
* not save or load debug registers. Sadly, KVM can't prevent SNP
4620+
* guests from lying about DebugSwap on secondary vCPUs, i.e. the
4621+
* SEV_FEATURES provided at "AP Create" isn't guaranteed to match what
4622+
* the guest has actually enabled (or not!) in the VMSA.
4623+
*
4624+
* If DebugSwap is *possible*, save the masks so that they're restored
4625+
* if the guest enables DebugSwap. But for the DRs themselves, do NOT
4626+
* rely on the CPU to restore the host values; KVM will restore them as
4627+
* needed in common code, via hw_breakpoint_restore(). Note, KVM does
4628+
* NOT support virtualizing Breakpoint Extensions, i.e. the mask MSRs
4629+
* don't need to be restored per se, KVM just needs to ensure they are
4630+
* loaded with the correct values *if* the CPU writes the MSRs.
46184631
*/
4619-
if (sev_vcpu_has_debug_swap(svm)) {
4620-
hostsa->dr0 = native_get_debugreg(0);
4621-
hostsa->dr1 = native_get_debugreg(1);
4622-
hostsa->dr2 = native_get_debugreg(2);
4623-
hostsa->dr3 = native_get_debugreg(3);
4632+
if (sev_vcpu_has_debug_swap(svm) ||
4633+
(sev_snp_guest(kvm) && cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP))) {
46244634
hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
46254635
hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
46264636
hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);

arch/x86/kvm/svm/svm.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3165,6 +3165,27 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
31653165
kvm_pr_unimpl_wrmsr(vcpu, ecx, data);
31663166
break;
31673167
}
3168+
3169+
/*
3170+
* AMD changed the architectural behavior of bits 5:2. On CPUs
3171+
* without BusLockTrap, bits 5:2 control "external pins", but
3172+
* on CPUs that support BusLockDetect, bit 2 enables BusLockTrap
3173+
* and bits 5:3 are reserved-to-zero. Sadly, old KVM allowed
3174+
* the guest to set bits 5:2 despite not actually virtualizing
3175+
* Performance-Monitoring/Breakpoint external pins. Drop bits
3176+
* 5:2 for backwards compatibility.
3177+
*/
3178+
data &= ~GENMASK(5, 2);
3179+
3180+
/*
3181+
* Suppress BTF as KVM doesn't virtualize BTF, but there's no
3182+
* way to communicate lack of support to the guest.
3183+
*/
3184+
if (data & DEBUGCTLMSR_BTF) {
3185+
kvm_pr_unimpl_wrmsr(vcpu, MSR_IA32_DEBUGCTLMSR, data);
3186+
data &= ~DEBUGCTLMSR_BTF;
3187+
}
3188+
31683189
if (data & DEBUGCTL_RESERVED_BITS)
31693190
return 1;
31703191

@@ -4189,6 +4210,18 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
41894210

41904211
guest_state_enter_irqoff();
41914212

4213+
/*
4214+
* Set RFLAGS.IF prior to VMRUN, as the host's RFLAGS.IF at the time of
4215+
* VMRUN controls whether or not physical IRQs are masked (KVM always
4216+
* runs with V_INTR_MASKING_MASK). Toggle RFLAGS.IF here to avoid the
4217+
* temptation to do STI+VMRUN+CLI, as AMD CPUs bleed the STI shadow
4218+
* into guest state if delivery of an event during VMRUN triggers a
4219+
* #VMEXIT, and the guest_state transitions already tell lockdep that
4220+
* IRQs are being enabled/disabled. Note! GIF=0 for the entirety of
4221+
* this path, so IRQs aren't actually unmasked while running host code.
4222+
*/
4223+
raw_local_irq_enable();
4224+
41924225
amd_clear_divider();
41934226

41944227
if (sev_es_guest(vcpu->kvm))
@@ -4197,6 +4230,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
41974230
else
41984231
__svm_vcpu_run(svm, spec_ctrl_intercepted);
41994232

4233+
raw_local_irq_disable();
4234+
42004235
guest_state_exit_irqoff();
42014236
}
42024237

@@ -4253,6 +4288,16 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
42534288
clgi();
42544289
kvm_load_guest_xsave_state(vcpu);
42554290

4291+
/*
4292+
* Hardware only context switches DEBUGCTL if LBR virtualization is
4293+
* enabled. Manually load DEBUGCTL if necessary (and restore it after
4294+
* VM-Exit), as running with the host's DEBUGCTL can negatively affect
4295+
* guest state and can even be fatal, e.g. due to Bus Lock Detect.
4296+
*/
4297+
if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
4298+
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
4299+
update_debugctlmsr(svm->vmcb->save.dbgctl);
4300+
42564301
kvm_wait_lapic_expire(vcpu);
42574302

42584303
/*
@@ -4280,6 +4325,10 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu,
42804325
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
42814326
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
42824327

4328+
if (!(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) &&
4329+
vcpu->arch.host_debugctl != svm->vmcb->save.dbgctl)
4330+
update_debugctlmsr(vcpu->arch.host_debugctl);
4331+
42834332
kvm_load_host_xsave_state(vcpu);
42844333
stgi();
42854334

arch/x86/kvm/svm/svm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
584584
/* svm.c */
585585
#define MSR_INVALID 0xffffffffU
586586

587-
#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
587+
#define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR)
588588

589589
extern bool dump_invalid_vmcb;
590590

arch/x86/kvm/svm/vmenter.S

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,8 @@ SYM_FUNC_START(__svm_vcpu_run)
170170
mov VCPU_RDI(%_ASM_DI), %_ASM_DI
171171

172172
/* Enter guest mode */
173-
sti
174-
175173
3: vmrun %_ASM_AX
176174
4:
177-
cli
178-
179175
/* Pop @svm to RAX while it's the only available register. */
180176
pop %_ASM_AX
181177

@@ -340,12 +336,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
340336
mov KVM_VMCB_pa(%rax), %rax
341337

342338
/* Enter guest mode */
343-
sti
344-
345339
1: vmrun %rax
346-
347-
2: cli
348-
340+
2:
349341
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
350342
FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
351343

0 commit comments

Comments
 (0)