Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit b3233c7

Browse files
committed
Merge branch 'kvm-fixes-6.10-1' into HEAD
* Fixes and debugging help for the #VE sanity check. Also disable it by default, even for CONFIG_DEBUG_KERNEL, because it was found to trigger spuriously (most likely a processor erratum as the exact symptoms vary by generation). * Avoid WARN() when two NMIs arrive simultaneously during an NMI-disabled situation (GIF=0 or interrupt shadow) when the processor supports virtual NMI. While generally KVM will not request an NMI window when virtual NMIs are supported, in this case it *does* have to single-step over the interrupt shadow or enable the STGI intercept, in order to deliver the latched second NMI. * Drop support for hand tuning APIC timer advancement from userspace. Since we have adaptive tuning, and it has proved to work well, drop the module parameter for manual configuration and with it a few stupid bugs that it had.
2 parents c3f38fa + 89a5881 commit b3233c7

File tree

15 files changed

+161
-69
lines changed

15 files changed

+161
-69
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,6 +2154,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
21542154

21552155
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
21562156
void *insn, int insn_len);
2157+
void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
21572158
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
21582159
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
21592160
u64 addr, unsigned long roots);

arch/x86/include/asm/vmxfeatures.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@
7777
#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
7878
#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
7979
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
80-
#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
80+
#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */
8181
#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
8282
#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
8383
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */

arch/x86/kvm/Kconfig

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ config KVM
4444
select KVM_VFIO
4545
select HAVE_KVM_PM_NOTIFIER if PM
4646
select KVM_GENERIC_HARDWARE_ENABLING
47+
select KVM_WERROR if WERROR
4748
help
4849
Support hosting fully virtualized guest machines using hardware
4950
virtualization extensions. You will need a fairly recent
@@ -66,7 +67,7 @@ config KVM_WERROR
6667
# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
6768
# Building KVM with -Werror and KASAN is still doable via enabling
6869
# the kernel-wide WERROR=y.
69-
depends on KVM && EXPERT && !KASAN
70+
depends on KVM && ((EXPERT && !KASAN) || WERROR)
7071
help
7172
Add -Werror to the build flags for KVM.
7273

@@ -97,15 +98,17 @@ config KVM_INTEL
9798

9899
config KVM_INTEL_PROVE_VE
99100
bool "Check that guests do not receive #VE exceptions"
100-
default KVM_PROVE_MMU || DEBUG_KERNEL
101-
depends on KVM_INTEL
101+
depends on KVM_INTEL && EXPERT
102102
help
103-
104103
Checks that KVM's page table management code will not incorrectly
105104
let guests receive a virtualization exception. Virtualization
106105
exceptions will be trapped by the hypervisor rather than injected
107106
in the guest.
108107

108+
Note: some CPUs appear to generate spurious EPT Violations #VEs
109+
that trigger KVM's WARN, in particular with eptad=0 and/or nested
110+
virtualization.
111+
109112
If unsure, say N.
110113

111114
config X86_SGX_KVM

arch/x86/kvm/lapic.c

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,17 @@
5959
#define MAX_APIC_VECTOR 256
6060
#define APIC_VECTORS_PER_REG 32
6161

62-
static bool lapic_timer_advance_dynamic __read_mostly;
62+
/*
63+
* Enable local APIC timer advancement (tscdeadline mode only) with adaptive
64+
* tuning. When enabled, KVM programs the host timer event to fire early, i.e.
65+
* before the deadline expires, to account for the delay between taking the
66+
* VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
67+
* the guest, i.e. so that the interrupt arrives in the guest with minimal
68+
* latency relative to the deadline programmed by the guest.
69+
*/
70+
static bool lapic_timer_advance __read_mostly = true;
71+
module_param(lapic_timer_advance, bool, 0444);
72+
6373
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
6474
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
6575
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
@@ -1854,16 +1864,14 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
18541864
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
18551865
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
18561866

1857-
if (lapic_timer_advance_dynamic) {
1858-
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1859-
/*
1860-
* If the timer fired early, reread the TSC to account for the
1861-
* overhead of the above adjustment to avoid waiting longer
1862-
* than is necessary.
1863-
*/
1864-
if (guest_tsc < tsc_deadline)
1865-
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1866-
}
1867+
adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1868+
1869+
/*
1870+
* If the timer fired early, reread the TSC to account for the overhead
1871+
* of the above adjustment to avoid waiting longer than is necessary.
1872+
*/
1873+
if (guest_tsc < tsc_deadline)
1874+
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
18671875

18681876
if (guest_tsc < tsc_deadline)
18691877
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
@@ -2812,7 +2820,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
28122820
return HRTIMER_NORESTART;
28132821
}
28142822

2815-
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2823+
int kvm_create_lapic(struct kvm_vcpu *vcpu)
28162824
{
28172825
struct kvm_lapic *apic;
28182826

@@ -2845,13 +2853,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
28452853
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
28462854
HRTIMER_MODE_ABS_HARD);
28472855
apic->lapic_timer.timer.function = apic_timer_fn;
2848-
if (timer_advance_ns == -1) {
2856+
if (lapic_timer_advance)
28492857
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2850-
lapic_timer_advance_dynamic = true;
2851-
} else {
2852-
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2853-
lapic_timer_advance_dynamic = false;
2854-
}
28552858

28562859
/*
28572860
* Stuff the APIC ENABLE bit in lieu of temporarily incrementing

arch/x86/kvm/lapic.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ struct kvm_lapic {
8585

8686
struct dest_map;
8787

88-
int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
88+
int kvm_create_lapic(struct kvm_vcpu *vcpu);
8989
void kvm_free_lapic(struct kvm_vcpu *vcpu);
9090

9191
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);

arch/x86/kvm/mmu/mmu.c

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -336,16 +336,19 @@ static int is_cpuid_PSE36(void)
336336
#ifdef CONFIG_X86_64
337337
static void __set_spte(u64 *sptep, u64 spte)
338338
{
339+
KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
339340
WRITE_ONCE(*sptep, spte);
340341
}
341342

342343
static void __update_clear_spte_fast(u64 *sptep, u64 spte)
343344
{
345+
KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
344346
WRITE_ONCE(*sptep, spte);
345347
}
346348

347349
static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
348350
{
351+
KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
349352
return xchg(sptep, spte);
350353
}
351354

@@ -4101,23 +4104,31 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
41014104
return leaf;
41024105
}
41034106

4104-
/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
4105-
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
4107+
static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
4108+
int *root_level)
41064109
{
4107-
u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
4108-
struct rsvd_bits_validate *rsvd_check;
4109-
int root, leaf, level;
4110-
bool reserved = false;
4110+
int leaf;
41114111

41124112
walk_shadow_page_lockless_begin(vcpu);
41134113

41144114
if (is_tdp_mmu_active(vcpu))
4115-
leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
4115+
leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level);
41164116
else
4117-
leaf = get_walk(vcpu, addr, sptes, &root);
4117+
leaf = get_walk(vcpu, addr, sptes, root_level);
41184118

41194119
walk_shadow_page_lockless_end(vcpu);
4120+
return leaf;
4121+
}
41204122

4123+
/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
4124+
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
4125+
{
4126+
u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
4127+
struct rsvd_bits_validate *rsvd_check;
4128+
int root, leaf, level;
4129+
bool reserved = false;
4130+
4131+
leaf = get_sptes_lockless(vcpu, addr, sptes, &root);
41214132
if (unlikely(leaf < 0)) {
41224133
*sptep = 0ull;
41234134
return reserved;
@@ -5921,6 +5932,22 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
59215932
}
59225933
EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
59235934

5935+
void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
5936+
{
5937+
u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
5938+
int root_level, leaf, level;
5939+
5940+
leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level);
5941+
if (unlikely(leaf < 0))
5942+
return;
5943+
5944+
pr_err("%s %llx", msg, gpa);
5945+
for (level = root_level; level >= leaf; level--)
5946+
pr_cont(", spte[%d] = 0x%llx", level, sptes[level]);
5947+
pr_cont("\n");
5948+
}
5949+
EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes);
5950+
59245951
static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
59255952
u64 addr, hpa_t root_hpa)
59265953
{

arch/x86/kvm/mmu/spte.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
#ifndef KVM_X86_MMU_SPTE_H
44
#define KVM_X86_MMU_SPTE_H
55

6+
#include <asm/vmx.h>
7+
68
#include "mmu.h"
79
#include "mmu_internal.h"
810

@@ -276,6 +278,13 @@ static inline bool is_shadow_present_pte(u64 pte)
276278
return !!(pte & SPTE_MMU_PRESENT_MASK);
277279
}
278280

281+
static inline bool is_ept_ve_possible(u64 spte)
282+
{
283+
return (shadow_present_mask & VMX_EPT_SUPPRESS_VE_BIT) &&
284+
!(spte & VMX_EPT_SUPPRESS_VE_BIT) &&
285+
(spte & VMX_EPT_RWX_MASK) != VMX_EPT_MISCONFIG_WX_VALUE;
286+
}
287+
279288
/*
280289
* Returns true if A/D bits are supported in hardware and are enabled by KVM.
281290
* When enabled, KVM uses A/D bits for all non-nested MMUs. Because L1 can

arch/x86/kvm/mmu/tdp_iter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
2121

2222
static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
2323
{
24+
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
2425
return xchg(rcu_dereference(sptep), new_spte);
2526
}
2627

2728
static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
2829
{
30+
KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
2931
WRITE_ONCE(*rcu_dereference(sptep), new_spte);
3032
}
3133

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
626626
* SPTEs.
627627
*/
628628
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
629-
0, iter->level, true);
629+
SHADOW_NONPRESENT_VALUE, iter->level, true);
630630

631631
return 0;
632632
}

arch/x86/kvm/svm/sev.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
779779
*/
780780
fpstate_set_confidential(&vcpu->arch.guest_fpu);
781781
vcpu->arch.guest_state_protected = true;
782+
783+
/*
784+
* SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
785+
* only after setting guest_state_protected because KVM_SET_MSRS allows
786+
* dynamic toggling of LBRV (for performance reason) on write access to
787+
* MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
788+
*/
789+
svm_enable_lbrv(vcpu);
782790
return 0;
783791
}
784792

@@ -2406,6 +2414,12 @@ void __init sev_hardware_setup(void)
24062414
if (!boot_cpu_has(X86_FEATURE_SEV_ES))
24072415
goto out;
24082416

2417+
if (!lbrv) {
2418+
WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
2419+
"LBRV must be present for SEV-ES support");
2420+
goto out;
2421+
}
2422+
24092423
/* Has the system been allocated ASIDs for SEV-ES? */
24102424
if (min_sev_asid == 1)
24112425
goto out;
@@ -3216,7 +3230,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
32163230
struct kvm_vcpu *vcpu = &svm->vcpu;
32173231

32183232
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
3219-
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
32203233

32213234
/*
32223235
* An SEV-ES guest requires a VMSA area that is a separate from the
@@ -3268,10 +3281,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
32683281
/* Clear intercepts on selected MSRs */
32693282
set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
32703283
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
3271-
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
3272-
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
3273-
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
3274-
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
32753284
}
32763285

32773286
void sev_init_vmcb(struct vcpu_svm *svm)

0 commit comments

Comments
 (0)