Skip to content

Commit f0f59d0

Browse files
committed
Merge tag 'kvm-x86-mmu-6.7' of https://github.com/kvm-x86/linux into HEAD
KVM x86 MMU changes for 6.7: - Clean up code that deals with honoring guest MTRRs when the VM has non-coherent DMA and host MTRRs are ignored, i.e. EPT is enabled. - Zap EPT entries when non-coherent DMA assignment stops/start to prevent using stale entries with the wrong memtype. - Don't ignore guest PAT for CR0.CD=1 && KVM_X86_QUIRK_CD_NW_CLEARED=y, as there's zero reason to ignore guest PAT if the effective MTRR memtype is WB. This will also allow for future optimizations of handling guest MTRR updates for VMs with non-coherent DMA and the quirk enabled. - Harden the fast page fault path to guard against encountering an invalid root when walking SPTEs.
2 parents f292dc8 + 1de9992 commit f0f59d0

File tree

5 files changed

+55
-21
lines changed

5 files changed

+55
-21
lines changed

arch/x86/kvm/mmu.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,13 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
237237
return -(u32)fault & errcode;
238238
}
239239

240+
bool __kvm_mmu_honors_guest_mtrrs(bool vm_has_noncoherent_dma);
241+
242+
static inline bool kvm_mmu_honors_guest_mtrrs(struct kvm *kvm)
243+
{
244+
return __kvm_mmu_honors_guest_mtrrs(kvm_arch_has_noncoherent_dma(kvm));
245+
}
246+
240247
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
241248

242249
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);

arch/x86/kvm/mmu/mmu.c

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3425,8 +3425,8 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
34253425
{
34263426
struct kvm_mmu_page *sp;
34273427
int ret = RET_PF_INVALID;
3428-
u64 spte = 0ull;
3429-
u64 *sptep = NULL;
3428+
u64 spte;
3429+
u64 *sptep;
34303430
uint retry_count = 0;
34313431

34323432
if (!page_fault_can_be_fast(fault))
@@ -3442,6 +3442,14 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
34423442
else
34433443
sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte);
34443444

3445+
/*
3446+
* It's entirely possible for the mapping to have been zapped
3447+
* by a different task, but the root page should always be
3448+
* available as the vCPU holds a reference to its root(s).
3449+
*/
3450+
if (WARN_ON_ONCE(!sptep))
3451+
spte = REMOVED_SPTE;
3452+
34453453
if (!is_shadow_present_pte(spte))
34463454
break;
34473455

@@ -4479,21 +4487,28 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
44794487
}
44804488
#endif
44814489

4482-
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
4490+
bool __kvm_mmu_honors_guest_mtrrs(bool vm_has_noncoherent_dma)
44834491
{
44844492
/*
4485-
* If the guest's MTRRs may be used to compute the "real" memtype,
4486-
* restrict the mapping level to ensure KVM uses a consistent memtype
4487-
* across the entire mapping. If the host MTRRs are ignored by TDP
4488-
* (shadow_memtype_mask is non-zero), and the VM has non-coherent DMA
4489-
* (DMA doesn't snoop CPU caches), KVM's ABI is to honor the memtype
4490-
* from the guest's MTRRs so that guest accesses to memory that is
4491-
* DMA'd aren't cached against the guest's wishes.
4493+
* If host MTRRs are ignored (shadow_memtype_mask is non-zero), and the
4494+
* VM has non-coherent DMA (DMA doesn't snoop CPU caches), KVM's ABI is
4495+
* to honor the memtype from the guest's MTRRs so that guest accesses
4496+
* to memory that is DMA'd aren't cached against the guest's wishes.
44924497
*
44934498
* Note, KVM may still ultimately ignore guest MTRRs for certain PFNs,
44944499
* e.g. KVM will force UC memtype for host MMIO.
44954500
*/
4496-
if (shadow_memtype_mask && kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
4501+
return vm_has_noncoherent_dma && shadow_memtype_mask;
4502+
}
4503+
4504+
int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
4505+
{
4506+
/*
4507+
* If the guest's MTRRs may be used to compute the "real" memtype,
4508+
* restrict the mapping level to ensure KVM uses a consistent memtype
4509+
* across the entire mapping.
4510+
*/
4511+
if (kvm_mmu_honors_guest_mtrrs(vcpu->kvm)) {
44974512
for ( ; fault->max_level > PG_LEVEL_4K; --fault->max_level) {
44984513
int page_num = KVM_PAGES_PER_HPAGE(fault->max_level);
44994514
gfn_t base = gfn_round_for_level(fault->gfn,

arch/x86/kvm/mtrr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
320320
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
321321
gfn_t start, end;
322322

323-
if (!tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm))
323+
if (!kvm_mmu_honors_guest_mtrrs(vcpu->kvm))
324324
return;
325325

326326
if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)

arch/x86/kvm/vmx/vmx.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7579,8 +7579,6 @@ static int vmx_vm_init(struct kvm *kvm)
75797579

75807580
static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
75817581
{
7582-
u8 cache;
7583-
75847582
/* We wanted to honor guest CD/MTRR/PAT, but doing so could result in
75857583
* memory aliases with conflicting memory types and sometimes MCEs.
75867584
* We have to be careful as to what are honored and when.
@@ -7607,11 +7605,10 @@ static u8 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
76077605

76087606
if (kvm_read_cr0_bits(vcpu, X86_CR0_CD)) {
76097607
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
7610-
cache = MTRR_TYPE_WRBACK;
7608+
return MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT;
76117609
else
7612-
cache = MTRR_TYPE_UNCACHABLE;
7613-
7614-
return (cache << VMX_EPT_MT_EPTE_SHIFT) | VMX_EPT_IPAT_BIT;
7610+
return (MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT) |
7611+
VMX_EPT_IPAT_BIT;
76157612
}
76167613

76177614
return kvm_mtrr_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT;

arch/x86/kvm/x86.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,7 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon
962962
kvm_mmu_reset_context(vcpu);
963963

964964
if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
965-
kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
965+
kvm_mmu_honors_guest_mtrrs(vcpu->kvm) &&
966966
!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
967967
kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
968968
}
@@ -13313,15 +13313,30 @@ bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
1331313313
}
1331413314
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
1331513315

13316+
static void kvm_noncoherent_dma_assignment_start_or_stop(struct kvm *kvm)
13317+
{
13318+
/*
13319+
* Non-coherent DMA assignment and de-assignment will affect
13320+
* whether KVM honors guest MTRRs and cause changes in memtypes
13321+
* in TDP.
13322+
* So, pass %true unconditionally to indicate non-coherent DMA was,
13323+
* or will be involved, and that zapping SPTEs might be necessary.
13324+
*/
13325+
if (__kvm_mmu_honors_guest_mtrrs(true))
13326+
kvm_zap_gfn_range(kvm, gpa_to_gfn(0), gpa_to_gfn(~0ULL));
13327+
}
13328+
1331613329
void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
1331713330
{
13318-
atomic_inc(&kvm->arch.noncoherent_dma_count);
13331+
if (atomic_inc_return(&kvm->arch.noncoherent_dma_count) == 1)
13332+
kvm_noncoherent_dma_assignment_start_or_stop(kvm);
1331913333
}
1332013334
EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
1332113335

1332213336
void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
1332313337
{
13324-
atomic_dec(&kvm->arch.noncoherent_dma_count);
13338+
if (!atomic_dec_return(&kvm->arch.noncoherent_dma_count))
13339+
kvm_noncoherent_dma_assignment_start_or_stop(kvm);
1332513340
}
1332613341
EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
1332713342

0 commit comments

Comments
 (0)