Skip to content

Commit 8177722

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "This is a bit on the large side, mostly due to two changes: - Changes to disable some broken PMU virtualization (see below for details under "x86 PMU") - Clean up SVM's enter/exit assembly code so that it can be compiled without OBJECT_FILES_NON_STANDARD. This fixes a warning "Unpatched return thunk in use. This should not happen!" when running KVM selftests. Everything else is small bugfixes and selftest changes: - Fix a mostly benign bug in the gfn_to_pfn_cache infrastructure where KVM would allow userspace to refresh the cache with a bogus GPA. The bug has existed for quite some time, but was exposed by a new sanity check added in 6.9 (to ensure a cache is either GPA-based or HVA-based). - Drop an unused param from gfn_to_pfn_cache_invalidate_start() that got left behind during a 6.9 cleanup. - Fix a math goof in x86's hugepage logic for KVM_SET_MEMORY_ATTRIBUTES that results in an array overflow (detected by KASAN). - Fix a bug where KVM incorrectly clears root_role.direct when userspace sets guest CPUID. - Fix a dirty logging bug in the where KVM fails to write-protect SPTEs used by a nested guest, if KVM is using Page-Modification Logging and the nested hypervisor is NOT using EPT. x86 PMU: - Drop support for virtualizing adaptive PEBS, as KVM's implementation is architecturally broken without an obvious/easy path forward, and because exposing adaptive PEBS can leak host LBRs to the guest, i.e. can leak host kernel addresses to the guest. - Set the enable bits for general purpose counters in PERF_GLOBAL_CTRL at RESET time, as done by both Intel and AMD processors. - Disable LBR virtualization on CPUs that don't support LBR callstacks, as KVM unconditionally uses PERF_SAMPLE_BRANCH_CALL_STACK when creating the perf event, and would fail on such CPUs. Tests: - Fix a flaw in the max_guest_memory selftest that results in it exhausting the supply of ucall structures when run with more than 256 vCPUs. - Mark KVM_MEM_READONLY as supported for RISC-V in set_memory_region_test" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits) KVM: Drop unused @may_block param from gfn_to_pfn_cache_invalidate_start() KVM: selftests: Add coverage of EPT-disabled to vmx_dirty_log_test KVM: x86/mmu: Fix and clarify comments about clearing D-bit vs. write-protecting KVM: x86/mmu: Remove function comments above clear_dirty_{gfn_range,pt_masked}() KVM: x86/mmu: Write-protect L2 SPTEs in TDP MMU when clearing dirty status KVM: x86/mmu: Precisely invalidate MMU root_role during CPUID update KVM: VMX: Disable LBR virtualization if the CPU doesn't support LBR callstacks perf/x86/intel: Expose existence of callback support to KVM KVM: VMX: Snapshot LBR capabilities during module initialization KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms KVM: x86: Snapshot if a vCPU's vendor model is AMD vs. Intel compatible KVM: x86: Stop compiling vmenter.S with OBJECT_FILES_NON_STANDARD KVM: SVM: Create a stack frame in __svm_sev_es_vcpu_run() KVM: SVM: Save/restore args across SEV-ES VMRUN via host save area KVM: SVM: Save/restore non-volatile GPRs in SEV-ES VMRUN via host save area KVM: SVM: Clobber RAX instead of RBX when discarding spec_ctrl_intercepted KVM: SVM: Drop 32-bit "support" from __svm_sev_es_vcpu_run() KVM: SVM: Wrap __svm_sev_es_vcpu_run() with #ifdef CONFIG_KVM_AMD_SEV KVM: SVM: Create a stack frame in __svm_vcpu_run() for unwinding KVM: SVM: Remove a useless zeroing of allocated memory ...
2 parents e43afae + 44ecfa3 commit 8177722

File tree

25 files changed

+267
-159
lines changed

25 files changed

+267
-159
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
16931693
lbr->from = x86_pmu.lbr_from;
16941694
lbr->to = x86_pmu.lbr_to;
16951695
lbr->info = x86_pmu.lbr_info;
1696+
lbr->has_callstack = x86_pmu_has_lbr_callstack();
16961697
}
16971698
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
16981699

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,7 @@ struct kvm_vcpu_arch {
855855
int cpuid_nent;
856856
struct kvm_cpuid_entry2 *cpuid_entries;
857857
struct kvm_hypervisor_cpuid kvm_cpuid;
858+
bool is_amd_compatible;
858859

859860
/*
860861
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly

arch/x86/include/asm/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ struct x86_pmu_lbr {
555555
unsigned int from;
556556
unsigned int to;
557557
unsigned int info;
558+
bool has_callstack;
558559
};
559560

560561
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);

arch/x86/kvm/Makefile

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@
33
ccflags-y += -I $(srctree)/arch/x86/kvm
44
ccflags-$(CONFIG_KVM_WERROR) += -Werror
55

6-
ifeq ($(CONFIG_FRAME_POINTER),y)
7-
OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
8-
OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
9-
endif
10-
116
include $(srctree)/virt/kvm/Makefile.kvm
127

138
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \

arch/x86/kvm/cpuid.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
376376

377377
kvm_update_pv_runtime(vcpu);
378378

379+
vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
379380
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
380381
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
381382

arch/x86/kvm/cpuid.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
120120
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
121121
}
122122

123+
static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
124+
{
125+
return vcpu->arch.is_amd_compatible;
126+
}
127+
128+
static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
129+
{
130+
return !guest_cpuid_is_amd_compatible(vcpu);
131+
}
132+
123133
static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
124134
{
125135
struct kvm_cpuid_entry2 *best;

arch/x86/kvm/lapic.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
27762776
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
27772777

27782778
r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
2779-
if (r && lvt_type == APIC_LVTPC)
2779+
if (r && lvt_type == APIC_LVTPC &&
2780+
guest_cpuid_is_intel_compatible(apic->vcpu))
27802781
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
27812782
return r;
27822783
}

arch/x86/kvm/mmu/mmu.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
49354935
context->cpu_role.base.level, is_efer_nx(context),
49364936
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
49374937
is_cr4_pse(context),
4938-
guest_cpuid_is_amd_or_hygon(vcpu));
4938+
guest_cpuid_is_amd_compatible(vcpu));
49394939
}
49404940

49414941
static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
@@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
55765576
* that problem is swept under the rug; KVM's CPUID API is horrific and
55775577
* it's all but impossible to solve it without introducing a new API.
55785578
*/
5579-
vcpu->arch.root_mmu.root_role.word = 0;
5580-
vcpu->arch.guest_mmu.root_role.word = 0;
5581-
vcpu->arch.nested_mmu.root_role.word = 0;
5579+
vcpu->arch.root_mmu.root_role.invalid = 1;
5580+
vcpu->arch.guest_mmu.root_role.invalid = 1;
5581+
vcpu->arch.nested_mmu.root_role.invalid = 1;
55825582
vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
55835583
vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
55845584
vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
@@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
73997399
* by the memslot, KVM can't use a hugepage due to the
74007400
* misaligned address regardless of memory attributes.
74017401
*/
7402-
if (gfn >= slot->base_gfn) {
7402+
if (gfn >= slot->base_gfn &&
7403+
gfn + nr_pages <= slot->base_gfn + slot->npages) {
74037404
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
74047405
hugepage_clear_mixed(slot, gfn, level);
74057406
else

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 22 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
15481548
}
15491549
}
15501550

1551-
/*
1552-
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
1553-
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
1554-
* If AD bits are not enabled, this will require clearing the writable bit on
1555-
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
1556-
* be flushed.
1557-
*/
1551+
static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
1552+
{
1553+
/*
1554+
* All TDP MMU shadow pages share the same role as their root, aside
1555+
* from level, so it is valid to key off any shadow page to determine if
1556+
* write protection is needed for an entire tree.
1557+
*/
1558+
return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
1559+
}
1560+
15581561
static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
15591562
gfn_t start, gfn_t end)
15601563
{
1561-
u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
1564+
const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
1565+
shadow_dirty_mask;
15621566
struct tdp_iter iter;
15631567
bool spte_set = false;
15641568

@@ -1573,7 +1577,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
15731577
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
15741578
continue;
15751579

1576-
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
1580+
KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
15771581
spte_ad_need_write_protect(iter.old_spte));
15781582

15791583
if (!(iter.old_spte & dbit))
@@ -1590,11 +1594,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
15901594
}
15911595

15921596
/*
1593-
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
1594-
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
1595-
* If AD bits are not enabled, this will require clearing the writable bit on
1596-
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
1597-
* be flushed.
1597+
* Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
1598+
* memslot. Returns true if an SPTE has been changed and the TLBs need to be
1599+
* flushed.
15981600
*/
15991601
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
16001602
const struct kvm_memory_slot *slot)
@@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
16101612
return spte_set;
16111613
}
16121614

1613-
/*
1614-
* Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
1615-
* set in mask, starting at gfn. The given memslot is expected to contain all
1616-
* the GFNs represented by set bits in the mask. If AD bits are enabled,
1617-
* clearing the dirty status will involve clearing the dirty bit on each SPTE
1618-
* or, if AD bits are not enabled, clearing the writable bit on each SPTE.
1619-
*/
16201615
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
16211616
gfn_t gfn, unsigned long mask, bool wrprot)
16221617
{
1623-
u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
1624-
shadow_dirty_mask;
1618+
const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
1619+
shadow_dirty_mask;
16251620
struct tdp_iter iter;
16261621

16271622
lockdep_assert_held_write(&kvm->mmu_lock);
@@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
16331628
if (!mask)
16341629
break;
16351630

1636-
KVM_MMU_WARN_ON(kvm_ad_enabled() &&
1631+
KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
16371632
spte_ad_need_write_protect(iter.old_spte));
16381633

16391634
if (iter.level > PG_LEVEL_4K ||
@@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
16591654
}
16601655

16611656
/*
1662-
* Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
1663-
* set in mask, starting at gfn. The given memslot is expected to contain all
1664-
* the GFNs represented by set bits in the mask. If AD bits are enabled,
1665-
* clearing the dirty status will involve clearing the dirty bit on each SPTE
1666-
* or, if AD bits are not enabled, clearing the writable bit on each SPTE.
1657+
* Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
1658+
* which a bit is set in mask, starting at gfn. The given memslot is expected to
1659+
* contain all the GFNs represented by set bits in the mask.
16671660
*/
16681661
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
16691662
struct kvm_memory_slot *slot,

arch/x86/kvm/pmu.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
775775
pmu->pebs_data_cfg_mask = ~0ull;
776776
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);
777777

778-
if (vcpu->kvm->arch.enable_pmu)
779-
static_call(kvm_x86_pmu_refresh)(vcpu);
778+
if (!vcpu->kvm->arch.enable_pmu)
779+
return;
780+
781+
static_call(kvm_x86_pmu_refresh)(vcpu);
782+
783+
/*
784+
* At RESET, both Intel and AMD CPUs set all enable bits for general
785+
* purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
786+
* was written for v1 PMUs don't unknowingly leave GP counters disabled
787+
* in the global controls). Emulate that behavior when refreshing the
788+
* PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
789+
*/
790+
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
791+
pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
780792
}
781793

782794
void kvm_pmu_init(struct kvm_vcpu *vcpu)

0 commit comments

Comments
 (0)