Skip to content

Commit b066935

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Address some fallout of the locking rework, this time affecting the way the vgic is configured - Fix an issue where the page table walker frees a subtree and then proceeds with walking what it has just freed... - Check that a given PA donated to the guest is actually memory (only affecting pKVM) - Correctly handle MTE CMOs by Set/Way - Fix the reported address of a watchpoint forwarded to userspace - Fix the freeing of the root of stage-2 page tables - Stop creating spurious PMU events to perform detection of the default PMU and use the existing PMU list instead x86: - Fix a memslot lookup bug in the NX recovery thread that could theoretically let userspace bypass the NX hugepage mitigation - Fix a s/BLOCKING/PENDING bug in SVM's vNMI support - Account exit stats for fastpath VM-Exits that never leave the super tight run-loop - Fix an out-of-bounds bug in the optimized APIC map code, and add a regression test for the race" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: selftests: Add test for race in kvm_recalculate_apic_map() KVM: x86: Bail from kvm_recalculate_phys_map() if x2APIC ID is out-of-bounds KVM: x86: Account fastpath-only VM-Exits in vCPU stats KVM: SVM: vNMI pending bit is V_NMI_PENDING_MASK not V_NMI_BLOCKING_MASK KVM: x86/mmu: Grab memslot for correct address space in NX recovery worker KVM: arm64: Document default vPMU behavior on heterogeneous systems KVM: arm64: Iterate arm_pmus list to probe for default PMU KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed() KVM: arm64: Populate fault info for watchpoint KVM: arm64: Reload PTE after invoking walker callback on preorder traversal KVM: arm64: Handle trap of tagged Set/Way CMOs arm64: Add missing Set/Way CMO encodings KVM: arm64: Prevent unconditional donation of unmapped regions from the host KVM: arm64: vgic: Fix a comment KVM: arm64: vgic: Fix locking comment KVM: arm64: vgic: Wrap vgic_its_create() with config_lock KVM: arm64: vgic: Fix a circular locking issue
2 parents 9455b4b + f211b45 commit b066935

File tree

23 files changed

+248
-95
lines changed

23 files changed

+248
-95
lines changed

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -632,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
632632
*
633633
* The walker will walk the page-table entries corresponding to the input
634634
* address range specified, visiting entries according to the walker flags.
635-
* Invalid entries are treated as leaf entries. Leaf entries are reloaded
636-
* after invoking the walker callback, allowing the walker to descend into
637-
* a newly installed table.
635+
* Invalid entries are treated as leaf entries. The visited page table entry is
636+
* reloaded after invoking the walker callback, allowing the walker to descend
637+
* into a newly installed table.
638638
*
639639
* Returning a negative error code from the walker callback function will
640640
* terminate the walk immediately with the same error code.

arch/arm64/include/asm/sysreg.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,14 @@
115115
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
116116

117117
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
118+
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
119+
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
118120
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
121+
#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
122+
#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
119123
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
124+
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
125+
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
120126

121127
/*
122128
* Automatically generated definitions for system registers, the

arch/arm64/kvm/hyp/include/hyp/switch.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -412,17 +412,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
412412
return false;
413413
}
414414

415-
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
415+
static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
416416
{
417417
if (!__populate_fault_info(vcpu))
418418
return true;
419419

420420
return false;
421421
}
422+
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
423+
__alias(kvm_hyp_handle_memory_fault);
424+
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
425+
__alias(kvm_hyp_handle_memory_fault);
422426

423427
static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
424428
{
425-
if (!__populate_fault_info(vcpu))
429+
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
426430
return true;
427431

428432
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {

arch/arm64/kvm/hyp/nvhe/mem_protect.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -575,18 +575,15 @@ struct pkvm_mem_donation {
575575

576576
struct check_walk_data {
577577
enum pkvm_page_state desired;
578-
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
578+
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
579579
};
580580

581581
static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
582582
enum kvm_pgtable_walk_flags visit)
583583
{
584584
struct check_walk_data *d = ctx->arg;
585585

586-
if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
587-
return -EINVAL;
588-
589-
return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
586+
return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
590587
}
591588

592589
static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
601598
return kvm_pgtable_walk(pgt, addr, size, &walker);
602599
}
603600

604-
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
601+
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
605602
{
603+
if (!addr_is_allowed_memory(addr))
604+
return PKVM_NOPAGE;
605+
606606
if (!kvm_pte_valid(pte) && pte)
607607
return PKVM_NOPAGE;
608608

@@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx
709709
return host_stage2_set_owner_locked(addr, size, host_id);
710710
}
711711

712-
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
712+
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
713713
{
714714
if (!kvm_pte_valid(pte))
715715
return PKVM_NOPAGE;

arch/arm64/kvm/hyp/nvhe/switch.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
186186
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
187187
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
188188
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
189+
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
189190
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
190191
};
191192

@@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
196197
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
197198
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
198199
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
200+
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
199201
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
200202
};
201203

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,14 +209,26 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
209209
.flags = flags,
210210
};
211211
int ret = 0;
212+
bool reload = false;
212213
kvm_pteref_t childp;
213214
bool table = kvm_pte_table(ctx.old, level);
214215

215-
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
216+
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
216217
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
218+
reload = true;
219+
}
217220

218221
if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
219222
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
223+
reload = true;
224+
}
225+
226+
/*
227+
* Reload the page table after invoking the walker callback for leaf
228+
* entries or after pre-order traversal, to allow the walker to descend
229+
* into a newly installed or replaced table.
230+
*/
231+
if (reload) {
220232
ctx.old = READ_ONCE(*ptep);
221233
table = kvm_pte_table(ctx.old, level);
222234
}
@@ -1320,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
13201332
};
13211333

13221334
WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
1335+
1336+
WARN_ON(mm_ops->page_count(pgtable) != 1);
1337+
mm_ops->put_page(pgtable);
13231338
}

arch/arm64/kvm/hyp/vhe/switch.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
110110
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
111111
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
112112
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
113+
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
113114
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
114115
};
115116

arch/arm64/kvm/pmu-emul.c

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -694,45 +694,23 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
694694

695695
static struct arm_pmu *kvm_pmu_probe_armpmu(void)
696696
{
697-
struct perf_event_attr attr = { };
698-
struct perf_event *event;
699-
struct arm_pmu *pmu = NULL;
700-
701-
/*
702-
* Create a dummy event that only counts user cycles. As we'll never
703-
* leave this function with the event being live, it will never
704-
* count anything. But it allows us to probe some of the PMU
705-
* details. Yes, this is terrible.
706-
*/
707-
attr.type = PERF_TYPE_RAW;
708-
attr.size = sizeof(attr);
709-
attr.pinned = 1;
710-
attr.disabled = 0;
711-
attr.exclude_user = 0;
712-
attr.exclude_kernel = 1;
713-
attr.exclude_hv = 1;
714-
attr.exclude_host = 1;
715-
attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
716-
attr.sample_period = GENMASK(63, 0);
697+
struct arm_pmu *tmp, *pmu = NULL;
698+
struct arm_pmu_entry *entry;
699+
int cpu;
717700

718-
event = perf_event_create_kernel_counter(&attr, -1, current,
719-
kvm_pmu_perf_overflow, &attr);
701+
mutex_lock(&arm_pmus_lock);
720702

721-
if (IS_ERR(event)) {
722-
pr_err_once("kvm: pmu event creation failed %ld\n",
723-
PTR_ERR(event));
724-
return NULL;
725-
}
703+
cpu = smp_processor_id();
704+
list_for_each_entry(entry, &arm_pmus, entry) {
705+
tmp = entry->arm_pmu;
726706

727-
if (event->pmu) {
728-
pmu = to_arm_pmu(event->pmu);
729-
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
730-
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
731-
pmu = NULL;
707+
if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
708+
pmu = tmp;
709+
break;
710+
}
732711
}
733712

734-
perf_event_disable(event);
735-
perf_event_release_kernel(event);
713+
mutex_unlock(&arm_pmus_lock);
736714

737715
return pmu;
738716
}
@@ -912,7 +890,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
912890
return -EBUSY;
913891

914892
if (!kvm->arch.arm_pmu) {
915-
/* No PMU set, get the default one */
893+
/*
894+
* No PMU set, get the default one.
895+
*
896+
* The observant among you will notice that the supported_cpus
897+
* mask does not get updated for the default PMU even though it
898+
* is quite possible the selected instance supports only a
899+
* subset of cores in the system. This is intentional, and
900+
* upholds the preexisting behavior on heterogeneous systems
901+
* where vCPUs can be scheduled on any core but the guest
902+
* counters could stop working.
903+
*/
916904
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
917905
if (!kvm->arch.arm_pmu)
918906
return -ENODEV;

arch/arm64/kvm/sys_regs.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
211211
return true;
212212
}
213213

214+
static bool access_dcgsw(struct kvm_vcpu *vcpu,
215+
struct sys_reg_params *p,
216+
const struct sys_reg_desc *r)
217+
{
218+
if (!kvm_has_mte(vcpu->kvm)) {
219+
kvm_inject_undefined(vcpu);
220+
return false;
221+
}
222+
223+
/* Treat MTE S/W ops as we treat the classic ones: with contempt */
224+
return access_dcsw(vcpu, p, r);
225+
}
226+
214227
static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
215228
{
216229
switch (r->aarch32_map) {
@@ -1756,8 +1769,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
17561769
*/
17571770
static const struct sys_reg_desc sys_reg_descs[] = {
17581771
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
1772+
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
1773+
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
17591774
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
1775+
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
1776+
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
17601777
{ SYS_DESC(SYS_DC_CISW), access_dcsw },
1778+
{ SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
1779+
{ SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },
17611780

17621781
DBG_BCR_BVR_WCR_WVR_EL1(0),
17631782
DBG_BCR_BVR_WCR_WVR_EL1(1),

arch/arm64/kvm/vgic/vgic-init.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
235235
* KVM io device for the redistributor that belongs to this VCPU.
236236
*/
237237
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
238-
mutex_lock(&vcpu->kvm->arch.config_lock);
238+
mutex_lock(&vcpu->kvm->slots_lock);
239239
ret = vgic_register_redist_iodev(vcpu);
240-
mutex_unlock(&vcpu->kvm->arch.config_lock);
240+
mutex_unlock(&vcpu->kvm->slots_lock);
241241
}
242242
return ret;
243243
}
@@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
406406

407407
/**
408408
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
409-
* is a GICv2. A GICv3 must be explicitly initialized by the guest using the
409+
* is a GICv2. A GICv3 must be explicitly initialized by userspace using the
410410
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
411411
* @kvm: kvm struct pointer
412412
*/
@@ -446,11 +446,13 @@ int vgic_lazy_init(struct kvm *kvm)
446446
int kvm_vgic_map_resources(struct kvm *kvm)
447447
{
448448
struct vgic_dist *dist = &kvm->arch.vgic;
449+
gpa_t dist_base;
449450
int ret = 0;
450451

451452
if (likely(vgic_ready(kvm)))
452453
return 0;
453454

455+
mutex_lock(&kvm->slots_lock);
454456
mutex_lock(&kvm->arch.config_lock);
455457
if (vgic_ready(kvm))
456458
goto out;
@@ -463,13 +465,26 @@ int kvm_vgic_map_resources(struct kvm *kvm)
463465
else
464466
ret = vgic_v3_map_resources(kvm);
465467

466-
if (ret)
468+
if (ret) {
467469
__kvm_vgic_destroy(kvm);
468-
else
469-
dist->ready = true;
470+
goto out;
471+
}
472+
dist->ready = true;
473+
dist_base = dist->vgic_dist_base;
474+
mutex_unlock(&kvm->arch.config_lock);
475+
476+
ret = vgic_register_dist_iodev(kvm, dist_base,
477+
kvm_vgic_global_state.type);
478+
if (ret) {
479+
kvm_err("Unable to register VGIC dist MMIO regions\n");
480+
kvm_vgic_destroy(kvm);
481+
}
482+
mutex_unlock(&kvm->slots_lock);
483+
return ret;
470484

471485
out:
472486
mutex_unlock(&kvm->arch.config_lock);
487+
mutex_unlock(&kvm->slots_lock);
473488
return ret;
474489
}
475490

0 commit comments

Comments
 (0)