Skip to content

Commit 2a359e0

Browse files
author
Marc Zyngier
committed
KVM: arm64: nv: Handle mapping of VNCR_EL2 at EL2
Now that we can handle faults triggered through VNCR_EL2, we need to map the corresponding page at EL2. But where, you'll ask? Since each CPU in the system can run a vcpu, we need a per-CPU mapping. For that, we carve a NR_CPUS range in the fixmap, giving us a per-CPU va at which to map the guest's VNCR's page. The mapping occurs both on vcpu load and on the back of a fault, both generating a request that will take care of the mapping. That mapping will also get dropped on vcpu put. Yes, this is a bit heavy handed, but it is simple. Eventually, we may want to have a per-VM, per-CPU mapping, which would avoid all the TLBI overhead. Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20250514103501.2225951-11-maz@kernel.org Signed-off-by: Marc Zyngier <maz@kernel.org>
1 parent 069a05e commit 2a359e0

File tree

4 files changed

+103
-9
lines changed

4 files changed

+103
-9
lines changed

arch/arm64/include/asm/fixmap.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ enum fixed_addresses {
4848
FIX_EARLYCON_MEM_BASE,
4949
FIX_TEXT_POKE0,
5050

51+
#ifdef CONFIG_KVM
52+
/* One slot per CPU, mapping the guest's VNCR page at EL2. */
53+
FIX_VNCR_END,
54+
FIX_VNCR = FIX_VNCR_END + NR_CPUS,
55+
#endif
56+
5157
#ifdef CONFIG_ACPI_APEI_GHES
5258
/* Used for GHES mapping from assorted contexts */
5359
FIX_APEI_GHES_IRQ,

arch/arm64/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,7 @@ struct kvm_host_data {
658658
#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4
659659
#define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5
660660
#define KVM_HOST_DATA_FLAG_VCPU_IN_HYP_CONTEXT 6
661+
#define KVM_HOST_DATA_FLAG_L1_VNCR_MAPPED 7
661662
unsigned long flags;
662663

663664
struct kvm_cpu_context host_ctxt;

arch/arm64/include/asm/kvm_nested.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,4 +337,11 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
337337
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
338338
int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu);
339339

340+
#define vncr_fixmap(c) \
341+
({ \
342+
u32 __c = (c); \
343+
BUG_ON(__c >= NR_CPUS); \
344+
(FIX_VNCR - __c); \
345+
})
346+
340347
#endif /* __ARM64_KVM_NESTED_H */

arch/arm64/kvm/nested.c

Lines changed: 89 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/kvm.h>
99
#include <linux/kvm_host.h>
1010

11+
#include <asm/fixmap.h>
1112
#include <asm/kvm_arm.h>
1213
#include <asm/kvm_emulate.h>
1314
#include <asm/kvm_mmu.h>
@@ -704,23 +705,35 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu)
704705
void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu)
705706
{
706707
/*
707-
* The vCPU kept its reference on the MMU after the last put, keep
708-
* rolling with it.
708+
* If the vCPU kept its reference on the MMU after the last put,
709+
* keep rolling with it.
709710
*/
710-
if (vcpu->arch.hw_mmu)
711-
return;
712-
713711
if (is_hyp_ctxt(vcpu)) {
714-
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
712+
if (!vcpu->arch.hw_mmu)
713+
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
715714
} else {
716-
write_lock(&vcpu->kvm->mmu_lock);
717-
vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
718-
write_unlock(&vcpu->kvm->mmu_lock);
715+
if (!vcpu->arch.hw_mmu) {
716+
scoped_guard(write_lock, &vcpu->kvm->mmu_lock)
717+
vcpu->arch.hw_mmu = get_s2_mmu_nested(vcpu);
718+
}
719+
720+
if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
721+
kvm_make_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu);
719722
}
720723
}
721724

722725
void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu)
723726
{
727+
/* Unconditionally drop the VNCR mapping if we have one */
728+
if (host_data_test_flag(L1_VNCR_MAPPED)) {
729+
BUG_ON(vcpu->arch.vncr_tlb->cpu != smp_processor_id());
730+
BUG_ON(is_hyp_ctxt(vcpu));
731+
732+
clear_fixmap(vncr_fixmap(vcpu->arch.vncr_tlb->cpu));
733+
vcpu->arch.vncr_tlb->cpu = -1;
734+
host_data_clear_flag(L1_VNCR_MAPPED);
735+
}
736+
724737
/*
725738
* Keep a reference on the associated stage-2 MMU if the vCPU is
726739
* scheduling out and not in WFI emulation, suggesting it is likely to
@@ -1042,6 +1055,70 @@ int kvm_handle_vncr_abort(struct kvm_vcpu *vcpu)
10421055
return 1;
10431056
}
10441057

1058+
static void kvm_map_l1_vncr(struct kvm_vcpu *vcpu)
1059+
{
1060+
struct vncr_tlb *vt = vcpu->arch.vncr_tlb;
1061+
pgprot_t prot;
1062+
1063+
guard(preempt)();
1064+
guard(read_lock)(&vcpu->kvm->mmu_lock);
1065+
1066+
/*
1067+
* The request to map VNCR may have raced against some other
1068+
* event, such as an interrupt, and may not be valid anymore.
1069+
*/
1070+
if (is_hyp_ctxt(vcpu))
1071+
return;
1072+
1073+
/*
1074+
* Check that the pseudo-TLB is valid and that VNCR_EL2 still
1075+
* contains the expected value. If it doesn't, we simply bail out
1076+
* without a mapping -- a transformed MSR/MRS will generate the
1077+
* fault and allows us to populate the pseudo-TLB.
1078+
*/
1079+
if (!vt->valid)
1080+
return;
1081+
1082+
if (read_vncr_el2(vcpu) != vt->gva)
1083+
return;
1084+
1085+
if (vt->wr.nG) {
1086+
u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
1087+
u64 ttbr = ((tcr & TCR_A1) ?
1088+
vcpu_read_sys_reg(vcpu, TTBR1_EL2) :
1089+
vcpu_read_sys_reg(vcpu, TTBR0_EL2));
1090+
u16 asid;
1091+
1092+
asid = FIELD_GET(TTBR_ASID_MASK, ttbr);
1093+
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, ASIDBITS, 16) ||
1094+
!(tcr & TCR_ASID16))
1095+
asid &= GENMASK(7, 0);
1096+
1097+
if (asid != vt->wr.asid)
1098+
return;
1099+
}
1100+
1101+
vt->cpu = smp_processor_id();
1102+
1103+
if (vt->wr.pw && vt->wr.pr)
1104+
prot = PAGE_KERNEL;
1105+
else if (vt->wr.pr)
1106+
prot = PAGE_KERNEL_RO;
1107+
else
1108+
prot = PAGE_NONE;
1109+
1110+
/*
1111+
* We can't map write-only (or no permission at all) in the kernel,
1112+
* but the guest can do it if using POE, so we'll have to turn a
1113+
* translation fault into a permission fault at runtime.
1114+
* FIXME: WO doesn't work at all, need POE support in the kernel.
1115+
*/
1116+
if (pgprot_val(prot) != pgprot_val(PAGE_NONE)) {
1117+
__set_fixmap(vncr_fixmap(vt->cpu), vt->hpa, prot);
1118+
host_data_set_flag(L1_VNCR_MAPPED);
1119+
}
1120+
}
1121+
10451122
/*
10461123
* Our emulated CPU doesn't support all the possible features. For the
10471124
* sake of simplicity (and probably mental sanity), wipe out a number
@@ -1582,6 +1659,9 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu)
15821659
write_unlock(&vcpu->kvm->mmu_lock);
15831660
}
15841661

1662+
if (kvm_check_request(KVM_REQ_MAP_L1_VNCR_EL2, vcpu))
1663+
kvm_map_l1_vncr(vcpu);
1664+
15851665
/* Must be last, as may switch context! */
15861666
if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu))
15871667
kvm_inject_nested_irq(vcpu);

0 commit comments

Comments
 (0)