Skip to content

Commit ea8d3cf

Browse files
author
Marc Zyngier
committed
KVM: arm64: nv: Add pseudo-TLB backing VNCR_EL2
FEAT_NV2 introduces an interesting problem for NV, as VNCR_EL2.BADDR is a virtual address in the EL2&0 (or EL2, but we thankfully ignore this) translation regime. As we need to replicate such mapping in the real EL2, it means that we need to remember that there is such a translation, and that any TLBI affecting EL2 can possibly affect this translation. It also means that any invalidation driven by an MMU notifier must be able to shoot down any such mapping. All in all, we need a data structure that represents this mapping, and that is extremely close to a TLB. Given that we can only use one of those per vcpu at any given time, we only allocate one. No effort is made to keep that structure small. If we need to start caching multiple of them, we may want to revisit that design point. But for now, it is kept simple so that we can reason about it. Oh, and add a braindump of how things are supposed to work, because I will definitely page this out at some point. Yes, pun intended. Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Link: https://lore.kernel.org/r/20250514103501.2225951-8-maz@kernel.org Signed-off-by: Marc Zyngier <maz@kernel.org>
1 parent bd914a9 commit ea8d3cf

File tree

5 files changed

+85
-0
lines changed

5 files changed

+85
-0
lines changed

arch/arm64/include/asm/kvm_host.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,8 @@ struct vcpu_reset_state {
731731
bool reset;
732732
};
733733

734+
struct vncr_tlb;
735+
734736
struct kvm_vcpu_arch {
735737
struct kvm_cpu_context ctxt;
736738

@@ -825,6 +827,9 @@ struct kvm_vcpu_arch {
825827

826828
/* Per-vcpu CCSIDR override or NULL */
827829
u32 *ccsidr;
830+
831+
/* Per-vcpu TLB for VNCR_EL2 -- NULL when !NV */
832+
struct vncr_tlb *vncr_tlb;
828833
};
829834

830835
/*

arch/arm64/include/asm/kvm_nested.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,4 +333,7 @@ struct s1_walk_result {
333333
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
334334
struct s1_walk_result *wr, u64 va);
335335

336+
/* VNCR management */
337+
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
338+
336339
#endif /* __ARM64_KVM_NESTED_H */

arch/arm64/kvm/arm.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,10 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
843843
return ret;
844844

845845
if (vcpu_has_nv(vcpu)) {
846+
ret = kvm_vcpu_allocate_vncr_tlb(vcpu);
847+
if (ret)
848+
return ret;
849+
846850
ret = kvm_vgic_vcpu_nv_init(vcpu);
847851
if (ret)
848852
return ret;

arch/arm64/kvm/nested.c

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,24 @@
1616

1717
#include "sys_regs.h"
1818

19+
struct vncr_tlb {
20+
/* The guest's VNCR_EL2 */
21+
u64 gva;
22+
struct s1_walk_info wi;
23+
struct s1_walk_result wr;
24+
25+
u64 hpa;
26+
27+
/* -1 when not mapped on a CPU */
28+
int cpu;
29+
30+
/*
31+
* true if the TLB is valid. Can only be changed with the
32+
* mmu_lock held.
33+
*/
34+
bool valid;
35+
};
36+
1937
/*
2038
* Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
2139
* memory usage and potential number of different sets of S2 PTs in
@@ -811,6 +829,60 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
811829
kvm_uninit_stage2_mmu(kvm);
812830
}
813831

832+
/*
833+
* Dealing with VNCR_EL2 exposed by the *guest* is a complicated matter:
834+
*
835+
* - We introduce an internal representation of a vcpu-private TLB,
836+
* representing the mapping between the guest VA contained in VNCR_EL2,
837+
* the IPA the guest's EL2 PTs point to, and the actual PA this lives at.
838+
*
839+
* - On translation fault from a nested VNCR access, we create such a TLB.
840+
* If there is no mapping to describe, the guest inherits the fault.
841+
* Crucially, no actual mapping is done at this stage.
842+
*
843+
* - On vcpu_load() in a non-HYP context with HCR_EL2.NV==1, if the above
844+
* TLB exists, we map it in the fixmap for this CPU, and run with it. We
845+
* have to respect the permissions dictated by the guest, but not the
846+
* memory type (FWB is a must).
847+
*
848+
* - Note that we usually don't do a vcpu_load() on the back of a fault
849+
* (unless we are preempted), so the resolution of a translation fault
850+
* must go via a request that will map the VNCR page in the fixmap.
851+
* vcpu_load() might as well use the same mechanism.
852+
*
853+
* - On vcpu_put() in a non-HYP context with HCR_EL2.NV==1, if the TLB was
854+
* mapped, we unmap it. Yes it is that simple. The TLB still exists
855+
* though, and may be reused at a later load.
856+
*
857+
* - On permission fault, we simply forward the fault to the guest's EL2.
858+
* Get out of my way.
859+
*
860+
* - On any TLBI for the EL2&0 translation regime, we must find any TLB that
861+
* intersects with the TLBI request, invalidate it, and unmap the page
862+
* from the fixmap. Because we need to look at all the vcpu-private TLBs,
863+
* this requires some wide-ranging locking to ensure that nothing races
864+
* against it. This may require some refcounting to avoid the search when
865+
* no such TLB is present.
866+
*
867+
* - On MMU notifiers, we must invalidate our TLB in a similar way, but
868+
* looking at the IPA instead. The funny part is that there may not be a
869+
* stage-2 mapping for this page if L1 hasn't accessed it using LD/ST
870+
* instructions.
871+
*/
872+
873+
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu)
874+
{
875+
if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
876+
return 0;
877+
878+
vcpu->arch.vncr_tlb = kzalloc(sizeof(*vcpu->arch.vncr_tlb),
879+
GFP_KERNEL_ACCOUNT);
880+
if (!vcpu->arch.vncr_tlb)
881+
return -ENOMEM;
882+
883+
return 0;
884+
}
885+
814886
/*
815887
* Our emulated CPU doesn't support all the possible features. For the
816888
* sake of simplicity (and probably mental sanity), wipe out a number

arch/arm64/kvm/reset.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
159159
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
160160
kfree(sve_state);
161161
free_page((unsigned long)vcpu->arch.ctxt.vncr_array);
162+
kfree(vcpu->arch.vncr_tlb);
162163
kfree(vcpu->arch.ccsidr);
163164
}
164165

0 commit comments

Comments
 (0)