Skip to content

Commit 146a050

Browse files
Marc Zyngieroupton
authored andcommitted
KVM: arm64: nv: Nested GICv3 emulation
When entering a nested VM, we set up the hypervisor control interface based on what the guest hypervisor has set. Especially, we investigate each list register written by the guest hypervisor whether HW bit is set. If so, we translate hw irq number from the guest's point of view to the real hardware irq number if there is a mapping. Co-developed-by: Jintack Lim <jintack@cs.columbia.edu> Signed-off-by: Jintack Lim <jintack@cs.columbia.edu> [Christoffer: Redesigned execution flow around vcpu load/put] Co-developed-by: Christoffer Dall <christoffer.dall@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@arm.com> [maz: Rewritten to support GICv3 instead of GICv2, NV2 support] Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20250225172930.1850838-9-maz@kernel.org Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
1 parent 21d29cd commit 146a050

File tree

7 files changed

+244
-1
lines changed

7 files changed

+244
-1
lines changed

arch/arm64/include/asm/kvm_hyp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
7676

7777
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
7878

79+
u64 __gic_v3_get_lr(unsigned int lr);
80+
7981
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
8082
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
8183
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);

arch/arm64/kvm/hyp/vgic-v3-sr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
1919
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
2020

21-
static u64 __gic_v3_get_lr(unsigned int lr)
21+
u64 __gic_v3_get_lr(unsigned int lr)
2222
{
2323
switch (lr & 0xf) {
2424
case 0:

arch/arm64/kvm/vgic/vgic-v3-nested.c

Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,63 @@
1616
#include "vgic.h"
1717

1818
#define ICH_LRN(n) (ICH_LR0_EL2 + (n))
19+
#define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n))
20+
#define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n))
1921

2022
struct mi_state {
2123
u16 eisr;
2224
u16 elrsr;
2325
bool pend;
2426
};
2527

28+
/*
29+
* The shadow registers loaded to the hardware when running a L2 guest
30+
* with the virtual IMO/FMO bits set.
31+
*/
32+
struct shadow_if {
33+
struct vgic_v3_cpu_if cpuif;
34+
unsigned long lr_map;
35+
};
36+
37+
static DEFINE_PER_CPU(struct shadow_if, shadow_if);
38+
2639
/*
2740
* Nesting GICv3 support
2841
*
42+
* On a non-nesting VM (only running at EL0/EL1), the host hypervisor
43+
* completely controls the interrupts injected via the list registers.
44+
* Consequently, most of the state that is modified by the guest (by ACK-ing
45+
* and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
46+
* keep a semi-consistent view of the interrupts.
47+
*
48+
* This still applies for a NV guest, but only while "InHost" (either
49+
* running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
50+
*
51+
* When running a L2 guest ("not InHost"), things are radically different,
52+
* as the L1 guest is in charge of provisioning the interrupts via its own
53+
* view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
54+
* page. This means that the flow described above does work (there is no
55+
* state to rebuild in the L0 hypervisor), and that most things happed on L2
56+
* load/put:
57+
*
58+
* - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
59+
* per-CPU data structure that is used to populate the actual LRs. This is
60+
* an extra copy that we could avoid, but life is short. In the process,
61+
* we remap any interrupt that has the HW bit set to the mapped interrupt
62+
* on the host, should the host consider it a HW one. This allows the HW
63+
* deactivation to take its course, such as for the timer.
64+
*
65+
* - on L2 put: perform the inverse transformation, so that the result of L2
66+
* running becomes visible to L1 in the VNCR-accessible registers.
67+
*
68+
* - there is nothing to do on L2 entry, as everything will have happened
69+
* on load. However, this is the point where we detect that an interrupt
70+
* targeting L1 and prepare the grand switcheroo.
71+
*
72+
* - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
73+
* interrupt. The L0 active state will be cleared by the HW if the L1
74+
* interrupt was itself backed by a HW interrupt.
75+
*
2976
* System register emulation:
3077
*
3178
* We get two classes of registers:
@@ -42,6 +89,26 @@ struct mi_state {
4289
* trap) thanks to NV being set by L1.
4390
*/
4491

92+
bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
93+
{
94+
u64 xmo;
95+
96+
if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
97+
xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
98+
WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
99+
"Separate virtual IRQ/FIQ settings not supported\n");
100+
101+
return !!xmo;
102+
}
103+
104+
return false;
105+
}
106+
107+
static struct shadow_if *get_shadow_if(void)
108+
{
109+
return this_cpu_ptr(&shadow_if);
110+
}
111+
45112
static bool lr_triggers_eoi(u64 lr)
46113
{
47114
return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
@@ -123,3 +190,154 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
123190

124191
return reg;
125192
}
193+
194+
/*
195+
* For LRs which have HW bit set such as timer interrupts, we modify them to
196+
* have the host hardware interrupt number instead of the virtual one programmed
197+
* by the guest hypervisor.
198+
*/
199+
static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
200+
struct vgic_v3_cpu_if *s_cpu_if)
201+
{
202+
unsigned long lr_map = 0;
203+
int index = 0;
204+
205+
for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
206+
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
207+
struct vgic_irq *irq;
208+
209+
if (!(lr & ICH_LR_STATE))
210+
lr = 0;
211+
212+
if (!(lr & ICH_LR_HW))
213+
goto next;
214+
215+
/* We have the HW bit set, check for validity of pINTID */
216+
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
217+
if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) {
218+
/* There was no real mapping, so nuke the HW bit */
219+
lr &= ~ICH_LR_HW;
220+
if (irq)
221+
vgic_put_irq(vcpu->kvm, irq);
222+
goto next;
223+
}
224+
225+
/* It is illegal to have the EOI bit set with HW */
226+
lr &= ~ICH_LR_EOI;
227+
228+
/* Translate the virtual mapping to the real one */
229+
lr &= ~ICH_LR_PHYS_ID_MASK;
230+
lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
231+
232+
vgic_put_irq(vcpu->kvm, irq);
233+
234+
next:
235+
s_cpu_if->vgic_lr[index] = lr;
236+
if (lr) {
237+
lr_map |= BIT(i);
238+
index++;
239+
}
240+
}
241+
242+
container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map;
243+
s_cpu_if->used_lrs = index;
244+
}
245+
246+
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
247+
{
248+
struct shadow_if *shadow_if = get_shadow_if();
249+
int i, index = 0;
250+
251+
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
252+
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
253+
struct vgic_irq *irq;
254+
255+
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
256+
goto next;
257+
258+
/*
259+
* If we had a HW lr programmed by the guest hypervisor, we
260+
* need to emulate the HW effect between the guest hypervisor
261+
* and the nested guest.
262+
*/
263+
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
264+
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
265+
goto next;
266+
267+
lr = __gic_v3_get_lr(index);
268+
if (!(lr & ICH_LR_STATE))
269+
irq->active = false;
270+
271+
vgic_put_irq(vcpu->kvm, irq);
272+
next:
273+
index++;
274+
}
275+
}
276+
277+
static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
278+
struct vgic_v3_cpu_if *s_cpu_if)
279+
{
280+
struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
281+
int i;
282+
283+
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
284+
s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
285+
s_cpu_if->vgic_sre = host_if->vgic_sre;
286+
287+
for (i = 0; i < 4; i++) {
288+
s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i));
289+
s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i));
290+
}
291+
292+
vgic_v3_create_shadow_lr(vcpu, s_cpu_if);
293+
}
294+
295+
void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
296+
{
297+
struct shadow_if *shadow_if = get_shadow_if();
298+
struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif;
299+
300+
BUG_ON(!vgic_state_is_nested(vcpu));
301+
302+
vgic_v3_create_shadow_state(vcpu, cpu_if);
303+
304+
__vgic_v3_restore_vmcr_aprs(cpu_if);
305+
__vgic_v3_activate_traps(cpu_if);
306+
307+
__vgic_v3_restore_state(cpu_if);
308+
}
309+
310+
void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
311+
{
312+
struct shadow_if *shadow_if = get_shadow_if();
313+
struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
314+
int i;
315+
316+
__vgic_v3_save_vmcr_aprs(s_cpu_if);
317+
__vgic_v3_deactivate_traps(s_cpu_if);
318+
__vgic_v3_save_state(s_cpu_if);
319+
320+
/*
321+
* Translate the shadow state HW fields back to the virtual ones
322+
* before copying the shadow struct back to the nested one.
323+
*/
324+
__vcpu_sys_reg(vcpu, ICH_HCR_EL2) = s_cpu_if->vgic_hcr;
325+
__vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr;
326+
327+
for (i = 0; i < 4; i++) {
328+
__vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i];
329+
__vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i];
330+
}
331+
332+
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
333+
u64 val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
334+
335+
val &= ~ICH_LR_STATE;
336+
val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE;
337+
338+
__vcpu_sys_reg(vcpu, ICH_LRN(i)) = val;
339+
s_cpu_if->vgic_lr[i] = 0;
340+
}
341+
342+
shadow_if->lr_map = 0;
343+
}

arch/arm64/kvm/vgic/vgic-v3.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,12 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
734734
{
735735
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
736736

737+
/* If the vgic is nested, perform the full state loading */
738+
if (vgic_state_is_nested(vcpu)) {
739+
vgic_v3_load_nested(vcpu);
740+
return;
741+
}
742+
737743
if (likely(!is_protected_kvm_enabled()))
738744
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
739745

@@ -747,6 +753,11 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
747753
{
748754
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
749755

756+
if (vgic_state_is_nested(vcpu)) {
757+
vgic_v3_put_nested(vcpu);
758+
return;
759+
}
760+
750761
if (likely(!is_protected_kvm_enabled()))
751762
kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
752763
WARN_ON(vgic_v4_put(vcpu));

arch/arm64/kvm/vgic/vgic.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
872872
{
873873
int used_lrs;
874874

875+
/* If nesting, emulate the HW effect from L0 to L1 */
876+
if (vgic_state_is_nested(vcpu)) {
877+
vgic_v3_sync_nested(vcpu);
878+
return;
879+
}
880+
875881
/* An empty ap_list_head implies used_lrs == 0 */
876882
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
877883
return;

arch/arm64/kvm/vgic/vgic.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,4 +353,8 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
353353
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
354354
}
355355

356+
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
357+
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
358+
void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
359+
356360
#endif

include/kvm/arm_vgic.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,8 @@ int vgic_v4_load(struct kvm_vcpu *vcpu);
437437
void vgic_v4_commit(struct kvm_vcpu *vcpu);
438438
int vgic_v4_put(struct kvm_vcpu *vcpu);
439439

440+
bool vgic_state_is_nested(struct kvm_vcpu *vcpu);
441+
440442
/* CPU HP callbacks */
441443
void kvm_vgic_cpu_up(void);
442444
void kvm_vgic_cpu_down(void);

0 commit comments

Comments
 (0)