16
16
#include "vgic.h"
17
17
18
18
#define ICH_LRN (n ) (ICH_LR0_EL2 + (n))
19
+ #define ICH_AP0RN (n ) (ICH_AP0R0_EL2 + (n))
20
+ #define ICH_AP1RN (n ) (ICH_AP1R0_EL2 + (n))
19
21
20
22
struct mi_state {
21
23
u16 eisr ;
22
24
u16 elrsr ;
23
25
bool pend ;
24
26
};
25
27
28
+ /*
29
+ * The shadow registers loaded to the hardware when running a L2 guest
30
+ * with the virtual IMO/FMO bits set.
31
+ */
32
+ struct shadow_if {
33
+ struct vgic_v3_cpu_if cpuif ;
34
+ unsigned long lr_map ;
35
+ };
36
+
37
+ static DEFINE_PER_CPU (struct shadow_if , shadow_if ) ;
38
+
26
39
/*
27
40
* Nesting GICv3 support
28
41
*
42
+ * On a non-nesting VM (only running at EL0/EL1), the host hypervisor
43
+ * completely controls the interrupts injected via the list registers.
44
+ * Consequently, most of the state that is modified by the guest (by ACK-ing
45
+ * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
46
+ * keep a semi-consistent view of the interrupts.
47
+ *
48
+ * This still applies for a NV guest, but only while "InHost" (either
49
+ * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
50
+ *
51
+ * When running a L2 guest ("not InHost"), things are radically different,
52
+ * as the L1 guest is in charge of provisioning the interrupts via its own
53
+ * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
54
+ * page. This means that the flow described above does work (there is no
55
+ * state to rebuild in the L0 hypervisor), and that most things happed on L2
56
+ * load/put:
57
+ *
58
+ * - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
59
+ * per-CPU data structure that is used to populate the actual LRs. This is
60
+ * an extra copy that we could avoid, but life is short. In the process,
61
+ * we remap any interrupt that has the HW bit set to the mapped interrupt
62
+ * on the host, should the host consider it a HW one. This allows the HW
63
+ * deactivation to take its course, such as for the timer.
64
+ *
65
+ * - on L2 put: perform the inverse transformation, so that the result of L2
66
+ * running becomes visible to L1 in the VNCR-accessible registers.
67
+ *
68
+ * - there is nothing to do on L2 entry, as everything will have happened
69
+ * on load. However, this is the point where we detect that an interrupt
70
+ * targeting L1 and prepare the grand switcheroo.
71
+ *
72
+ * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
73
+ * interrupt. The L0 active state will be cleared by the HW if the L1
74
+ * interrupt was itself backed by a HW interrupt.
75
+ *
29
76
* System register emulation:
30
77
*
31
78
* We get two classes of registers:
@@ -42,6 +89,26 @@ struct mi_state {
42
89
* trap) thanks to NV being set by L1.
43
90
*/
44
91
92
+ bool vgic_state_is_nested (struct kvm_vcpu * vcpu )
93
+ {
94
+ u64 xmo ;
95
+
96
+ if (vcpu_has_nv (vcpu ) && !is_hyp_ctxt (vcpu )) {
97
+ xmo = __vcpu_sys_reg (vcpu , HCR_EL2 ) & (HCR_IMO | HCR_FMO );
98
+ WARN_ONCE (xmo && xmo != (HCR_IMO | HCR_FMO ),
99
+ "Separate virtual IRQ/FIQ settings not supported\n" );
100
+
101
+ return !!xmo ;
102
+ }
103
+
104
+ return false;
105
+ }
106
+
107
+ static struct shadow_if * get_shadow_if (void )
108
+ {
109
+ return this_cpu_ptr (& shadow_if );
110
+ }
111
+
45
112
static bool lr_triggers_eoi (u64 lr )
46
113
{
47
114
return !(lr & (ICH_LR_STATE | ICH_LR_HW )) && (lr & ICH_LR_EOI );
@@ -123,3 +190,154 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
123
190
124
191
return reg ;
125
192
}
193
+
194
+ /*
195
+ * For LRs which have HW bit set such as timer interrupts, we modify them to
196
+ * have the host hardware interrupt number instead of the virtual one programmed
197
+ * by the guest hypervisor.
198
+ */
199
+ static void vgic_v3_create_shadow_lr (struct kvm_vcpu * vcpu ,
200
+ struct vgic_v3_cpu_if * s_cpu_if )
201
+ {
202
+ unsigned long lr_map = 0 ;
203
+ int index = 0 ;
204
+
205
+ for (int i = 0 ; i < kvm_vgic_global_state .nr_lr ; i ++ ) {
206
+ u64 lr = __vcpu_sys_reg (vcpu , ICH_LRN (i ));
207
+ struct vgic_irq * irq ;
208
+
209
+ if (!(lr & ICH_LR_STATE ))
210
+ lr = 0 ;
211
+
212
+ if (!(lr & ICH_LR_HW ))
213
+ goto next ;
214
+
215
+ /* We have the HW bit set, check for validity of pINTID */
216
+ irq = vgic_get_vcpu_irq (vcpu , FIELD_GET (ICH_LR_PHYS_ID_MASK , lr ));
217
+ if (!irq || !irq -> hw || irq -> intid > VGIC_MAX_SPI ) {
218
+ /* There was no real mapping, so nuke the HW bit */
219
+ lr &= ~ICH_LR_HW ;
220
+ if (irq )
221
+ vgic_put_irq (vcpu -> kvm , irq );
222
+ goto next ;
223
+ }
224
+
225
+ /* It is illegal to have the EOI bit set with HW */
226
+ lr &= ~ICH_LR_EOI ;
227
+
228
+ /* Translate the virtual mapping to the real one */
229
+ lr &= ~ICH_LR_PHYS_ID_MASK ;
230
+ lr |= FIELD_PREP (ICH_LR_PHYS_ID_MASK , (u64 )irq -> hwintid );
231
+
232
+ vgic_put_irq (vcpu -> kvm , irq );
233
+
234
+ next :
235
+ s_cpu_if -> vgic_lr [index ] = lr ;
236
+ if (lr ) {
237
+ lr_map |= BIT (i );
238
+ index ++ ;
239
+ }
240
+ }
241
+
242
+ container_of (s_cpu_if , struct shadow_if , cpuif )-> lr_map = lr_map ;
243
+ s_cpu_if -> used_lrs = index ;
244
+ }
245
+
246
+ void vgic_v3_sync_nested (struct kvm_vcpu * vcpu )
247
+ {
248
+ struct shadow_if * shadow_if = get_shadow_if ();
249
+ int i , index = 0 ;
250
+
251
+ for_each_set_bit (i , & shadow_if -> lr_map , kvm_vgic_global_state .nr_lr ) {
252
+ u64 lr = __vcpu_sys_reg (vcpu , ICH_LRN (i ));
253
+ struct vgic_irq * irq ;
254
+
255
+ if (!(lr & ICH_LR_HW ) || !(lr & ICH_LR_STATE ))
256
+ goto next ;
257
+
258
+ /*
259
+ * If we had a HW lr programmed by the guest hypervisor, we
260
+ * need to emulate the HW effect between the guest hypervisor
261
+ * and the nested guest.
262
+ */
263
+ irq = vgic_get_vcpu_irq (vcpu , FIELD_GET (ICH_LR_PHYS_ID_MASK , lr ));
264
+ if (WARN_ON (!irq )) /* Shouldn't happen as we check on load */
265
+ goto next ;
266
+
267
+ lr = __gic_v3_get_lr (index );
268
+ if (!(lr & ICH_LR_STATE ))
269
+ irq -> active = false;
270
+
271
+ vgic_put_irq (vcpu -> kvm , irq );
272
+ next :
273
+ index ++ ;
274
+ }
275
+ }
276
+
277
+ static void vgic_v3_create_shadow_state (struct kvm_vcpu * vcpu ,
278
+ struct vgic_v3_cpu_if * s_cpu_if )
279
+ {
280
+ struct vgic_v3_cpu_if * host_if = & vcpu -> arch .vgic_cpu .vgic_v3 ;
281
+ int i ;
282
+
283
+ s_cpu_if -> vgic_hcr = __vcpu_sys_reg (vcpu , ICH_HCR_EL2 );
284
+ s_cpu_if -> vgic_vmcr = __vcpu_sys_reg (vcpu , ICH_VMCR_EL2 );
285
+ s_cpu_if -> vgic_sre = host_if -> vgic_sre ;
286
+
287
+ for (i = 0 ; i < 4 ; i ++ ) {
288
+ s_cpu_if -> vgic_ap0r [i ] = __vcpu_sys_reg (vcpu , ICH_AP0RN (i ));
289
+ s_cpu_if -> vgic_ap1r [i ] = __vcpu_sys_reg (vcpu , ICH_AP1RN (i ));
290
+ }
291
+
292
+ vgic_v3_create_shadow_lr (vcpu , s_cpu_if );
293
+ }
294
+
295
+ void vgic_v3_load_nested (struct kvm_vcpu * vcpu )
296
+ {
297
+ struct shadow_if * shadow_if = get_shadow_if ();
298
+ struct vgic_v3_cpu_if * cpu_if = & shadow_if -> cpuif ;
299
+
300
+ BUG_ON (!vgic_state_is_nested (vcpu ));
301
+
302
+ vgic_v3_create_shadow_state (vcpu , cpu_if );
303
+
304
+ __vgic_v3_restore_vmcr_aprs (cpu_if );
305
+ __vgic_v3_activate_traps (cpu_if );
306
+
307
+ __vgic_v3_restore_state (cpu_if );
308
+ }
309
+
310
+ void vgic_v3_put_nested (struct kvm_vcpu * vcpu )
311
+ {
312
+ struct shadow_if * shadow_if = get_shadow_if ();
313
+ struct vgic_v3_cpu_if * s_cpu_if = & shadow_if -> cpuif ;
314
+ int i ;
315
+
316
+ __vgic_v3_save_vmcr_aprs (s_cpu_if );
317
+ __vgic_v3_deactivate_traps (s_cpu_if );
318
+ __vgic_v3_save_state (s_cpu_if );
319
+
320
+ /*
321
+ * Translate the shadow state HW fields back to the virtual ones
322
+ * before copying the shadow struct back to the nested one.
323
+ */
324
+ __vcpu_sys_reg (vcpu , ICH_HCR_EL2 ) = s_cpu_if -> vgic_hcr ;
325
+ __vcpu_sys_reg (vcpu , ICH_VMCR_EL2 ) = s_cpu_if -> vgic_vmcr ;
326
+
327
+ for (i = 0 ; i < 4 ; i ++ ) {
328
+ __vcpu_sys_reg (vcpu , ICH_AP0RN (i )) = s_cpu_if -> vgic_ap0r [i ];
329
+ __vcpu_sys_reg (vcpu , ICH_AP1RN (i )) = s_cpu_if -> vgic_ap1r [i ];
330
+ }
331
+
332
+ for_each_set_bit (i , & shadow_if -> lr_map , kvm_vgic_global_state .nr_lr ) {
333
+ u64 val = __vcpu_sys_reg (vcpu , ICH_LRN (i ));
334
+
335
+ val &= ~ICH_LR_STATE ;
336
+ val |= s_cpu_if -> vgic_lr [i ] & ICH_LR_STATE ;
337
+
338
+ __vcpu_sys_reg (vcpu , ICH_LRN (i )) = val ;
339
+ s_cpu_if -> vgic_lr [i ] = 0 ;
340
+ }
341
+
342
+ shadow_if -> lr_map = 0 ;
343
+ }
0 commit comments