Skip to content

Commit d789fa6

Browse files
yamahatabonzini
authored andcommitted
KVM: TDX: Handle vCPU dissociation
Handle vCPUs dissociations by invoking SEAMCALL TDH.VP.FLUSH which flushes the address translation caches and cached TD VMCS of a TD vCPU in its associated pCPU. In TDX, a vCPUs can only be associated with one pCPU at a time, which is done by invoking SEAMCALL TDH.VP.ENTER. For a successful association, the vCPU must be dissociated from its previous associated pCPU. To facilitate vCPU dissociation, introduce a per-pCPU list associated_tdvcpus. Add a vCPU into this list when it's loaded into a new pCPU (i.e. when a vCPU is loaded for the first time or migrated to a new pCPU). vCPU dissociations can happen under below conditions: - On the op hardware_disable is called. This op is called when virtualization is disabled on a given pCPU, e.g. when hot-unplug a pCPU or machine shutdown/suspend. In this case, dissociate all vCPUs from the pCPU by iterating its per-pCPU list associated_tdvcpus. - On vCPU migration to a new pCPU. Before adding a vCPU into associated_tdvcpus list of the new pCPU, dissociation from its old pCPU is required, which is performed by issuing an IPI and executing SEAMCALL TDH.VP.FLUSH on the old pCPU. On a successful dissociation, the vCPU will be removed from the associated_tdvcpus list of its previously associated pCPU. - On tdx_mmu_release_hkid() is called. TDX mandates that all vCPUs must be disassociated prior to the release of an hkid. Therefore, dissociation of all vCPUs is a must before executing the SEAMCALL TDH.MNG.VPFLUSHDONE and subsequently freeing the hkid. Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Co-developed-by: Yan Zhao <yan.y.zhao@intel.com> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> Message-ID: <20241112073858.22312-1-yan.y.zhao@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 012426d commit d789fa6

File tree

4 files changed

+177
-10
lines changed

4 files changed

+177
-10
lines changed

arch/x86/kvm/vmx/main.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@
1010
#include "tdx.h"
1111
#include "tdx_arch.h"
1212

13+
static void vt_disable_virtualization_cpu(void)
14+
{
15+
/* Note, TDX *and* VMX need to be disabled if TDX is enabled. */
16+
if (enable_tdx)
17+
tdx_disable_virtualization_cpu();
18+
vmx_disable_virtualization_cpu();
19+
}
20+
1321
static __init int vt_hardware_setup(void)
1422
{
1523
int ret;
@@ -111,6 +119,16 @@ static void vt_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
111119
vmx_vcpu_reset(vcpu, init_event);
112120
}
113121

122+
static void vt_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
123+
{
124+
if (is_td_vcpu(vcpu)) {
125+
tdx_vcpu_load(vcpu, cpu);
126+
return;
127+
}
128+
129+
vmx_vcpu_load(vcpu, cpu);
130+
}
131+
114132
static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
115133
{
116134
if (is_td_vcpu(vcpu)) {
@@ -199,7 +217,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
199217
.hardware_unsetup = vmx_hardware_unsetup,
200218

201219
.enable_virtualization_cpu = vmx_enable_virtualization_cpu,
202-
.disable_virtualization_cpu = vmx_disable_virtualization_cpu,
220+
.disable_virtualization_cpu = vt_disable_virtualization_cpu,
203221
.emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu,
204222

205223
.has_emulated_msr = vmx_has_emulated_msr,
@@ -216,7 +234,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
216234
.vcpu_reset = vt_vcpu_reset,
217235

218236
.prepare_switch_to_guest = vmx_prepare_switch_to_guest,
219-
.vcpu_load = vmx_vcpu_load,
237+
.vcpu_load = vt_vcpu_load,
220238
.vcpu_put = vmx_vcpu_put,
221239

222240
.update_exception_bitmap = vmx_update_exception_bitmap,

arch/x86/kvm/vmx/tdx.c

Lines changed: 151 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,21 @@ static bool tdx_operand_busy(u64 err)
162162
}
163163

164164

165+
/*
166+
* A per-CPU list of TD vCPUs associated with a given CPU.
167+
* Protected by interrupt mask. Only manipulated by the CPU owning this per-CPU
168+
* list.
169+
* - When a vCPU is loaded onto a CPU, it is removed from the per-CPU list of
170+
* the old CPU during the IPI callback running on the old CPU, and then added
171+
* to the per-CPU list of the new CPU.
172+
* - When a TD is tearing down, all vCPUs are disassociated from their current
173+
* running CPUs and removed from the per-CPU list during the IPI callback
174+
* running on those CPUs.
175+
* - When a CPU is brought down, traverse the per-CPU list to disassociate all
176+
* associated TD vCPUs and remove them from the per-CPU list.
177+
*/
178+
static DEFINE_PER_CPU(struct list_head, associated_tdvcpus);
179+
165180
static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
166181
{
167182
tdx_guest_keyid_free(kvm_tdx->hkid);
@@ -177,6 +192,22 @@ static inline bool is_hkid_assigned(struct kvm_tdx *kvm_tdx)
177192
return kvm_tdx->hkid > 0;
178193
}
179194

195+
static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
196+
{
197+
lockdep_assert_irqs_disabled();
198+
199+
list_del(&to_tdx(vcpu)->cpu_list);
200+
201+
/*
202+
* Ensure tdx->cpu_list is updated before setting vcpu->cpu to -1,
203+
* otherwise, a different CPU can see vcpu->cpu = -1 and add the vCPU
204+
* to its list before it's deleted from this CPU's list.
205+
*/
206+
smp_wmb();
207+
208+
vcpu->cpu = -1;
209+
}
210+
180211
static void tdx_clear_page(struct page *page)
181212
{
182213
const void *zero_page = (const void *) page_to_virt(ZERO_PAGE(0));
@@ -243,6 +274,83 @@ static void tdx_reclaim_control_page(struct page *ctrl_page)
243274
__free_page(ctrl_page);
244275
}
245276

277+
struct tdx_flush_vp_arg {
278+
struct kvm_vcpu *vcpu;
279+
u64 err;
280+
};
281+
282+
static void tdx_flush_vp(void *_arg)
283+
{
284+
struct tdx_flush_vp_arg *arg = _arg;
285+
struct kvm_vcpu *vcpu = arg->vcpu;
286+
u64 err;
287+
288+
arg->err = 0;
289+
lockdep_assert_irqs_disabled();
290+
291+
/* Task migration can race with CPU offlining. */
292+
if (unlikely(vcpu->cpu != raw_smp_processor_id()))
293+
return;
294+
295+
/*
296+
* No need to do TDH_VP_FLUSH if the vCPU hasn't been initialized. The
297+
* list tracking still needs to be updated so that it's correct if/when
298+
* the vCPU does get initialized.
299+
*/
300+
if (to_tdx(vcpu)->state != VCPU_TD_STATE_UNINITIALIZED) {
301+
/*
302+
* No need to retry. TDX Resources needed for TDH.VP.FLUSH are:
303+
* TDVPR as exclusive, TDR as shared, and TDCS as shared. This
304+
* vp flush function is called when destructing vCPU/TD or vCPU
305+
* migration. No other thread uses TDVPR in those cases.
306+
*/
307+
err = tdh_vp_flush(&to_tdx(vcpu)->vp);
308+
if (unlikely(err && err != TDX_VCPU_NOT_ASSOCIATED)) {
309+
/*
310+
* This function is called in IPI context. Do not use
311+
* printk to avoid console semaphore.
312+
* The caller prints out the error message, instead.
313+
*/
314+
if (err)
315+
arg->err = err;
316+
}
317+
}
318+
319+
tdx_disassociate_vp(vcpu);
320+
}
321+
322+
static void tdx_flush_vp_on_cpu(struct kvm_vcpu *vcpu)
323+
{
324+
struct tdx_flush_vp_arg arg = {
325+
.vcpu = vcpu,
326+
};
327+
int cpu = vcpu->cpu;
328+
329+
if (unlikely(cpu == -1))
330+
return;
331+
332+
smp_call_function_single(cpu, tdx_flush_vp, &arg, 1);
333+
if (KVM_BUG_ON(arg.err, vcpu->kvm))
334+
pr_tdx_error(TDH_VP_FLUSH, arg.err);
335+
}
336+
337+
void tdx_disable_virtualization_cpu(void)
338+
{
339+
int cpu = raw_smp_processor_id();
340+
struct list_head *tdvcpus = &per_cpu(associated_tdvcpus, cpu);
341+
struct tdx_flush_vp_arg arg;
342+
struct vcpu_tdx *tdx, *tmp;
343+
unsigned long flags;
344+
345+
local_irq_save(flags);
346+
/* Safe variant needed as tdx_disassociate_vp() deletes the entry. */
347+
list_for_each_entry_safe(tdx, tmp, tdvcpus, cpu_list) {
348+
arg.vcpu = &tdx->vcpu;
349+
tdx_flush_vp(&arg);
350+
}
351+
local_irq_restore(flags);
352+
}
353+
246354
#define TDX_SEAMCALL_RETRIES 10000
247355

248356
static void smp_func_do_phymem_cache_wb(void *unused)
@@ -281,22 +389,21 @@ void tdx_mmu_release_hkid(struct kvm *kvm)
281389
bool packages_allocated, targets_allocated;
282390
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
283391
cpumask_var_t packages, targets;
284-
u64 err;
392+
struct kvm_vcpu *vcpu;
393+
unsigned long j;
285394
int i;
395+
u64 err;
286396

287397
if (!is_hkid_assigned(kvm_tdx))
288398
return;
289399

290-
/* KeyID has been allocated but guest is not yet configured */
291-
if (!kvm_tdx->td.tdr_page) {
292-
tdx_hkid_free(kvm_tdx);
293-
return;
294-
}
295-
296400
packages_allocated = zalloc_cpumask_var(&packages, GFP_KERNEL);
297401
targets_allocated = zalloc_cpumask_var(&targets, GFP_KERNEL);
298402
cpus_read_lock();
299403

404+
kvm_for_each_vcpu(j, vcpu, kvm)
405+
tdx_flush_vp_on_cpu(vcpu);
406+
300407
/*
301408
* TDH.PHYMEM.CACHE.WB tries to acquire the TDX module global lock
302409
* and can fail with TDX_OPERAND_BUSY when it fails to get the lock.
@@ -310,6 +417,16 @@ void tdx_mmu_release_hkid(struct kvm *kvm)
310417
* After the above flushing vps, there should be no more vCPU
311418
* associations, as all vCPU fds have been released at this stage.
312419
*/
420+
err = tdh_mng_vpflushdone(&kvm_tdx->td);
421+
if (err == TDX_FLUSHVP_NOT_DONE)
422+
goto out;
423+
if (KVM_BUG_ON(err, kvm)) {
424+
pr_tdx_error(TDH_MNG_VPFLUSHDONE, err);
425+
pr_err("tdh_mng_vpflushdone() failed. HKID %d is leaked.\n",
426+
kvm_tdx->hkid);
427+
goto out;
428+
}
429+
313430
for_each_online_cpu(i) {
314431
if (packages_allocated &&
315432
cpumask_test_and_set_cpu(topology_physical_package_id(i),
@@ -335,6 +452,7 @@ void tdx_mmu_release_hkid(struct kvm *kvm)
335452
tdx_hkid_free(kvm_tdx);
336453
}
337454

455+
out:
338456
mutex_unlock(&tdx_lock);
339457
cpus_read_unlock();
340458
free_cpumask_var(targets);
@@ -488,6 +606,27 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
488606
return 0;
489607
}
490608

609+
void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
610+
{
611+
struct vcpu_tdx *tdx = to_tdx(vcpu);
612+
613+
if (vcpu->cpu == cpu || !is_hkid_assigned(to_kvm_tdx(vcpu->kvm)))
614+
return;
615+
616+
tdx_flush_vp_on_cpu(vcpu);
617+
618+
KVM_BUG_ON(cpu != raw_smp_processor_id(), vcpu->kvm);
619+
local_irq_disable();
620+
/*
621+
* Pairs with the smp_wmb() in tdx_disassociate_vp() to ensure
622+
* vcpu->cpu is read before tdx->cpu_list.
623+
*/
624+
smp_rmb();
625+
626+
list_add(&tdx->cpu_list, &per_cpu(associated_tdvcpus, cpu));
627+
local_irq_enable();
628+
}
629+
491630
void tdx_vcpu_free(struct kvm_vcpu *vcpu)
492631
{
493632
struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
@@ -2044,7 +2183,11 @@ void tdx_cleanup(void)
20442183

20452184
int __init tdx_bringup(void)
20462185
{
2047-
int r;
2186+
int r, i;
2187+
2188+
/* tdx_disable_virtualization_cpu() uses associated_tdvcpus. */
2189+
for_each_possible_cpu(i)
2190+
INIT_LIST_HEAD(&per_cpu(associated_tdvcpus, i));
20482191

20492192
if (!enable_tdx)
20502193
return 0;

arch/x86/kvm/vmx/tdx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ struct vcpu_tdx {
4848

4949
struct tdx_vp vp;
5050

51+
struct list_head cpu_list;
52+
5153
enum vcpu_tdx_state state;
5254
};
5355

arch/x86/kvm/vmx/x86_ops.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,15 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
122122
void vmx_setup_mce(struct kvm_vcpu *vcpu);
123123

124124
#ifdef CONFIG_KVM_INTEL_TDX
125+
void tdx_disable_virtualization_cpu(void);
125126
int tdx_vm_init(struct kvm *kvm);
126127
void tdx_mmu_release_hkid(struct kvm *kvm);
127128
void tdx_vm_destroy(struct kvm *kvm);
128129
int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
129130

130131
int tdx_vcpu_create(struct kvm_vcpu *vcpu);
131132
void tdx_vcpu_free(struct kvm_vcpu *vcpu);
133+
void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
132134

133135
int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
134136

@@ -146,13 +148,15 @@ void tdx_flush_tlb_all(struct kvm_vcpu *vcpu);
146148
void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
147149
int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn);
148150
#else
151+
static inline void tdx_disable_virtualization_cpu(void) {}
149152
static inline int tdx_vm_init(struct kvm *kvm) { return -EOPNOTSUPP; }
150153
static inline void tdx_mmu_release_hkid(struct kvm *kvm) {}
151154
static inline void tdx_vm_destroy(struct kvm *kvm) {}
152155
static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
153156

154157
static inline int tdx_vcpu_create(struct kvm_vcpu *vcpu) { return -EOPNOTSUPP; }
155158
static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
159+
static inline void tdx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {}
156160

157161
static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
158162

0 commit comments

Comments
 (0)