Skip to content

Commit 4b2abc4

Browse files
yanzhao56bonzini
authored andcommitted
KVM: TDX: Kick off vCPUs when SEAMCALL is busy during TD page removal
Kick off all vCPUs and prevent tdh_vp_enter() from executing whenever tdh_mem_range_block()/tdh_mem_track()/tdh_mem_page_remove() encounters contention, since the page removal path does not expect error and is less sensitive to the performance penalty caused by kicking off vCPUs. Although KVM has protected SEPT zap-related SEAMCALLs with kvm->mmu_lock, KVM may still encounter TDX_OPERAND_BUSY due to the contention in the TDX module. - tdh_mem_track() may contend with tdh_vp_enter(). - tdh_mem_range_block()/tdh_mem_page_remove() may contend with tdh_vp_enter() and TDCALLs. Resources SHARED users EXCLUSIVE users ------------------------------------------------------------ TDCS epoch tdh_vp_enter tdh_mem_track ------------------------------------------------------------ SEPT tree tdh_mem_page_remove tdh_vp_enter (0-step mitigation) tdh_mem_range_block ------------------------------------------------------------ SEPT entry tdh_mem_range_block (Host lock) tdh_mem_page_remove (Host lock) tdg_mem_page_accept (Guest lock) tdg_mem_page_attr_rd (Guest lock) tdg_mem_page_attr_wr (Guest lock) Use a TDX specific per-VM flag wait_for_sept_zap along with KVM_REQ_OUTSIDE_GUEST_MODE to kick off vCPUs and prevent them from entering TD, thereby avoiding the potential contention. Apply the kick-off and no vCPU entering only after each SEAMCALL busy error to minimize the window of no TD entry, as the contention due to 0-step mitigation or TDCALLs is expected to be rare. Suggested-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> Message-ID: <20250227012021.1778144-5-binbin.wu@linux.intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent b0327bb commit 4b2abc4

File tree

2 files changed

+61
-9
lines changed

2 files changed

+61
-9
lines changed

arch/x86/kvm/vmx/tdx.c

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,26 @@ static void tdx_clear_page(struct page *page)
295295
__mb();
296296
}
297297

298+
static void tdx_no_vcpus_enter_start(struct kvm *kvm)
299+
{
300+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
301+
302+
lockdep_assert_held_write(&kvm->mmu_lock);
303+
304+
WRITE_ONCE(kvm_tdx->wait_for_sept_zap, true);
305+
306+
kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
307+
}
308+
309+
static void tdx_no_vcpus_enter_stop(struct kvm *kvm)
310+
{
311+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
312+
313+
lockdep_assert_held_write(&kvm->mmu_lock);
314+
315+
WRITE_ONCE(kvm_tdx->wait_for_sept_zap, false);
316+
}
317+
298318
/* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */
299319
static int __tdx_reclaim_page(struct page *page)
300320
{
@@ -980,6 +1000,14 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
9801000
*/
9811001
WARN_ON_ONCE(force_immediate_exit);
9821002

1003+
/*
1004+
* Wait until retry of SEPT-zap-related SEAMCALL completes before
1005+
* allowing vCPU entry to avoid contention with tdh_vp_enter() and
1006+
* TDCALLs.
1007+
*/
1008+
if (unlikely(READ_ONCE(to_kvm_tdx(vcpu->kvm)->wait_for_sept_zap)))
1009+
return EXIT_FASTPATH_EXIT_HANDLED;
1010+
9831011
trace_kvm_entry(vcpu, force_immediate_exit);
9841012

9851013
if (pi_test_on(&vt->pi_desc)) {
@@ -1493,15 +1521,24 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
14931521
if (KVM_BUG_ON(!is_hkid_assigned(kvm_tdx), kvm))
14941522
return -EINVAL;
14951523

1496-
do {
1524+
/*
1525+
* When zapping private page, write lock is held. So no race condition
1526+
* with other vcpu sept operation.
1527+
* Race with TDH.VP.ENTER due to (0-step mitigation) and Guest TDCALLs.
1528+
*/
1529+
err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
1530+
&level_state);
1531+
1532+
if (unlikely(tdx_operand_busy(err))) {
14971533
/*
1498-
* When zapping private page, write lock is held. So no race
1499-
* condition with other vcpu sept operation. Race only with
1500-
* TDH.VP.ENTER.
1534+
* The second retry is expected to succeed after kicking off all
1535+
* other vCPUs and prevent them from invoking TDH.VP.ENTER.
15011536
*/
1537+
tdx_no_vcpus_enter_start(kvm);
15021538
err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
15031539
&level_state);
1504-
} while (unlikely(tdx_operand_busy(err)));
1540+
tdx_no_vcpus_enter_stop(kvm);
1541+
}
15051542

15061543
if (KVM_BUG_ON(err, kvm)) {
15071544
pr_tdx_error_2(TDH_MEM_PAGE_REMOVE, err, entry, level_state);
@@ -1585,9 +1622,13 @@ static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
15851622
WARN_ON_ONCE(level != PG_LEVEL_4K);
15861623

15871624
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
1588-
if (unlikely(tdx_operand_busy(err)))
1589-
return -EBUSY;
15901625

1626+
if (unlikely(tdx_operand_busy(err))) {
1627+
/* After no vCPUs enter, the second retry is expected to succeed */
1628+
tdx_no_vcpus_enter_start(kvm);
1629+
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
1630+
tdx_no_vcpus_enter_stop(kvm);
1631+
}
15911632
if (tdx_is_sept_zap_err_due_to_premap(kvm_tdx, err, entry, level) &&
15921633
!KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) {
15931634
atomic64_dec(&kvm_tdx->nr_premapped);
@@ -1637,9 +1678,13 @@ static void tdx_track(struct kvm *kvm)
16371678

16381679
lockdep_assert_held_write(&kvm->mmu_lock);
16391680

1640-
do {
1681+
err = tdh_mem_track(&kvm_tdx->td);
1682+
if (unlikely(tdx_operand_busy(err))) {
1683+
/* After no vCPUs enter, the second retry is expected to succeed */
1684+
tdx_no_vcpus_enter_start(kvm);
16411685
err = tdh_mem_track(&kvm_tdx->td);
1642-
} while (unlikely(tdx_operand_busy(err)));
1686+
tdx_no_vcpus_enter_stop(kvm);
1687+
}
16431688

16441689
if (KVM_BUG_ON(err, kvm))
16451690
pr_tdx_error(TDH_MEM_TRACK, err);

arch/x86/kvm/vmx/tdx.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ struct kvm_tdx {
3737

3838
/* For KVM_TDX_INIT_MEM_REGION. */
3939
atomic64_t nr_premapped;
40+
41+
/*
42+
* Prevent vCPUs from TD entry to ensure SEPT zap related SEAMCALLs do
43+
* not contend with tdh_vp_enter() and TDCALLs.
44+
* Set/unset is protected with kvm->mmu_lock.
45+
*/
46+
bool wait_for_sept_zap;
4047
};
4148

4249
/* TDX module vCPU states */

0 commit comments

Comments
 (0)