Skip to content

Commit eac0b72

Browse files
yanzhao56bonzini
authored andcommitted
KVM: TDX: Handle SEPT zap error due to page add error in premap
Move the handling of SEPT zap errors caused by unsuccessful execution of tdh_mem_page_add() in KVM_TDX_INIT_MEM_REGION from tdx_sept_drop_private_spte() to tdx_sept_zap_private_spte(). Introduce a new helper function tdx_is_sept_zap_err_due_to_premap() to detect this specific error. During the IOCTL KVM_TDX_INIT_MEM_REGION, KVM premaps leaf SPTEs in the mirror page table before the corresponding entry in the private page table is successfully installed by tdh_mem_page_add(). If an error occurs during the invocation of tdh_mem_page_add(), a mismatch between the mirror and private page tables results in SEAMCALLs for SEPT zap returning the error code TDX_EPT_ENTRY_STATE_INCORRECT. The error TDX_EPT_WALK_FAILED is not possible because, during KVM_TDX_INIT_MEM_REGION, KVM only premaps leaf SPTEs after successfully mapping non-leaf SPTEs. Unlike leaf SPTEs, there is no mismatch in non-leaf PTEs between the mirror and private page tables. Therefore, during zap, SEAMCALLs should find an empty leaf entry in the private EPT, leading to the error TDX_EPT_ENTRY_STATE_INCORRECT instead of TDX_EPT_WALK_FAILED. Since tdh_mem_range_block() is always invoked before tdh_mem_page_remove(), move the handling of SEPT zap errors from tdx_sept_drop_private_spte() to tdx_sept_zap_private_spte(). In tdx_sept_zap_private_spte(), return 0 for errors due to premap to skip executing other SEAMCALLs for zap, which are unnecessary. Return 1 to indicate no other errors, allowing the execution of other zap SEAMCALLs to continue. The failure of tdh_mem_page_add() is uncommon and has not been observed in real workloads. Currently, this failure is only hypothetically triggered by skipping the real SEAMCALL and faking the add error in the SEAMCALL wrapper. Additionally, without this fix, there will be no host crashes or other severe issues. Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> Message-ID: <20250217085642.19696-1-yan.y.zhao@intel.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 1f62531 commit eac0b72

File tree

1 file changed

+47
-19
lines changed

1 file changed

+47
-19
lines changed

arch/x86/kvm/vmx/tdx.c

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -780,20 +780,6 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
780780
&level_state);
781781
} while (unlikely(tdx_operand_busy(err)));
782782

783-
if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE &&
784-
err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) {
785-
/*
786-
* Page is mapped by KVM_TDX_INIT_MEM_REGION, but hasn't called
787-
* tdh_mem_page_add().
788-
*/
789-
if ((!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) &&
790-
!KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) {
791-
atomic64_dec(&kvm_tdx->nr_premapped);
792-
tdx_unpin(kvm, page);
793-
return 0;
794-
}
795-
}
796-
797783
if (KVM_BUG_ON(err, kvm)) {
798784
pr_tdx_error_2(TDH_MEM_PAGE_REMOVE, err, entry, level_state);
799785
return -EIO;
@@ -831,8 +817,41 @@ int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
831817
return 0;
832818
}
833819

820+
/*
821+
* Check if the error returned from a SEPT zap SEAMCALL is due to that a page is
822+
* mapped by KVM_TDX_INIT_MEM_REGION without tdh_mem_page_add() being called
823+
* successfully.
824+
*
825+
* Since tdh_mem_sept_add() must have been invoked successfully before a
826+
* non-leaf entry present in the mirrored page table, the SEPT ZAP related
827+
* SEAMCALLs should not encounter err TDX_EPT_WALK_FAILED. They should instead
828+
* find TDX_EPT_ENTRY_STATE_INCORRECT due to an empty leaf entry found in the
829+
* SEPT.
830+
*
831+
* Further check if the returned entry from SEPT walking is with RWX permissions
832+
* to filter out anything unexpected.
833+
*
834+
* Note: @level is pg_level, not the tdx_level. The tdx_level extracted from
835+
* level_state returned from a SEAMCALL error is the same as that passed into
836+
* the SEAMCALL.
837+
*/
838+
static int tdx_is_sept_zap_err_due_to_premap(struct kvm_tdx *kvm_tdx, u64 err,
839+
u64 entry, int level)
840+
{
841+
if (!err || kvm_tdx->state == TD_STATE_RUNNABLE)
842+
return false;
843+
844+
if (err != (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))
845+
return false;
846+
847+
if ((is_last_spte(entry, level) && (entry & VMX_EPT_RWX_MASK)))
848+
return false;
849+
850+
return true;
851+
}
852+
834853
static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
835-
enum pg_level level)
854+
enum pg_level level, struct page *page)
836855
{
837856
int tdx_level = pg_level_to_tdx_sept_level(level);
838857
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
@@ -845,11 +864,19 @@ static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
845864
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
846865
if (unlikely(tdx_operand_busy(err)))
847866
return -EBUSY;
867+
868+
if (tdx_is_sept_zap_err_due_to_premap(kvm_tdx, err, entry, level) &&
869+
!KVM_BUG_ON(!atomic64_read(&kvm_tdx->nr_premapped), kvm)) {
870+
atomic64_dec(&kvm_tdx->nr_premapped);
871+
tdx_unpin(kvm, page);
872+
return 0;
873+
}
874+
848875
if (KVM_BUG_ON(err, kvm)) {
849876
pr_tdx_error_2(TDH_MEM_RANGE_BLOCK, err, entry, level_state);
850877
return -EIO;
851878
}
852-
return 0;
879+
return 1;
853880
}
854881

855882
/*
@@ -923,6 +950,7 @@ int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
923950
int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
924951
enum pg_level level, kvm_pfn_t pfn)
925952
{
953+
struct page *page = pfn_to_page(pfn);
926954
int ret;
927955

928956
/*
@@ -933,8 +961,8 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
933961
if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
934962
return -EINVAL;
935963

936-
ret = tdx_sept_zap_private_spte(kvm, gfn, level);
937-
if (ret)
964+
ret = tdx_sept_zap_private_spte(kvm, gfn, level, page);
965+
if (ret <= 0)
938966
return ret;
939967

940968
/*
@@ -943,7 +971,7 @@ int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
943971
*/
944972
tdx_track(kvm);
945973

946-
return tdx_sept_drop_private_spte(kvm, gfn, level, pfn_to_page(pfn));
974+
return tdx_sept_drop_private_spte(kvm, gfn, level, page);
947975
}
948976

949977
static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)

0 commit comments

Comments
 (0)