Skip to content

Commit 02ab577

Browse files
yamahatabonzini
authored andcommitted
KVM: TDX: Implement hooks to propagate changes of TDP MMU mirror page table
Implement hooks in TDX to propagate changes of mirror page table to private EPT, including changes for page table page adding/removing, guest page adding/removing. TDX invokes corresponding SEAMCALLs in the hooks. - Hook link_external_spt propagates adding page table page into private EPT. - Hook set_external_spte tdx_sept_set_private_spte() in this patch only handles adding of guest private page when TD is finalized. Later patches will handle the case of adding guest private pages before TD finalization. - Hook free_external_spt It is invoked when page table page is removed in mirror page table, which currently must occur at TD tear down phase, after hkid is freed. - Hook remove_external_spte It is invoked when guest private page is removed in mirror page table, which can occur when TD is active, e.g. during shared <-> private conversion and slot move/deletion. This hook is ensured to be triggered before hkid is freed, because gmem fd is released along with all private leaf mappings zapped before freeing hkid at VM destroy. TDX invokes below SEAMCALLs sequentially: 1) TDH.MEM.RANGE.BLOCK (remove RWX bits from a private EPT entry), 2) TDH.MEM.TRACK (increases TD epoch) 3) TDH.MEM.PAGE.REMOVE (remove the private EPT entry and untrack the guest page). TDH.MEM.PAGE.REMOVE can't succeed without TDH.MEM.RANGE.BLOCK and TDH.MEM.TRACK being called successfully. SEAMCALL TDH.MEM.TRACK is called in function tdx_track() to enforce that TLB tracking will be performed by TDX module for private EPT. Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Co-developed-by: Yan Zhao <yan.y.zhao@intel.com> Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> --- - Remove TDX_ERROR_SEPT_BUSY and Add tdx_operand_busy() helper (Binbin) Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1 parent 22836e1 commit 02ab577

File tree

4 files changed

+284
-3
lines changed

4 files changed

+284
-3
lines changed

arch/x86/kvm/vmx/main.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,21 @@ static __init int vt_hardware_setup(void)
3636
* is KVM may allocate couple of more bytes than needed for
3737
* each VM.
3838
*/
39-
if (enable_tdx)
39+
if (enable_tdx) {
4040
vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
4141
sizeof(struct kvm_tdx));
42+
/*
43+
* Note, TDX may fail to initialize in a later time in
44+
* vt_init(), in which case it is not necessary to setup
45+
* those callbacks. But making them valid here even
46+
* when TDX fails to init later is fine because those
47+
* callbacks won't be called if the VM isn't TDX guest.
48+
*/
49+
vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
50+
vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
51+
vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
52+
vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
53+
}
4254

4355
return 0;
4456
}

arch/x86/kvm/vmx/tdx.c

Lines changed: 211 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ static DEFINE_MUTEX(tdx_lock);
154154

155155
static atomic_t nr_configured_hkid;
156156

157+
static bool tdx_operand_busy(u64 err)
158+
{
159+
return (err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY;
160+
}
161+
162+
157163
static inline void tdx_hkid_free(struct kvm_tdx *kvm_tdx)
158164
{
159165
tdx_guest_keyid_free(kvm_tdx->hkid);
@@ -525,6 +531,160 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
525531
td_vmcs_write64(to_tdx(vcpu), SHARED_EPT_POINTER, root_hpa);
526532
}
527533

534+
static void tdx_unpin(struct kvm *kvm, struct page *page)
535+
{
536+
put_page(page);
537+
}
538+
539+
static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
540+
enum pg_level level, struct page *page)
541+
{
542+
int tdx_level = pg_level_to_tdx_sept_level(level);
543+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
544+
gpa_t gpa = gfn_to_gpa(gfn);
545+
u64 entry, level_state;
546+
u64 err;
547+
548+
err = tdh_mem_page_aug(&kvm_tdx->td, gpa, tdx_level, page, &entry, &level_state);
549+
if (unlikely(tdx_operand_busy(err))) {
550+
tdx_unpin(kvm, page);
551+
return -EBUSY;
552+
}
553+
554+
if (KVM_BUG_ON(err, kvm)) {
555+
pr_tdx_error_2(TDH_MEM_PAGE_AUG, err, entry, level_state);
556+
tdx_unpin(kvm, page);
557+
return -EIO;
558+
}
559+
560+
return 0;
561+
}
562+
563+
int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
564+
enum pg_level level, kvm_pfn_t pfn)
565+
{
566+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
567+
struct page *page = pfn_to_page(pfn);
568+
569+
/* TODO: handle large pages. */
570+
if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
571+
return -EINVAL;
572+
573+
/*
574+
* Because guest_memfd doesn't support page migration with
575+
* a_ops->migrate_folio (yet), no callback is triggered for KVM on page
576+
* migration. Until guest_memfd supports page migration, prevent page
577+
* migration.
578+
* TODO: Once guest_memfd introduces callback on page migration,
579+
* implement it and remove get_page/put_page().
580+
*/
581+
get_page(page);
582+
583+
if (likely(kvm_tdx->state == TD_STATE_RUNNABLE))
584+
return tdx_mem_page_aug(kvm, gfn, level, page);
585+
586+
/*
587+
* TODO: KVM_TDX_INIT_MEM_REGION support to populate before finalize
588+
* comes here for the initial memory.
589+
*/
590+
return -EOPNOTSUPP;
591+
}
592+
593+
static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
594+
enum pg_level level, struct page *page)
595+
{
596+
int tdx_level = pg_level_to_tdx_sept_level(level);
597+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
598+
gpa_t gpa = gfn_to_gpa(gfn);
599+
u64 err, entry, level_state;
600+
601+
/* TODO: handle large pages. */
602+
if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
603+
return -EINVAL;
604+
605+
if (KVM_BUG_ON(!is_hkid_assigned(kvm_tdx), kvm))
606+
return -EINVAL;
607+
608+
do {
609+
/*
610+
* When zapping private page, write lock is held. So no race
611+
* condition with other vcpu sept operation. Race only with
612+
* TDH.VP.ENTER.
613+
*/
614+
err = tdh_mem_page_remove(&kvm_tdx->td, gpa, tdx_level, &entry,
615+
&level_state);
616+
} while (unlikely(tdx_operand_busy(err)));
617+
618+
if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE &&
619+
err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) {
620+
/*
621+
* This page was mapped with KVM_MAP_MEMORY, but
622+
* KVM_TDX_INIT_MEM_REGION is not issued yet.
623+
*/
624+
if (!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) {
625+
tdx_unpin(kvm, page);
626+
return 0;
627+
}
628+
}
629+
630+
if (KVM_BUG_ON(err, kvm)) {
631+
pr_tdx_error_2(TDH_MEM_PAGE_REMOVE, err, entry, level_state);
632+
return -EIO;
633+
}
634+
635+
err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
636+
637+
if (KVM_BUG_ON(err, kvm)) {
638+
pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err);
639+
return -EIO;
640+
}
641+
tdx_clear_page(page);
642+
tdx_unpin(kvm, page);
643+
return 0;
644+
}
645+
646+
int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
647+
enum pg_level level, void *private_spt)
648+
{
649+
int tdx_level = pg_level_to_tdx_sept_level(level);
650+
gpa_t gpa = gfn_to_gpa(gfn);
651+
struct page *page = virt_to_page(private_spt);
652+
u64 err, entry, level_state;
653+
654+
err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, tdx_level, page, &entry,
655+
&level_state);
656+
if (unlikely(tdx_operand_busy(err)))
657+
return -EBUSY;
658+
659+
if (KVM_BUG_ON(err, kvm)) {
660+
pr_tdx_error_2(TDH_MEM_SEPT_ADD, err, entry, level_state);
661+
return -EIO;
662+
}
663+
664+
return 0;
665+
}
666+
667+
static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
668+
enum pg_level level)
669+
{
670+
int tdx_level = pg_level_to_tdx_sept_level(level);
671+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
672+
gpa_t gpa = gfn_to_gpa(gfn) & KVM_HPAGE_MASK(level);
673+
u64 err, entry, level_state;
674+
675+
/* For now large page isn't supported yet. */
676+
WARN_ON_ONCE(level != PG_LEVEL_4K);
677+
678+
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
679+
if (unlikely(tdx_operand_busy(err)))
680+
return -EBUSY;
681+
if (KVM_BUG_ON(err, kvm)) {
682+
pr_tdx_error_2(TDH_MEM_RANGE_BLOCK, err, entry, level_state);
683+
return -EIO;
684+
}
685+
return 0;
686+
}
687+
528688
/*
529689
* Ensure shared and private EPTs to be flushed on all vCPUs.
530690
* tdh_mem_track() is the only caller that increases TD epoch. An increase in
@@ -549,7 +709,7 @@ void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int pgd_level)
549709
* occurs certainly after TD epoch increment and before the next
550710
* tdh_mem_track().
551711
*/
552-
static void __always_unused tdx_track(struct kvm *kvm)
712+
static void tdx_track(struct kvm *kvm)
553713
{
554714
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
555715
u64 err;
@@ -562,14 +722,63 @@ static void __always_unused tdx_track(struct kvm *kvm)
562722

563723
do {
564724
err = tdh_mem_track(&kvm_tdx->td);
565-
} while (unlikely((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY));
725+
} while (unlikely(tdx_operand_busy(err)));
566726

567727
if (KVM_BUG_ON(err, kvm))
568728
pr_tdx_error(TDH_MEM_TRACK, err);
569729

570730
kvm_make_all_cpus_request(kvm, KVM_REQ_OUTSIDE_GUEST_MODE);
571731
}
572732

733+
int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
734+
enum pg_level level, void *private_spt)
735+
{
736+
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
737+
738+
/*
739+
* free_external_spt() is only called after hkid is freed when TD is
740+
* tearing down.
741+
* KVM doesn't (yet) zap page table pages in mirror page table while
742+
* TD is active, though guest pages mapped in mirror page table could be
743+
* zapped during TD is active, e.g. for shared <-> private conversion
744+
* and slot move/deletion.
745+
*/
746+
if (KVM_BUG_ON(is_hkid_assigned(kvm_tdx), kvm))
747+
return -EINVAL;
748+
749+
/*
750+
* The HKID assigned to this TD was already freed and cache was
751+
* already flushed. We don't have to flush again.
752+
*/
753+
return tdx_reclaim_page(virt_to_page(private_spt));
754+
}
755+
756+
int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
757+
enum pg_level level, kvm_pfn_t pfn)
758+
{
759+
int ret;
760+
761+
/*
762+
* HKID is released after all private pages have been removed, and set
763+
* before any might be populated. Warn if zapping is attempted when
764+
* there can't be anything populated in the private EPT.
765+
*/
766+
if (KVM_BUG_ON(!is_hkid_assigned(to_kvm_tdx(kvm)), kvm))
767+
return -EINVAL;
768+
769+
ret = tdx_sept_zap_private_spte(kvm, gfn, level);
770+
if (ret)
771+
return ret;
772+
773+
/*
774+
* TDX requires TLB tracking before dropping private page. Do
775+
* it here, although it is also done later.
776+
*/
777+
tdx_track(kvm);
778+
779+
return tdx_sept_drop_private_spte(kvm, gfn, level, pfn_to_page(pfn));
780+
}
781+
573782
static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
574783
{
575784
const struct tdx_sys_info_td_conf *td_conf = &tdx_sysinfo->td_conf;

arch/x86/kvm/vmx/tdx_arch.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,29 @@ struct td_params {
121121
#define TDX_MIN_TSC_FREQUENCY_KHZ (100 * 1000)
122122
#define TDX_MAX_TSC_FREQUENCY_KHZ (10 * 1000 * 1000)
123123

124+
/* Additional Secure EPT entry information */
125+
#define TDX_SEPT_LEVEL_MASK GENMASK_ULL(2, 0)
126+
#define TDX_SEPT_STATE_MASK GENMASK_ULL(15, 8)
127+
#define TDX_SEPT_STATE_SHIFT 8
128+
129+
enum tdx_sept_entry_state {
130+
TDX_SEPT_FREE = 0,
131+
TDX_SEPT_BLOCKED = 1,
132+
TDX_SEPT_PENDING = 2,
133+
TDX_SEPT_PENDING_BLOCKED = 3,
134+
TDX_SEPT_PRESENT = 4,
135+
};
136+
137+
static inline u8 tdx_get_sept_level(u64 sept_entry_info)
138+
{
139+
return sept_entry_info & TDX_SEPT_LEVEL_MASK;
140+
}
141+
142+
static inline u8 tdx_get_sept_state(u64 sept_entry_info)
143+
{
144+
return (sept_entry_info & TDX_SEPT_STATE_MASK) >> TDX_SEPT_STATE_SHIFT;
145+
}
146+
124147
#define MD_FIELD_ID_FEATURES0_TOPOLOGY_ENUM BIT_ULL(20)
125148

126149
/*

arch/x86/kvm/vmx/x86_ops.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ void tdx_vcpu_free(struct kvm_vcpu *vcpu);
132132

133133
int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
134134

135+
int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
136+
enum pg_level level, void *private_spt);
137+
int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
138+
enum pg_level level, void *private_spt);
139+
int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
140+
enum pg_level level, kvm_pfn_t pfn);
141+
int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
142+
enum pg_level level, kvm_pfn_t pfn);
143+
135144
void tdx_flush_tlb_current(struct kvm_vcpu *vcpu);
136145
void tdx_flush_tlb_all(struct kvm_vcpu *vcpu);
137146
void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
@@ -146,6 +155,34 @@ static inline void tdx_vcpu_free(struct kvm_vcpu *vcpu) {}
146155

147156
static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
148157

158+
static inline int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
159+
enum pg_level level,
160+
void *private_spt)
161+
{
162+
return -EOPNOTSUPP;
163+
}
164+
165+
static inline int tdx_sept_free_private_spt(struct kvm *kvm, gfn_t gfn,
166+
enum pg_level level,
167+
void *private_spt)
168+
{
169+
return -EOPNOTSUPP;
170+
}
171+
172+
static inline int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
173+
enum pg_level level,
174+
kvm_pfn_t pfn)
175+
{
176+
return -EOPNOTSUPP;
177+
}
178+
179+
static inline int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
180+
enum pg_level level,
181+
kvm_pfn_t pfn)
182+
{
183+
return -EOPNOTSUPP;
184+
}
185+
149186
static inline void tdx_flush_tlb_current(struct kvm_vcpu *vcpu) {}
150187
static inline void tdx_flush_tlb_all(struct kvm_vcpu *vcpu) {}
151188
static inline void tdx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level) {}

0 commit comments

Comments
 (0)