Skip to content

Commit 77ab80c

Browse files
committed
Merge branch 'kvm-tdx-enter-exit' into HEAD
This series introduces callbacks to facilitate the entry of a TD VCPU and the corresponding save/restore of host state. A TD VCPU is entered via the SEAMCALL TDH.VP.ENTER. The TDX Module manages the save/restore of guest state and, in conjunction with the SEAMCALL interface, handles certain aspects of host state. However, there are specific elements of the host state that require additional attention, as detailed in the Intel TDX ABI documentation for TDH.VP.ENTER. TDX is quite different from VMX in this regard. For VMX, the host VMM is heavily involved in restoring, managing and saving guest CPU state, whereas for TDX this is handled by the TDX Module. In that way, the TDX Module can protect the confidentiality and integrity of TD CPU state. The TDX Module does not save/restore all host CPU state because the host VMM can do it more efficiently and selectively. CPU state referred to below is host CPU state. Often values are already held in memory so no explicit save is needed, and restoration may not be needed if the kernel is not using a feature. TDX does not support PAUSE-loop exiting. According to the TDX Module Base arch. spec., hypercalls are expected to be used instead. Note that the Linux TDX guest supports existing hypercalls via TDG.VP.VMCALL. This series requires TDX module 1.5.06.00.0744, or later, due to removal of the workarounds for the lack of the NO_RBP_MOD feature required by the kernel. NO_RBP_MOD is now required.
2 parents fcbe348 + 484612f commit 77ab80c

File tree

15 files changed

+513
-149
lines changed

15 files changed

+513
-149
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -606,8 +606,15 @@ struct kvm_pmu {
606606
struct kvm_pmu_ops;
607607

608608
enum {
609-
KVM_DEBUGREG_BP_ENABLED = 1,
610-
KVM_DEBUGREG_WONT_EXIT = 2,
609+
KVM_DEBUGREG_BP_ENABLED = BIT(0),
610+
KVM_DEBUGREG_WONT_EXIT = BIT(1),
611+
/*
612+
* Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by
613+
* hardware on exit from or enter to guest. KVM needn't switch them.
614+
* DR0-3, DR6 and DR7 are set to their architectural INIT value on VM
615+
* exit, host values need to be restored.
616+
*/
617+
KVM_DEBUGREG_AUTO_SWITCH = BIT(2),
611618
};
612619

613620
struct kvm_mtrr {
@@ -2328,6 +2335,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
23282335
int kvm_add_user_return_msr(u32 msr);
23292336
int kvm_find_user_return_msr(u32 msr);
23302337
int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
2338+
void kvm_user_return_msr_update_cache(unsigned int index, u64 val);
23312339

23322340
static inline bool kvm_is_supported_user_return_msr(u32 msr)
23332341
{

arch/x86/include/asm/tdx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level)
165165
return level - 1;
166166
}
167167

168+
u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
168169
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
169170
u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
170171
u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, int level, struct page *page, u64 *ext_err1, u64 *ext_err2);

arch/x86/kvm/vmx/common.h

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,78 @@
33
#define __KVM_X86_VMX_COMMON_H
44

55
#include <linux/kvm_host.h>
6+
#include <asm/posted_intr.h>
67

78
#include "mmu.h"
89

10+
union vmx_exit_reason {
11+
struct {
12+
u32 basic : 16;
13+
u32 reserved16 : 1;
14+
u32 reserved17 : 1;
15+
u32 reserved18 : 1;
16+
u32 reserved19 : 1;
17+
u32 reserved20 : 1;
18+
u32 reserved21 : 1;
19+
u32 reserved22 : 1;
20+
u32 reserved23 : 1;
21+
u32 reserved24 : 1;
22+
u32 reserved25 : 1;
23+
u32 bus_lock_detected : 1;
24+
u32 enclave_mode : 1;
25+
u32 smi_pending_mtf : 1;
26+
u32 smi_from_vmx_root : 1;
27+
u32 reserved30 : 1;
28+
u32 failed_vmentry : 1;
29+
};
30+
u32 full;
31+
};
32+
33+
struct vcpu_vt {
34+
/* Posted interrupt descriptor */
35+
struct pi_desc pi_desc;
36+
37+
/* Used if this vCPU is waiting for PI notification wakeup. */
38+
struct list_head pi_wakeup_list;
39+
40+
union vmx_exit_reason exit_reason;
41+
42+
unsigned long exit_qualification;
43+
u32 exit_intr_info;
44+
45+
/*
46+
* If true, guest state has been loaded into hardware, and host state
47+
* saved into vcpu_{vt,vmx,tdx}. If false, host state is loaded into
48+
* hardware.
49+
*/
50+
bool guest_state_loaded;
51+
52+
#ifdef CONFIG_X86_64
53+
u64 msr_host_kernel_gs_base;
54+
#endif
55+
56+
unsigned long host_debugctlmsr;
57+
};
58+
59+
#ifdef CONFIG_KVM_INTEL_TDX
60+
61+
static __always_inline bool is_td(struct kvm *kvm)
62+
{
63+
return kvm->arch.vm_type == KVM_X86_TDX_VM;
64+
}
65+
66+
static __always_inline bool is_td_vcpu(struct kvm_vcpu *vcpu)
67+
{
68+
return is_td(vcpu->kvm);
69+
}
70+
71+
#else
72+
73+
static inline bool is_td(struct kvm *kvm) { return false; }
74+
static inline bool is_td_vcpu(struct kvm_vcpu *vcpu) { return false; }
75+
76+
#endif
77+
978
static inline bool vt_is_tdx_private_gpa(struct kvm *kvm, gpa_t gpa)
1079
{
1180
/* For TDX the direct mask is the shared mask. */

arch/x86/kvm/vmx/main.c

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include "tdx.h"
1111
#include "tdx_arch.h"
1212

13+
#ifdef CONFIG_KVM_INTEL_TDX
14+
static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt));
15+
#endif
16+
1317
static void vt_disable_virtualization_cpu(void)
1418
{
1519
/* Note, TDX *and* VMX need to be disabled if TDX is enabled. */
@@ -141,6 +145,42 @@ static void vt_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
141145
vmx_update_cpu_dirty_logging(vcpu);
142146
}
143147

148+
static void vt_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
149+
{
150+
if (is_td_vcpu(vcpu)) {
151+
tdx_prepare_switch_to_guest(vcpu);
152+
return;
153+
}
154+
155+
vmx_prepare_switch_to_guest(vcpu);
156+
}
157+
158+
static void vt_vcpu_put(struct kvm_vcpu *vcpu)
159+
{
160+
if (is_td_vcpu(vcpu)) {
161+
tdx_vcpu_put(vcpu);
162+
return;
163+
}
164+
165+
vmx_vcpu_put(vcpu);
166+
}
167+
168+
static int vt_vcpu_pre_run(struct kvm_vcpu *vcpu)
169+
{
170+
if (is_td_vcpu(vcpu))
171+
return tdx_vcpu_pre_run(vcpu);
172+
173+
return vmx_vcpu_pre_run(vcpu);
174+
}
175+
176+
static fastpath_t vt_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
177+
{
178+
if (is_td_vcpu(vcpu))
179+
return tdx_vcpu_run(vcpu, force_immediate_exit);
180+
181+
return vmx_vcpu_run(vcpu, force_immediate_exit);
182+
}
183+
144184
static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
145185
{
146186
if (is_td_vcpu(vcpu)) {
@@ -245,9 +285,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
245285
.vcpu_free = vt_vcpu_free,
246286
.vcpu_reset = vt_vcpu_reset,
247287

248-
.prepare_switch_to_guest = vmx_prepare_switch_to_guest,
288+
.prepare_switch_to_guest = vt_prepare_switch_to_guest,
249289
.vcpu_load = vt_vcpu_load,
250-
.vcpu_put = vmx_vcpu_put,
290+
.vcpu_put = vt_vcpu_put,
251291

252292
.update_exception_bitmap = vmx_update_exception_bitmap,
253293
.get_feature_msr = vmx_get_feature_msr,
@@ -281,8 +321,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
281321
.flush_tlb_gva = vt_flush_tlb_gva,
282322
.flush_tlb_guest = vt_flush_tlb_guest,
283323

284-
.vcpu_pre_run = vmx_vcpu_pre_run,
285-
.vcpu_run = vmx_vcpu_run,
324+
.vcpu_pre_run = vt_vcpu_pre_run,
325+
.vcpu_run = vt_vcpu_run,
286326
.handle_exit = vmx_handle_exit,
287327
.skip_emulated_instruction = vmx_skip_emulated_instruction,
288328
.update_emulated_instruction = vmx_update_emulated_instruction,

arch/x86/kvm/vmx/nested.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
275275
{
276276
struct vmcs_host_state *dest, *src;
277277

278-
if (unlikely(!vmx->guest_state_loaded))
278+
if (unlikely(!vmx->vt.guest_state_loaded))
279279
return;
280280

281281
src = &prev->host_state;
@@ -425,7 +425,7 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
425425
* tables also changed, but KVM should not treat EPT Misconfig
426426
* VM-Exits as writes.
427427
*/
428-
WARN_ON_ONCE(vmx->exit_reason.basic != EXIT_REASON_EPT_VIOLATION);
428+
WARN_ON_ONCE(vmx->vt.exit_reason.basic != EXIT_REASON_EPT_VIOLATION);
429429

430430
/*
431431
* PML Full and EPT Violation VM-Exits both use bit 12 to report
@@ -4622,7 +4622,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
46224622
{
46234623
/* update exit information fields: */
46244624
vmcs12->vm_exit_reason = vm_exit_reason;
4625-
if (to_vmx(vcpu)->exit_reason.enclave_mode)
4625+
if (vmx_get_exit_reason(vcpu).enclave_mode)
46264626
vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
46274627
vmcs12->exit_qualification = exit_qualification;
46284628

@@ -6126,7 +6126,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
61266126
* nested VM-Exit. Pass the original exit reason, i.e. don't hardcode
61276127
* EXIT_REASON_VMFUNC as the exit reason.
61286128
*/
6129-
nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
6129+
nested_vmx_vmexit(vcpu, vmx->vt.exit_reason.full,
61306130
vmx_get_intr_info(vcpu),
61316131
vmx_get_exit_qual(vcpu));
61326132
return 1;
@@ -6571,7 +6571,7 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
65716571
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
65726572
{
65736573
struct vcpu_vmx *vmx = to_vmx(vcpu);
6574-
union vmx_exit_reason exit_reason = vmx->exit_reason;
6574+
union vmx_exit_reason exit_reason = vmx->vt.exit_reason;
65756575
unsigned long exit_qual;
65766576
u32 exit_intr_info;
65776577

arch/x86/kvm/vmx/posted_intr.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
3333

3434
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
3535
{
36-
return &(to_vmx(vcpu)->pi_desc);
36+
return &(to_vt(vcpu)->pi_desc);
3737
}
3838

3939
static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
@@ -53,7 +53,7 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
5353
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
5454
{
5555
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
56-
struct vcpu_vmx *vmx = to_vmx(vcpu);
56+
struct vcpu_vt *vt = to_vt(vcpu);
5757
struct pi_desc old, new;
5858
unsigned long flags;
5959
unsigned int dest;
@@ -90,7 +90,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
9090
*/
9191
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
9292
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
93-
list_del(&vmx->pi_wakeup_list);
93+
list_del(&vt->pi_wakeup_list);
9494
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
9595
}
9696

@@ -146,14 +146,14 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
146146
static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
147147
{
148148
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
149-
struct vcpu_vmx *vmx = to_vmx(vcpu);
149+
struct vcpu_vt *vt = to_vt(vcpu);
150150
struct pi_desc old, new;
151151
unsigned long flags;
152152

153153
local_irq_save(flags);
154154

155155
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
156-
list_add_tail(&vmx->pi_wakeup_list,
156+
list_add_tail(&vt->pi_wakeup_list,
157157
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
158158
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
159159

@@ -220,13 +220,13 @@ void pi_wakeup_handler(void)
220220
int cpu = smp_processor_id();
221221
struct list_head *wakeup_list = &per_cpu(wakeup_vcpus_on_cpu, cpu);
222222
raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, cpu);
223-
struct vcpu_vmx *vmx;
223+
struct vcpu_vt *vt;
224224

225225
raw_spin_lock(spinlock);
226-
list_for_each_entry(vmx, wakeup_list, pi_wakeup_list) {
226+
list_for_each_entry(vt, wakeup_list, pi_wakeup_list) {
227227

228-
if (pi_test_on(&vmx->pi_desc))
229-
kvm_vcpu_wake_up(&vmx->vcpu);
228+
if (pi_test_on(&vt->pi_desc))
229+
kvm_vcpu_wake_up(vt_to_vcpu(vt));
230230
}
231231
raw_spin_unlock(spinlock);
232232
}

0 commit comments

Comments
 (0)