Skip to content

Commit 6cb0945

Browse files
committed
Merge tag 'x86_tdx_for_6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 TDX updates from Dave Hansen: "Avoid direct HLT instruction execution in TDX guests. TDX guests aren't expected to use the HLT instruction directly. It causes a virtualization exception (#VE). While the #VE _can_ be handled, the current handling is slow and buggy and the easiest thing is just to avoid HLT in the first place. Plus, the kernel already has paravirt infrastructure that makes it relatively painless. Make TDX guests require paravirt and add some TDX-specific paravirt handlers which avoid HLT in the normal halt routines. Also add a warning in case another HLT sneaks in. There was a report that this leads to a "major performance improvement" on specjbb2015, probably because of the extra #VE overhead or missed wakeups from the buggy HLT handling" * tag 'x86_tdx_for_6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tdx: Emit warning if IRQs are enabled during HLT #VE handling x86/tdx: Fix arch_safe_halt() execution for TDX VMs x86/paravirt: Move halt paravirt calls under CONFIG_PARAVIRT
2 parents 92b71be + e8f4592 commit 6cb0945

File tree

8 files changed

+78
-40
lines changed

8 files changed

+78
-40
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,7 @@ config INTEL_TDX_GUEST
889889
depends on X86_64 && CPU_SUP_INTEL
890890
depends on X86_X2APIC
891891
depends on EFI_STUB
892+
depends on PARAVIRT
892893
select ARCH_HAS_CC_PLATFORM
893894
select X86_MEM_ENCRYPT
894895
select X86_MCE

arch/x86/coco/tdx/tdx.c

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <asm/ia32.h>
1515
#include <asm/insn.h>
1616
#include <asm/insn-eval.h>
17+
#include <asm/paravirt_types.h>
1718
#include <asm/pgtable.h>
1819
#include <asm/set_memory.h>
1920
#include <asm/traps.h>
@@ -392,13 +393,21 @@ static int handle_halt(struct ve_info *ve)
392393
{
393394
const bool irq_disabled = irqs_disabled();
394395

396+
/*
397+
* HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a
398+
* wake event may be consumed before requesting HLT emulation, leaving
399+
* the vCPU blocking indefinitely.
400+
*/
401+
if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled"))
402+
return -EIO;
403+
395404
if (__halt(irq_disabled))
396405
return -EIO;
397406

398407
return ve_instr_len(ve);
399408
}
400409

401-
void __cpuidle tdx_safe_halt(void)
410+
void __cpuidle tdx_halt(void)
402411
{
403412
const bool irq_disabled = false;
404413

@@ -409,6 +418,16 @@ void __cpuidle tdx_safe_halt(void)
409418
WARN_ONCE(1, "HLT instruction emulation failed\n");
410419
}
411420

421+
static void __cpuidle tdx_safe_halt(void)
422+
{
423+
tdx_halt();
424+
/*
425+
* "__cpuidle" section doesn't support instrumentation, so stick
426+
* with raw_* variant that avoids tracing hooks.
427+
*/
428+
raw_local_irq_enable();
429+
}
430+
412431
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
413432
{
414433
struct tdx_module_args args = {
@@ -1109,6 +1128,19 @@ void __init tdx_early_init(void)
11091128
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
11101129
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
11111130

1131+
/*
1132+
* Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
1133+
* will enable interrupts before HLT TDCALL invocation if executed
1134+
* in STI-shadow, possibly resulting in missed wakeup events.
1135+
*
1136+
* Modify all possible HLT execution paths to use TDX specific routines
1137+
* that directly execute TDCALL and toggle the interrupt state as
1138+
* needed after TDCALL completion. This also reduces HLT related #VEs
1139+
* in addition to having a reliable halt logic execution.
1140+
*/
1141+
pv_ops.irq.safe_halt = tdx_safe_halt;
1142+
pv_ops.irq.halt = tdx_halt;
1143+
11121144
/*
11131145
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
11141146
* bringup low level code. That raises #VE which cannot be handled

arch/x86/include/asm/irqflags.h

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,28 @@ static __always_inline void native_local_irq_restore(unsigned long flags)
7676

7777
#endif
7878

79+
#ifndef CONFIG_PARAVIRT
80+
#ifndef __ASSEMBLY__
81+
/*
82+
* Used in the idle loop; sti takes one instruction cycle
83+
* to complete:
84+
*/
85+
static __always_inline void arch_safe_halt(void)
86+
{
87+
native_safe_halt();
88+
}
89+
90+
/*
91+
* Used when interrupts are already enabled or to
92+
* shutdown the processor:
93+
*/
94+
static __always_inline void halt(void)
95+
{
96+
native_halt();
97+
}
98+
#endif /* __ASSEMBLY__ */
99+
#endif /* CONFIG_PARAVIRT */
100+
79101
#ifdef CONFIG_PARAVIRT_XXL
80102
#include <asm/paravirt.h>
81103
#else
@@ -97,24 +119,6 @@ static __always_inline void arch_local_irq_enable(void)
97119
native_irq_enable();
98120
}
99121

100-
/*
101-
* Used in the idle loop; sti takes one instruction cycle
102-
* to complete:
103-
*/
104-
static __always_inline void arch_safe_halt(void)
105-
{
106-
native_safe_halt();
107-
}
108-
109-
/*
110-
* Used when interrupts are already enabled or to
111-
* shutdown the processor:
112-
*/
113-
static __always_inline void halt(void)
114-
{
115-
native_halt();
116-
}
117-
118122
/*
119123
* For spinlocks, etc:
120124
*/

arch/x86/include/asm/paravirt.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn,
102102
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
103103
}
104104

105+
static __always_inline void arch_safe_halt(void)
106+
{
107+
PVOP_VCALL0(irq.safe_halt);
108+
}
109+
110+
static inline void halt(void)
111+
{
112+
PVOP_VCALL0(irq.halt);
113+
}
114+
105115
#ifdef CONFIG_PARAVIRT_XXL
106116
static inline void load_sp0(unsigned long sp0)
107117
{
@@ -165,16 +175,6 @@ static inline void __write_cr4(unsigned long x)
165175
PVOP_VCALL1(cpu.write_cr4, x);
166176
}
167177

168-
static __always_inline void arch_safe_halt(void)
169-
{
170-
PVOP_VCALL0(irq.safe_halt);
171-
}
172-
173-
static inline void halt(void)
174-
{
175-
PVOP_VCALL0(irq.halt);
176-
}
177-
178178
static inline u64 paravirt_read_msr(unsigned msr)
179179
{
180180
return PVOP_CALL1(u64, cpu.read_msr, msr);

arch/x86/include/asm/paravirt_types.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,9 @@ struct pv_irq_ops {
120120
struct paravirt_callee_save save_fl;
121121
struct paravirt_callee_save irq_disable;
122122
struct paravirt_callee_save irq_enable;
123-
123+
#endif
124124
void (*safe_halt)(void);
125125
void (*halt)(void);
126-
#endif
127126
} __no_randomize_layout;
128127

129128
struct pv_mmu_ops {

arch/x86/include/asm/tdx.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
5858

5959
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
6060

61-
void tdx_safe_halt(void);
61+
void tdx_halt(void);
6262

6363
bool tdx_early_handle_ve(struct pt_regs *regs);
6464

@@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
7272
#else
7373

7474
static inline void tdx_early_init(void) { };
75-
static inline void tdx_safe_halt(void) { };
75+
static inline void tdx_halt(void) { };
7676

7777
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
7878

arch/x86/kernel/paravirt.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ void paravirt_set_sched_clock(u64 (*func)(void))
7575
static_call_update(pv_sched_clock, func);
7676
}
7777

78+
static noinstr void pv_native_safe_halt(void)
79+
{
80+
native_safe_halt();
81+
}
82+
7883
#ifdef CONFIG_PARAVIRT_XXL
7984
static noinstr void pv_native_write_cr2(unsigned long val)
8085
{
@@ -100,11 +105,6 @@ static noinstr void pv_native_set_debugreg(int regno, unsigned long val)
100105
{
101106
native_set_debugreg(regno, val);
102107
}
103-
104-
static noinstr void pv_native_safe_halt(void)
105-
{
106-
native_safe_halt();
107-
}
108108
#endif
109109

110110
struct pv_info pv_info = {
@@ -161,9 +161,11 @@ struct paravirt_patch_template pv_ops = {
161161
.irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl),
162162
.irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable),
163163
.irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable),
164+
#endif /* CONFIG_PARAVIRT_XXL */
165+
166+
/* Irq HLT ops. */
164167
.irq.safe_halt = pv_native_safe_halt,
165168
.irq.halt = native_halt,
166-
#endif /* CONFIG_PARAVIRT_XXL */
167169

168170
/* Mmu ops. */
169171
.mmu.flush_tlb_user = native_flush_tlb_local,

arch/x86/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,7 @@ void __init select_idle_routine(void)
939939
static_call_update(x86_idle, mwait_idle);
940940
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
941941
pr_info("using TDX aware idle routine\n");
942-
static_call_update(x86_idle, tdx_safe_halt);
942+
static_call_update(x86_idle, tdx_halt);
943943
} else {
944944
static_call_update(x86_idle, default_idle);
945945
}

0 commit comments

Comments
 (0)