Skip to content

Commit 1eee4ef

Browse files
committed
Merge tag 'x86_urgent_for_v6.8_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: - Make sure clearing CPU buffers using VERW happens at the latest possible point in the return-to-userspace path, otherwise memory accesses after the VERW execution could cause data to land in CPU buffers again * tag 'x86_urgent_for_v6.8_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: KVM/VMX: Move VERW closer to VMentry for MDS mitigation KVM/VMX: Use BT+JNC, i.e. EFLAGS.CF to select VMRESUME vs. VMLAUNCH x86/bugs: Use ALTERNATIVE() instead of mds_user_clear static key x86/entry_32: Add VERW just before userspace transition x86/entry_64: Add VERW just before userspace transition x86/bugs: Add asm helpers for executing VERW
2 parents 8c46ed3 + 43fb862 commit 1eee4ef

File tree

13 files changed

+112
-46
lines changed

13 files changed

+112
-46
lines changed

Documentation/arch/x86/mds.rst

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ The kernel provides a function to invoke the buffer clearing:
9595

9696
mds_clear_cpu_buffers()
9797

98+
Also macro CLEAR_CPU_BUFFERS can be used in ASM late in exit-to-user path.
99+
Other than CFLAGS.ZF, this macro doesn't clobber any registers.
100+
98101
The mitigation is invoked on kernel/userspace, hypervisor/guest and C-state
99102
(idle) transitions.
100103

@@ -138,17 +141,30 @@ Mitigation points
138141

139142
When transitioning from kernel to user space the CPU buffers are flushed
140143
on affected CPUs when the mitigation is not disabled on the kernel
141-
command line. The migitation is enabled through the static key
142-
mds_user_clear.
143-
144-
The mitigation is invoked in prepare_exit_to_usermode() which covers
145-
all but one of the kernel to user space transitions. The exception
146-
is when we return from a Non Maskable Interrupt (NMI), which is
147-
handled directly in do_nmi().
148-
149-
(The reason that NMI is special is that prepare_exit_to_usermode() can
150-
enable IRQs. In NMI context, NMIs are blocked, and we don't want to
151-
enable IRQs with NMIs blocked.)
144+
command line. The mitigation is enabled through the feature flag
145+
X86_FEATURE_CLEAR_CPU_BUF.
146+
147+
The mitigation is invoked just before transitioning to userspace after
148+
user registers are restored. This is done to minimize the window in
149+
which kernel data could be accessed after VERW e.g. via an NMI after
150+
VERW.
151+
152+
**Corner case not handled**
153+
Interrupts returning to kernel don't clear CPUs buffers since the
154+
exit-to-user path is expected to do that anyways. But, there could be
155+
a case when an NMI is generated in kernel after the exit-to-user path
156+
has cleared the buffers. This case is not handled and NMI returning to
157+
kernel don't clear CPU buffers because:
158+
159+
1. It is rare to get an NMI after VERW, but before returning to userspace.
160+
2. For an unprivileged user, there is no known way to make that NMI
161+
less rare or target it.
162+
3. It would take a large number of these precisely-timed NMIs to mount
163+
an actual attack. There's presumably not enough bandwidth.
164+
4. The NMI in question occurs after a VERW, i.e. when user state is
165+
restored and most interesting data is already scrubbed. Whats left
166+
is only the data that NMI touches, and that may or may not be of
167+
any interest.
152168

153169

154170
2. C-State transition

arch/x86/entry/entry.S

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
#include <linux/export.h>
77
#include <linux/linkage.h>
88
#include <asm/msr-index.h>
9+
#include <asm/unwind_hints.h>
10+
#include <asm/segment.h>
11+
#include <asm/cache.h>
912

1013
.pushsection .noinstr.text, "ax"
1114

@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
2023
EXPORT_SYMBOL_GPL(entry_ibpb);
2124

2225
.popsection
26+
27+
/*
28+
* Define the VERW operand that is disguised as entry code so that
29+
* it can be referenced with KPTI enabled. This ensure VERW can be
30+
* used late in exit-to-user path after page tables are switched.
31+
*/
32+
.pushsection .entry.text, "ax"
33+
34+
.align L1_CACHE_BYTES, 0xcc
35+
SYM_CODE_START_NOALIGN(mds_verw_sel)
36+
UNWIND_HINT_UNDEFINED
37+
ANNOTATE_NOENDBR
38+
.word __KERNEL_DS
39+
.align L1_CACHE_BYTES, 0xcc
40+
SYM_CODE_END(mds_verw_sel);
41+
/* For KVM */
42+
EXPORT_SYMBOL_GPL(mds_verw_sel);
43+
44+
.popsection
45+

arch/x86/entry/entry_32.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
885885
BUG_IF_WRONG_CR3 no_user_check=1
886886
popfl
887887
popl %eax
888+
CLEAR_CPU_BUFFERS
888889

889890
/*
890891
* Return back to the vDSO, which will pop ecx and edx.
@@ -954,6 +955,7 @@ restore_all_switch_stack:
954955

955956
/* Restore user state */
956957
RESTORE_REGS pop=4 # skip orig_eax/error_code
958+
CLEAR_CPU_BUFFERS
957959
.Lirq_return:
958960
/*
959961
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi)
11461148

11471149
/* Not on SYSENTER stack. */
11481150
call exc_nmi
1151+
CLEAR_CPU_BUFFERS
11491152
jmp .Lnmi_return
11501153

11511154
.Lnmi_from_sysenter_stack:

arch/x86/entry/entry_64.S

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ syscall_return_via_sysret:
161161
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
162162
ANNOTATE_NOENDBR
163163
swapgs
164+
CLEAR_CPU_BUFFERS
164165
sysretq
165166
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
166167
ANNOTATE_NOENDBR
@@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
573574

574575
.Lswapgs_and_iret:
575576
swapgs
577+
CLEAR_CPU_BUFFERS
576578
/* Assert that the IRET frame indicates user mode. */
577579
testb $3, 8(%rsp)
578580
jnz .Lnative_iret
@@ -723,6 +725,8 @@ native_irq_return_ldt:
723725
*/
724726
popq %rax /* Restore user RAX */
725727

728+
CLEAR_CPU_BUFFERS
729+
726730
/*
727731
* RSP now points to an ordinary IRET frame, except that the page
728732
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -1449,6 +1453,12 @@ nmi_restore:
14491453
std
14501454
movq $0, 5*8(%rsp) /* clear "NMI executing" */
14511455

1456+
/*
1457+
* Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
1458+
* NMI in kernel after user state is restored. For an unprivileged user
1459+
* these conditions are hard to meet.
1460+
*/
1461+
14521462
/*
14531463
* iretq reads the "iret" frame and exits the NMI stack in a
14541464
* single instruction. We are returning to kernel mode, so this
@@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
14661476
UNWIND_HINT_END_OF_STACK
14671477
ENDBR
14681478
mov $-ENOSYS, %eax
1479+
CLEAR_CPU_BUFFERS
14691480
sysretl
14701481
SYM_CODE_END(entry_SYSCALL32_ignore)
14711482

arch/x86/entry/entry_64_compat.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
270270
xorl %r9d, %r9d
271271
xorl %r10d, %r10d
272272
swapgs
273+
CLEAR_CPU_BUFFERS
273274
sysretl
274275
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
275276
ANNOTATE_NOENDBR

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
9696
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
9797
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
98-
/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
98+
#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
9999
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
100100
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
101101
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */

arch/x86/include/asm/entry-common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
9191

9292
static __always_inline void arch_exit_to_user_mode(void)
9393
{
94-
mds_user_clear_cpu_buffers();
9594
amd_clear_divider();
9695
}
9796
#define arch_exit_to_user_mode arch_exit_to_user_mode

arch/x86/include/asm/nospec-branch.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,17 @@
315315
#endif
316316
.endm
317317

318+
/*
319+
* Macro to execute VERW instruction that mitigate transient data sampling
320+
* attacks such as MDS. On affected systems a microcode update overloaded VERW
321+
* instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
322+
*
323+
* Note: Only the memory operand variant of VERW clears the CPU buffers.
324+
*/
325+
.macro CLEAR_CPU_BUFFERS
326+
ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
327+
.endm
328+
318329
#else /* __ASSEMBLY__ */
319330

320331
#define ANNOTATE_RETPOLINE_SAFE \
@@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
529540
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
530541
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
531542

532-
DECLARE_STATIC_KEY_FALSE(mds_user_clear);
533543
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
534544

535545
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
536546

537547
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
538548

549+
extern u16 mds_verw_sel;
550+
539551
#include <asm/segment.h>
540552

541553
/**
@@ -561,17 +573,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
561573
asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc");
562574
}
563575

564-
/**
565-
* mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
566-
*
567-
* Clear CPU buffers if the corresponding static key is enabled
568-
*/
569-
static __always_inline void mds_user_clear_cpu_buffers(void)
570-
{
571-
if (static_branch_likely(&mds_user_clear))
572-
mds_clear_cpu_buffers();
573-
}
574-
575576
/**
576577
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
577578
*

arch/x86/kernel/cpu/bugs.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
111111
/* Control unconditional IBPB in switch_mm() */
112112
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
113113

114-
/* Control MDS CPU buffer clear before returning to user space */
115-
DEFINE_STATIC_KEY_FALSE(mds_user_clear);
116-
EXPORT_SYMBOL_GPL(mds_user_clear);
117114
/* Control MDS CPU buffer clear before idling (halt, mwait) */
118115
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
119116
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
252249
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
253250
mds_mitigation = MDS_MITIGATION_VMWERV;
254251

255-
static_branch_enable(&mds_user_clear);
252+
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
256253

257254
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
258255
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
356353
* For guests that can't determine whether the correct microcode is
357354
* present on host, enable the mitigation for UCODE_NEEDED as well.
358355
*/
359-
static_branch_enable(&mds_user_clear);
356+
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
360357

361358
if (taa_nosmt || cpu_mitigations_auto_nosmt())
362359
cpu_smt_disable(false);
@@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void)
424421
*/
425422
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
426423
boot_cpu_has(X86_FEATURE_RTM)))
427-
static_branch_enable(&mds_user_clear);
424+
setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
428425
else
429426
static_branch_enable(&mmio_stale_data_clear);
430427

@@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void)
484481
if (cpu_mitigations_off())
485482
return;
486483

487-
if (!static_key_enabled(&mds_user_clear))
484+
if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
488485
goto out;
489486

490487
/*
491-
* mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
492-
* mitigation, if necessary.
488+
* X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
489+
* Stale Data mitigation, if necessary.
493490
*/
494491
if (mds_mitigation == MDS_MITIGATION_OFF &&
495492
boot_cpu_has_bug(X86_BUG_MDS)) {

arch/x86/kernel/nmi.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -563,9 +563,6 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
563563
}
564564
if (this_cpu_dec_return(nmi_state))
565565
goto nmi_restart;
566-
567-
if (user_mode(regs))
568-
mds_user_clear_cpu_buffers();
569566
}
570567

571568
#if IS_ENABLED(CONFIG_KVM_INTEL)

0 commit comments

Comments
 (0)