Skip to content

Commit b2926a3

Browse files
rpedgecohansendc
authored andcommitted
x86/shstk: Handle thread shadow stack
When a process is duplicated, but the child shares the address space with the parent, there is potential for the threads sharing a single stack to cause conflicts for each other. In the normal non-CET case this is handled in two ways. With regular CLONE_VM a new stack is provided by userspace such that the parent and child have different stacks. For vfork, the parent is suspended until the child exits. So as long as the child doesn't return from the vfork()/CLONE_VFORK calling function and sticks to a limited set of operations, the parent and child can share the same stack. For shadow stack, these scenarios present similar sharing problems. For the CLONE_VM case, the child and the parent must have separate shadow stacks. Instead of changing clone to take a shadow stack, have the kernel just allocate one and switch to it. Use stack_size passed from clone3() syscall for thread shadow stack size. A compat-mode thread shadow stack size is further reduced to 1/4. This allows more threads to run in a 32-bit address space. The clone() does not pass stack_size, which was added to clone3(). In that case, use RLIMIT_STACK size and cap to 4 GB. For shadow stack enabled vfork(), the parent and child can share the same shadow stack, like they can share a normal stack. Since the parent is suspended until the child terminates, the child will not interfere with the parent while executing as long as it doesn't return from the vfork() and overwrite up the shadow stack. The child can safely overwrite down the shadow stack, as the parent can just overwrite this later. So CET does not add any additional limitations for vfork(). Free the shadow stack on thread exit by doing it in mm_release(). Skip this when exiting a vfork() child since the stack is shared in the parent. During this operation, the shadow stack pointer of the new thread needs to be updated to point to the newly allocated shadow stack. Since the ability to do this is confined to the FPU subsystem, change fpu_clone() to take the new shadow stack pointer, and update it internally inside the FPU subsystem. This part was suggested by Thomas Gleixner. Co-developed-by: Yu-cheng Yu <yu-cheng.yu@intel.com> Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@intel.com> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Kees Cook <keescook@chromium.org> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> Tested-by: Pengfei Xu <pengfei.xu@intel.com> Tested-by: John Allen <john.allen@amd.com> Tested-by: Kees Cook <keescook@chromium.org> Link: https://lore.kernel.org/all/20230613001108.3040476-30-rick.p.edgecombe%40intel.com
1 parent 2d39a6a commit b2926a3

File tree

6 files changed

+103
-5
lines changed

6 files changed

+103
-5
lines changed

arch/x86/include/asm/fpu/sched.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111

1212
extern void save_fpregs_to_fpstate(struct fpu *fpu);
1313
extern void fpu__drop(struct fpu *fpu);
14-
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal);
14+
extern int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
15+
unsigned long shstk_addr);
1516
extern void fpu_flush_thread(void);
1617

1718
/*

arch/x86/include/asm/mmu_context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ do { \
186186
#else
187187
#define deactivate_mm(tsk, mm) \
188188
do { \
189+
if (!tsk->vfork_done) \
190+
shstk_free(tsk); \
189191
load_gs_index(0); \
190192
loadsegment(fs, 0); \
191193
} while (0)

arch/x86/include/asm/shstk.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,16 @@ struct thread_shstk {
1515

1616
long shstk_prctl(struct task_struct *task, int option, unsigned long features);
1717
void reset_thread_features(void);
18+
unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clone_flags,
19+
unsigned long stack_size);
1820
void shstk_free(struct task_struct *p);
1921
#else
2022
static inline long shstk_prctl(struct task_struct *task, int option,
2123
unsigned long arg2) { return -EINVAL; }
2224
static inline void reset_thread_features(void) {}
25+
static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
26+
unsigned long clone_flags,
27+
unsigned long stack_size) { return 0; }
2328
static inline void shstk_free(struct task_struct *p) {}
2429
#endif /* CONFIG_X86_USER_SHADOW_STACK */
2530

arch/x86/kernel/fpu/core.c

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -552,8 +552,36 @@ static inline void fpu_inherit_perms(struct fpu *dst_fpu)
552552
}
553553
}
554554

555+
/* A passed ssp of zero will not cause any update */
556+
static int update_fpu_shstk(struct task_struct *dst, unsigned long ssp)
557+
{
558+
#ifdef CONFIG_X86_USER_SHADOW_STACK
559+
struct cet_user_state *xstate;
560+
561+
/* If ssp update is not needed. */
562+
if (!ssp)
563+
return 0;
564+
565+
xstate = get_xsave_addr(&dst->thread.fpu.fpstate->regs.xsave,
566+
XFEATURE_CET_USER);
567+
568+
/*
569+
* If there is a non-zero ssp, then 'dst' must be configured with a shadow
570+
* stack and the fpu state should be up to date since it was just copied
571+
* from the parent in fpu_clone(). So there must be a valid non-init CET
572+
* state location in the buffer.
573+
*/
574+
if (WARN_ON_ONCE(!xstate))
575+
return 1;
576+
577+
xstate->user_ssp = (u64)ssp;
578+
#endif
579+
return 0;
580+
}
581+
555582
/* Clone current's FPU state on fork */
556-
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
583+
int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
584+
unsigned long ssp)
557585
{
558586
struct fpu *src_fpu = &current->thread.fpu;
559587
struct fpu *dst_fpu = &dst->thread.fpu;
@@ -613,6 +641,12 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal)
613641
if (use_xsave())
614642
dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
615643

644+
/*
645+
* Update shadow stack pointer, in case it changed during clone.
646+
*/
647+
if (update_fpu_shstk(dst, ssp))
648+
return 1;
649+
616650
trace_x86_fpu_copy_src(src_fpu);
617651
trace_x86_fpu_copy_dst(dst_fpu);
618652

arch/x86/kernel/process.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include <asm/unwind.h>
5151
#include <asm/tdx.h>
5252
#include <asm/mmu_context.h>
53+
#include <asm/shstk.h>
5354

5455
#include "process.h"
5556

@@ -121,6 +122,7 @@ void exit_thread(struct task_struct *tsk)
121122

122123
free_vm86(t);
123124

125+
shstk_free(tsk);
124126
fpu__drop(fpu);
125127
}
126128

@@ -142,6 +144,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
142144
struct inactive_task_frame *frame;
143145
struct fork_frame *fork_frame;
144146
struct pt_regs *childregs;
147+
unsigned long new_ssp;
145148
int ret = 0;
146149

147150
childregs = task_pt_regs(p);
@@ -179,7 +182,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
179182
frame->flags = X86_EFLAGS_FIXED;
180183
#endif
181184

182-
fpu_clone(p, clone_flags, args->fn);
185+
/*
186+
* Allocate a new shadow stack for thread if needed. If shadow stack,
187+
* is disabled, new_ssp will remain 0, and fpu_clone() will know not to
188+
* update it.
189+
*/
190+
new_ssp = shstk_alloc_thread_stack(p, clone_flags, args->stack_size);
191+
if (IS_ERR_VALUE(new_ssp))
192+
return PTR_ERR((void *)new_ssp);
193+
194+
fpu_clone(p, clone_flags, args->fn, new_ssp);
183195

184196
/* Kernel thread ? */
185197
if (unlikely(p->flags & PF_KTHREAD)) {
@@ -225,6 +237,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
225237
if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
226238
io_bitmap_share(p);
227239

240+
/*
241+
* If copy_thread() if failing, don't leak the shadow stack possibly
242+
* allocated in shstk_alloc_thread_stack() above.
243+
*/
244+
if (ret)
245+
shstk_free(p);
246+
228247
return ret;
229248
}
230249

arch/x86/kernel/shstk.c

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ static unsigned long alloc_shstk(unsigned long size)
4747
unsigned long addr, unused;
4848

4949
mmap_write_lock(mm);
50-
addr = do_mmap(NULL, addr, size, PROT_READ, flags,
50+
addr = do_mmap(NULL, 0, size, PROT_READ, flags,
5151
VM_SHADOW_STACK | VM_WRITE, 0, &unused, NULL);
5252

5353
mmap_write_unlock(mm);
@@ -126,6 +126,37 @@ void reset_thread_features(void)
126126
current->thread.features_locked = 0;
127127
}
128128

129+
unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, unsigned long clone_flags,
130+
unsigned long stack_size)
131+
{
132+
struct thread_shstk *shstk = &tsk->thread.shstk;
133+
unsigned long addr, size;
134+
135+
/*
136+
* If shadow stack is not enabled on the new thread, skip any
137+
* switch to a new shadow stack.
138+
*/
139+
if (!features_enabled(ARCH_SHSTK_SHSTK))
140+
return 0;
141+
142+
/*
143+
* For CLONE_VM, except vfork, the child needs a separate shadow
144+
* stack.
145+
*/
146+
if ((clone_flags & (CLONE_VFORK | CLONE_VM)) != CLONE_VM)
147+
return 0;
148+
149+
size = adjust_shstk_size(stack_size);
150+
addr = alloc_shstk(size);
151+
if (IS_ERR_VALUE(addr))
152+
return addr;
153+
154+
shstk->base = addr;
155+
shstk->size = size;
156+
157+
return addr + size;
158+
}
159+
129160
void shstk_free(struct task_struct *tsk)
130161
{
131162
struct thread_shstk *shstk = &tsk->thread.shstk;
@@ -134,7 +165,13 @@ void shstk_free(struct task_struct *tsk)
134165
!features_enabled(ARCH_SHSTK_SHSTK))
135166
return;
136167

137-
if (!tsk->mm)
168+
/*
169+
* When fork() with CLONE_VM fails, the child (tsk) already has a
170+
* shadow stack allocated, and exit_thread() calls this function to
171+
* free it. In this case the parent (current) and the child share
172+
* the same mm struct.
173+
*/
174+
if (!tsk->mm || tsk->mm != current->mm)
138175
return;
139176

140177
unmap_shadow_stack(shstk->base, shstk->size);

0 commit comments

Comments
 (0)