Skip to content

Commit 24005d1

Browse files
Merge patch series "riscv: SCS support"
Sami Tolvanen <samitolvanen@google.com> says: This series adds Shadow Call Stack (SCS) support for RISC-V. SCS uses compiler instrumentation to store return addresses in a separate shadow stack to protect them against accidental or malicious overwrites. More information about SCS can be found here: https://clang.llvm.org/docs/ShadowCallStack.html Patch 1 is from Deepak, and it simplifies VMAP_STACK overflow handling by adding support for accessing per-CPU variables directly in assembly. The patch is included in this series to make IRQ stack switching cleaner with SCS, and I've simply rebased it and fixed a couple of minor issues. Patch 2 uses this functionality to clean up the stack switching by moving duplicate code into a single function. On RISC-V, the compiler uses the gp register for storing the current shadow call stack pointer, which is incompatible with global pointer relaxation. Patch 3 moves global pointer loading into a macro that can be easily disabled with SCS. Patch 4 implements SCS register loading and switching, and allows the feature to be enabled, and patch 5 adds separate per-CPU IRQ shadow call stacks when CONFIG_IRQ_STACKS is enabled. Patch 6 fixes the backward-edge CFI test in lkdtm for RISC-V. Note that this series requires Clang 17. Earlier Clang versions support SCS on RISC-V, but use the x18 register instead of gp, which isn't ideal. gcc has SCS support for arm64, but I'm not aware of plans to support RISC-V. Once the Zicfiss extension is ratified, it's probably preferable to use hardware-backed shadow stacks instead of SCS on hardware that supports the extension, and we may want to consider implementing CONFIG_DYNAMIC_SCS to patch between the implementation at runtime (similarly to the arm64 implementation, which switches to SCS when hardware PAC support isn't available). * b4-shazam-merge: lkdtm: Fix CFI_BACKWARD on RISC-V riscv: Use separate IRQ shadow call stacks riscv: Implement Shadow Call Stack riscv: Move global pointer loading to a macro riscv: Deduplicate IRQ stack switching riscv: VMAP_STACK overflow detection thread-safe Link: https://lore.kernel.org/r/20230927224757.1154247-8-samitolvanen@google.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2 parents 4630d6d + 245561b commit 24005d1

File tree

16 files changed

+248
-177
lines changed

16 files changed

+248
-177
lines changed

arch/riscv/Kconfig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ config RISCV
4949
select ARCH_SUPPORTS_HUGETLBFS if MMU
5050
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
5151
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
52+
select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
5253
select ARCH_USE_MEMTEST
5354
select ARCH_USE_QUEUED_RWLOCKS
5455
select ARCH_USES_CFI_TRAPS if CFI_CLANG
@@ -175,6 +176,11 @@ config GCC_SUPPORTS_DYNAMIC_FTRACE
175176
def_bool CC_IS_GCC
176177
depends on $(cc-option,-fpatchable-function-entry=8)
177178

179+
config HAVE_SHADOW_CALL_STACK
180+
def_bool $(cc-option,-fsanitize=shadow-call-stack)
181+
# https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
182+
depends on $(ld-option,--no-relax-gp)
183+
178184
config ARCH_MMAP_RND_BITS_MIN
179185
default 18 if 64BIT
180186
default 8

arch/riscv/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ endif
5555
endif
5656
endif
5757

58+
ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
59+
KBUILD_LDFLAGS += --no-relax-gp
60+
endif
61+
5862
# ISA string setting
5963
riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
6064
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima

arch/riscv/include/asm/asm-prototypes.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
2525
DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
2626
DECLARE_DO_ERROR_INFO(do_trap_break);
2727

28-
asmlinkage unsigned long get_overflow_stack(void);
2928
asmlinkage void handle_bad_stack(struct pt_regs *regs);
3029
asmlinkage void do_page_fault(struct pt_regs *regs);
3130
asmlinkage void do_irq(struct pt_regs *regs);

arch/riscv/include/asm/asm.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,47 @@
8282
.endr
8383
.endm
8484

85+
#ifdef CONFIG_SMP
86+
#ifdef CONFIG_32BIT
87+
#define PER_CPU_OFFSET_SHIFT 2
88+
#else
89+
#define PER_CPU_OFFSET_SHIFT 3
90+
#endif
91+
92+
.macro asm_per_cpu dst sym tmp
93+
REG_L \tmp, TASK_TI_CPU_NUM(tp)
94+
slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT
95+
la \dst, __per_cpu_offset
96+
add \dst, \dst, \tmp
97+
REG_L \tmp, 0(\dst)
98+
la \dst, \sym
99+
add \dst, \dst, \tmp
100+
.endm
101+
#else /* CONFIG_SMP */
102+
.macro asm_per_cpu dst sym tmp
103+
la \dst, \sym
104+
.endm
105+
#endif /* CONFIG_SMP */
106+
107+
.macro load_per_cpu dst ptr tmp
108+
asm_per_cpu \dst \ptr \tmp
109+
REG_L \dst, 0(\dst)
110+
.endm
111+
112+
#ifdef CONFIG_SHADOW_CALL_STACK
113+
/* gp is used as the shadow call stack pointer instead */
114+
.macro load_global_pointer
115+
.endm
116+
#else
117+
/* load __global_pointer to gp */
118+
.macro load_global_pointer
119+
.option push
120+
.option norelax
121+
la gp, __global_pointer$
122+
.option pop
123+
.endm
124+
#endif /* CONFIG_SHADOW_CALL_STACK */
125+
85126
/* save all GPs except x1 ~ x5 */
86127
.macro save_from_x6_to_x31
87128
REG_S x6, PT_T1(sp)

arch/riscv/include/asm/irq_stack.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212

1313
DECLARE_PER_CPU(ulong *, irq_stack_ptr);
1414

15+
asmlinkage void call_on_irq_stack(struct pt_regs *regs,
16+
void (*func)(struct pt_regs *));
17+
1518
#ifdef CONFIG_VMAP_STACK
1619
/*
1720
* To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd

arch/riscv/include/asm/scs.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_SCS_H
3+
#define _ASM_SCS_H
4+
5+
#ifdef __ASSEMBLY__
6+
#include <asm/asm-offsets.h>
7+
8+
#ifdef CONFIG_SHADOW_CALL_STACK
9+
10+
/* Load init_shadow_call_stack to gp. */
11+
.macro scs_load_init_stack
12+
la gp, init_shadow_call_stack
13+
XIP_FIXUP_OFFSET gp
14+
.endm
15+
16+
/* Load the per-CPU IRQ shadow call stack to gp. */
17+
.macro scs_load_irq_stack tmp
18+
load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp
19+
.endm
20+
21+
/* Load task_scs_sp(current) to gp. */
22+
.macro scs_load_current
23+
REG_L gp, TASK_TI_SCS_SP(tp)
24+
.endm
25+
26+
/* Load task_scs_sp(current) to gp, but only if tp has changed. */
27+
.macro scs_load_current_if_task_changed prev
28+
beq \prev, tp, _skip_scs
29+
scs_load_current
30+
_skip_scs:
31+
.endm
32+
33+
/* Save gp to task_scs_sp(current). */
34+
.macro scs_save_current
35+
REG_S gp, TASK_TI_SCS_SP(tp)
36+
.endm
37+
38+
#else /* CONFIG_SHADOW_CALL_STACK */
39+
40+
.macro scs_load_init_stack
41+
.endm
42+
.macro scs_load_irq_stack tmp
43+
.endm
44+
.macro scs_load_current
45+
.endm
46+
.macro scs_load_current_if_task_changed prev
47+
.endm
48+
.macro scs_save_current
49+
.endm
50+
51+
#endif /* CONFIG_SHADOW_CALL_STACK */
52+
#endif /* __ASSEMBLY__ */
53+
54+
#endif /* _ASM_SCS_H */

arch/riscv/include/asm/thread_info.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@
3434

3535
#ifndef __ASSEMBLY__
3636

37-
extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
38-
extern unsigned long spin_shadow_stack;
39-
4037
#include <asm/processor.h>
4138
#include <asm/csr.h>
4239

@@ -60,8 +57,20 @@ struct thread_info {
6057
long user_sp; /* User stack pointer */
6158
int cpu;
6259
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
60+
#ifdef CONFIG_SHADOW_CALL_STACK
61+
void *scs_base;
62+
void *scs_sp;
63+
#endif
6364
};
6465

66+
#ifdef CONFIG_SHADOW_CALL_STACK
67+
#define INIT_SCS \
68+
.scs_base = init_shadow_call_stack, \
69+
.scs_sp = init_shadow_call_stack,
70+
#else
71+
#define INIT_SCS
72+
#endif
73+
6574
/*
6675
* macros/functions for gaining access to the thread information structure
6776
*
@@ -71,6 +80,7 @@ struct thread_info {
7180
{ \
7281
.flags = 0, \
7382
.preempt_count = INIT_PREEMPT_COUNT, \
83+
INIT_SCS \
7484
}
7585

7686
void arch_release_task_struct(struct task_struct *tsk);

arch/riscv/kernel/asm-offsets.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <asm/thread_info.h>
1515
#include <asm/ptrace.h>
1616
#include <asm/cpu_ops_sbi.h>
17+
#include <asm/stacktrace.h>
1718
#include <asm/suspend.h>
1819

1920
void asm_offsets(void);
@@ -38,7 +39,11 @@ void asm_offsets(void)
3839
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
3940
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
4041
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
42+
#ifdef CONFIG_SHADOW_CALL_STACK
43+
OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
44+
#endif
4145

46+
OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
4247
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
4348
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
4449
OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]);
@@ -479,4 +484,8 @@ void asm_offsets(void)
479484
OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
480485
OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
481486
OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
487+
488+
DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
489+
OFFSET(STACKFRAME_FP, stackframe, fp);
490+
OFFSET(STACKFRAME_RA, stackframe, ra);
482491
}

arch/riscv/kernel/entry.S

Lines changed: 60 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,13 @@
99

1010
#include <asm/asm.h>
1111
#include <asm/csr.h>
12+
#include <asm/scs.h>
1213
#include <asm/unistd.h>
14+
#include <asm/page.h>
1315
#include <asm/thread_info.h>
1416
#include <asm/asm-offsets.h>
1517
#include <asm/errata_list.h>
18+
#include <linux/sizes.h>
1619

1720
.section .irqentry.text, "ax"
1821

@@ -75,10 +78,11 @@ _save_context:
7578
csrw CSR_SCRATCH, x0
7679

7780
/* Load the global pointer */
78-
.option push
79-
.option norelax
80-
la gp, __global_pointer$
81-
.option pop
81+
load_global_pointer
82+
83+
/* Load the kernel shadow call stack pointer if coming from userspace */
84+
scs_load_current_if_task_changed s5
85+
8286
move a0, sp /* pt_regs */
8387
la ra, ret_from_exception
8488

@@ -125,6 +129,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
125129
addi s0, sp, PT_SIZE_ON_STACK
126130
REG_S s0, TASK_TI_KERNEL_SP(tp)
127131

132+
/* Save the kernel shadow call stack pointer */
133+
scs_save_current
134+
128135
/*
129136
* Save TP into the scratch register , so we can find the kernel data
130137
* structures again.
@@ -172,67 +179,15 @@ SYM_CODE_END(ret_from_exception)
172179

173180
#ifdef CONFIG_VMAP_STACK
174181
SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
175-
/*
176-
* Takes the psuedo-spinlock for the shadow stack, in case multiple
177-
* harts are concurrently overflowing their kernel stacks. We could
178-
* store any value here, but since we're overflowing the kernel stack
179-
* already we only have SP to use as a scratch register. So we just
180-
* swap in the address of the spinlock, as that's definately non-zero.
181-
*
182-
* Pairs with a store_release in handle_bad_stack().
183-
*/
184-
1: la sp, spin_shadow_stack
185-
REG_AMOSWAP_AQ sp, sp, (sp)
186-
bnez sp, 1b
187-
188-
la sp, shadow_stack
189-
addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
182+
/* we reach here from kernel context, sscratch must be 0 */
183+
csrrw x31, CSR_SCRATCH, x31
184+
asm_per_cpu sp, overflow_stack, x31
185+
li x31, OVERFLOW_STACK_SIZE
186+
add sp, sp, x31
187+
/* zero out x31 again and restore x31 */
188+
xor x31, x31, x31
189+
csrrw x31, CSR_SCRATCH, x31
190190

191-
//save caller register to shadow stack
192-
addi sp, sp, -(PT_SIZE_ON_STACK)
193-
REG_S x1, PT_RA(sp)
194-
REG_S x5, PT_T0(sp)
195-
REG_S x6, PT_T1(sp)
196-
REG_S x7, PT_T2(sp)
197-
REG_S x10, PT_A0(sp)
198-
REG_S x11, PT_A1(sp)
199-
REG_S x12, PT_A2(sp)
200-
REG_S x13, PT_A3(sp)
201-
REG_S x14, PT_A4(sp)
202-
REG_S x15, PT_A5(sp)
203-
REG_S x16, PT_A6(sp)
204-
REG_S x17, PT_A7(sp)
205-
REG_S x28, PT_T3(sp)
206-
REG_S x29, PT_T4(sp)
207-
REG_S x30, PT_T5(sp)
208-
REG_S x31, PT_T6(sp)
209-
210-
la ra, restore_caller_reg
211-
tail get_overflow_stack
212-
213-
restore_caller_reg:
214-
//save per-cpu overflow stack
215-
REG_S a0, -8(sp)
216-
//restore caller register from shadow_stack
217-
REG_L x1, PT_RA(sp)
218-
REG_L x5, PT_T0(sp)
219-
REG_L x6, PT_T1(sp)
220-
REG_L x7, PT_T2(sp)
221-
REG_L x10, PT_A0(sp)
222-
REG_L x11, PT_A1(sp)
223-
REG_L x12, PT_A2(sp)
224-
REG_L x13, PT_A3(sp)
225-
REG_L x14, PT_A4(sp)
226-
REG_L x15, PT_A5(sp)
227-
REG_L x16, PT_A6(sp)
228-
REG_L x17, PT_A7(sp)
229-
REG_L x28, PT_T3(sp)
230-
REG_L x29, PT_T4(sp)
231-
REG_L x30, PT_T5(sp)
232-
REG_L x31, PT_T6(sp)
233-
234-
//load per-cpu overflow stack
235-
REG_L sp, -8(sp)
236191
addi sp, sp, -(PT_SIZE_ON_STACK)
237192

238193
//save context to overflow stack
@@ -270,6 +225,43 @@ SYM_CODE_START(ret_from_fork)
270225
tail syscall_exit_to_user_mode
271226
SYM_CODE_END(ret_from_fork)
272227

228+
#ifdef CONFIG_IRQ_STACKS
229+
/*
230+
* void call_on_irq_stack(struct pt_regs *regs,
231+
* void (*func)(struct pt_regs *));
232+
*
233+
* Calls func(regs) using the per-CPU IRQ stack.
234+
*/
235+
SYM_FUNC_START(call_on_irq_stack)
236+
/* Create a frame record to save ra and s0 (fp) */
237+
addi sp, sp, -STACKFRAME_SIZE_ON_STACK
238+
REG_S ra, STACKFRAME_RA(sp)
239+
REG_S s0, STACKFRAME_FP(sp)
240+
addi s0, sp, STACKFRAME_SIZE_ON_STACK
241+
242+
/* Switch to the per-CPU shadow call stack */
243+
scs_save_current
244+
scs_load_irq_stack t0
245+
246+
/* Switch to the per-CPU IRQ stack and call the handler */
247+
load_per_cpu t0, irq_stack_ptr, t1
248+
li t1, IRQ_STACK_SIZE
249+
add sp, t0, t1
250+
jalr a1
251+
252+
/* Switch back to the thread shadow call stack */
253+
scs_load_current
254+
255+
/* Switch back to the thread stack and restore ra and s0 */
256+
addi sp, s0, -STACKFRAME_SIZE_ON_STACK
257+
REG_L ra, STACKFRAME_RA(sp)
258+
REG_L s0, STACKFRAME_FP(sp)
259+
addi sp, sp, STACKFRAME_SIZE_ON_STACK
260+
261+
ret
262+
SYM_FUNC_END(call_on_irq_stack)
263+
#endif /* CONFIG_IRQ_STACKS */
264+
273265
/*
274266
* Integer register context switch
275267
* The callee-saved registers must be saved and restored.
@@ -299,6 +291,8 @@ SYM_FUNC_START(__switch_to)
299291
REG_S s9, TASK_THREAD_S9_RA(a3)
300292
REG_S s10, TASK_THREAD_S10_RA(a3)
301293
REG_S s11, TASK_THREAD_S11_RA(a3)
294+
/* Save the kernel shadow call stack pointer */
295+
scs_save_current
302296
/* Restore context from next->thread */
303297
REG_L ra, TASK_THREAD_RA_RA(a4)
304298
REG_L sp, TASK_THREAD_SP_RA(a4)
@@ -316,6 +310,8 @@ SYM_FUNC_START(__switch_to)
316310
REG_L s11, TASK_THREAD_S11_RA(a4)
317311
/* The offset of thread_info in task_struct is zero. */
318312
move tp, a1
313+
/* Switch to the next shadow call stack */
314+
scs_load_current
319315
ret
320316
SYM_FUNC_END(__switch_to)
321317

0 commit comments

Comments
 (0)