Skip to content

Commit cdc2022

Browse files
committed
Merge tag 'core-entry-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull generic syscall updates from Ingo Molnar: "Move various entry functions from kernel/entry/common.c to a header file, and always-inline them, to improve syscall entry performance on s390 by ~11%" * tag 'core-entry-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: entry: Move syscall_enter_from_user_mode() to header file entry: Move enter_from_user_mode() to header file entry: Move exit to usermode functions to header file
2 parents ab9517f + 221a164 commit cdc2022

File tree

2 files changed

+103
-100
lines changed

2 files changed

+103
-100
lines changed

include/linux/entry-common.h

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
#include <linux/syscalls.h>
88
#include <linux/seccomp.h>
99
#include <linux/sched.h>
10+
#include <linux/context_tracking.h>
11+
#include <linux/livepatch.h>
12+
#include <linux/resume_user_mode.h>
13+
#include <linux/tick.h>
14+
#include <linux/kmsan.h>
1015

1116
#include <asm/entry-common.h>
1217

@@ -98,7 +103,19 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {}
98103
* done between establishing state and enabling interrupts. The caller must
99104
* enable interrupts before invoking syscall_enter_from_user_mode_work().
100105
*/
101-
void enter_from_user_mode(struct pt_regs *regs);
106+
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
107+
{
108+
arch_enter_from_user_mode(regs);
109+
lockdep_hardirqs_off(CALLER_ADDR0);
110+
111+
CT_WARN_ON(__ct_state() != CONTEXT_USER);
112+
user_exit_irqoff();
113+
114+
instrumentation_begin();
115+
kmsan_unpoison_entry_regs(regs);
116+
trace_hardirqs_off_finish();
117+
instrumentation_end();
118+
}
102119

103120
/**
104121
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
@@ -117,6 +134,9 @@ void enter_from_user_mode(struct pt_regs *regs);
117134
*/
118135
void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
119136

137+
long syscall_trace_enter(struct pt_regs *regs, long syscall,
138+
unsigned long work);
139+
120140
/**
121141
* syscall_enter_from_user_mode_work - Check and handle work before invoking
122142
* a syscall
@@ -140,7 +160,15 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
140160
* ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter()
141161
* 2) Invocation of audit_syscall_entry()
142162
*/
143-
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
163+
static __always_inline long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
164+
{
165+
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
166+
167+
if (work & SYSCALL_WORK_ENTER)
168+
syscall = syscall_trace_enter(regs, syscall, work);
169+
170+
return syscall;
171+
}
144172

145173
/**
146174
* syscall_enter_from_user_mode - Establish state and check and handle work
@@ -159,7 +187,19 @@ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
159187
* Returns: The original or a modified syscall number. See
160188
* syscall_enter_from_user_mode_work() for further explanation.
161189
*/
162-
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
190+
static __always_inline long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
191+
{
192+
long ret;
193+
194+
enter_from_user_mode(regs);
195+
196+
instrumentation_begin();
197+
local_irq_enable();
198+
ret = syscall_enter_from_user_mode_work(regs, syscall);
199+
instrumentation_end();
200+
201+
return ret;
202+
}
163203

164204
/**
165205
* local_irq_enable_exit_to_user - Exit to user variant of local_irq_enable()
@@ -258,6 +298,43 @@ static __always_inline void arch_exit_to_user_mode(void) { }
258298
*/
259299
void arch_do_signal_or_restart(struct pt_regs *regs);
260300

301+
/**
302+
* exit_to_user_mode_loop - do any pending work before leaving to user space
303+
*/
304+
unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
305+
unsigned long ti_work);
306+
307+
/**
308+
* exit_to_user_mode_prepare - call exit_to_user_mode_loop() if required
309+
* @regs: Pointer to pt_regs on entry stack
310+
*
311+
* 1) check that interrupts are disabled
312+
* 2) call tick_nohz_user_enter_prepare()
313+
* 3) call exit_to_user_mode_loop() if any flags from
314+
* EXIT_TO_USER_MODE_WORK are set
315+
* 4) check that interrupts are still disabled
316+
*/
317+
static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
318+
{
319+
unsigned long ti_work;
320+
321+
lockdep_assert_irqs_disabled();
322+
323+
/* Flush pending rcuog wakeup before the last need_resched() check */
324+
tick_nohz_user_enter_prepare();
325+
326+
ti_work = read_thread_flags();
327+
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
328+
ti_work = exit_to_user_mode_loop(regs, ti_work);
329+
330+
arch_exit_to_user_mode_prepare(regs, ti_work);
331+
332+
/* Ensure that kernel state is sane for a return to userspace */
333+
kmap_assert_nomap();
334+
lockdep_assert_irqs_disabled();
335+
lockdep_sys_exit();
336+
}
337+
261338
/**
262339
* exit_to_user_mode - Fixup state when exiting to user mode
263340
*
@@ -276,7 +353,17 @@ void arch_do_signal_or_restart(struct pt_regs *regs);
276353
* non-instrumentable.
277354
* The caller has to invoke syscall_exit_to_user_mode_work() before this.
278355
*/
279-
void exit_to_user_mode(void);
356+
static __always_inline void exit_to_user_mode(void)
357+
{
358+
instrumentation_begin();
359+
trace_hardirqs_on_prepare();
360+
lockdep_hardirqs_on_prepare();
361+
instrumentation_end();
362+
363+
user_enter_irqoff();
364+
arch_exit_to_user_mode();
365+
lockdep_hardirqs_on(CALLER_ADDR0);
366+
}
280367

281368
/**
282369
* syscall_exit_to_user_mode_work - Handle work before returning to user mode

kernel/entry/common.c

Lines changed: 12 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,6 @@
1515
#define CREATE_TRACE_POINTS
1616
#include <trace/events/syscalls.h>
1717

18-
/* See comment for enter_from_user_mode() in entry-common.h */
19-
static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
20-
{
21-
arch_enter_from_user_mode(regs);
22-
lockdep_hardirqs_off(CALLER_ADDR0);
23-
24-
CT_WARN_ON(__ct_state() != CONTEXT_USER);
25-
user_exit_irqoff();
26-
27-
instrumentation_begin();
28-
kmsan_unpoison_entry_regs(regs);
29-
trace_hardirqs_off_finish();
30-
instrumentation_end();
31-
}
32-
33-
void noinstr enter_from_user_mode(struct pt_regs *regs)
34-
{
35-
__enter_from_user_mode(regs);
36-
}
37-
3818
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
3919
{
4020
if (unlikely(audit_context())) {
@@ -45,7 +25,7 @@ static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
4525
}
4626
}
4727

48-
static long syscall_trace_enter(struct pt_regs *regs, long syscall,
28+
long syscall_trace_enter(struct pt_regs *regs, long syscall,
4929
unsigned long work)
5030
{
5131
long ret = 0;
@@ -85,67 +65,24 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall,
8565
return ret ? : syscall;
8666
}
8767

88-
static __always_inline long
89-
__syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
90-
{
91-
unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
92-
93-
if (work & SYSCALL_WORK_ENTER)
94-
syscall = syscall_trace_enter(regs, syscall, work);
95-
96-
return syscall;
97-
}
98-
99-
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall)
100-
{
101-
return __syscall_enter_from_user_work(regs, syscall);
102-
}
103-
104-
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
105-
{
106-
long ret;
107-
108-
__enter_from_user_mode(regs);
109-
110-
instrumentation_begin();
111-
local_irq_enable();
112-
ret = __syscall_enter_from_user_work(regs, syscall);
113-
instrumentation_end();
114-
115-
return ret;
116-
}
117-
11868
noinstr void syscall_enter_from_user_mode_prepare(struct pt_regs *regs)
11969
{
120-
__enter_from_user_mode(regs);
70+
enter_from_user_mode(regs);
12171
instrumentation_begin();
12272
local_irq_enable();
12373
instrumentation_end();
12474
}
12575

126-
/* See comment for exit_to_user_mode() in entry-common.h */
127-
static __always_inline void __exit_to_user_mode(void)
128-
{
129-
instrumentation_begin();
130-
trace_hardirqs_on_prepare();
131-
lockdep_hardirqs_on_prepare();
132-
instrumentation_end();
133-
134-
user_enter_irqoff();
135-
arch_exit_to_user_mode();
136-
lockdep_hardirqs_on(CALLER_ADDR0);
137-
}
138-
139-
void noinstr exit_to_user_mode(void)
140-
{
141-
__exit_to_user_mode();
142-
}
143-
14476
/* Workaround to allow gradual conversion of architecture code */
14577
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
14678

147-
static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
148-
unsigned long ti_work)
79+
/**
80+
* exit_to_user_mode_loop - do any pending work before leaving to user space
81+
* @regs: Pointer to pt_regs on entry stack
82+
* @ti_work: TIF work flags as read by the caller
83+
*/
84+
__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
85+
unsigned long ti_work)
14986
{
15087
/*
15188
* Before returning to user space ensure that all pending work
@@ -190,27 +127,6 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
190127
return ti_work;
191128
}
192129

193-
static void exit_to_user_mode_prepare(struct pt_regs *regs)
194-
{
195-
unsigned long ti_work;
196-
197-
lockdep_assert_irqs_disabled();
198-
199-
/* Flush pending rcuog wakeup before the last need_resched() check */
200-
tick_nohz_user_enter_prepare();
201-
202-
ti_work = read_thread_flags();
203-
if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
204-
ti_work = exit_to_user_mode_loop(regs, ti_work);
205-
206-
arch_exit_to_user_mode_prepare(regs, ti_work);
207-
208-
/* Ensure that kernel state is sane for a return to userspace */
209-
kmap_assert_nomap();
210-
lockdep_assert_irqs_disabled();
211-
lockdep_sys_exit();
212-
}
213-
214130
/*
215131
* If SYSCALL_EMU is set, then the only reason to report is when
216132
* SINGLESTEP is set (i.e. PTRACE_SYSEMU_SINGLESTEP). This syscall
@@ -295,20 +211,20 @@ __visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs)
295211
instrumentation_begin();
296212
__syscall_exit_to_user_mode_work(regs);
297213
instrumentation_end();
298-
__exit_to_user_mode();
214+
exit_to_user_mode();
299215
}
300216

301217
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
302218
{
303-
__enter_from_user_mode(regs);
219+
enter_from_user_mode(regs);
304220
}
305221

306222
noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
307223
{
308224
instrumentation_begin();
309225
exit_to_user_mode_prepare(regs);
310226
instrumentation_end();
311-
__exit_to_user_mode();
227+
exit_to_user_mode();
312228
}
313229

314230
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)

0 commit comments

Comments
 (0)