Skip to content

Commit 43a43fa

Browse files
committed
futex: improve user space accesses
Josh Poimboeuf reports that he got a "will-it-scale.per_process_ops 1.9% improvement" report for his patch that changed __get_user() to use pointer masking instead of the explicit speculation barrier. However, that patch doesn't actually work in the general case, because some (very bad) architecture-specific code actually depends on __get_user() also working on kernel addresses. A profile showed that the offending __get_user() was the futex code, which really should be fixed up to not use that horrid legacy case. Rewrite futex_get_value_locked() to use the modern user acccess helpers, and inline it so that the compiler not only avoids the function call for a few instructions, but can do CSE on the address masking. It also turns out the x86 futex functions have unnecessary barriers in other places, so let's fix those up too. Link: https://lore.kernel.org/all/20241115230653.hfvzyf3aqqntgp63@jpoimboe/ Reported-by: Josh Poimboeuf <jpoimboe@kernel.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 4e07155 commit 43a43fa

File tree

3 files changed

+63
-26
lines changed

3 files changed

+63
-26
lines changed

arch/x86/include/asm/futex.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ do { \
4848
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
4949
u32 __user *uaddr)
5050
{
51-
if (!user_access_begin(uaddr, sizeof(u32)))
51+
if (can_do_masked_user_access())
52+
uaddr = masked_user_access_begin(uaddr);
53+
else if (!user_access_begin(uaddr, sizeof(u32)))
5254
return -EFAULT;
5355

5456
switch (op) {
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
8486
{
8587
int ret = 0;
8688

87-
if (!user_access_begin(uaddr, sizeof(u32)))
89+
if (can_do_masked_user_access())
90+
uaddr = masked_user_access_begin(uaddr);
91+
else if (!user_access_begin(uaddr, sizeof(u32)))
8892
return -EFAULT;
8993
asm volatile("\n"
9094
"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"

kernel/futex/core.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -451,28 +451,6 @@ struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *
451451
return NULL;
452452
}
453453

454-
int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
455-
{
456-
int ret;
457-
458-
pagefault_disable();
459-
ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
460-
pagefault_enable();
461-
462-
return ret;
463-
}
464-
465-
int futex_get_value_locked(u32 *dest, u32 __user *from)
466-
{
467-
int ret;
468-
469-
pagefault_disable();
470-
ret = __get_user(*dest, from);
471-
pagefault_enable();
472-
473-
return ret ? -EFAULT : 0;
474-
}
475-
476454
/**
477455
* wait_for_owner_exiting - Block until the owner has exited
478456
* @ret: owner's current futex lock status

kernel/futex/futex.h

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/rtmutex.h>
77
#include <linux/sched/wake_q.h>
88
#include <linux/compat.h>
9+
#include <linux/uaccess.h>
910

1011
#ifdef CONFIG_PREEMPT_RT
1112
#include <linux/rcuwait.h>
@@ -225,10 +226,64 @@ extern bool __futex_wake_mark(struct futex_q *q);
225226
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
226227

227228
extern int fault_in_user_writeable(u32 __user *uaddr);
228-
extern int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval);
229-
extern int futex_get_value_locked(u32 *dest, u32 __user *from);
230229
extern struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, union futex_key *key);
231230

231+
static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32 uval, u32 newval)
232+
{
233+
int ret;
234+
235+
pagefault_disable();
236+
ret = futex_atomic_cmpxchg_inatomic(curval, uaddr, uval, newval);
237+
pagefault_enable();
238+
239+
return ret;
240+
}
241+
242+
/*
243+
* This does a plain atomic user space read, and the user pointer has
244+
* already been verified earlier by get_futex_key() to be both aligned
245+
* and actually in user space, just like futex_atomic_cmpxchg_inatomic().
246+
*
247+
* We still want to avoid any speculation, and while __get_user() is
248+
* the traditional model for this, it's actually slower than doing
249+
* this manually these days.
250+
*
251+
* We could just have a per-architecture special function for it,
252+
* the same way we do futex_atomic_cmpxchg_inatomic(), but rather
253+
* than force everybody to do that, write it out long-hand using
254+
* the low-level user-access infrastructure.
255+
*
256+
* This looks a bit overkill, but generally just results in a couple
257+
* of instructions.
258+
*/
259+
static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
260+
{
261+
u32 val;
262+
263+
if (can_do_masked_user_access())
264+
from = masked_user_access_begin(from);
265+
else if (!user_read_access_begin(from, sizeof(*from)))
266+
return -EFAULT;
267+
unsafe_get_user(val, from, Efault);
268+
user_access_end();
269+
*dest = val;
270+
return 0;
271+
Efault:
272+
user_access_end();
273+
return -EFAULT;
274+
}
275+
276+
static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
277+
{
278+
int ret;
279+
280+
pagefault_disable();
281+
ret = futex_read_inatomic(dest, from);
282+
pagefault_enable();
283+
284+
return ret;
285+
}
286+
232287
extern void __futex_unqueue(struct futex_q *q);
233288
extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb);
234289
extern int futex_unqueue(struct futex_q *q);

0 commit comments

Comments
 (0)