Skip to content

Commit 494e7fe

Browse files
committed
Merge tag 'bpf_res_spin_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf relisient spinlock support from Alexei Starovoitov: "This patch set introduces Resilient Queued Spin Lock (or rqspinlock with res_spin_lock() and res_spin_unlock() APIs). This is a qspinlock variant which recovers the kernel from a stalled state when the lock acquisition path cannot make forward progress. This can occur when a lock acquisition attempt enters a deadlock situation (e.g. AA, or ABBA), or more generally, when the owner of the lock (which we’re trying to acquire) isn’t making forward progress. Deadlock detection is the main mechanism used to provide instant recovery, with the timeout mechanism acting as a final line of defense. Detection is triggered immediately when beginning the waiting loop of a lock slow path. Additionally, BPF programs attached to different parts of the kernel can introduce new control flow into the kernel, which increases the likelihood of deadlocks in code not written to handle reentrancy. There have been multiple syzbot reports surfacing deadlocks in internal kernel code due to the diverse ways in which BPF programs can be attached to different parts of the kernel. By switching the BPF subsystem’s lock usage to rqspinlock, all of these issues are mitigated at runtime. This spin lock implementation allows BPF maps to become safer and remove mechanisms that have fallen short in assuring safety when nesting programs in arbitrary ways in the same context or across different contexts. We run benchmarks that stress locking scalability and perform comparison against the baseline (qspinlock). For the rqspinlock case, we replace the default qspinlock with it in the kernel, such that all spin locks in the kernel use the rqspinlock slow path. As such, benchmarks that stress kernel spin locks end up exercising rqspinlock. More details in the cover letter in commit 6ffb901 ("Merge branch 'resilient-queued-spin-lock'")" * tag 'bpf_res_spin_lock' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (24 commits) selftests/bpf: Add tests for rqspinlock bpf: Maintain FIFO property for rqspinlock unlock bpf: Implement verifier support for rqspinlock bpf: Introduce rqspinlock kfuncs bpf: Convert lpm_trie.c to rqspinlock bpf: Convert percpu_freelist.c to rqspinlock bpf: Convert hashtab.c to rqspinlock rqspinlock: Add locktorture support rqspinlock: Add entry to Makefile, MAINTAINERS rqspinlock: Add macros for rqspinlock usage rqspinlock: Add basic support for CONFIG_PARAVIRT rqspinlock: Add a test-and-set fallback rqspinlock: Add deadlock detection and recovery rqspinlock: Protect waiters in trylock fallback from stalls rqspinlock: Protect waiters in queue from stalls rqspinlock: Protect pending bit owners from stalls rqspinlock: Hardcode cond_acquire loops for arm64 rqspinlock: Add support for timeouts rqspinlock: Drop PV and virtualization support rqspinlock: Add rqspinlock.h header ...
2 parents fa593d0 + 6ffb901 commit 494e7fe

File tree

27 files changed

+2312
-417
lines changed

27 files changed

+2312
-417
lines changed

MAINTAINERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4361,6 +4361,8 @@ F: include/uapi/linux/filter.h
43614361
F: kernel/bpf/
43624362
F: kernel/trace/bpf_trace.c
43634363
F: lib/buildid.c
4364+
F: arch/*/include/asm/rqspinlock.h
4365+
F: include/asm-generic/rqspinlock.h
43644366
F: lib/test_bpf.c
43654367
F: net/bpf/
43664368
F: net/core/filter.c

arch/arm64/include/asm/rqspinlock.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_RQSPINLOCK_H
3+
#define _ASM_RQSPINLOCK_H
4+
5+
#include <asm/barrier.h>
6+
7+
/*
8+
* Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom
9+
* version based on [0]. In rqspinlock code, our conditional expression involves
10+
* checking the value _and_ additionally a timeout. However, on arm64, the
11+
* WFE-based implementation may never spin again if no stores occur to the
12+
* locked byte in the lock word. As such, we may be stuck forever if
13+
* event-stream based unblocking is not available on the platform for WFE spin
14+
* loops (arch_timer_evtstrm_available).
15+
*
16+
* Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this
17+
* copy-paste.
18+
*
19+
* While we rely on the implementation to amortize the cost of sampling
20+
* cond_expr for us, it will not happen when event stream support is
21+
* unavailable, time_expr check is amortized. This is not the common case, and
22+
* it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns
23+
* comparison, hence just let it be. In case of event-stream, the loop is woken
24+
* up at microsecond granularity.
25+
*
26+
* [0]: https://lore.kernel.org/lkml/20250203214911.898276-1-ankur.a.arora@oracle.com
27+
*/
28+
29+
#ifndef smp_cond_load_acquire_timewait
30+
31+
#define smp_cond_time_check_count 200
32+
33+
#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \
34+
time_limit_ns) ({ \
35+
typeof(ptr) __PTR = (ptr); \
36+
__unqual_scalar_typeof(*ptr) VAL; \
37+
unsigned int __count = 0; \
38+
for (;;) { \
39+
VAL = READ_ONCE(*__PTR); \
40+
if (cond_expr) \
41+
break; \
42+
cpu_relax(); \
43+
if (__count++ < smp_cond_time_check_count) \
44+
continue; \
45+
if ((time_expr_ns) >= (time_limit_ns)) \
46+
break; \
47+
__count = 0; \
48+
} \
49+
(typeof(*ptr))VAL; \
50+
})
51+
52+
#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
53+
time_expr_ns, time_limit_ns) \
54+
({ \
55+
typeof(ptr) __PTR = (ptr); \
56+
__unqual_scalar_typeof(*ptr) VAL; \
57+
for (;;) { \
58+
VAL = smp_load_acquire(__PTR); \
59+
if (cond_expr) \
60+
break; \
61+
__cmpwait_relaxed(__PTR, VAL); \
62+
if ((time_expr_ns) >= (time_limit_ns)) \
63+
break; \
64+
} \
65+
(typeof(*ptr))VAL; \
66+
})
67+
68+
#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
69+
time_expr_ns, time_limit_ns) \
70+
({ \
71+
__unqual_scalar_typeof(*ptr) _val; \
72+
int __wfe = arch_timer_evtstrm_available(); \
73+
\
74+
if (likely(__wfe)) { \
75+
_val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
76+
time_expr_ns, \
77+
time_limit_ns); \
78+
} else { \
79+
_val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
80+
time_expr_ns, \
81+
time_limit_ns); \
82+
smp_acquire__after_ctrl_dep(); \
83+
} \
84+
(typeof(*ptr))_val; \
85+
})
86+
87+
#endif
88+
89+
#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1)
90+
91+
#include <asm-generic/rqspinlock.h>
92+
93+
#endif /* _ASM_RQSPINLOCK_H */

arch/x86/include/asm/rqspinlock.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_X86_RQSPINLOCK_H
3+
#define _ASM_X86_RQSPINLOCK_H
4+
5+
#include <asm/paravirt.h>
6+
7+
#ifdef CONFIG_PARAVIRT
8+
DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
9+
10+
#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled
11+
static __always_inline bool resilient_virt_spin_lock_enabled(void)
12+
{
13+
return static_branch_likely(&virt_spin_lock_key);
14+
}
15+
16+
#ifdef CONFIG_QUEUED_SPINLOCKS
17+
typedef struct qspinlock rqspinlock_t;
18+
#else
19+
typedef struct rqspinlock rqspinlock_t;
20+
#endif
21+
extern int resilient_tas_spin_lock(rqspinlock_t *lock);
22+
23+
#define resilient_virt_spin_lock resilient_virt_spin_lock
24+
static inline int resilient_virt_spin_lock(rqspinlock_t *lock)
25+
{
26+
return resilient_tas_spin_lock(lock);
27+
}
28+
29+
#endif /* CONFIG_PARAVIRT */
30+
31+
#include <asm-generic/rqspinlock.h>
32+
33+
#endif /* _ASM_X86_RQSPINLOCK_H */

include/asm-generic/Kbuild

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ mandatory-y += pci.h
4545
mandatory-y += percpu.h
4646
mandatory-y += pgalloc.h
4747
mandatory-y += preempt.h
48+
mandatory-y += rqspinlock.h
4849
mandatory-y += runtime-const.h
4950
mandatory-y += rwonce.h
5051
mandatory-y += sections.h

include/asm-generic/mcs_spinlock.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
#ifndef __ASM_MCS_SPINLOCK_H
22
#define __ASM_MCS_SPINLOCK_H
33

4+
struct mcs_spinlock {
5+
struct mcs_spinlock *next;
6+
int locked; /* 1 if lock acquired */
7+
int count; /* nesting count, see qspinlock.c */
8+
};
9+
410
/*
511
* Architectures can define their own:
612
*

0 commit comments

Comments
 (0)