Skip to content

Commit 337ffea

Browse files
kkdwivediAlexei Starovoitov
authored andcommitted
rqspinlock: Protect pending bit owners from stalls
The pending bit is used to avoid queueing in case the lock is uncontended, and has demonstrated benefits for the 2 contender scenario, esp. on x86. In case the pending bit is acquired and we wait for the locked bit to disappear, we may get stuck due to the lock owner not making progress. Hence, this waiting loop must be protected with a timeout check. To perform a graceful recovery once we decide to abort our lock acquisition attempt in this case, we must unset the pending bit since we own it. All waiters undoing their changes and exiting gracefully allows the lock word to be restored to the unlocked state once all participants (owner, waiters) have been recovered, and the lock remains usable. Hence, set the pending bit back to zero before returning to the caller. Introduce a lockevent (rqspinlock_lock_timeout) to capture timeout event statistics. Reviewed-by: Barret Rhoden <brho@google.com> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20250316040541.108729-10-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent ebababc commit 337ffea

File tree

3 files changed

+33
-6
lines changed

3 files changed

+33
-6
lines changed

include/asm-generic/rqspinlock.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
struct qspinlock;
1616
typedef struct qspinlock rqspinlock_t;
1717

18-
extern void resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val);
18+
extern int resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val);
1919

2020
/*
2121
* Default timeout for waiting loops is 0.25 seconds

kernel/bpf/rqspinlock.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
138138
* @lock: Pointer to queued spinlock structure
139139
* @val: Current value of the queued spinlock 32-bit word
140140
*
141+
* Return:
142+
* * 0 - Lock was acquired successfully.
143+
* * -ETIMEDOUT - Lock acquisition failed because of timeout.
144+
*
141145
* (queue tail, pending bit, lock value)
142146
*
143147
* fast : slow : unlock
@@ -154,12 +158,12 @@ static DEFINE_PER_CPU_ALIGNED(struct qnode, rqnodes[_Q_MAX_NODES]);
154158
* contended : (*,x,y) +--> (*,0,0) ---> (*,0,1) -' :
155159
* queue : ^--' :
156160
*/
157-
void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
161+
int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
158162
{
159163
struct mcs_spinlock *prev, *next, *node;
160164
struct rqspinlock_timeout ts;
165+
int idx, ret = 0;
161166
u32 old, tail;
162-
int idx;
163167

164168
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
165169

@@ -217,8 +221,25 @@ void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
217221
* clear_pending_set_locked() implementations imply full
218222
* barriers.
219223
*/
220-
if (val & _Q_LOCKED_MASK)
221-
smp_cond_load_acquire(&lock->locked, !VAL);
224+
if (val & _Q_LOCKED_MASK) {
225+
RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT);
226+
res_smp_cond_load_acquire(&lock->locked, !VAL || RES_CHECK_TIMEOUT(ts, ret));
227+
}
228+
229+
if (ret) {
230+
/*
231+
* We waited for the locked bit to go back to 0, as the pending
232+
* waiter, but timed out. We need to clear the pending bit since
233+
* we own it. Once a stuck owner has been recovered, the lock
234+
* must be restored to a valid state, hence removing the pending
235+
* bit is necessary.
236+
*
237+
* *,1,* -> *,0,*
238+
*/
239+
clear_pending(lock);
240+
lockevent_inc(rqspinlock_lock_timeout);
241+
return ret;
242+
}
222243

223244
/*
224245
* take ownership and clear the pending bit.
@@ -227,7 +248,7 @@ void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
227248
*/
228249
clear_pending_set_locked(lock);
229250
lockevent_inc(lock_pending);
230-
return;
251+
return 0;
231252

232253
/*
233254
* End of pending bit optimistic spinning and beginning of MCS
@@ -378,5 +399,6 @@ void __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val)
378399
* release the node
379400
*/
380401
__this_cpu_dec(rqnodes[0].mcs.count);
402+
return 0;
381403
}
382404
EXPORT_SYMBOL_GPL(resilient_queued_spin_lock_slowpath);

kernel/locking/lock_events_list.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
4949
LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
5050
#endif /* CONFIG_QUEUED_SPINLOCKS */
5151

52+
/*
53+
* Locking events for Resilient Queued Spin Lock
54+
*/
55+
LOCK_EVENT(rqspinlock_lock_timeout) /* # of locking ops that timeout */
56+
5257
/*
5358
* Locking events for rwsem
5459
*/

0 commit comments

Comments
 (0)