Skip to content

Commit 165d05d

Browse files
committed
Merge tag 'locking_urgent_for_v5.15_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking fixes from Borislav Petkov: - Fix the futex PI requeue machinery to not return to userspace in inconsistent state - Avoid a potential null pointer dereference in the ww_mutex deadlock check - Other smaller cleanups and optimizations * tag 'locking_urgent_for_v5.15_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: locking/rtmutex: Fix ww_mutex deadlock check futex: Remove unused variable 'vpid' in futex_proxy_trylock_atomic() futex: Avoid redundant task lookup futex: Clarify comment for requeue_pi_wake_futex() futex: Prevent inconsistent state and exit race futex: Return error code instead of assigning it without effect locking/rwsem: Add missing __init_rwsem() for PREEMPT_RT
2 parents 7bf3142 + e548057 commit 165d05d

File tree

4 files changed

+120
-94
lines changed

4 files changed

+120
-94
lines changed

include/linux/rwsem.h

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,22 +142,14 @@ struct rw_semaphore {
142142
#define DECLARE_RWSEM(lockname) \
143143
struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
144144

145-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
146-
extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
145+
extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name,
147146
struct lock_class_key *key);
148-
#else
149-
static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
150-
struct lock_class_key *key)
151-
{
152-
}
153-
#endif
154147

155148
#define init_rwsem(sem) \
156149
do { \
157150
static struct lock_class_key __key; \
158151
\
159-
init_rwbase_rt(&(sem)->rwbase); \
160-
__rwsem_init((sem), #sem, &__key); \
152+
__init_rwsem((sem), #sem, &__key); \
161153
} while (0)
162154

163155
static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)

kernel/futex.c

Lines changed: 111 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1263,6 +1263,36 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
12631263
return -ESRCH;
12641264
}
12651265

1266+
static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
1267+
struct futex_pi_state **ps)
1268+
{
1269+
/*
1270+
* No existing pi state. First waiter. [2]
1271+
*
1272+
* This creates pi_state, we have hb->lock held, this means nothing can
1273+
* observe this state, wait_lock is irrelevant.
1274+
*/
1275+
struct futex_pi_state *pi_state = alloc_pi_state();
1276+
1277+
/*
1278+
* Initialize the pi_mutex in locked state and make @p
1279+
* the owner of it:
1280+
*/
1281+
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1282+
1283+
/* Store the key for possible exit cleanups: */
1284+
pi_state->key = *key;
1285+
1286+
WARN_ON(!list_empty(&pi_state->list));
1287+
list_add(&pi_state->list, &p->pi_state_list);
1288+
/*
1289+
* Assignment without holding pi_state->pi_mutex.wait_lock is safe
1290+
* because there is no concurrency as the object is not published yet.
1291+
*/
1292+
pi_state->owner = p;
1293+
1294+
*ps = pi_state;
1295+
}
12661296
/*
12671297
* Lookup the task for the TID provided from user space and attach to
12681298
* it after doing proper sanity checks.
@@ -1272,7 +1302,6 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
12721302
struct task_struct **exiting)
12731303
{
12741304
pid_t pid = uval & FUTEX_TID_MASK;
1275-
struct futex_pi_state *pi_state;
12761305
struct task_struct *p;
12771306

12781307
/*
@@ -1324,36 +1353,11 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
13241353
return ret;
13251354
}
13261355

1327-
/*
1328-
* No existing pi state. First waiter. [2]
1329-
*
1330-
* This creates pi_state, we have hb->lock held, this means nothing can
1331-
* observe this state, wait_lock is irrelevant.
1332-
*/
1333-
pi_state = alloc_pi_state();
1334-
1335-
/*
1336-
* Initialize the pi_mutex in locked state and make @p
1337-
* the owner of it:
1338-
*/
1339-
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
1340-
1341-
/* Store the key for possible exit cleanups: */
1342-
pi_state->key = *key;
1343-
1344-
WARN_ON(!list_empty(&pi_state->list));
1345-
list_add(&pi_state->list, &p->pi_state_list);
1346-
/*
1347-
* Assignment without holding pi_state->pi_mutex.wait_lock is safe
1348-
* because there is no concurrency as the object is not published yet.
1349-
*/
1350-
pi_state->owner = p;
1356+
__attach_to_pi_owner(p, key, ps);
13511357
raw_spin_unlock_irq(&p->pi_lock);
13521358

13531359
put_task_struct(p);
13541360

1355-
*ps = pi_state;
1356-
13571361
return 0;
13581362
}
13591363

@@ -1454,8 +1458,26 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
14541458
newval |= FUTEX_WAITERS;
14551459

14561460
ret = lock_pi_update_atomic(uaddr, uval, newval);
1457-
/* If the take over worked, return 1 */
1458-
return ret < 0 ? ret : 1;
1461+
if (ret)
1462+
return ret;
1463+
1464+
/*
1465+
* If the waiter bit was requested the caller also needs PI
1466+
* state attached to the new owner of the user space futex.
1467+
*
1468+
* @task is guaranteed to be alive and it cannot be exiting
1469+
* because it is either sleeping or waiting in
1470+
* futex_requeue_pi_wakeup_sync().
1471+
*
1472+
* No need to do the full attach_to_pi_owner() exercise
1473+
* because @task is known and valid.
1474+
*/
1475+
if (set_waiters) {
1476+
raw_spin_lock_irq(&task->pi_lock);
1477+
__attach_to_pi_owner(task, key, ps);
1478+
raw_spin_unlock_irq(&task->pi_lock);
1479+
}
1480+
return 1;
14591481
}
14601482

14611483
/*
@@ -1939,12 +1961,26 @@ static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
19391961
* @hb: the hash_bucket of the requeue target futex
19401962
*
19411963
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
1942-
* target futex if it is uncontended or via a lock steal. Set the futex_q key
1943-
* to the requeue target futex so the waiter can detect the wakeup on the right
1944-
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
1945-
* atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1946-
* to protect access to the pi_state to fixup the owner later. Must be called
1947-
* with both q->lock_ptr and hb->lock held.
1964+
* target futex if it is uncontended or via a lock steal.
1965+
*
1966+
* 1) Set @q::key to the requeue target futex key so the waiter can detect
1967+
* the wakeup on the right futex.
1968+
*
1969+
* 2) Dequeue @q from the hash bucket.
1970+
*
1971+
* 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
1972+
* acquisition.
1973+
*
1974+
* 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
1975+
* the waiter has to fixup the pi state.
1976+
*
1977+
* 5) Complete the requeue state so the waiter can make progress. After
1978+
* this point the waiter task can return from the syscall immediately in
1979+
* case that the pi state does not have to be fixed up.
1980+
*
1981+
* 6) Wake the waiter task.
1982+
*
1983+
* Must be called with both q->lock_ptr and hb->lock held.
19481984
*/
19491985
static inline
19501986
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
@@ -1998,7 +2034,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
19982034
{
19992035
struct futex_q *top_waiter = NULL;
20002036
u32 curval;
2001-
int ret, vpid;
2037+
int ret;
20022038

20032039
if (get_futex_value_locked(&curval, pifutex))
20042040
return -EFAULT;
@@ -2025,7 +2061,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
20252061
* and waiting on the 'waitqueue' futex which is always !PI.
20262062
*/
20272063
if (!top_waiter->rt_waiter || top_waiter->pi_state)
2028-
ret = -EINVAL;
2064+
return -EINVAL;
20292065

20302066
/* Ensure we requeue to the expected futex. */
20312067
if (!match_futex(top_waiter->requeue_pi_key, key2))
@@ -2036,17 +2072,23 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
20362072
return -EAGAIN;
20372073

20382074
/*
2039-
* Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
2040-
* the contended case or if set_waiters is 1. The pi_state is returned
2041-
* in ps in contended cases.
2075+
* Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
2076+
* in the contended case or if @set_waiters is true.
2077+
*
2078+
* In the contended case PI state is attached to the lock owner. If
2079+
* the user space lock can be acquired then PI state is attached to
2080+
* the new owner (@top_waiter->task) when @set_waiters is true.
20422081
*/
2043-
vpid = task_pid_vnr(top_waiter->task);
20442082
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
20452083
exiting, set_waiters);
20462084
if (ret == 1) {
2047-
/* Dequeue, wake up and update top_waiter::requeue_state */
2085+
/*
2086+
* Lock was acquired in user space and PI state was
2087+
* attached to @top_waiter->task. That means state is fully
2088+
* consistent and the waiter can return to user space
2089+
* immediately after the wakeup.
2090+
*/
20482091
requeue_pi_wake_futex(top_waiter, key2, hb2);
2049-
return vpid;
20502092
} else if (ret < 0) {
20512093
/* Rewind top_waiter::requeue_state */
20522094
futex_requeue_pi_complete(top_waiter, ret);
@@ -2208,19 +2250,26 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
22082250
&exiting, nr_requeue);
22092251

22102252
/*
2211-
* At this point the top_waiter has either taken uaddr2 or is
2212-
* waiting on it. If the former, then the pi_state will not
2213-
* exist yet, look it up one more time to ensure we have a
2214-
* reference to it. If the lock was taken, @ret contains the
2215-
* VPID of the top waiter task.
2216-
* If the lock was not taken, we have pi_state and an initial
2217-
* refcount on it. In case of an error we have nothing.
2253+
* At this point the top_waiter has either taken uaddr2 or
2254+
* is waiting on it. In both cases pi_state has been
2255+
* established and an initial refcount on it. In case of an
2256+
* error there's nothing.
22182257
*
22192258
* The top waiter's requeue_state is up to date:
22202259
*
2221-
* - If the lock was acquired atomically (ret > 0), then
2260+
* - If the lock was acquired atomically (ret == 1), then
22222261
* the state is Q_REQUEUE_PI_LOCKED.
22232262
*
2263+
* The top waiter has been dequeued and woken up and can
2264+
* return to user space immediately. The kernel/user
2265+
* space state is consistent. In case that there must be
2266+
* more waiters requeued the WAITERS bit in the user
2267+
* space futex is set so the top waiter task has to go
2268+
* into the syscall slowpath to unlock the futex. This
2269+
* will block until this requeue operation has been
2270+
* completed and the hash bucket locks have been
2271+
* dropped.
2272+
*
22242273
* - If the trylock failed with an error (ret < 0) then
22252274
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
22262275
* happened", or Q_REQUEUE_PI_IGNORE when there was an
@@ -2234,36 +2283,20 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
22342283
* the same sanity checks for requeue_pi as the loop
22352284
* below does.
22362285
*/
2237-
if (ret > 0) {
2238-
WARN_ON(pi_state);
2239-
task_count++;
2240-
/*
2241-
* If futex_proxy_trylock_atomic() acquired the
2242-
* user space futex, then the user space value
2243-
* @uaddr2 has been set to the @hb1's top waiter
2244-
* task VPID. This task is guaranteed to be alive
2245-
* and cannot be exiting because it is either
2246-
* sleeping or blocked on @hb2 lock.
2247-
*
2248-
* The @uaddr2 futex cannot have waiters either as
2249-
* otherwise futex_proxy_trylock_atomic() would not
2250-
* have succeeded.
2251-
*
2252-
* In order to requeue waiters to @hb2, pi state is
2253-
* required. Hand in the VPID value (@ret) and
2254-
* allocate PI state with an initial refcount on
2255-
* it.
2256-
*/
2257-
ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state,
2258-
&exiting);
2259-
WARN_ON(ret);
2260-
}
2261-
22622286
switch (ret) {
22632287
case 0:
22642288
/* We hold a reference on the pi state. */
22652289
break;
22662290

2291+
case 1:
2292+
/*
2293+
* futex_proxy_trylock_atomic() acquired the user space
2294+
* futex. Adjust task_count.
2295+
*/
2296+
task_count++;
2297+
ret = 0;
2298+
break;
2299+
22672300
/*
22682301
* If the above failed, then pi_state is NULL and
22692302
* waiter::requeue_state is correct.
@@ -2395,9 +2428,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
23952428
}
23962429

23972430
/*
2398-
* We took an extra initial reference to the pi_state either in
2399-
* futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
2400-
* to drop it here again.
2431+
* We took an extra initial reference to the pi_state in
2432+
* futex_proxy_trylock_atomic(). We need to drop it here again.
24012433
*/
24022434
put_pi_state(pi_state);
24032435

kernel/locking/rtmutex.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
753753
* other configuration and we fail to report; also, see
754754
* lockdep.
755755
*/
756-
if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
756+
if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
757757
ret = 0;
758758

759759
raw_spin_unlock(&lock->wait_lock);

kernel/locking/rwsem.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,15 +1376,17 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
13761376

13771377
#include "rwbase_rt.c"
13781378

1379-
#ifdef CONFIG_DEBUG_LOCK_ALLOC
1380-
void __rwsem_init(struct rw_semaphore *sem, const char *name,
1379+
void __init_rwsem(struct rw_semaphore *sem, const char *name,
13811380
struct lock_class_key *key)
13821381
{
1382+
init_rwbase_rt(&(sem)->rwbase);
1383+
1384+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
13831385
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
13841386
lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
1385-
}
1386-
EXPORT_SYMBOL(__rwsem_init);
13871387
#endif
1388+
}
1389+
EXPORT_SYMBOL(__init_rwsem);
13881390

13891391
static inline void __down_read(struct rw_semaphore *sem)
13901392
{

0 commit comments

Comments
 (0)