@@ -1263,6 +1263,36 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
1263
1263
return - ESRCH ;
1264
1264
}
1265
1265
1266
+ static void __attach_to_pi_owner (struct task_struct * p , union futex_key * key ,
1267
+ struct futex_pi_state * * ps )
1268
+ {
1269
+ /*
1270
+ * No existing pi state. First waiter. [2]
1271
+ *
1272
+ * This creates pi_state, we have hb->lock held, this means nothing can
1273
+ * observe this state, wait_lock is irrelevant.
1274
+ */
1275
+ struct futex_pi_state * pi_state = alloc_pi_state ();
1276
+
1277
+ /*
1278
+ * Initialize the pi_mutex in locked state and make @p
1279
+ * the owner of it:
1280
+ */
1281
+ rt_mutex_init_proxy_locked (& pi_state -> pi_mutex , p );
1282
+
1283
+ /* Store the key for possible exit cleanups: */
1284
+ pi_state -> key = * key ;
1285
+
1286
+ WARN_ON (!list_empty (& pi_state -> list ));
1287
+ list_add (& pi_state -> list , & p -> pi_state_list );
1288
+ /*
1289
+ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
1290
+ * because there is no concurrency as the object is not published yet.
1291
+ */
1292
+ pi_state -> owner = p ;
1293
+
1294
+ * ps = pi_state ;
1295
+ }
1266
1296
/*
1267
1297
* Lookup the task for the TID provided from user space and attach to
1268
1298
* it after doing proper sanity checks.
@@ -1272,7 +1302,6 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
1272
1302
struct task_struct * * exiting )
1273
1303
{
1274
1304
pid_t pid = uval & FUTEX_TID_MASK ;
1275
- struct futex_pi_state * pi_state ;
1276
1305
struct task_struct * p ;
1277
1306
1278
1307
/*
@@ -1324,36 +1353,11 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
1324
1353
return ret ;
1325
1354
}
1326
1355
1327
- /*
1328
- * No existing pi state. First waiter. [2]
1329
- *
1330
- * This creates pi_state, we have hb->lock held, this means nothing can
1331
- * observe this state, wait_lock is irrelevant.
1332
- */
1333
- pi_state = alloc_pi_state ();
1334
-
1335
- /*
1336
- * Initialize the pi_mutex in locked state and make @p
1337
- * the owner of it:
1338
- */
1339
- rt_mutex_init_proxy_locked (& pi_state -> pi_mutex , p );
1340
-
1341
- /* Store the key for possible exit cleanups: */
1342
- pi_state -> key = * key ;
1343
-
1344
- WARN_ON (!list_empty (& pi_state -> list ));
1345
- list_add (& pi_state -> list , & p -> pi_state_list );
1346
- /*
1347
- * Assignment without holding pi_state->pi_mutex.wait_lock is safe
1348
- * because there is no concurrency as the object is not published yet.
1349
- */
1350
- pi_state -> owner = p ;
1356
+ __attach_to_pi_owner (p , key , ps );
1351
1357
raw_spin_unlock_irq (& p -> pi_lock );
1352
1358
1353
1359
put_task_struct (p );
1354
1360
1355
- * ps = pi_state ;
1356
-
1357
1361
return 0 ;
1358
1362
}
1359
1363
@@ -1454,8 +1458,26 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
1454
1458
newval |= FUTEX_WAITERS ;
1455
1459
1456
1460
ret = lock_pi_update_atomic (uaddr , uval , newval );
1457
- /* If the take over worked, return 1 */
1458
- return ret < 0 ? ret : 1 ;
1461
+ if (ret )
1462
+ return ret ;
1463
+
1464
+ /*
1465
+ * If the waiter bit was requested the caller also needs PI
1466
+ * state attached to the new owner of the user space futex.
1467
+ *
1468
+ * @task is guaranteed to be alive and it cannot be exiting
1469
+ * because it is either sleeping or waiting in
1470
+ * futex_requeue_pi_wakeup_sync().
1471
+ *
1472
+ * No need to do the full attach_to_pi_owner() exercise
1473
+ * because @task is known and valid.
1474
+ */
1475
+ if (set_waiters ) {
1476
+ raw_spin_lock_irq (& task -> pi_lock );
1477
+ __attach_to_pi_owner (task , key , ps );
1478
+ raw_spin_unlock_irq (& task -> pi_lock );
1479
+ }
1480
+ return 1 ;
1459
1481
}
1460
1482
1461
1483
/*
@@ -1939,12 +1961,26 @@ static inline int futex_requeue_pi_wakeup_sync(struct futex_q *q)
1939
1961
* @hb: the hash_bucket of the requeue target futex
1940
1962
*
1941
1963
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
1942
- * target futex if it is uncontended or via a lock steal. Set the futex_q key
1943
- * to the requeue target futex so the waiter can detect the wakeup on the right
1944
- * futex, but remove it from the hb and NULL the rt_waiter so it can detect
1945
- * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1946
- * to protect access to the pi_state to fixup the owner later. Must be called
1947
- * with both q->lock_ptr and hb->lock held.
1964
+ * target futex if it is uncontended or via a lock steal.
1965
+ *
1966
+ * 1) Set @q::key to the requeue target futex key so the waiter can detect
1967
+ * the wakeup on the right futex.
1968
+ *
1969
+ * 2) Dequeue @q from the hash bucket.
1970
+ *
1971
+ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
1972
+ * acquisition.
1973
+ *
1974
+ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
1975
+ * the waiter has to fixup the pi state.
1976
+ *
1977
+ * 5) Complete the requeue state so the waiter can make progress. After
1978
+ * this point the waiter task can return from the syscall immediately in
1979
+ * case that the pi state does not have to be fixed up.
1980
+ *
1981
+ * 6) Wake the waiter task.
1982
+ *
1983
+ * Must be called with both q->lock_ptr and hb->lock held.
1948
1984
*/
1949
1985
static inline
1950
1986
void requeue_pi_wake_futex (struct futex_q * q , union futex_key * key ,
@@ -1998,7 +2034,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
1998
2034
{
1999
2035
struct futex_q * top_waiter = NULL ;
2000
2036
u32 curval ;
2001
- int ret , vpid ;
2037
+ int ret ;
2002
2038
2003
2039
if (get_futex_value_locked (& curval , pifutex ))
2004
2040
return - EFAULT ;
@@ -2025,7 +2061,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
2025
2061
* and waiting on the 'waitqueue' futex which is always !PI.
2026
2062
*/
2027
2063
if (!top_waiter -> rt_waiter || top_waiter -> pi_state )
2028
- ret = - EINVAL ;
2064
+ return - EINVAL ;
2029
2065
2030
2066
/* Ensure we requeue to the expected futex. */
2031
2067
if (!match_futex (top_waiter -> requeue_pi_key , key2 ))
@@ -2036,17 +2072,23 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
2036
2072
return - EAGAIN ;
2037
2073
2038
2074
/*
2039
- * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
2040
- * the contended case or if set_waiters is 1. The pi_state is returned
2041
- * in ps in contended cases.
2075
+ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
2076
+ * in the contended case or if @set_waiters is true.
2077
+ *
2078
+ * In the contended case PI state is attached to the lock owner. If
2079
+ * the user space lock can be acquired then PI state is attached to
2080
+ * the new owner (@top_waiter->task) when @set_waiters is true.
2042
2081
*/
2043
- vpid = task_pid_vnr (top_waiter -> task );
2044
2082
ret = futex_lock_pi_atomic (pifutex , hb2 , key2 , ps , top_waiter -> task ,
2045
2083
exiting , set_waiters );
2046
2084
if (ret == 1 ) {
2047
- /* Dequeue, wake up and update top_waiter::requeue_state */
2085
+ /*
2086
+ * Lock was acquired in user space and PI state was
2087
+ * attached to @top_waiter->task. That means state is fully
2088
+ * consistent and the waiter can return to user space
2089
+ * immediately after the wakeup.
2090
+ */
2048
2091
requeue_pi_wake_futex (top_waiter , key2 , hb2 );
2049
- return vpid ;
2050
2092
} else if (ret < 0 ) {
2051
2093
/* Rewind top_waiter::requeue_state */
2052
2094
futex_requeue_pi_complete (top_waiter , ret );
@@ -2208,19 +2250,26 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
2208
2250
& exiting , nr_requeue );
2209
2251
2210
2252
/*
2211
- * At this point the top_waiter has either taken uaddr2 or is
2212
- * waiting on it. If the former, then the pi_state will not
2213
- * exist yet, look it up one more time to ensure we have a
2214
- * reference to it. If the lock was taken, @ret contains the
2215
- * VPID of the top waiter task.
2216
- * If the lock was not taken, we have pi_state and an initial
2217
- * refcount on it. In case of an error we have nothing.
2253
+ * At this point the top_waiter has either taken uaddr2 or
2254
+ * is waiting on it. In both cases pi_state has been
2255
+ * established and an initial refcount on it. In case of an
2256
+ * error there's nothing.
2218
2257
*
2219
2258
* The top waiter's requeue_state is up to date:
2220
2259
*
2221
- * - If the lock was acquired atomically (ret > 0 ), then
2260
+ * - If the lock was acquired atomically (ret == 1 ), then
2222
2261
* the state is Q_REQUEUE_PI_LOCKED.
2223
2262
*
2263
+ * The top waiter has been dequeued and woken up and can
2264
+ * return to user space immediately. The kernel/user
2265
+ * space state is consistent. In case that there must be
2266
+ * more waiters requeued the WAITERS bit in the user
2267
+ * space futex is set so the top waiter task has to go
2268
+ * into the syscall slowpath to unlock the futex. This
2269
+ * will block until this requeue operation has been
2270
+ * completed and the hash bucket locks have been
2271
+ * dropped.
2272
+ *
2224
2273
* - If the trylock failed with an error (ret < 0) then
2225
2274
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
2226
2275
* happened", or Q_REQUEUE_PI_IGNORE when there was an
@@ -2234,36 +2283,20 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
2234
2283
* the same sanity checks for requeue_pi as the loop
2235
2284
* below does.
2236
2285
*/
2237
- if (ret > 0 ) {
2238
- WARN_ON (pi_state );
2239
- task_count ++ ;
2240
- /*
2241
- * If futex_proxy_trylock_atomic() acquired the
2242
- * user space futex, then the user space value
2243
- * @uaddr2 has been set to the @hb1's top waiter
2244
- * task VPID. This task is guaranteed to be alive
2245
- * and cannot be exiting because it is either
2246
- * sleeping or blocked on @hb2 lock.
2247
- *
2248
- * The @uaddr2 futex cannot have waiters either as
2249
- * otherwise futex_proxy_trylock_atomic() would not
2250
- * have succeeded.
2251
- *
2252
- * In order to requeue waiters to @hb2, pi state is
2253
- * required. Hand in the VPID value (@ret) and
2254
- * allocate PI state with an initial refcount on
2255
- * it.
2256
- */
2257
- ret = attach_to_pi_owner (uaddr2 , ret , & key2 , & pi_state ,
2258
- & exiting );
2259
- WARN_ON (ret );
2260
- }
2261
-
2262
2286
switch (ret ) {
2263
2287
case 0 :
2264
2288
/* We hold a reference on the pi state. */
2265
2289
break ;
2266
2290
2291
+ case 1 :
2292
+ /*
2293
+ * futex_proxy_trylock_atomic() acquired the user space
2294
+ * futex. Adjust task_count.
2295
+ */
2296
+ task_count ++ ;
2297
+ ret = 0 ;
2298
+ break ;
2299
+
2267
2300
/*
2268
2301
* If the above failed, then pi_state is NULL and
2269
2302
* waiter::requeue_state is correct.
@@ -2395,9 +2428,8 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
2395
2428
}
2396
2429
2397
2430
/*
2398
- * We took an extra initial reference to the pi_state either in
2399
- * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
2400
- * to drop it here again.
2431
+ * We took an extra initial reference to the pi_state in
2432
+ * futex_proxy_trylock_atomic(). We need to drop it here again.
2401
2433
*/
2402
2434
put_pi_state (pi_state );
2403
2435
0 commit comments