@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
333
333
return prio ;
334
334
}
335
335
336
+ /*
337
+ * Update the waiter->tree copy of the sort keys.
338
+ */
336
339
static __always_inline void
337
340
waiter_update_prio (struct rt_mutex_waiter * waiter , struct task_struct * task )
338
341
{
339
- waiter -> prio = __waiter_prio (task );
340
- waiter -> deadline = task -> dl .deadline ;
342
+ lockdep_assert_held (& waiter -> lock -> wait_lock );
343
+ lockdep_assert (RB_EMPTY_NODE (& waiter -> tree .entry ));
344
+
345
+ waiter -> tree .prio = __waiter_prio (task );
346
+ waiter -> tree .deadline = task -> dl .deadline ;
347
+ }
348
+
349
+ /*
350
+ * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
351
+ */
352
+ static __always_inline void
353
+ waiter_clone_prio (struct rt_mutex_waiter * waiter , struct task_struct * task )
354
+ {
355
+ lockdep_assert_held (& waiter -> lock -> wait_lock );
356
+ lockdep_assert_held (& task -> pi_lock );
357
+ lockdep_assert (RB_EMPTY_NODE (& waiter -> pi_tree .entry ));
358
+
359
+ waiter -> pi_tree .prio = waiter -> tree .prio ;
360
+ waiter -> pi_tree .deadline = waiter -> tree .deadline ;
341
361
}
342
362
343
363
/*
344
- * Only use with rt_mutex_waiter_ {less,equal}()
364
+ * Only use with rt_waiter_node_ {less,equal}()
345
365
*/
366
+ #define task_to_waiter_node (p ) \
367
+ &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
346
368
#define task_to_waiter (p ) \
347
- &(struct rt_mutex_waiter){ .prio = __waiter_prio (p), .deadline = (p)->dl.deadline }
369
+ &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node (p) }
348
370
349
- static __always_inline int rt_mutex_waiter_less (struct rt_mutex_waiter * left ,
350
- struct rt_mutex_waiter * right )
371
+ static __always_inline int rt_waiter_node_less (struct rt_waiter_node * left ,
372
+ struct rt_waiter_node * right )
351
373
{
352
374
if (left -> prio < right -> prio )
353
375
return 1 ;
@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
364
386
return 0 ;
365
387
}
366
388
367
- static __always_inline int rt_mutex_waiter_equal (struct rt_mutex_waiter * left ,
368
- struct rt_mutex_waiter * right )
389
+ static __always_inline int rt_waiter_node_equal (struct rt_waiter_node * left ,
390
+ struct rt_waiter_node * right )
369
391
{
370
392
if (left -> prio != right -> prio )
371
393
return 0 ;
@@ -385,38 +407,38 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
385
407
static inline bool rt_mutex_steal (struct rt_mutex_waiter * waiter ,
386
408
struct rt_mutex_waiter * top_waiter )
387
409
{
388
- if (rt_mutex_waiter_less ( waiter , top_waiter ))
410
+ if (rt_waiter_node_less ( & waiter -> tree , & top_waiter -> tree ))
389
411
return true;
390
412
391
413
#ifdef RT_MUTEX_BUILD_SPINLOCKS
392
414
/*
393
415
* Note that RT tasks are excluded from same priority (lateral)
394
416
* steals to prevent the introduction of an unbounded latency.
395
417
*/
396
- if (rt_prio (waiter -> prio ) || dl_prio (waiter -> prio ))
418
+ if (rt_prio (waiter -> tree . prio ) || dl_prio (waiter -> tree . prio ))
397
419
return false;
398
420
399
- return rt_mutex_waiter_equal ( waiter , top_waiter );
421
+ return rt_waiter_node_equal ( & waiter -> tree , & top_waiter -> tree );
400
422
#else
401
423
return false;
402
424
#endif
403
425
}
404
426
405
427
#define __node_2_waiter (node ) \
406
- rb_entry((node), struct rt_mutex_waiter, tree_entry )
428
+ rb_entry((node), struct rt_mutex_waiter, tree.entry )
407
429
408
430
static __always_inline bool __waiter_less (struct rb_node * a , const struct rb_node * b )
409
431
{
410
432
struct rt_mutex_waiter * aw = __node_2_waiter (a );
411
433
struct rt_mutex_waiter * bw = __node_2_waiter (b );
412
434
413
- if (rt_mutex_waiter_less ( aw , bw ))
435
+ if (rt_waiter_node_less ( & aw -> tree , & bw -> tree ))
414
436
return 1 ;
415
437
416
438
if (!build_ww_mutex ())
417
439
return 0 ;
418
440
419
- if (rt_mutex_waiter_less ( bw , aw ))
441
+ if (rt_waiter_node_less ( & bw -> tree , & aw -> tree ))
420
442
return 0 ;
421
443
422
444
/* NOTE: relies on waiter->ww_ctx being set before insertion */
@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
434
456
static __always_inline void
435
457
rt_mutex_enqueue (struct rt_mutex_base * lock , struct rt_mutex_waiter * waiter )
436
458
{
437
- rb_add_cached (& waiter -> tree_entry , & lock -> waiters , __waiter_less );
459
+ lockdep_assert_held (& lock -> wait_lock );
460
+
461
+ rb_add_cached (& waiter -> tree .entry , & lock -> waiters , __waiter_less );
438
462
}
439
463
440
464
static __always_inline void
441
465
rt_mutex_dequeue (struct rt_mutex_base * lock , struct rt_mutex_waiter * waiter )
442
466
{
443
- if (RB_EMPTY_NODE (& waiter -> tree_entry ))
467
+ lockdep_assert_held (& lock -> wait_lock );
468
+
469
+ if (RB_EMPTY_NODE (& waiter -> tree .entry ))
444
470
return ;
445
471
446
- rb_erase_cached (& waiter -> tree_entry , & lock -> waiters );
447
- RB_CLEAR_NODE (& waiter -> tree_entry );
472
+ rb_erase_cached (& waiter -> tree . entry , & lock -> waiters );
473
+ RB_CLEAR_NODE (& waiter -> tree . entry );
448
474
}
449
475
450
- #define __node_2_pi_waiter (node ) \
451
- rb_entry((node), struct rt_mutex_waiter, pi_tree_entry )
476
+ #define __node_2_rt_node (node ) \
477
+ rb_entry((node), struct rt_waiter_node, entry )
452
478
453
- static __always_inline bool
454
- __pi_waiter_less (struct rb_node * a , const struct rb_node * b )
479
+ static __always_inline bool __pi_waiter_less (struct rb_node * a , const struct rb_node * b )
455
480
{
456
- return rt_mutex_waiter_less ( __node_2_pi_waiter (a ), __node_2_pi_waiter (b ));
481
+ return rt_waiter_node_less ( __node_2_rt_node (a ), __node_2_rt_node (b ));
457
482
}
458
483
459
484
static __always_inline void
460
485
rt_mutex_enqueue_pi (struct task_struct * task , struct rt_mutex_waiter * waiter )
461
486
{
462
- rb_add_cached (& waiter -> pi_tree_entry , & task -> pi_waiters , __pi_waiter_less );
487
+ lockdep_assert_held (& task -> pi_lock );
488
+
489
+ rb_add_cached (& waiter -> pi_tree .entry , & task -> pi_waiters , __pi_waiter_less );
463
490
}
464
491
465
492
static __always_inline void
466
493
rt_mutex_dequeue_pi (struct task_struct * task , struct rt_mutex_waiter * waiter )
467
494
{
468
- if (RB_EMPTY_NODE (& waiter -> pi_tree_entry ))
495
+ lockdep_assert_held (& task -> pi_lock );
496
+
497
+ if (RB_EMPTY_NODE (& waiter -> pi_tree .entry ))
469
498
return ;
470
499
471
- rb_erase_cached (& waiter -> pi_tree_entry , & task -> pi_waiters );
472
- RB_CLEAR_NODE (& waiter -> pi_tree_entry );
500
+ rb_erase_cached (& waiter -> pi_tree . entry , & task -> pi_waiters );
501
+ RB_CLEAR_NODE (& waiter -> pi_tree . entry );
473
502
}
474
503
475
- static __always_inline void rt_mutex_adjust_prio (struct task_struct * p )
504
+ static __always_inline void rt_mutex_adjust_prio (struct rt_mutex_base * lock ,
505
+ struct task_struct * p )
476
506
{
477
507
struct task_struct * pi_task = NULL ;
478
508
509
+ lockdep_assert_held (& lock -> wait_lock );
510
+ lockdep_assert (rt_mutex_owner (lock ) == p );
479
511
lockdep_assert_held (& p -> pi_lock );
480
512
481
513
if (task_has_pi_waiters (p ))
@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
571
603
* Chain walk basics and protection scope
572
604
*
573
605
* [R] refcount on task
574
- * [P ] task->pi_lock held
606
+ * [Pn ] task->pi_lock held
575
607
* [L] rtmutex->wait_lock held
576
608
*
609
+ * Normal locking order:
610
+ *
611
+ * rtmutex->wait_lock
612
+ * task->pi_lock
613
+ *
577
614
* Step Description Protected by
578
615
* function arguments:
579
616
* @task [R]
@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
588
625
* again:
589
626
* loop_sanity_check();
590
627
* retry:
591
- * [1] lock(task->pi_lock); [R] acquire [P ]
592
- * [2] waiter = task->pi_blocked_on; [P ]
593
- * [3] check_exit_conditions_1(); [P ]
594
- * [4] lock = waiter->lock; [P ]
595
- * [5] if (!try_lock(lock->wait_lock)) { [P ] try to acquire [L]
596
- * unlock(task->pi_lock); release [P ]
628
+ * [1] lock(task->pi_lock); [R] acquire [P1 ]
629
+ * [2] waiter = task->pi_blocked_on; [P1 ]
630
+ * [3] check_exit_conditions_1(); [P1 ]
631
+ * [4] lock = waiter->lock; [P1 ]
632
+ * [5] if (!try_lock(lock->wait_lock)) { [P1 ] try to acquire [L]
633
+ * unlock(task->pi_lock); release [P1 ]
597
634
* goto retry;
598
635
* }
599
- * [6] check_exit_conditions_2(); [P ] + [L]
600
- * [7] requeue_lock_waiter(lock, waiter); [P ] + [L]
601
- * [8] unlock(task->pi_lock); release [P ]
636
+ * [6] check_exit_conditions_2(); [P1 ] + [L]
637
+ * [7] requeue_lock_waiter(lock, waiter); [P1 ] + [L]
638
+ * [8] unlock(task->pi_lock); release [P1 ]
602
639
* put_task_struct(task); release [R]
603
640
* [9] check_exit_conditions_3(); [L]
604
641
* [10] task = owner(lock); [L]
605
642
* get_task_struct(task); [L] acquire [R]
606
- * lock(task->pi_lock); [L] acquire [P ]
607
- * [11] requeue_pi_waiter(tsk, waiters(lock));[P ] + [L]
608
- * [12] check_exit_conditions_4(); [P ] + [L]
609
- * [13] unlock(task->pi_lock); release [P ]
643
+ * lock(task->pi_lock); [L] acquire [P2 ]
644
+ * [11] requeue_pi_waiter(tsk, waiters(lock));[P2 ] + [L]
645
+ * [12] check_exit_conditions_4(); [P2 ] + [L]
646
+ * [13] unlock(task->pi_lock); release [P2 ]
610
647
* unlock(lock->wait_lock); release [L]
611
648
* goto again;
649
+ *
650
+ * Where P1 is the blocking task and P2 is the lock owner; going up one step
651
+ * the owner becomes the next blocked task etc..
652
+ *
653
+ *
612
654
*/
613
655
static int __sched rt_mutex_adjust_prio_chain (struct task_struct * task ,
614
656
enum rtmutex_chainwalk chwalk ,
@@ -756,21 +798,26 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
756
798
* enabled we continue, but stop the requeueing in the chain
757
799
* walk.
758
800
*/
759
- if (rt_mutex_waiter_equal ( waiter , task_to_waiter (task ))) {
801
+ if (rt_waiter_node_equal ( & waiter -> tree , task_to_waiter_node (task ))) {
760
802
if (!detect_deadlock )
761
803
goto out_unlock_pi ;
762
804
else
763
805
requeue = false;
764
806
}
765
807
766
808
/*
767
- * [4] Get the next lock
809
+ * [4] Get the next lock; per holding task->pi_lock we can't unblock
810
+ * and guarantee @lock's existence.
768
811
*/
769
812
lock = waiter -> lock ;
770
813
/*
771
814
* [5] We need to trylock here as we are holding task->pi_lock,
772
815
* which is the reverse lock order versus the other rtmutex
773
816
* operations.
817
+ *
818
+ * Per the above, holding task->pi_lock guarantees lock exists, so
819
+ * inverting this lock order is infeasible from a life-time
820
+ * perspective.
774
821
*/
775
822
if (!raw_spin_trylock (& lock -> wait_lock )) {
776
823
raw_spin_unlock_irq (& task -> pi_lock );
@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
874
921
* or
875
922
*
876
923
* DL CBS enforcement advancing the effective deadline.
877
- *
878
- * Even though pi_waiters also uses these fields, and that tree is only
879
- * updated in [11], we can do this here, since we hold [L], which
880
- * serializes all pi_waiters access and rb_erase() does not care about
881
- * the values of the node being removed.
882
924
*/
883
925
waiter_update_prio (waiter , task );
884
926
885
927
rt_mutex_enqueue (lock , waiter );
886
928
887
- /* [8] Release the task */
929
+ /*
930
+ * [8] Release the (blocking) task in preparation for
931
+ * taking the owner task in [10].
932
+ *
933
+ * Since we hold lock->waiter_lock, task cannot unblock, even if we
934
+ * release task->pi_lock.
935
+ */
888
936
raw_spin_unlock (& task -> pi_lock );
889
937
put_task_struct (task );
890
938
@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
908
956
return 0 ;
909
957
}
910
958
911
- /* [10] Grab the next task, i.e. the owner of @lock */
959
+ /*
960
+ * [10] Grab the next task, i.e. the owner of @lock
961
+ *
962
+ * Per holding lock->wait_lock and checking for !owner above, there
963
+ * must be an owner and it cannot go away.
964
+ */
912
965
task = get_task_struct (rt_mutex_owner (lock ));
913
966
raw_spin_lock (& task -> pi_lock );
914
967
@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
921
974
* and adjust the priority of the owner.
922
975
*/
923
976
rt_mutex_dequeue_pi (task , prerequeue_top_waiter );
977
+ waiter_clone_prio (waiter , task );
924
978
rt_mutex_enqueue_pi (task , waiter );
925
- rt_mutex_adjust_prio (task );
979
+ rt_mutex_adjust_prio (lock , task );
926
980
927
981
} else if (prerequeue_top_waiter == waiter ) {
928
982
/*
@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
937
991
*/
938
992
rt_mutex_dequeue_pi (task , waiter );
939
993
waiter = rt_mutex_top_waiter (lock );
994
+ waiter_clone_prio (waiter , task );
940
995
rt_mutex_enqueue_pi (task , waiter );
941
- rt_mutex_adjust_prio (task );
996
+ rt_mutex_adjust_prio (lock , task );
942
997
} else {
943
998
/*
944
999
* Nothing changed. No need to do any priority
@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1154
1209
waiter -> task = task ;
1155
1210
waiter -> lock = lock ;
1156
1211
waiter_update_prio (waiter , task );
1212
+ waiter_clone_prio (waiter , task );
1157
1213
1158
1214
/* Get the top priority waiter on the lock */
1159
1215
if (rt_mutex_has_waiters (lock ))
@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1187
1243
rt_mutex_dequeue_pi (owner , top_waiter );
1188
1244
rt_mutex_enqueue_pi (owner , waiter );
1189
1245
1190
- rt_mutex_adjust_prio (owner );
1246
+ rt_mutex_adjust_prio (lock , owner );
1191
1247
if (owner -> pi_blocked_on )
1192
1248
chain_walk = 1 ;
1193
1249
} else if (rt_mutex_cond_detect_deadlock (waiter , chwalk )) {
@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1234
1290
{
1235
1291
struct rt_mutex_waiter * waiter ;
1236
1292
1293
+ lockdep_assert_held (& lock -> wait_lock );
1294
+
1237
1295
raw_spin_lock (& current -> pi_lock );
1238
1296
1239
1297
waiter = rt_mutex_top_waiter (lock );
@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1246
1304
* task unblocks.
1247
1305
*/
1248
1306
rt_mutex_dequeue_pi (current , waiter );
1249
- rt_mutex_adjust_prio (current );
1307
+ rt_mutex_adjust_prio (lock , current );
1250
1308
1251
1309
/*
1252
1310
* As we are waking up the top waiter, and the waiter stays
@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
1482
1540
if (rt_mutex_has_waiters (lock ))
1483
1541
rt_mutex_enqueue_pi (owner , rt_mutex_top_waiter (lock ));
1484
1542
1485
- rt_mutex_adjust_prio (owner );
1543
+ rt_mutex_adjust_prio (lock , owner );
1486
1544
1487
1545
/* Store the lock on which owner is blocked or NULL */
1488
1546
next_lock = task_blocked_on_lock (owner );
0 commit comments