Skip to content

Commit b55945c

Browse files
author
Peter Zijlstra
committed
sched: Fix pick_next_task_fair() vs try_to_wake_up() race
Syzkaller robot reported KCSAN tripping over the ASSERT_EXCLUSIVE_WRITER(p->on_rq) in __block_task(). The report noted that both pick_next_task_fair() and try_to_wake_up() were concurrently trying to write to the same p->on_rq, violating the assertion -- even though both paths hold rq->__lock. The logical consequence is that both code paths end up holding a different rq->__lock. And looking through ttwu(), this is possible when the __block_task() 'p->on_rq = 0' store is visible to the ttwu() 'p->on_rq' load, which then assumes the task is not queued and continues to migrate it. Rearrange things such that __block_task() releases @p with the store and no code thereafter will use @p again. Fixes: 152e11f ("sched/fair: Implement delayed dequeue") Reported-by: syzbot+0ec1e96c2cdf5c0e512a@syzkaller.appspotmail.com Reported-by: Kent Overstreet <kent.overstreet@linux.dev> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Marco Elver <elver@google.com> Link: https://lkml.kernel.org/r/20241023093641.GE16066@noisy.programming.kicks-ass.net
1 parent 42f7652 commit b55945c

File tree

2 files changed

+46
-9
lines changed

2 files changed

+46
-9
lines changed

kernel/sched/fair.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5625,8 +5625,9 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
56255625
struct sched_entity *se = pick_eevdf(cfs_rq);
56265626
if (se->sched_delayed) {
56275627
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
5628-
SCHED_WARN_ON(se->sched_delayed);
5629-
SCHED_WARN_ON(se->on_rq);
5628+
/*
5629+
* Must not reference @se again, see __block_task().
5630+
*/
56305631
return NULL;
56315632
}
56325633
return se;
@@ -7176,7 +7177,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
71767177
/* Fix-up what dequeue_task_fair() skipped */
71777178
hrtick_update(rq);
71787179

7179-
/* Fix-up what block_task() skipped. */
7180+
/*
7181+
* Fix-up what block_task() skipped.
7182+
*
7183+
* Must be last, @p might not be valid after this.
7184+
*/
71807185
__block_task(rq, p);
71817186
}
71827187

@@ -7193,12 +7198,14 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
71937198
if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
71947199
util_est_dequeue(&rq->cfs, p);
71957200

7196-
if (dequeue_entities(rq, &p->se, flags) < 0) {
7197-
util_est_update(&rq->cfs, p, DEQUEUE_SLEEP);
7201+
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
7202+
if (dequeue_entities(rq, &p->se, flags) < 0)
71987203
return false;
7199-
}
72007204

7201-
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
7205+
/*
7206+
* Must not reference @p after dequeue_entities(DEQUEUE_DELAYED).
7207+
*/
7208+
72027209
hrtick_update(rq);
72037210
return true;
72047211
}

kernel/sched/sched.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2769,15 +2769,45 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
27692769

27702770
static inline void __block_task(struct rq *rq, struct task_struct *p)
27712771
{
2772-
WRITE_ONCE(p->on_rq, 0);
2773-
ASSERT_EXCLUSIVE_WRITER(p->on_rq);
27742772
if (p->sched_contributes_to_load)
27752773
rq->nr_uninterruptible++;
27762774

27772775
if (p->in_iowait) {
27782776
atomic_inc(&rq->nr_iowait);
27792777
delayacct_blkio_start();
27802778
}
2779+
2780+
ASSERT_EXCLUSIVE_WRITER(p->on_rq);
2781+
2782+
/*
2783+
* The moment this write goes through, ttwu() can swoop in and migrate
2784+
* this task, rendering our rq->__lock ineffective.
2785+
*
2786+
* __schedule() try_to_wake_up()
2787+
* LOCK rq->__lock LOCK p->pi_lock
2788+
* pick_next_task()
2789+
* pick_next_task_fair()
2790+
* pick_next_entity()
2791+
* dequeue_entities()
2792+
* __block_task()
2793+
* RELEASE p->on_rq = 0 if (p->on_rq && ...)
2794+
* break;
2795+
*
2796+
* ACQUIRE (after ctrl-dep)
2797+
*
2798+
* cpu = select_task_rq();
2799+
* set_task_cpu(p, cpu);
2800+
* ttwu_queue()
2801+
* ttwu_do_activate()
2802+
* LOCK rq->__lock
2803+
* activate_task()
2804+
* STORE p->on_rq = 1
2805+
* UNLOCK rq->__lock
2806+
*
2807+
* Callers must ensure to not reference @p after this -- we no longer
2808+
* own it.
2809+
*/
2810+
smp_store_release(&p->on_rq, 0);
27812811
}
27822812

27832813
extern void activate_task(struct rq *rq, struct task_struct *p, int flags);

0 commit comments

Comments
 (0)