Skip to content

Commit 04193d5

Browse files
author
Peter Zijlstra
committed
sched: Fix balance_push() vs __sched_setscheduler()
The purpose of balance_push() is to act as a filter on task selection in the case of CPU hotplug, specifically when taking the CPU out. It does this by (ab)using the balance callback infrastructure, with the express purpose of keeping all the unlikely/odd cases in a single place. In order to serve its purpose, the balance_push_callback needs to be (exclusively) on the callback list at all times (noting that the callback always places itself back on the list the moment it runs, also noting that when the CPU goes down, regular balancing concerns are moot, so ignoring them is fine). And here-in lies the problem, __sched_setscheduler()'s use of splice_balance_callbacks() takes the callbacks off the list across a lock-break, making it possible for, an interleaving, __schedule() to see an empty list and not get filtered. Fixes: ae79270 ("sched: Optimize finish_lock_switch()") Reported-by: Jing-Ting Wu <jing-ting.wu@mediatek.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Jing-Ting Wu <jing-ting.wu@mediatek.com> Link: https://lkml.kernel.org/r/20220519134706.GH2578@worktop.programming.kicks-ass.net
1 parent b13bacc commit 04193d5

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

kernel/sched/core.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4798,25 +4798,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
47984798

47994799
static void balance_push(struct rq *rq);
48004800

4801+
/*
4802+
* balance_push_callback is a right abuse of the callback interface and plays
4803+
* by significantly different rules.
4804+
*
4805+
* Where the normal balance_callback's purpose is to be ran in the same context
4806+
* that queued it (only later, when it's safe to drop rq->lock again),
4807+
* balance_push_callback is specifically targeted at __schedule().
4808+
*
4809+
* This abuse is tolerated because it places all the unlikely/odd cases behind
4810+
* a single test, namely: rq->balance_callback == NULL.
4811+
*/
48014812
struct callback_head balance_push_callback = {
48024813
.next = NULL,
48034814
.func = (void (*)(struct callback_head *))balance_push,
48044815
};
48054816

4806-
static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
4817+
static inline struct callback_head *
4818+
__splice_balance_callbacks(struct rq *rq, bool split)
48074819
{
48084820
struct callback_head *head = rq->balance_callback;
48094821

4822+
if (likely(!head))
4823+
return NULL;
4824+
48104825
lockdep_assert_rq_held(rq);
4811-
if (head)
4826+
/*
4827+
* Must not take balance_push_callback off the list when
4828+
* splice_balance_callbacks() and balance_callbacks() are not
4829+
* in the same rq->lock section.
4830+
*
4831+
* In that case it would be possible for __schedule() to interleave
4832+
* and observe the list empty.
4833+
*/
4834+
if (split && head == &balance_push_callback)
4835+
head = NULL;
4836+
else
48124837
rq->balance_callback = NULL;
48134838

48144839
return head;
48154840
}
48164841

4842+
static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
4843+
{
4844+
return __splice_balance_callbacks(rq, true);
4845+
}
4846+
48174847
static void __balance_callbacks(struct rq *rq)
48184848
{
4819-
do_balance_callbacks(rq, splice_balance_callbacks(rq));
4849+
do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
48204850
}
48214851

48224852
static inline void balance_callbacks(struct rq *rq, struct callback_head *head)

kernel/sched/sched.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1693,6 +1693,11 @@ queue_balance_callback(struct rq *rq,
16931693
{
16941694
lockdep_assert_rq_held(rq);
16951695

1696+
/*
1697+
* Don't (re)queue an already queued item; nor queue anything when
1698+
* balance_push() is active, see the comment with
1699+
* balance_push_callback.
1700+
*/
16961701
if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
16971702
return;
16981703

0 commit comments

Comments
 (0)