Skip to content

Commit 5b6547e

Browse files
author
Peter Zijlstra
committed
sched/core: Fix forceidle balancing
Steve reported that ChromeOS encounters the forceidle balancer being ran from rt_mutex_setprio()'s balance_callback() invocation and explodes. Now, the forceidle balancer gets queued every time the idle task gets selected, set_next_task(), which is strictly too often. rt_mutex_setprio() also uses set_next_task() in the 'change' pattern: queued = task_on_rq_queued(p); /* p->on_rq == TASK_ON_RQ_QUEUED */ running = task_current(rq, p); /* rq->curr == p */ if (queued) dequeue_task(...); if (running) put_prev_task(...); /* change task properties */ if (queued) enqueue_task(...); if (running) set_next_task(...); However, rt_mutex_setprio() will explicitly not run this pattern on the idle task (since priority boosting the idle task is quite insane). Most other 'change' pattern users are pidhash based and would also not apply to idle. Also, the change pattern doesn't contain a __balance_callback() invocation and hence we could have an out-of-band balance-callback, which *should* trigger the WARN in rq_pin_lock() (which guards against this exact anti-pattern). So while none of that explains how this happens, it does indicate that having it in set_next_task() might not be the most robust option. Instead, explicitly queue the forceidle balancer from pick_next_task() when it does indeed result in forceidle selection. Having it here, ensures it can only be triggered under the __schedule() rq->lock instance, and hence must be ran from that context. This also happens to clean up the code a little, so win-win. Fixes: d2dfa17 ("sched: Trivial forced-newidle balancer") Reported-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: T.J. Alumbaugh <talumbau@chromium.org> Link: https://lkml.kernel.org/r/20220330160535.GN8939@worktop.programming.kicks-ass.net
1 parent 3123109 commit 5b6547e

File tree

3 files changed

+10
-11
lines changed

3 files changed

+10
-11
lines changed

kernel/sched/core.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5752,6 +5752,8 @@ static inline struct task_struct *pick_task(struct rq *rq)
57525752

57535753
extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
57545754

5755+
static void queue_core_balance(struct rq *rq);
5756+
57555757
static struct task_struct *
57565758
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
57575759
{
@@ -5801,7 +5803,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
58015803
}
58025804

58035805
rq->core_pick = NULL;
5804-
return next;
5806+
goto out;
58055807
}
58065808

58075809
put_prev_task_balance(rq, prev, rf);
@@ -5851,7 +5853,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
58515853
*/
58525854
WARN_ON_ONCE(fi_before);
58535855
task_vruntime_update(rq, next, false);
5854-
goto done;
5856+
goto out_set_next;
58555857
}
58565858
}
58575859

@@ -5970,8 +5972,12 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
59705972
resched_curr(rq_i);
59715973
}
59725974

5973-
done:
5975+
out_set_next:
59745976
set_next_task(rq, next);
5977+
out:
5978+
if (rq->core->core_forceidle_count && next == rq->idle)
5979+
queue_core_balance(rq);
5980+
59755981
return next;
59765982
}
59775983

@@ -6066,7 +6072,7 @@ static void sched_core_balance(struct rq *rq)
60666072

60676073
static DEFINE_PER_CPU(struct callback_head, core_balance_head);
60686074

6069-
void queue_core_balance(struct rq *rq)
6075+
static void queue_core_balance(struct rq *rq)
60706076
{
60716077
if (!sched_core_enabled(rq))
60726078
return;

kernel/sched/idle.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,6 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir
434434
{
435435
update_idle_core(rq);
436436
schedstat_inc(rq->sched_goidle);
437-
queue_core_balance(rq);
438437
}
439438

440439
#ifdef CONFIG_SMP

kernel/sched/sched.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,8 +1232,6 @@ static inline bool sched_group_cookie_match(struct rq *rq,
12321232
return false;
12331233
}
12341234

1235-
extern void queue_core_balance(struct rq *rq);
1236-
12371235
static inline bool sched_core_enqueued(struct task_struct *p)
12381236
{
12391237
return !RB_EMPTY_NODE(&p->core_node);
@@ -1267,10 +1265,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
12671265
return &rq->__lock;
12681266
}
12691267

1270-
static inline void queue_core_balance(struct rq *rq)
1271-
{
1272-
}
1273-
12741268
static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
12751269
{
12761270
return true;

0 commit comments

Comments
 (0)