Skip to content

Commit 2e3f309

Browse files
committed
Merge tag 'sched_ext-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fixes from Tejun Heo: - Fix corner case bug where ops.dispatch() couldn't extend the execution of the current task if SCX_OPS_ENQ_LAST is set. - Fix ops.cpu_release() not being called when a SCX task is preempted by a higher priority sched class task. - Fix buitin idle mask being incorrectly left as busy after an idle CPU is picked and kicked. - scx_ops_bypass() was unnecessarily using rq_lock() which comes with rq pinning related sanity checks which could trigger spuriously. Switch to raw_spin_rq_lock(). * tag 'sched_ext-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: sched_ext: idle: Refresh idle masks during idle-to-idle transitions sched_ext: switch class when preempted by higher priority scheduler sched_ext: Replace rq_lock() to raw_spin_rq_lock() in scx_ops_bypass() sched_ext: keep running prev when prev->scx.slice != 0
2 parents 58624e4 + a2a3374 commit 2e3f309

File tree

3 files changed

+74
-26
lines changed

3 files changed

+74
-26
lines changed

kernel/sched/ext.c

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2747,6 +2747,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
27472747
{
27482748
struct scx_dsp_ctx *dspc = this_cpu_ptr(scx_dsp_ctx);
27492749
bool prev_on_scx = prev->sched_class == &ext_sched_class;
2750+
bool prev_on_rq = prev->scx.flags & SCX_TASK_QUEUED;
27502751
int nr_loops = SCX_DSP_MAX_LOOPS;
27512752

27522753
lockdep_assert_rq_held(rq);
@@ -2779,8 +2780,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
27792780
* See scx_ops_disable_workfn() for the explanation on the
27802781
* bypassing test.
27812782
*/
2782-
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
2783-
prev->scx.slice && !scx_rq_bypassing(rq)) {
2783+
if (prev_on_rq && prev->scx.slice && !scx_rq_bypassing(rq)) {
27842784
rq->scx.flags |= SCX_RQ_BAL_KEEP;
27852785
goto has_tasks;
27862786
}
@@ -2813,6 +2813,10 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
28132813

28142814
flush_dispatch_buf(rq);
28152815

2816+
if (prev_on_rq && prev->scx.slice) {
2817+
rq->scx.flags |= SCX_RQ_BAL_KEEP;
2818+
goto has_tasks;
2819+
}
28162820
if (rq->scx.local_dsq.nr)
28172821
goto has_tasks;
28182822
if (consume_global_dsq(rq))
@@ -2838,8 +2842,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
28382842
* Didn't find another task to run. Keep running @prev unless
28392843
* %SCX_OPS_ENQ_LAST is in effect.
28402844
*/
2841-
if ((prev->scx.flags & SCX_TASK_QUEUED) &&
2842-
(!static_branch_unlikely(&scx_ops_enq_last) ||
2845+
if (prev_on_rq && (!static_branch_unlikely(&scx_ops_enq_last) ||
28432846
scx_rq_bypassing(rq))) {
28442847
rq->scx.flags |= SCX_RQ_BAL_KEEP;
28452848
goto has_tasks;
@@ -3034,7 +3037,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
30343037
*/
30353038
if (p->scx.slice && !scx_rq_bypassing(rq)) {
30363039
dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
3037-
return;
3040+
goto switch_class;
30383041
}
30393042

30403043
/*
@@ -3051,6 +3054,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
30513054
}
30523055
}
30533056

3057+
switch_class:
30543058
if (next && next->sched_class != &ext_sched_class)
30553059
switch_class(rq, next);
30563060
}
@@ -3586,16 +3590,8 @@ static void reset_idle_masks(void)
35863590
cpumask_copy(idle_masks.smt, cpu_online_mask);
35873591
}
35883592

3589-
void __scx_update_idle(struct rq *rq, bool idle)
3593+
static void update_builtin_idle(int cpu, bool idle)
35903594
{
3591-
int cpu = cpu_of(rq);
3592-
3593-
if (SCX_HAS_OP(update_idle) && !scx_rq_bypassing(rq)) {
3594-
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
3595-
if (!static_branch_unlikely(&scx_builtin_idle_enabled))
3596-
return;
3597-
}
3598-
35993595
if (idle)
36003596
cpumask_set_cpu(cpu, idle_masks.cpu);
36013597
else
@@ -3622,6 +3618,57 @@ void __scx_update_idle(struct rq *rq, bool idle)
36223618
#endif
36233619
}
36243620

3621+
/*
3622+
* Update the idle state of a CPU to @idle.
3623+
*
3624+
* If @do_notify is true, ops.update_idle() is invoked to notify the scx
3625+
* scheduler of an actual idle state transition (idle to busy or vice
3626+
* versa). If @do_notify is false, only the idle state in the idle masks is
3627+
* refreshed without invoking ops.update_idle().
3628+
*
3629+
* This distinction is necessary, because an idle CPU can be "reserved" and
3630+
* awakened via scx_bpf_pick_idle_cpu() + scx_bpf_kick_cpu(), marking it as
3631+
* busy even if no tasks are dispatched. In this case, the CPU may return
3632+
* to idle without a true state transition. Refreshing the idle masks
3633+
* without invoking ops.update_idle() ensures accurate idle state tracking
3634+
* while avoiding unnecessary updates and maintaining balanced state
3635+
* transitions.
3636+
*/
3637+
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify)
3638+
{
3639+
int cpu = cpu_of(rq);
3640+
3641+
lockdep_assert_rq_held(rq);
3642+
3643+
/*
3644+
* Trigger ops.update_idle() only when transitioning from a task to
3645+
* the idle thread and vice versa.
3646+
*
3647+
* Idle transitions are indicated by do_notify being set to true,
3648+
* managed by put_prev_task_idle()/set_next_task_idle().
3649+
*/
3650+
if (SCX_HAS_OP(update_idle) && do_notify && !scx_rq_bypassing(rq))
3651+
SCX_CALL_OP(SCX_KF_REST, update_idle, cpu_of(rq), idle);
3652+
3653+
/*
3654+
* Update the idle masks:
3655+
* - for real idle transitions (do_notify == true)
3656+
* - for idle-to-idle transitions (indicated by the previous task
3657+
* being the idle thread, managed by pick_task_idle())
3658+
*
3659+
* Skip updating idle masks if the previous task is not the idle
3660+
* thread, since set_next_task_idle() has already handled it when
3661+
* transitioning from a task to the idle thread (calling this
3662+
* function with do_notify == true).
3663+
*
3664+
* In this way we can avoid updating the idle masks twice,
3665+
* unnecessarily.
3666+
*/
3667+
if (static_branch_likely(&scx_builtin_idle_enabled))
3668+
if (do_notify || is_idle_task(rq->curr))
3669+
update_builtin_idle(cpu, idle);
3670+
}
3671+
36253672
static void handle_hotplug(struct rq *rq, bool online)
36263673
{
36273674
int cpu = cpu_of(rq);
@@ -4744,10 +4791,9 @@ static void scx_ops_bypass(bool bypass)
47444791
*/
47454792
for_each_possible_cpu(cpu) {
47464793
struct rq *rq = cpu_rq(cpu);
4747-
struct rq_flags rf;
47484794
struct task_struct *p, *n;
47494795

4750-
rq_lock(rq, &rf);
4796+
raw_spin_rq_lock(rq);
47514797

47524798
if (bypass) {
47534799
WARN_ON_ONCE(rq->scx.flags & SCX_RQ_BYPASSING);
@@ -4763,7 +4809,7 @@ static void scx_ops_bypass(bool bypass)
47634809
* sees scx_rq_bypassing() before moving tasks to SCX.
47644810
*/
47654811
if (!scx_enabled()) {
4766-
rq_unlock(rq, &rf);
4812+
raw_spin_rq_unlock(rq);
47674813
continue;
47684814
}
47694815

@@ -4783,10 +4829,11 @@ static void scx_ops_bypass(bool bypass)
47834829
sched_enq_and_set_task(&ctx);
47844830
}
47854831

4786-
rq_unlock(rq, &rf);
4787-
47884832
/* resched to restore ticks and idle state */
4789-
resched_cpu(cpu);
4833+
if (cpu_online(cpu) || cpu == smp_processor_id())
4834+
resched_curr(rq);
4835+
4836+
raw_spin_rq_unlock(rq);
47904837
}
47914838

47924839
atomic_dec(&scx_ops_breather_depth);

kernel/sched/ext.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,15 @@ static inline void init_sched_ext_class(void) {}
5757
#endif /* CONFIG_SCHED_CLASS_EXT */
5858

5959
#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
60-
void __scx_update_idle(struct rq *rq, bool idle);
60+
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);
6161

62-
static inline void scx_update_idle(struct rq *rq, bool idle)
62+
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
6363
{
6464
if (scx_enabled())
65-
__scx_update_idle(rq, idle);
65+
__scx_update_idle(rq, idle, do_notify);
6666
}
6767
#else
68-
static inline void scx_update_idle(struct rq *rq, bool idle) {}
68+
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {}
6969
#endif
7070

7171
#ifdef CONFIG_CGROUP_SCHED

kernel/sched/idle.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,19 +452,20 @@ static void wakeup_preempt_idle(struct rq *rq, struct task_struct *p, int flags)
452452
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct task_struct *next)
453453
{
454454
dl_server_update_idle_time(rq, prev);
455-
scx_update_idle(rq, false);
455+
scx_update_idle(rq, false, true);
456456
}
457457

458458
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first)
459459
{
460460
update_idle_core(rq);
461-
scx_update_idle(rq, true);
461+
scx_update_idle(rq, true, true);
462462
schedstat_inc(rq->sched_goidle);
463463
next->se.exec_start = rq_clock_task(rq);
464464
}
465465

466466
struct task_struct *pick_task_idle(struct rq *rq)
467467
{
468+
scx_update_idle(rq, true, false);
468469
return rq->idle;
469470
}
470471

0 commit comments

Comments
 (0)