Skip to content

Commit 88c56cf

Browse files
auldpPeter Zijlstra
authored andcommitted
sched/fair: Block nohz tick_stop when cfs bandwidth in use
CFS bandwidth limits and NOHZ full don't play well together. Tasks can easily run well past their quotas before a remote tick does accounting. This leads to long, multi-period stalls before such tasks can run again. Currently, when presented with these conflicting requirements the scheduler is favoring nohz_full and letting the tick be stopped. However, nohz tick stopping is already best-effort, there are a number of conditions that can prevent it, whereas cfs runtime bandwidth is expected to be enforced. Make the scheduler favor bandwidth over stopping the tick by setting TICK_DEP_BIT_SCHED when the only running task is a cfs task with runtime limit enabled. We use cfs_b->hierarchical_quota to determine if the task requires the tick. Add check in pick_next_task_fair() as well since that is where we have a handle on the task that is actually going to be running. Add check in sched_can_stop_tick() to cover some edge cases such as nr_running going from 2->1 and the 1 remains the running task. Reviewed-By: Ben Segall <bsegall@google.com> Signed-off-by: Phil Auld <pauld@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230712133357.381137-3-pauld@redhat.com
1 parent c98c182 commit 88c56cf

File tree

4 files changed

+81
-1
lines changed

4 files changed

+81
-1
lines changed

kernel/sched/core.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,20 @@ static void nohz_csd_func(void *info)
11941194
#endif /* CONFIG_NO_HZ_COMMON */
11951195

11961196
#ifdef CONFIG_NO_HZ_FULL
1197+
static inline bool __need_bw_check(struct rq *rq, struct task_struct *p)
1198+
{
1199+
if (rq->nr_running != 1)
1200+
return false;
1201+
1202+
if (p->sched_class != &fair_sched_class)
1203+
return false;
1204+
1205+
if (!task_on_rq_queued(p))
1206+
return false;
1207+
1208+
return true;
1209+
}
1210+
11971211
bool sched_can_stop_tick(struct rq *rq)
11981212
{
11991213
int fifo_nr_running;
@@ -1229,6 +1243,18 @@ bool sched_can_stop_tick(struct rq *rq)
12291243
if (rq->nr_running > 1)
12301244
return false;
12311245

1246+
/*
1247+
* If there is one task and it has CFS runtime bandwidth constraints
1248+
* and it's on the cpu now we don't want to stop the tick.
1249+
* This check prevents clearing the bit if a newly enqueued task here is
1250+
* dequeued by migrating while the constrained task continues to run.
1251+
* E.g. going from 2->1 without going through pick_next_task().
1252+
*/
1253+
if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
1254+
if (cfs_task_bw_constrained(rq->curr))
1255+
return false;
1256+
}
1257+
12321258
return true;
12331259
}
12341260
#endif /* CONFIG_NO_HZ_FULL */

kernel/sched/fair.c

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6189,6 +6189,46 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
61896189
rq_clock_stop_loop_update(rq);
61906190
}
61916191

6192+
bool cfs_task_bw_constrained(struct task_struct *p)
6193+
{
6194+
struct cfs_rq *cfs_rq = task_cfs_rq(p);
6195+
6196+
if (!cfs_bandwidth_used())
6197+
return false;
6198+
6199+
if (cfs_rq->runtime_enabled ||
6200+
tg_cfs_bandwidth(cfs_rq->tg)->hierarchical_quota != RUNTIME_INF)
6201+
return true;
6202+
6203+
return false;
6204+
}
6205+
6206+
#ifdef CONFIG_NO_HZ_FULL
6207+
/* called from pick_next_task_fair() */
6208+
static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
6209+
{
6210+
int cpu = cpu_of(rq);
6211+
6212+
if (!sched_feat(HZ_BW) || !cfs_bandwidth_used())
6213+
return;
6214+
6215+
if (!tick_nohz_full_cpu(cpu))
6216+
return;
6217+
6218+
if (rq->nr_running != 1)
6219+
return;
6220+
6221+
/*
6222+
* We know there is only one task runnable and we've just picked it. The
6223+
* normal enqueue path will have cleared TICK_DEP_BIT_SCHED if we will
6224+
* be otherwise able to stop the tick. Just need to check if we are using
6225+
* bandwidth control.
6226+
*/
6227+
if (cfs_task_bw_constrained(p))
6228+
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
6229+
}
6230+
#endif
6231+
61926232
#else /* CONFIG_CFS_BANDWIDTH */
61936233

61946234
static inline bool cfs_bandwidth_used(void)
@@ -6231,9 +6271,18 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
62316271
static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
62326272
static inline void update_runtime_enabled(struct rq *rq) {}
62336273
static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
6234-
6274+
#ifdef CONFIG_CGROUP_SCHED
6275+
bool cfs_task_bw_constrained(struct task_struct *p)
6276+
{
6277+
return false;
6278+
}
6279+
#endif
62356280
#endif /* CONFIG_CFS_BANDWIDTH */
62366281

6282+
#if !defined(CONFIG_CFS_BANDWIDTH) || !defined(CONFIG_NO_HZ_FULL)
6283+
static inline void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p) {}
6284+
#endif
6285+
62376286
/**************************************************
62386287
* CFS operations on tasks:
62396288
*/
@@ -8201,6 +8250,7 @@ done: __maybe_unused;
82018250
hrtick_start_fair(rq, p);
82028251

82038252
update_misfit_status(p, rq);
8253+
sched_fair_update_stop_tick(rq, p);
82048254

82058255
return p;
82068256

kernel/sched/features.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,3 +101,5 @@ SCHED_FEAT(LATENCY_WARN, false)
101101

102102
SCHED_FEAT(ALT_PERIOD, true)
103103
SCHED_FEAT(BASE_SLICE, true)
104+
105+
SCHED_FEAT(HZ_BW, true)

kernel/sched/sched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,7 @@ extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth
459459
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
460460
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
461461
extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
462+
extern bool cfs_task_bw_constrained(struct task_struct *p);
462463

463464
extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
464465
struct sched_rt_entity *rt_se, int cpu,
@@ -494,6 +495,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
494495
#else /* CONFIG_CGROUP_SCHED */
495496

496497
struct cfs_bandwidth { };
498+
static inline bool cfs_task_bw_constrained(struct task_struct *p) { return false; }
497499

498500
#endif /* CONFIG_CGROUP_SCHED */
499501

0 commit comments

Comments
 (0)