Skip to content

Commit c98c182

Browse files
auldpPeter Zijlstra
authored andcommitted
sched, cgroup: Restore meaning to hierarchical_quota
In cgroupv2 cfs_b->hierarchical_quota is set to -1 for all task groups due to the previous fix simply taking the min. It should reflect a limit imposed at that level or by an ancestor. Even though cgroupv2 does not require child quota to be less than or equal to that of its ancestors the task group will still be constrained by such a quota so this should be shown here. Cgroupv1 continues to set this correctly. In both cases, add initialization when a new task group is created based on the current parent's value (or RUNTIME_INF in the case of root_task_group). Otherwise, the field is wrong until a quota is changed after creation and __cfs_schedulable() is called. Fixes: c53593e ("sched, cgroup: Don't reject lower cpu.max on ancestors") Signed-off-by: Phil Auld <pauld@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Ben Segall <bsegall@google.com> Acked-by: Tejun Heo <tj@kernel.org> Link: https://lore.kernel.org/r/20230714125746.812891-1-pauld@redhat.com
1 parent 113d0a6 commit c98c182

File tree

3 files changed

+14
-8
lines changed

3 files changed

+14
-8
lines changed

kernel/sched/core.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9953,7 +9953,7 @@ void __init sched_init(void)
99539953
ptr += nr_cpu_ids * sizeof(void **);
99549954

99559955
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
9956-
init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
9956+
init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
99579957
#endif /* CONFIG_FAIR_GROUP_SCHED */
99589958
#ifdef CONFIG_RT_GROUP_SCHED
99599959
root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -11087,11 +11087,16 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
1108711087

1108811088
/*
1108911089
* Ensure max(child_quota) <= parent_quota. On cgroup2,
11090-
* always take the min. On cgroup1, only inherit when no
11091-
* limit is set:
11090+
* always take the non-RUNTIME_INF min. On cgroup1, only
11091+
* inherit when no limit is set. In both cases this is used
11092+
* by the scheduler to determine if a given CFS task has a
11093+
* bandwidth constraint at some higher level.
1109211094
*/
1109311095
if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
11094-
quota = min(quota, parent_quota);
11096+
if (quota == RUNTIME_INF)
11097+
quota = parent_quota;
11098+
else if (parent_quota != RUNTIME_INF)
11099+
quota = min(quota, parent_quota);
1109511100
} else {
1109611101
if (quota == RUNTIME_INF)
1109711102
quota = parent_quota;

kernel/sched/fair.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6045,13 +6045,14 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
60456045
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
60466046
}
60476047

6048-
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
6048+
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
60496049
{
60506050
raw_spin_lock_init(&cfs_b->lock);
60516051
cfs_b->runtime = 0;
60526052
cfs_b->quota = RUNTIME_INF;
60536053
cfs_b->period = ns_to_ktime(default_cfs_period());
60546054
cfs_b->burst = 0;
6055+
cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
60556056

60566057
INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
60576058
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
@@ -6217,7 +6218,7 @@ static inline int throttled_lb_pair(struct task_group *tg,
62176218
return 0;
62186219
}
62196220

6220-
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
6221+
void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent) {}
62216222

62226223
#ifdef CONFIG_FAIR_GROUP_SCHED
62236224
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
@@ -12599,7 +12600,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
1259912600

1260012601
tg->shares = NICE_0_LOAD;
1260112602

12602-
init_cfs_bandwidth(tg_cfs_bandwidth(tg));
12603+
init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));
1260312604

1260412605
for_each_possible_cpu(i) {
1260512606
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ extern void unregister_fair_sched_group(struct task_group *tg);
454454
extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
455455
struct sched_entity *se, int cpu,
456456
struct sched_entity *parent);
457-
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
457+
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent);
458458

459459
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
460460
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);

0 commit comments

Comments
 (0)