Skip to content

Commit 89c572e

Browse files
committed
Merge tag 'sched-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Fix inconsistency in misfit task load-balancing - Fix CPU isolation bugs in the task-wakeup logic - Rework and unify the sched_use_asym_prio() and sched_asym_prefer() logic - Clean up and simplify ->avg_* accesses - Misc cleanups and fixes * tag 'sched-core-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/topology: Rename SD_SHARE_PKG_RESOURCES to SD_SHARE_LLC sched/fair: Check the SD_ASYM_PACKING flag in sched_use_asym_prio() sched/fair: Rework sched_use_asym_prio() and sched_asym_prefer() sched/fair: Remove unused parameter from sched_asym() sched/topology: Remove duplicate descriptions from TOPOLOGY_SD_FLAGS sched/fair: Simplify the update_sd_pick_busiest() logic sched/fair: Do strict inequality check for busiest misfit task group sched/fair: Remove unnecessary goto in update_sd_lb_stats() sched/fair: Take the scheduling domain into account in select_idle_core() sched/fair: Take the scheduling domain into account in select_idle_smt() sched/fair: Add READ_ONCE() and use existing helper function to access ->avg_irq sched/fair: Use existing helper functions to access ->avg_rt and ->avg_dl sched/core: Simplify code by removing duplicate #ifdefs
2 parents a5b1a01 + 54de442 commit 89c572e

File tree

7 files changed

+74
-93
lines changed

7 files changed

+74
-93
lines changed

arch/powerpc/kernel/smp.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -984,7 +984,7 @@ static bool shared_caches __ro_after_init;
984984
/* cpumask of CPUs with asymmetric SMT dependency */
985985
static int powerpc_smt_flags(void)
986986
{
987-
int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
987+
int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
988988

989989
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
990990
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
@@ -1010,9 +1010,9 @@ static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
10101010
static int powerpc_shared_cache_flags(void)
10111011
{
10121012
if (static_branch_unlikely(&splpar_asym_pack))
1013-
return SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
1013+
return SD_SHARE_LLC | SD_ASYM_PACKING;
10141014

1015-
return SD_SHARE_PKG_RESOURCES;
1015+
return SD_SHARE_LLC;
10161016
}
10171017

10181018
static int powerpc_shared_proc_flags(void)

include/linux/sched/sd_flags.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,13 +117,13 @@ SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
117117
SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS)
118118

119119
/*
120-
* Domain members share CPU package resources (i.e. caches)
120+
* Domain members share CPU Last Level Caches
121121
*
122122
* SHARED_CHILD: Set from the base domain up until spanned CPUs no longer share
123123
* the same cache(s).
124124
* NEEDS_GROUPS: Caches are shared between groups.
125125
*/
126-
SD_FLAG(SD_SHARE_PKG_RESOURCES, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
126+
SD_FLAG(SD_SHARE_LLC, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
127127

128128
/*
129129
* Only a single load balancing instance

include/linux/sched/topology.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,21 @@ extern const struct sd_flag_debug sd_flag_debug[];
3838
#ifdef CONFIG_SCHED_SMT
3939
static inline int cpu_smt_flags(void)
4040
{
41-
return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
41+
return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
4242
}
4343
#endif
4444

4545
#ifdef CONFIG_SCHED_CLUSTER
4646
static inline int cpu_cluster_flags(void)
4747
{
48-
return SD_CLUSTER | SD_SHARE_PKG_RESOURCES;
48+
return SD_CLUSTER | SD_SHARE_LLC;
4949
}
5050
#endif
5151

5252
#ifdef CONFIG_SCHED_MC
5353
static inline int cpu_core_flags(void)
5454
{
55-
return SD_SHARE_PKG_RESOURCES;
55+
return SD_SHARE_LLC;
5656
}
5757
#endif
5858

kernel/sched/core.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,6 @@ static void cpu_util_update_eff(struct cgroup_subsys_state *css);
17921792
#endif
17931793

17941794
#ifdef CONFIG_SYSCTL
1795-
#ifdef CONFIG_UCLAMP_TASK
17961795
#ifdef CONFIG_UCLAMP_TASK_GROUP
17971796
static void uclamp_update_root_tg(void)
17981797
{
@@ -1898,7 +1897,6 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
18981897
return result;
18991898
}
19001899
#endif
1901-
#endif
19021900

19031901
static int uclamp_validate(struct task_struct *p,
19041902
const struct sched_attr *attr)
@@ -2065,7 +2063,7 @@ static void __init init_uclamp(void)
20652063
}
20662064
}
20672065

2068-
#else /* CONFIG_UCLAMP_TASK */
2066+
#else /* !CONFIG_UCLAMP_TASK */
20692067
static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
20702068
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
20712069
static inline int uclamp_validate(struct task_struct *p,

kernel/sched/fair.c

Lines changed: 46 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -7289,15 +7289,15 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
72897289
if (!available_idle_cpu(cpu)) {
72907290
idle = false;
72917291
if (*idle_cpu == -1) {
7292-
if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
7292+
if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) {
72937293
*idle_cpu = cpu;
72947294
break;
72957295
}
72967296
continue;
72977297
}
72987298
break;
72997299
}
7300-
if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
7300+
if (*idle_cpu == -1 && cpumask_test_cpu(cpu, cpus))
73017301
*idle_cpu = cpu;
73027302
}
73037303

@@ -7311,13 +7311,19 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
73117311
/*
73127312
* Scan the local SMT mask for idle CPUs.
73137313
*/
7314-
static int select_idle_smt(struct task_struct *p, int target)
7314+
static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
73157315
{
73167316
int cpu;
73177317

73187318
for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) {
73197319
if (cpu == target)
73207320
continue;
7321+
/*
7322+
* Check if the CPU is in the LLC scheduling domain of @target.
7323+
* Due to isolcpus, there is no guarantee that all the siblings are in the domain.
7324+
*/
7325+
if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
7326+
continue;
73217327
if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
73227328
return cpu;
73237329
}
@@ -7341,7 +7347,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
73417347
return __select_idle_cpu(core, p);
73427348
}
73437349

7344-
static inline int select_idle_smt(struct task_struct *p, int target)
7350+
static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
73457351
{
73467352
return -1;
73477353
}
@@ -7591,7 +7597,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
75917597
has_idle_core = test_idle_cores(target);
75927598

75937599
if (!has_idle_core && cpus_share_cache(prev, target)) {
7594-
i = select_idle_smt(p, prev);
7600+
i = select_idle_smt(p, sd, prev);
75957601
if ((unsigned int)i < nr_cpumask_bits)
75967602
return i;
75977603
}
@@ -9237,19 +9243,17 @@ static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
92379243

92389244
static inline bool others_have_blocked(struct rq *rq)
92399245
{
9240-
if (READ_ONCE(rq->avg_rt.util_avg))
9246+
if (cpu_util_rt(rq))
92419247
return true;
92429248

9243-
if (READ_ONCE(rq->avg_dl.util_avg))
9249+
if (cpu_util_dl(rq))
92449250
return true;
92459251

92469252
if (thermal_load_avg(rq))
92479253
return true;
92489254

9249-
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
9250-
if (READ_ONCE(rq->avg_irq.util_avg))
9255+
if (cpu_util_irq(rq))
92519256
return true;
9252-
#endif
92539257

92549258
return false;
92559259
}
@@ -9506,8 +9510,8 @@ static unsigned long scale_rt_capacity(int cpu)
95069510
* avg_thermal.load_avg tracks thermal pressure and the weighted
95079511
* average uses the actual delta max capacity(load).
95089512
*/
9509-
used = READ_ONCE(rq->avg_rt.util_avg);
9510-
used += READ_ONCE(rq->avg_dl.util_avg);
9513+
used = cpu_util_rt(rq);
9514+
used += cpu_util_dl(rq);
95119515
used += thermal_load_avg(rq);
95129516

95139517
if (unlikely(used >= max))
@@ -9740,51 +9744,49 @@ group_type group_classify(unsigned int imbalance_pct,
97409744
*/
97419745
static bool sched_use_asym_prio(struct sched_domain *sd, int cpu)
97429746
{
9747+
if (!(sd->flags & SD_ASYM_PACKING))
9748+
return false;
9749+
97439750
if (!sched_smt_active())
97449751
return true;
97459752

97469753
return sd->flags & SD_SHARE_CPUCAPACITY || is_core_idle(cpu);
97479754
}
97489755

9756+
static inline bool sched_asym(struct sched_domain *sd, int dst_cpu, int src_cpu)
9757+
{
9758+
/*
9759+
* First check if @dst_cpu can do asym_packing load balance. Only do it
9760+
* if it has higher priority than @src_cpu.
9761+
*/
9762+
return sched_use_asym_prio(sd, dst_cpu) &&
9763+
sched_asym_prefer(dst_cpu, src_cpu);
9764+
}
9765+
97499766
/**
9750-
* sched_asym - Check if the destination CPU can do asym_packing load balance
9767+
* sched_group_asym - Check if the destination CPU can do asym_packing balance
97519768
* @env: The load balancing environment
9752-
* @sds: Load-balancing data with statistics of the local group
97539769
* @sgs: Load-balancing statistics of the candidate busiest group
97549770
* @group: The candidate busiest group
97559771
*
97569772
* @env::dst_cpu can do asym_packing if it has higher priority than the
97579773
* preferred CPU of @group.
97589774
*
9759-
* SMT is a special case. If we are balancing load between cores, @env::dst_cpu
9760-
* can do asym_packing balance only if all its SMT siblings are idle. Also, it
9761-
* can only do it if @group is an SMT group and has exactly on busy CPU. Larger
9762-
* imbalances in the number of CPUS are dealt with in find_busiest_group().
9763-
*
9764-
* If we are balancing load within an SMT core, or at PKG domain level, always
9765-
* proceed.
9766-
*
97679775
* Return: true if @env::dst_cpu can do with asym_packing load balance. False
97689776
* otherwise.
97699777
*/
97709778
static inline bool
9771-
sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs,
9772-
struct sched_group *group)
9779+
sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group *group)
97739780
{
9774-
/* Ensure that the whole local core is idle, if applicable. */
9775-
if (!sched_use_asym_prio(env->sd, env->dst_cpu))
9776-
return false;
9777-
97789781
/*
9779-
* CPU priorities does not make sense for SMT cores with more than one
9782+
* CPU priorities do not make sense for SMT cores with more than one
97809783
* busy sibling.
97819784
*/
9782-
if (group->flags & SD_SHARE_CPUCAPACITY) {
9783-
if (sgs->group_weight - sgs->idle_cpus != 1)
9784-
return false;
9785-
}
9785+
if ((group->flags & SD_SHARE_CPUCAPACITY) &&
9786+
(sgs->group_weight - sgs->idle_cpus != 1))
9787+
return false;
97869788

9787-
return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
9789+
return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu);
97889790
}
97899791

97909792
/* One group has more than one SMT CPU while the other group does not */
@@ -9938,11 +9940,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
99389940
sgs->group_weight = group->group_weight;
99399941

99409942
/* Check if dst CPU is idle and preferred to this group */
9941-
if (!local_group && env->sd->flags & SD_ASYM_PACKING &&
9942-
env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running &&
9943-
sched_asym(env, sds, sgs, group)) {
9943+
if (!local_group && env->idle != CPU_NOT_IDLE && sgs->sum_h_nr_running &&
9944+
sched_group_asym(env, sgs, group))
99449945
sgs->group_asym_packing = 1;
9945-
}
99469946

99479947
/* Check for loaded SMT group to be balanced to dst CPU */
99489948
if (!local_group && smt_balance(env, sgs, group))
@@ -10006,9 +10006,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
1000610006
switch (sgs->group_type) {
1000710007
case group_overloaded:
1000810008
/* Select the overloaded group with highest avg_load. */
10009-
if (sgs->avg_load <= busiest->avg_load)
10010-
return false;
10011-
break;
10009+
return sgs->avg_load > busiest->avg_load;
1001210010

1001310011
case group_imbalanced:
1001410012
/*
@@ -10019,18 +10017,14 @@ static bool update_sd_pick_busiest(struct lb_env *env,
1001910017

1002010018
case group_asym_packing:
1002110019
/* Prefer to move from lowest priority CPU's work */
10022-
if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
10023-
return false;
10024-
break;
10020+
return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu);
1002510021

1002610022
case group_misfit_task:
1002710023
/*
1002810024
* If we have more than one misfit sg go with the biggest
1002910025
* misfit.
1003010026
*/
10031-
if (sgs->group_misfit_task_load < busiest->group_misfit_task_load)
10032-
return false;
10033-
break;
10027+
return sgs->group_misfit_task_load > busiest->group_misfit_task_load;
1003410028

1003510029
case group_smt_balance:
1003610030
/*
@@ -10182,10 +10176,8 @@ static int idle_cpu_without(int cpu, struct task_struct *p)
1018210176
* be computed and tested before calling idle_cpu_without().
1018310177
*/
1018410178

10185-
#ifdef CONFIG_SMP
1018610179
if (rq->ttwu_pending)
1018710180
return 0;
10188-
#endif
1018910181

1019010182
return 1;
1019110183
}
@@ -10578,16 +10570,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
1057810570

1057910571
update_sg_lb_stats(env, sds, sg, sgs, &sg_status);
1058010572

10581-
if (local_group)
10582-
goto next_group;
10583-
10584-
10585-
if (update_sd_pick_busiest(env, sds, sg, sgs)) {
10573+
if (!local_group && update_sd_pick_busiest(env, sds, sg, sgs)) {
1058610574
sds->busiest = sg;
1058710575
sds->busiest_stat = *sgs;
1058810576
}
1058910577

10590-
next_group:
1059110578
/* Now, start updating sd_lb_stats */
1059210579
sds->total_load += sgs->group_load;
1059310580
sds->total_capacity += sgs->group_capacity;
@@ -10691,7 +10678,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
1069110678
*/
1069210679
if (local->group_type == group_has_spare) {
1069310680
if ((busiest->group_type > group_fully_busy) &&
10694-
!(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
10681+
!(env->sd->flags & SD_SHARE_LLC)) {
1069510682
/*
1069610683
* If busiest is overloaded, try to fill spare
1069710684
* capacity. This might end up creating spare capacity
@@ -11038,10 +11025,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
1103811025
* If balancing between cores, let lower priority CPUs help
1103911026
* SMT cores with more than one busy sibling.
1104011027
*/
11041-
if ((env->sd->flags & SD_ASYM_PACKING) &&
11042-
sched_use_asym_prio(env->sd, i) &&
11043-
sched_asym_prefer(i, env->dst_cpu) &&
11044-
nr_running == 1)
11028+
if (sched_asym(env->sd, i, env->dst_cpu) && nr_running == 1)
1104511029
continue;
1104611030

1104711031
switch (env->migration_type) {
@@ -11137,8 +11121,7 @@ asym_active_balance(struct lb_env *env)
1113711121
* the lower priority @env::dst_cpu help it. Do not follow
1113811122
* CPU priority.
1113911123
*/
11140-
return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
11141-
sched_use_asym_prio(env->sd, env->dst_cpu) &&
11124+
return env->idle != CPU_NOT_IDLE && sched_use_asym_prio(env->sd, env->dst_cpu) &&
1114211125
(sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
1114311126
!sched_use_asym_prio(env->sd, env->src_cpu));
1114411127
}
@@ -11910,8 +11893,7 @@ static void nohz_balancer_kick(struct rq *rq)
1191011893
* preferred CPU must be idle.
1191111894
*/
1191211895
for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
11913-
if (sched_use_asym_prio(sd, i) &&
11914-
sched_asym_prefer(i, cpu)) {
11896+
if (sched_asym(sd, i, cpu)) {
1191511897
flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK;
1191611898
goto unlock;
1191711899
}

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3136,7 +3136,7 @@ static inline bool uclamp_rq_is_idle(struct rq *rq)
31363136
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
31373137
static inline unsigned long cpu_util_irq(struct rq *rq)
31383138
{
3139-
return rq->avg_irq.util_avg;
3139+
return READ_ONCE(rq->avg_irq.util_avg);
31403140
}
31413141

31423142
static inline

0 commit comments

Comments
 (0)