@@ -7289,15 +7289,15 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
7289
7289
if (!available_idle_cpu (cpu )) {
7290
7290
idle = false;
7291
7291
if (* idle_cpu == -1 ) {
7292
- if (sched_idle_cpu (cpu ) && cpumask_test_cpu (cpu , p -> cpus_ptr )) {
7292
+ if (sched_idle_cpu (cpu ) && cpumask_test_cpu (cpu , cpus )) {
7293
7293
* idle_cpu = cpu ;
7294
7294
break ;
7295
7295
}
7296
7296
continue ;
7297
7297
}
7298
7298
break ;
7299
7299
}
7300
- if (* idle_cpu == -1 && cpumask_test_cpu (cpu , p -> cpus_ptr ))
7300
+ if (* idle_cpu == -1 && cpumask_test_cpu (cpu , cpus ))
7301
7301
* idle_cpu = cpu ;
7302
7302
}
7303
7303
@@ -7311,13 +7311,19 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
7311
7311
/*
7312
7312
* Scan the local SMT mask for idle CPUs.
7313
7313
*/
7314
- static int select_idle_smt (struct task_struct * p , int target )
7314
+ static int select_idle_smt (struct task_struct * p , struct sched_domain * sd , int target )
7315
7315
{
7316
7316
int cpu ;
7317
7317
7318
7318
for_each_cpu_and (cpu , cpu_smt_mask (target ), p -> cpus_ptr ) {
7319
7319
if (cpu == target )
7320
7320
continue ;
7321
+ /*
7322
+ * Check if the CPU is in the LLC scheduling domain of @target.
7323
+ * Due to isolcpus, there is no guarantee that all the siblings are in the domain.
7324
+ */
7325
+ if (!cpumask_test_cpu (cpu , sched_domain_span (sd )))
7326
+ continue ;
7321
7327
if (available_idle_cpu (cpu ) || sched_idle_cpu (cpu ))
7322
7328
return cpu ;
7323
7329
}
@@ -7341,7 +7347,7 @@ static inline int select_idle_core(struct task_struct *p, int core, struct cpuma
7341
7347
return __select_idle_cpu (core , p );
7342
7348
}
7343
7349
7344
- static inline int select_idle_smt (struct task_struct * p , int target )
7350
+ static inline int select_idle_smt (struct task_struct * p , struct sched_domain * sd , int target )
7345
7351
{
7346
7352
return -1 ;
7347
7353
}
@@ -7591,7 +7597,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
7591
7597
has_idle_core = test_idle_cores (target );
7592
7598
7593
7599
if (!has_idle_core && cpus_share_cache (prev , target )) {
7594
- i = select_idle_smt (p , prev );
7600
+ i = select_idle_smt (p , sd , prev );
7595
7601
if ((unsigned int )i < nr_cpumask_bits )
7596
7602
return i ;
7597
7603
}
@@ -9237,19 +9243,17 @@ static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
9237
9243
9238
9244
static inline bool others_have_blocked (struct rq * rq )
9239
9245
{
9240
- if (READ_ONCE (rq -> avg_rt . util_avg ))
9246
+ if (cpu_util_rt (rq ))
9241
9247
return true;
9242
9248
9243
- if (READ_ONCE (rq -> avg_dl . util_avg ))
9249
+ if (cpu_util_dl (rq ))
9244
9250
return true;
9245
9251
9246
9252
if (thermal_load_avg (rq ))
9247
9253
return true;
9248
9254
9249
- #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
9250
- if (READ_ONCE (rq -> avg_irq .util_avg ))
9255
+ if (cpu_util_irq (rq ))
9251
9256
return true;
9252
- #endif
9253
9257
9254
9258
return false;
9255
9259
}
@@ -9506,8 +9510,8 @@ static unsigned long scale_rt_capacity(int cpu)
9506
9510
* avg_thermal.load_avg tracks thermal pressure and the weighted
9507
9511
* average uses the actual delta max capacity(load).
9508
9512
*/
9509
- used = READ_ONCE (rq -> avg_rt . util_avg );
9510
- used += READ_ONCE (rq -> avg_dl . util_avg );
9513
+ used = cpu_util_rt (rq );
9514
+ used += cpu_util_dl (rq );
9511
9515
used += thermal_load_avg (rq );
9512
9516
9513
9517
if (unlikely (used >= max ))
@@ -9740,51 +9744,49 @@ group_type group_classify(unsigned int imbalance_pct,
9740
9744
*/
9741
9745
static bool sched_use_asym_prio (struct sched_domain * sd , int cpu )
9742
9746
{
9747
+ if (!(sd -> flags & SD_ASYM_PACKING ))
9748
+ return false;
9749
+
9743
9750
if (!sched_smt_active ())
9744
9751
return true;
9745
9752
9746
9753
return sd -> flags & SD_SHARE_CPUCAPACITY || is_core_idle (cpu );
9747
9754
}
9748
9755
9756
+ static inline bool sched_asym (struct sched_domain * sd , int dst_cpu , int src_cpu )
9757
+ {
9758
+ /*
9759
+ * First check if @dst_cpu can do asym_packing load balance. Only do it
9760
+ * if it has higher priority than @src_cpu.
9761
+ */
9762
+ return sched_use_asym_prio (sd , dst_cpu ) &&
9763
+ sched_asym_prefer (dst_cpu , src_cpu );
9764
+ }
9765
+
9749
9766
/**
9750
- * sched_asym - Check if the destination CPU can do asym_packing load balance
9767
+ * sched_group_asym - Check if the destination CPU can do asym_packing balance
9751
9768
* @env: The load balancing environment
9752
- * @sds: Load-balancing data with statistics of the local group
9753
9769
* @sgs: Load-balancing statistics of the candidate busiest group
9754
9770
* @group: The candidate busiest group
9755
9771
*
9756
9772
* @env::dst_cpu can do asym_packing if it has higher priority than the
9757
9773
* preferred CPU of @group.
9758
9774
*
9759
- * SMT is a special case. If we are balancing load between cores, @env::dst_cpu
9760
- * can do asym_packing balance only if all its SMT siblings are idle. Also, it
9761
- * can only do it if @group is an SMT group and has exactly on busy CPU. Larger
9762
- * imbalances in the number of CPUS are dealt with in find_busiest_group().
9763
- *
9764
- * If we are balancing load within an SMT core, or at PKG domain level, always
9765
- * proceed.
9766
- *
9767
9775
* Return: true if @env::dst_cpu can do with asym_packing load balance. False
9768
9776
* otherwise.
9769
9777
*/
9770
9778
static inline bool
9771
- sched_asym (struct lb_env * env , struct sd_lb_stats * sds , struct sg_lb_stats * sgs ,
9772
- struct sched_group * group )
9779
+ sched_group_asym (struct lb_env * env , struct sg_lb_stats * sgs , struct sched_group * group )
9773
9780
{
9774
- /* Ensure that the whole local core is idle, if applicable. */
9775
- if (!sched_use_asym_prio (env -> sd , env -> dst_cpu ))
9776
- return false;
9777
-
9778
9781
/*
9779
- * CPU priorities does not make sense for SMT cores with more than one
9782
+ * CPU priorities do not make sense for SMT cores with more than one
9780
9783
* busy sibling.
9781
9784
*/
9782
- if (group -> flags & SD_SHARE_CPUCAPACITY ) {
9783
- if (sgs -> group_weight - sgs -> idle_cpus != 1 )
9784
- return false;
9785
- }
9785
+ if ((group -> flags & SD_SHARE_CPUCAPACITY ) &&
9786
+ (sgs -> group_weight - sgs -> idle_cpus != 1 ))
9787
+ return false;
9786
9788
9787
- return sched_asym_prefer ( env -> dst_cpu , group -> asym_prefer_cpu );
9789
+ return sched_asym ( env -> sd , env -> dst_cpu , group -> asym_prefer_cpu );
9788
9790
}
9789
9791
9790
9792
/* One group has more than one SMT CPU while the other group does not */
@@ -9938,11 +9940,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
9938
9940
sgs -> group_weight = group -> group_weight ;
9939
9941
9940
9942
/* Check if dst CPU is idle and preferred to this group */
9941
- if (!local_group && env -> sd -> flags & SD_ASYM_PACKING &&
9942
- env -> idle != CPU_NOT_IDLE && sgs -> sum_h_nr_running &&
9943
- sched_asym (env , sds , sgs , group )) {
9943
+ if (!local_group && env -> idle != CPU_NOT_IDLE && sgs -> sum_h_nr_running &&
9944
+ sched_group_asym (env , sgs , group ))
9944
9945
sgs -> group_asym_packing = 1 ;
9945
- }
9946
9946
9947
9947
/* Check for loaded SMT group to be balanced to dst CPU */
9948
9948
if (!local_group && smt_balance (env , sgs , group ))
@@ -10006,9 +10006,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
10006
10006
switch (sgs -> group_type ) {
10007
10007
case group_overloaded :
10008
10008
/* Select the overloaded group with highest avg_load. */
10009
- if (sgs -> avg_load <= busiest -> avg_load )
10010
- return false;
10011
- break ;
10009
+ return sgs -> avg_load > busiest -> avg_load ;
10012
10010
10013
10011
case group_imbalanced :
10014
10012
/*
@@ -10019,18 +10017,14 @@ static bool update_sd_pick_busiest(struct lb_env *env,
10019
10017
10020
10018
case group_asym_packing :
10021
10019
/* Prefer to move from lowest priority CPU's work */
10022
- if (sched_asym_prefer (sg -> asym_prefer_cpu , sds -> busiest -> asym_prefer_cpu ))
10023
- return false;
10024
- break ;
10020
+ return sched_asym_prefer (sds -> busiest -> asym_prefer_cpu , sg -> asym_prefer_cpu );
10025
10021
10026
10022
case group_misfit_task :
10027
10023
/*
10028
10024
* If we have more than one misfit sg go with the biggest
10029
10025
* misfit.
10030
10026
*/
10031
- if (sgs -> group_misfit_task_load < busiest -> group_misfit_task_load )
10032
- return false;
10033
- break ;
10027
+ return sgs -> group_misfit_task_load > busiest -> group_misfit_task_load ;
10034
10028
10035
10029
case group_smt_balance :
10036
10030
/*
@@ -10182,10 +10176,8 @@ static int idle_cpu_without(int cpu, struct task_struct *p)
10182
10176
* be computed and tested before calling idle_cpu_without().
10183
10177
*/
10184
10178
10185
- #ifdef CONFIG_SMP
10186
10179
if (rq -> ttwu_pending )
10187
10180
return 0 ;
10188
- #endif
10189
10181
10190
10182
return 1 ;
10191
10183
}
@@ -10578,16 +10570,11 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
10578
10570
10579
10571
update_sg_lb_stats (env , sds , sg , sgs , & sg_status );
10580
10572
10581
- if (local_group )
10582
- goto next_group ;
10583
-
10584
-
10585
- if (update_sd_pick_busiest (env , sds , sg , sgs )) {
10573
+ if (!local_group && update_sd_pick_busiest (env , sds , sg , sgs )) {
10586
10574
sds -> busiest = sg ;
10587
10575
sds -> busiest_stat = * sgs ;
10588
10576
}
10589
10577
10590
- next_group :
10591
10578
/* Now, start updating sd_lb_stats */
10592
10579
sds -> total_load += sgs -> group_load ;
10593
10580
sds -> total_capacity += sgs -> group_capacity ;
@@ -10691,7 +10678,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
10691
10678
*/
10692
10679
if (local -> group_type == group_has_spare ) {
10693
10680
if ((busiest -> group_type > group_fully_busy ) &&
10694
- !(env -> sd -> flags & SD_SHARE_PKG_RESOURCES )) {
10681
+ !(env -> sd -> flags & SD_SHARE_LLC )) {
10695
10682
/*
10696
10683
* If busiest is overloaded, try to fill spare
10697
10684
* capacity. This might end up creating spare capacity
@@ -11038,10 +11025,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
11038
11025
* If balancing between cores, let lower priority CPUs help
11039
11026
* SMT cores with more than one busy sibling.
11040
11027
*/
11041
- if ((env -> sd -> flags & SD_ASYM_PACKING ) &&
11042
- sched_use_asym_prio (env -> sd , i ) &&
11043
- sched_asym_prefer (i , env -> dst_cpu ) &&
11044
- nr_running == 1 )
11028
+ if (sched_asym (env -> sd , i , env -> dst_cpu ) && nr_running == 1 )
11045
11029
continue ;
11046
11030
11047
11031
switch (env -> migration_type ) {
@@ -11137,8 +11121,7 @@ asym_active_balance(struct lb_env *env)
11137
11121
* the lower priority @env::dst_cpu help it. Do not follow
11138
11122
* CPU priority.
11139
11123
*/
11140
- return env -> idle != CPU_NOT_IDLE && (env -> sd -> flags & SD_ASYM_PACKING ) &&
11141
- sched_use_asym_prio (env -> sd , env -> dst_cpu ) &&
11124
+ return env -> idle != CPU_NOT_IDLE && sched_use_asym_prio (env -> sd , env -> dst_cpu ) &&
11142
11125
(sched_asym_prefer (env -> dst_cpu , env -> src_cpu ) ||
11143
11126
!sched_use_asym_prio (env -> sd , env -> src_cpu ));
11144
11127
}
@@ -11910,8 +11893,7 @@ static void nohz_balancer_kick(struct rq *rq)
11910
11893
* preferred CPU must be idle.
11911
11894
*/
11912
11895
for_each_cpu_and (i , sched_domain_span (sd ), nohz .idle_cpus_mask ) {
11913
- if (sched_use_asym_prio (sd , i ) &&
11914
- sched_asym_prefer (i , cpu )) {
11896
+ if (sched_asym (sd , i , cpu )) {
11915
11897
flags = NOHZ_STATS_KICK | NOHZ_BALANCE_KICK ;
11916
11898
goto unlock ;
11917
11899
}
0 commit comments