@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
4648
4648
#endif
4649
4649
}
4650
4650
4651
+ static inline bool entity_is_long_sleeper (struct sched_entity * se )
4652
+ {
4653
+ struct cfs_rq * cfs_rq ;
4654
+ u64 sleep_time ;
4655
+
4656
+ if (se -> exec_start == 0 )
4657
+ return false;
4658
+
4659
+ cfs_rq = cfs_rq_of (se );
4660
+
4661
+ sleep_time = rq_clock_task (rq_of (cfs_rq ));
4662
+
4663
+ /* Happen while migrating because of clock task divergence */
4664
+ if (sleep_time <= se -> exec_start )
4665
+ return false;
4666
+
4667
+ sleep_time -= se -> exec_start ;
4668
+ if (sleep_time > ((1ULL << 63 ) / scale_load_down (NICE_0_LOAD )))
4669
+ return true;
4670
+
4671
+ return false;
4672
+ }
4673
+
4651
4674
static void
4652
4675
place_entity (struct cfs_rq * cfs_rq , struct sched_entity * se , int initial )
4653
4676
{
4654
4677
u64 vruntime = cfs_rq -> min_vruntime ;
4655
- u64 sleep_time ;
4656
4678
4657
4679
/*
4658
4680
* The 'current' period is already promised to the current tasks,
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
4684
4706
4685
4707
/*
4686
4708
* Pull vruntime of the entity being placed to the base level of
4687
- * cfs_rq, to prevent boosting it if placed backwards. If the entity
4688
- * slept for a long time, don't even try to compare its vruntime with
4689
- * the base as it may be too far off and the comparison may get
4690
- * inversed due to s64 overflow.
4691
- */
4692
- sleep_time = rq_clock_task (rq_of (cfs_rq )) - se -> exec_start ;
4693
- if ((s64 )sleep_time > 60LL * NSEC_PER_SEC )
4709
+ * cfs_rq, to prevent boosting it if placed backwards.
4710
+ * However, min_vruntime can advance much faster than real time, with
4711
+ * the extreme being when an entity with the minimal weight always runs
4712
+ * on the cfs_rq. If the waking entity slept for a long time, its
4713
+ * vruntime difference from min_vruntime may overflow s64 and their
4714
+ * comparison may get inversed, so ignore the entity's original
4715
+ * vruntime in that case.
4716
+ * The maximal vruntime speedup is given by the ratio of normal to
4717
+ * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
4718
+ * When placing a migrated waking entity, its exec_start has been set
4719
+ * from a different rq. In order to take into account a possible
4720
+ * divergence between new and prev rq's clocks task because of irq and
4721
+ * stolen time, we take an additional margin.
4722
+ * So, cutting off on the sleep time of
4723
+ * 2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
4724
+ * should be safe.
4725
+ */
4726
+ if (entity_is_long_sleeper (se ))
4694
4727
se -> vruntime = vruntime ;
4695
4728
else
4696
4729
se -> vruntime = max_vruntime (se -> vruntime , vruntime );
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
4770
4803
4771
4804
if (flags & ENQUEUE_WAKEUP )
4772
4805
place_entity (cfs_rq , se , 0 );
4806
+ /* Entity has migrated, no longer consider this task hot */
4807
+ if (flags & ENQUEUE_MIGRATED )
4808
+ se -> exec_start = 0 ;
4773
4809
4774
4810
check_schedstat_required ();
4775
4811
update_stats_enqueue_fair (cfs_rq , se , flags );
@@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
7657
7693
/* Tell new CPU we are migrated */
7658
7694
se -> avg .last_update_time = 0 ;
7659
7695
7660
- /* We have migrated, no longer consider this task hot */
7661
- se -> exec_start = 0 ;
7662
-
7663
7696
update_scan_period (p , new_cpu );
7664
7697
}
7665
7698
0 commit comments