Skip to content

Commit 2679a83

Browse files
Hao JiaPeter Zijlstra
authored andcommitted
sched/core: Avoid obvious double update_rq_clock warning
When we use raw_spin_rq_lock() to acquire the rq lock and have to update the rq clock while holding the lock, the kernel may issue a WARN_DOUBLE_CLOCK warning. Since we directly use raw_spin_rq_lock() to acquire rq lock instead of rq_lock(), there is no corresponding change to rq->clock_update_flags. In particular, we have obtained the rq lock of other CPUs, the rq->clock_update_flags of this CPU may be RQCF_UPDATED at this time, and then calling update_rq_clock() will trigger the WARN_DOUBLE_CLOCK warning. So we need to clear RQCF_UPDATED of rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning. For the sched_rt_period_timer() and migrate_task_rq_dl() cases we simply replace raw_spin_rq_lock()/raw_spin_rq_unlock() with rq_lock()/rq_unlock(). For the {pull,push}_{rt,dl}_task() cases, we add the double_rq_clock_clear_update() function to clear RQCF_UPDATED of rq->clock_update_flags, and call double_rq_clock_clear_update() before double_lock_balance()/double_rq_lock() returns to avoid the WARN_DOUBLE_CLOCK warning. Some call trace reports: Call Trace 1: <IRQ> sched_rt_period_timer+0x10f/0x3a0 ? enqueue_top_rt_rq+0x110/0x110 __hrtimer_run_queues+0x1a9/0x490 hrtimer_interrupt+0x10b/0x240 __sysvec_apic_timer_interrupt+0x8a/0x250 sysvec_apic_timer_interrupt+0x9a/0xd0 </IRQ> <TASK> asm_sysvec_apic_timer_interrupt+0x12/0x20 Call Trace 2: <TASK> activate_task+0x8b/0x110 push_rt_task.part.108+0x241/0x2c0 push_rt_tasks+0x15/0x30 finish_task_switch+0xaa/0x2e0 ? __switch_to+0x134/0x420 __schedule+0x343/0x8e0 ? hrtimer_start_range_ns+0x101/0x340 schedule+0x4e/0xb0 do_nanosleep+0x8e/0x160 hrtimer_nanosleep+0x89/0x120 ? hrtimer_init_sleeper+0x90/0x90 __x64_sys_nanosleep+0x96/0xd0 do_syscall_64+0x34/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Call Trace 3: <TASK> deactivate_task+0x93/0xe0 pull_rt_task+0x33e/0x400 balance_rt+0x7e/0x90 __schedule+0x62f/0x8e0 do_task_dead+0x3f/0x50 do_exit+0x7b8/0xbb0 do_group_exit+0x2d/0x90 get_signal+0x9df/0x9e0 ? preempt_count_add+0x56/0xa0 ? __remove_hrtimer+0x35/0x70 arch_do_signal_or_restart+0x36/0x720 ? nanosleep_copyout+0x39/0x50 ? do_nanosleep+0x131/0x160 ? audit_filter_inodes+0xf5/0x120 exit_to_user_mode_prepare+0x10f/0x1e0 syscall_exit_to_user_mode+0x17/0x30 do_syscall_64+0x40/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae Call Trace 4: update_rq_clock+0x128/0x1a0 migrate_task_rq_dl+0xec/0x310 set_task_cpu+0x84/0x1e4 try_to_wake_up+0x1d8/0x5c0 wake_up_process+0x1c/0x30 hrtimer_wakeup+0x24/0x3c __hrtimer_run_queues+0x114/0x270 hrtimer_interrupt+0xe8/0x244 arch_timer_handler_phys+0x30/0x50 handle_percpu_devid_irq+0x88/0x140 generic_handle_domain_irq+0x40/0x60 gic_handle_irq+0x48/0xe0 call_on_irq_stack+0x2c/0x60 do_interrupt_handler+0x80/0x84 Steps to reproduce: 1. Enable CONFIG_SCHED_DEBUG when compiling the kernel 2. echo 1 > /sys/kernel/debug/clear_warn_once echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched/features echo "NO_RT_PUSH_IPI" > /sys/kernel/debug/sched/features 3. Run some rt/dl tasks that periodically work and sleep, e.g. Create 2*n rt or dl (90% running) tasks via rt-app (on a system with n CPUs), and Dietmar Eggemann reports Call Trace 4 when running on PREEMPT_RT kernel. Signed-off-by: Hao Jia <jiahao.os@bytedance.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Link: https://lore.kernel.org/r/20220430085843.62939-2-jiahao.os@bytedance.com
1 parent d70522f commit 2679a83

File tree

4 files changed

+33
-11
lines changed

4 files changed

+33
-11
lines changed

kernel/sched/core.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -613,10 +613,10 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
613613
swap(rq1, rq2);
614614

615615
raw_spin_rq_lock(rq1);
616-
if (__rq_lockp(rq1) == __rq_lockp(rq2))
617-
return;
616+
if (__rq_lockp(rq1) != __rq_lockp(rq2))
617+
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
618618

619-
raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
619+
double_rq_clock_clear_update(rq1, rq2);
620620
}
621621
#endif
622622

kernel/sched/deadline.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,6 +1830,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
18301830

18311831
static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
18321832
{
1833+
struct rq_flags rf;
18331834
struct rq *rq;
18341835

18351836
if (READ_ONCE(p->__state) != TASK_WAKING)
@@ -1841,7 +1842,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
18411842
* from try_to_wake_up(). Hence, p->pi_lock is locked, but
18421843
* rq->lock is not... So, lock it
18431844
*/
1844-
raw_spin_rq_lock(rq);
1845+
rq_lock(rq, &rf);
18451846
if (p->dl.dl_non_contending) {
18461847
update_rq_clock(rq);
18471848
sub_running_bw(&p->dl, &rq->dl);
@@ -1857,7 +1858,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
18571858
put_task_struct(p);
18581859
}
18591860
sub_rq_bw(&p->dl, &rq->dl);
1860-
raw_spin_rq_unlock(rq);
1861+
rq_unlock(rq, &rf);
18611862
}
18621863

18631864
static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)

kernel/sched/rt.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
871871
int enqueue = 0;
872872
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
873873
struct rq *rq = rq_of_rt_rq(rt_rq);
874+
struct rq_flags rf;
874875
int skip;
875876

876877
/*
@@ -885,7 +886,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
885886
if (skip)
886887
continue;
887888

888-
raw_spin_rq_lock(rq);
889+
rq_lock(rq, &rf);
889890
update_rq_clock(rq);
890891

891892
if (rt_rq->rt_time) {
@@ -923,7 +924,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
923924

924925
if (enqueue)
925926
sched_rt_rq_enqueue(rt_rq);
926-
raw_spin_rq_unlock(rq);
927+
rq_unlock(rq, &rf);
927928
}
928929

929930
if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))

kernel/sched/sched.h

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2474,6 +2474,24 @@ unsigned long arch_scale_freq_capacity(int cpu)
24742474
}
24752475
#endif
24762476

2477+
#ifdef CONFIG_SCHED_DEBUG
2478+
/*
2479+
* In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
2480+
* acquire rq lock instead of rq_lock(). So at the end of these two functions
2481+
* we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
2482+
* rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
2483+
*/
2484+
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
2485+
{
2486+
rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
2487+
/* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
2488+
#ifdef CONFIG_SMP
2489+
rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
2490+
#endif
2491+
}
2492+
#else
2493+
static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
2494+
#endif
24772495

24782496
#ifdef CONFIG_SMP
24792497

@@ -2539,14 +2557,15 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
25392557
__acquires(busiest->lock)
25402558
__acquires(this_rq->lock)
25412559
{
2542-
if (__rq_lockp(this_rq) == __rq_lockp(busiest))
2543-
return 0;
2544-
2545-
if (likely(raw_spin_rq_trylock(busiest)))
2560+
if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
2561+
likely(raw_spin_rq_trylock(busiest))) {
2562+
double_rq_clock_clear_update(this_rq, busiest);
25462563
return 0;
2564+
}
25472565

25482566
if (rq_order_less(this_rq, busiest)) {
25492567
raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
2568+
double_rq_clock_clear_update(this_rq, busiest);
25502569
return 0;
25512570
}
25522571

@@ -2640,6 +2659,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
26402659
BUG_ON(rq1 != rq2);
26412660
raw_spin_rq_lock(rq1);
26422661
__acquire(rq2->lock); /* Fake it out ;) */
2662+
double_rq_clock_clear_update(rq1, rq2);
26432663
}
26442664

26452665
/*

0 commit comments

Comments
 (0)