Skip to content

Commit 49a1763

Browse files
Sebastian Andrzej SiewiorKAGA-KOKO
authored andcommitted
softirq: Use a dedicated thread for timer wakeups on PREEMPT_RT.
The timer and hrtimer soft interrupts are raised in hard interrupt context. With threaded interrupts force enabled or on PREEMPT_RT this leads to waking the ksoftirqd for the processing of the soft interrupt. ksoftirqd runs as SCHED_OTHER task which means it will compete with other tasks for CPU resources. This can introduce long delays for timer processing on heavy loaded systems and is not desired. Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated timers thread and let it run at the lowest SCHED_FIFO priority. Wake-ups for RT tasks happen from hardirq context so only timer_list timers and hrtimers for "regular" tasks are processed here. The higher priority ensures that wakeups are performed before scheduling SCHED_OTHER tasks. Using a dedicated variable to store the pending softirq bits values ensure that the timer are not accidentally picked up by ksoftirqd and other threaded interrupts. It shouldn't be picked up by ksoftirqd since it runs at lower priority. However if ksoftirqd is already running while a timer fires, then ksoftird will be PI-boosted due to the BH-lock to ktimer's priority. The timer thread can pick up pending softirqs from ksoftirqd but only if the softirq load is high. It is not be desired that the picked up softirqs are processed at SCHED_FIFO priority under high softirq load but this can already happen by a PI-boost by a force-threaded interrupt. [ frederic@kernel.org: rcutorture.c fixes, storm fix by introduction of local_timers_pending() for tick_nohz_next_event() ] [ junxiao.chang@intel.com: Ensure ktimersd gets woken up even if a softirq is currently served. ] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Paul E. McKenney <paulmck@kernel.org> [rcutorture] Reviewed-by: Frederic Weisbecker <frederic@kernel.org> Link: https://lore.kernel.org/all/20241106150419.2593080-4-bigeasy@linutronix.de
1 parent a02976c commit 49a1763

File tree

6 files changed

+127
-5
lines changed

6 files changed

+127
-5
lines changed

include/linux/interrupt.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,53 @@ extern void __raise_softirq_irqoff(unsigned int nr);
616616
extern void raise_softirq_irqoff(unsigned int nr);
617617
extern void raise_softirq(unsigned int nr);
618618

619+
/*
620+
* With forced-threaded interrupts enabled a raised softirq is deferred to
621+
* ksoftirqd unless it can be handled within the threaded interrupt. This
622+
* affects timer_list timers and hrtimers which are explicitly marked with
623+
* HRTIMER_MODE_SOFT.
624+
* With PREEMPT_RT enabled more hrtimers are moved to softirq for processing
625+
* which includes all timers which are not explicitly marked HRTIMER_MODE_HARD.
626+
* Userspace controlled timers (like the clock_nanosleep() interface) is divided
627+
* into two categories: Tasks with elevated scheduling policy including
628+
* SCHED_{FIFO|RR|DL} and the remaining scheduling policy. The tasks with the
629+
* elevated scheduling policy are woken up directly from the HARDIRQ while all
630+
* other wake ups are delayed to softirq and so to ksoftirqd.
631+
*
632+
* The ksoftirqd runs at SCHED_OTHER policy at which it should remain since it
633+
* handles the softirq in an overloaded situation (not handled everything
634+
* within its last run).
635+
* If the timers are handled at SCHED_OTHER priority then they competes with all
636+
* other SCHED_OTHER tasks for CPU resources are possibly delayed.
637+
* Moving timers softirqs to a low priority SCHED_FIFO thread instead ensures
638+
* that timer are performed before scheduling any SCHED_OTHER thread.
639+
*/
640+
DECLARE_PER_CPU(struct task_struct *, ktimerd);
641+
DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
642+
void raise_ktimers_thread(unsigned int nr);
643+
644+
static inline unsigned int local_timers_pending_force_th(void)
645+
{
646+
return __this_cpu_read(pending_timer_softirq);
647+
}
648+
649+
static inline void raise_timer_softirq(unsigned int nr)
650+
{
651+
lockdep_assert_in_irq();
652+
if (force_irqthreads())
653+
raise_ktimers_thread(nr);
654+
else
655+
__raise_softirq_irqoff(nr);
656+
}
657+
658+
static inline unsigned int local_timers_pending(void)
659+
{
660+
if (force_irqthreads())
661+
return local_timers_pending_force_th();
662+
else
663+
return local_softirq_pending();
664+
}
665+
619666
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
620667

621668
static inline struct task_struct *this_cpu_ksoftirqd(void)

kernel/rcu/rcutorture.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2440,6 +2440,14 @@ static int rcutorture_booster_init(unsigned int cpu)
24402440
WARN_ON_ONCE(!t);
24412441
sp.sched_priority = 2;
24422442
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2443+
#ifdef CONFIG_IRQ_FORCED_THREADING
2444+
if (force_irqthreads()) {
2445+
t = per_cpu(ktimerd, cpu);
2446+
WARN_ON_ONCE(!t);
2447+
sp.sched_priority = 2;
2448+
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
2449+
}
2450+
#endif
24432451
}
24442452

24452453
/* Don't allow time recalculation while creating a new task. */

kernel/softirq.c

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,24 @@ static inline void tick_irq_exit(void)
624624
#endif
625625
}
626626

627+
#ifdef CONFIG_IRQ_FORCED_THREADING
628+
DEFINE_PER_CPU(struct task_struct *, ktimerd);
629+
DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
630+
631+
static void wake_timersd(void)
632+
{
633+
struct task_struct *tsk = __this_cpu_read(ktimerd);
634+
635+
if (tsk)
636+
wake_up_process(tsk);
637+
}
638+
639+
#else
640+
641+
static inline void wake_timersd(void) { }
642+
643+
#endif
644+
627645
static inline void __irq_exit_rcu(void)
628646
{
629647
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -636,6 +654,10 @@ static inline void __irq_exit_rcu(void)
636654
if (!in_interrupt() && local_softirq_pending())
637655
invoke_softirq();
638656

657+
if (IS_ENABLED(CONFIG_IRQ_FORCED_THREADING) && force_irqthreads() &&
658+
local_timers_pending_force_th() && !(in_nmi() | in_hardirq()))
659+
wake_timersd();
660+
639661
tick_irq_exit();
640662
}
641663

@@ -971,12 +993,57 @@ static struct smp_hotplug_thread softirq_threads = {
971993
.thread_comm = "ksoftirqd/%u",
972994
};
973995

996+
#ifdef CONFIG_IRQ_FORCED_THREADING
997+
static void ktimerd_setup(unsigned int cpu)
998+
{
999+
/* Above SCHED_NORMAL to handle timers before regular tasks. */
1000+
sched_set_fifo_low(current);
1001+
}
1002+
1003+
static int ktimerd_should_run(unsigned int cpu)
1004+
{
1005+
return local_timers_pending_force_th();
1006+
}
1007+
1008+
void raise_ktimers_thread(unsigned int nr)
1009+
{
1010+
trace_softirq_raise(nr);
1011+
__this_cpu_or(pending_timer_softirq, BIT(nr));
1012+
}
1013+
1014+
static void run_ktimerd(unsigned int cpu)
1015+
{
1016+
unsigned int timer_si;
1017+
1018+
ksoftirqd_run_begin();
1019+
1020+
timer_si = local_timers_pending_force_th();
1021+
__this_cpu_write(pending_timer_softirq, 0);
1022+
or_softirq_pending(timer_si);
1023+
1024+
__do_softirq();
1025+
1026+
ksoftirqd_run_end();
1027+
}
1028+
1029+
static struct smp_hotplug_thread timer_thread = {
1030+
.store = &ktimerd,
1031+
.setup = ktimerd_setup,
1032+
.thread_should_run = ktimerd_should_run,
1033+
.thread_fn = run_ktimerd,
1034+
.thread_comm = "ktimers/%u",
1035+
};
1036+
#endif
1037+
9741038
static __init int spawn_ksoftirqd(void)
9751039
{
9761040
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
9771041
takeover_tasklets);
9781042
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
979-
1043+
#ifdef CONFIG_IRQ_FORCED_THREADING
1044+
if (force_irqthreads())
1045+
BUG_ON(smpboot_register_percpu_thread(&timer_thread));
1046+
#endif
9801047
return 0;
9811048
}
9821049
early_initcall(spawn_ksoftirqd);

kernel/time/hrtimer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1811,7 +1811,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
18111811
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
18121812
cpu_base->softirq_expires_next = KTIME_MAX;
18131813
cpu_base->softirq_activated = 1;
1814-
__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1814+
raise_timer_softirq(HRTIMER_SOFTIRQ);
18151815
}
18161816

18171817
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@ -1906,7 +1906,7 @@ void hrtimer_run_queues(void)
19061906
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
19071907
cpu_base->softirq_expires_next = KTIME_MAX;
19081908
cpu_base->softirq_activated = 1;
1909-
__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1909+
raise_timer_softirq(HRTIMER_SOFTIRQ);
19101910
}
19111911

19121912
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);

kernel/time/tick-sched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
859859

860860
static inline bool local_timer_softirq_pending(void)
861861
{
862-
return local_softirq_pending() & BIT(TIMER_SOFTIRQ);
862+
return local_timers_pending() & BIT(TIMER_SOFTIRQ);
863863
}
864864

865865
/*

kernel/time/timer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2499,7 +2499,7 @@ static void run_local_timers(void)
24992499
*/
25002500
if (time_after_eq(jiffies, READ_ONCE(base->next_expiry)) ||
25012501
(i == BASE_DEF && tmigr_requires_handle_remote())) {
2502-
__raise_softirq_irqoff(TIMER_SOFTIRQ);
2502+
raise_timer_softirq(TIMER_SOFTIRQ);
25032503
return;
25042504
}
25052505
}

0 commit comments

Comments
 (0)