Skip to content

Commit 56cabb9

Browse files
diandersrafaeljw
authored andcommitted
PM: sleep: Allow configuring the DPM watchdog to warn earlier than panic
Allow configuring the DPM watchdog to warn about slow suspend/resume functions without causing a system panic(). This allows you to set the DPM_WATCHDOG_WARNING_TIMEOUT to something like 5 or 10 seconds to get warnings about slow suspend/resume functions that eventually succeed. Signed-off-by: Douglas Anderson <dianders@chromium.org> Reviewed-by: Tomasz Figa <tfiga@chromium.org> Link: https://patch.msgid.link/20250109125957.v2.1.I4554f931b8da97948f308ecc651b124338ee9603@changeid [ rjw: Subject edit ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1 parent 96484d2 commit 56cabb9

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

drivers/base/power/main.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,7 @@ struct dpm_watchdog {
496496
struct device *dev;
497497
struct task_struct *tsk;
498498
struct timer_list timer;
499+
bool fatal;
499500
};
500501

501502
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
@@ -512,11 +513,23 @@ struct dpm_watchdog {
512513
static void dpm_watchdog_handler(struct timer_list *t)
513514
{
514515
struct dpm_watchdog *wd = from_timer(wd, t, timer);
516+
struct timer_list *timer = &wd->timer;
517+
unsigned int time_left;
518+
519+
if (wd->fatal) {
520+
dev_emerg(wd->dev, "**** DPM device timeout ****\n");
521+
show_stack(wd->tsk, NULL, KERN_EMERG);
522+
panic("%s %s: unrecoverable failure\n",
523+
dev_driver_string(wd->dev), dev_name(wd->dev));
524+
}
525+
526+
time_left = CONFIG_DPM_WATCHDOG_TIMEOUT - CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
527+
dev_warn(wd->dev, "**** DPM device timeout after %u seconds; %u seconds until panic ****\n",
528+
CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT, time_left);
529+
show_stack(wd->tsk, NULL, KERN_WARNING);
515530

516-
dev_emerg(wd->dev, "**** DPM device timeout ****\n");
517-
show_stack(wd->tsk, NULL, KERN_EMERG);
518-
panic("%s %s: unrecoverable failure\n",
519-
dev_driver_string(wd->dev), dev_name(wd->dev));
531+
wd->fatal = true;
532+
mod_timer(timer, jiffies + HZ * time_left);
520533
}
521534

522535
/**
@@ -530,10 +543,11 @@ static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
530543

531544
wd->dev = dev;
532545
wd->tsk = current;
546+
wd->fatal = CONFIG_DPM_WATCHDOG_TIMEOUT == CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
533547

534548
timer_setup_on_stack(timer, dpm_watchdog_handler, 0);
535549
/* use same timeout value for both suspend and resume */
536-
timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
550+
timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_WARNING_TIMEOUT;
537551
add_timer(timer);
538552
}
539553

kernel/power/Kconfig

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,11 +257,30 @@ config DPM_WATCHDOG
257257
boot session.
258258

259259
config DPM_WATCHDOG_TIMEOUT
260-
int "Watchdog timeout in seconds"
260+
int "Watchdog timeout to panic in seconds"
261261
range 1 120
262262
default 120
263263
depends on DPM_WATCHDOG
264264

265+
config DPM_WATCHDOG_WARNING_TIMEOUT
266+
int "Watchdog timeout to warn in seconds"
267+
range 1 DPM_WATCHDOG_TIMEOUT
268+
default DPM_WATCHDOG_TIMEOUT
269+
depends on DPM_WATCHDOG
270+
help
271+
If the DPM watchdog warning timeout and main timeout are
272+
different then a non-fatal warning (with a stack trace of
273+
the stuck suspend routine) will be printed when the warning
274+
timeout expires. If the suspend routine gets un-stuck
275+
before the main timeout expires then no other action is
276+
taken. If the routine continues to be stuck and the main
277+
timeout expires then an emergency-level message and stack
278+
trace will be printed and the system will panic.
279+
280+
If the warning timeout is equal to the main timeout (the
281+
default) then the warning will never happen and the system
282+
will jump straight to panic when the main timeout expires.
283+
265284
config PM_TRACE
266285
bool
267286
help

0 commit comments

Comments
 (0)