Skip to content

Commit 7839d00

Browse files
committed
PM: sleep: Fix possible deadlocks in core system-wide PM code
It is reported that in low-memory situations the system-wide resume core code deadlocks, because async_schedule_dev() executes its argument function synchronously if it cannot allocate memory (and not only in that case) and that function attempts to acquire a mutex that is already held. Executing the argument function synchronously from within dpm_async_fn() may also be problematic for ordering reasons (it may cause a consumer device's resume callback to be invoked before a requisite supplier device's one, for example). Address this by changing the code in question to use async_schedule_dev_nocall() for scheduling the asynchronous execution of device suspend and resume functions and to directly run them synchronously if async_schedule_dev_nocall() returns false. Link: https://lore.kernel.org/linux-pm/ZYvjiqX6EsL15moe@perf/ Reported-by: Youngmin Nam <youngmin.nam@samsung.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> Tested-by: Youngmin Nam <youngmin.nam@samsung.com> Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org> Cc: 5.7+ <stable@vger.kernel.org> # 5.7+: 6aa09a5 async: Split async_schedule_node_domain() Cc: 5.7+ <stable@vger.kernel.org> # 5.7+: 7d4b5d7 async: Introduce async_schedule_dev_nocall() Cc: 5.7+ <stable@vger.kernel.org> # 5.7+
1 parent 7d4b5d7 commit 7839d00

File tree

1 file changed

+68
-80
lines changed

1 file changed

+68
-80
lines changed

drivers/base/power/main.c

Lines changed: 68 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -579,15 +579,15 @@ bool dev_pm_skip_resume(struct device *dev)
579579
}
580580

581581
/**
582-
* device_resume_noirq - Execute a "noirq resume" callback for given device.
582+
* __device_resume_noirq - Execute a "noirq resume" callback for given device.
583583
* @dev: Device to handle.
584584
* @state: PM transition of the system being carried out.
585585
* @async: If true, the device is being resumed asynchronously.
586586
*
587587
* The driver of @dev will not receive interrupts while this function is being
588588
* executed.
589589
*/
590-
static int device_resume_noirq(struct device *dev, pm_message_t state, bool async)
590+
static void __device_resume_noirq(struct device *dev, pm_message_t state, bool async)
591591
{
592592
pm_callback_t callback = NULL;
593593
const char *info = NULL;
@@ -655,7 +655,13 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
655655
Out:
656656
complete_all(&dev->power.completion);
657657
TRACE_RESUME(error);
658-
return error;
658+
659+
if (error) {
660+
suspend_stats.failed_resume_noirq++;
661+
dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
662+
dpm_save_failed_dev(dev_name(dev));
663+
pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
664+
}
659665
}
660666

661667
static bool is_async(struct device *dev)
@@ -668,27 +674,35 @@ static bool dpm_async_fn(struct device *dev, async_func_t func)
668674
{
669675
reinit_completion(&dev->power.completion);
670676

671-
if (is_async(dev)) {
672-
get_device(dev);
673-
async_schedule_dev(func, dev);
677+
if (!is_async(dev))
678+
return false;
679+
680+
get_device(dev);
681+
682+
if (async_schedule_dev_nocall(func, dev))
674683
return true;
675-
}
684+
685+
put_device(dev);
676686

677687
return false;
678688
}
679689

680690
static void async_resume_noirq(void *data, async_cookie_t cookie)
681691
{
682692
struct device *dev = data;
683-
int error;
684-
685-
error = device_resume_noirq(dev, pm_transition, true);
686-
if (error)
687-
pm_dev_err(dev, pm_transition, " async", error);
688693

694+
__device_resume_noirq(dev, pm_transition, true);
689695
put_device(dev);
690696
}
691697

698+
static void device_resume_noirq(struct device *dev)
699+
{
700+
if (dpm_async_fn(dev, async_resume_noirq))
701+
return;
702+
703+
__device_resume_noirq(dev, pm_transition, false);
704+
}
705+
692706
static void dpm_noirq_resume_devices(pm_message_t state)
693707
{
694708
struct device *dev;
@@ -698,32 +712,14 @@ static void dpm_noirq_resume_devices(pm_message_t state)
698712
mutex_lock(&dpm_list_mtx);
699713
pm_transition = state;
700714

701-
/*
702-
* Advanced the async threads upfront,
703-
* in case the starting of async threads is
704-
* delayed by non-async resuming devices.
705-
*/
706-
list_for_each_entry(dev, &dpm_noirq_list, power.entry)
707-
dpm_async_fn(dev, async_resume_noirq);
708-
709715
while (!list_empty(&dpm_noirq_list)) {
710716
dev = to_device(dpm_noirq_list.next);
711717
get_device(dev);
712718
list_move_tail(&dev->power.entry, &dpm_late_early_list);
713719

714720
mutex_unlock(&dpm_list_mtx);
715721

716-
if (!is_async(dev)) {
717-
int error;
718-
719-
error = device_resume_noirq(dev, state, false);
720-
if (error) {
721-
suspend_stats.failed_resume_noirq++;
722-
dpm_save_failed_step(SUSPEND_RESUME_NOIRQ);
723-
dpm_save_failed_dev(dev_name(dev));
724-
pm_dev_err(dev, state, " noirq", error);
725-
}
726-
}
722+
device_resume_noirq(dev);
727723

728724
put_device(dev);
729725

@@ -751,14 +747,14 @@ void dpm_resume_noirq(pm_message_t state)
751747
}
752748

753749
/**
754-
* device_resume_early - Execute an "early resume" callback for given device.
750+
* __device_resume_early - Execute an "early resume" callback for given device.
755751
* @dev: Device to handle.
756752
* @state: PM transition of the system being carried out.
757753
* @async: If true, the device is being resumed asynchronously.
758754
*
759755
* Runtime PM is disabled for @dev while this function is being executed.
760756
*/
761-
static int device_resume_early(struct device *dev, pm_message_t state, bool async)
757+
static void __device_resume_early(struct device *dev, pm_message_t state, bool async)
762758
{
763759
pm_callback_t callback = NULL;
764760
const char *info = NULL;
@@ -811,21 +807,31 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
811807

812808
pm_runtime_enable(dev);
813809
complete_all(&dev->power.completion);
814-
return error;
810+
811+
if (error) {
812+
suspend_stats.failed_resume_early++;
813+
dpm_save_failed_step(SUSPEND_RESUME_EARLY);
814+
dpm_save_failed_dev(dev_name(dev));
815+
pm_dev_err(dev, state, async ? " async early" : " early", error);
816+
}
815817
}
816818

817819
static void async_resume_early(void *data, async_cookie_t cookie)
818820
{
819821
struct device *dev = data;
820-
int error;
821-
822-
error = device_resume_early(dev, pm_transition, true);
823-
if (error)
824-
pm_dev_err(dev, pm_transition, " async", error);
825822

823+
__device_resume_early(dev, pm_transition, true);
826824
put_device(dev);
827825
}
828826

827+
static void device_resume_early(struct device *dev)
828+
{
829+
if (dpm_async_fn(dev, async_resume_early))
830+
return;
831+
832+
__device_resume_early(dev, pm_transition, false);
833+
}
834+
829835
/**
830836
* dpm_resume_early - Execute "early resume" callbacks for all devices.
831837
* @state: PM transition of the system being carried out.
@@ -839,32 +845,14 @@ void dpm_resume_early(pm_message_t state)
839845
mutex_lock(&dpm_list_mtx);
840846
pm_transition = state;
841847

842-
/*
843-
* Advanced the async threads upfront,
844-
* in case the starting of async threads is
845-
* delayed by non-async resuming devices.
846-
*/
847-
list_for_each_entry(dev, &dpm_late_early_list, power.entry)
848-
dpm_async_fn(dev, async_resume_early);
849-
850848
while (!list_empty(&dpm_late_early_list)) {
851849
dev = to_device(dpm_late_early_list.next);
852850
get_device(dev);
853851
list_move_tail(&dev->power.entry, &dpm_suspended_list);
854852

855853
mutex_unlock(&dpm_list_mtx);
856854

857-
if (!is_async(dev)) {
858-
int error;
859-
860-
error = device_resume_early(dev, state, false);
861-
if (error) {
862-
suspend_stats.failed_resume_early++;
863-
dpm_save_failed_step(SUSPEND_RESUME_EARLY);
864-
dpm_save_failed_dev(dev_name(dev));
865-
pm_dev_err(dev, state, " early", error);
866-
}
867-
}
855+
device_resume_early(dev);
868856

869857
put_device(dev);
870858

@@ -888,12 +876,12 @@ void dpm_resume_start(pm_message_t state)
888876
EXPORT_SYMBOL_GPL(dpm_resume_start);
889877

890878
/**
891-
* device_resume - Execute "resume" callbacks for given device.
879+
* __device_resume - Execute "resume" callbacks for given device.
892880
* @dev: Device to handle.
893881
* @state: PM transition of the system being carried out.
894882
* @async: If true, the device is being resumed asynchronously.
895883
*/
896-
static int device_resume(struct device *dev, pm_message_t state, bool async)
884+
static void __device_resume(struct device *dev, pm_message_t state, bool async)
897885
{
898886
pm_callback_t callback = NULL;
899887
const char *info = NULL;
@@ -975,20 +963,30 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
975963

976964
TRACE_RESUME(error);
977965

978-
return error;
966+
if (error) {
967+
suspend_stats.failed_resume++;
968+
dpm_save_failed_step(SUSPEND_RESUME);
969+
dpm_save_failed_dev(dev_name(dev));
970+
pm_dev_err(dev, state, async ? " async" : "", error);
971+
}
979972
}
980973

981974
static void async_resume(void *data, async_cookie_t cookie)
982975
{
983976
struct device *dev = data;
984-
int error;
985977

986-
error = device_resume(dev, pm_transition, true);
987-
if (error)
988-
pm_dev_err(dev, pm_transition, " async", error);
978+
__device_resume(dev, pm_transition, true);
989979
put_device(dev);
990980
}
991981

982+
static void device_resume(struct device *dev)
983+
{
984+
if (dpm_async_fn(dev, async_resume))
985+
return;
986+
987+
__device_resume(dev, pm_transition, false);
988+
}
989+
992990
/**
993991
* dpm_resume - Execute "resume" callbacks for non-sysdev devices.
994992
* @state: PM transition of the system being carried out.
@@ -1008,27 +1006,17 @@ void dpm_resume(pm_message_t state)
10081006
pm_transition = state;
10091007
async_error = 0;
10101008

1011-
list_for_each_entry(dev, &dpm_suspended_list, power.entry)
1012-
dpm_async_fn(dev, async_resume);
1013-
10141009
while (!list_empty(&dpm_suspended_list)) {
10151010
dev = to_device(dpm_suspended_list.next);
1011+
10161012
get_device(dev);
1017-
if (!is_async(dev)) {
1018-
int error;
10191013

1020-
mutex_unlock(&dpm_list_mtx);
1014+
mutex_unlock(&dpm_list_mtx);
1015+
1016+
device_resume(dev);
10211017

1022-
error = device_resume(dev, state, false);
1023-
if (error) {
1024-
suspend_stats.failed_resume++;
1025-
dpm_save_failed_step(SUSPEND_RESUME);
1026-
dpm_save_failed_dev(dev_name(dev));
1027-
pm_dev_err(dev, state, "", error);
1028-
}
1018+
mutex_lock(&dpm_list_mtx);
10291019

1030-
mutex_lock(&dpm_list_mtx);
1031-
}
10321020
if (!list_empty(&dev->power.entry))
10331021
list_move_tail(&dev->power.entry, &dpm_prepared_list);
10341022

0 commit comments

Comments
 (0)