Skip to content

Commit 6aa0365

Browse files
committed
ata: libata-scsi: Avoid deadlock on rescan after device resume
When an ATA port is resumed from sleep, the port is reset and a power management request issued to libata EH to reset the port and rescanning the device(s) attached to the port. Device rescanning is done by scheduling an ata_scsi_dev_rescan() work, which will execute scsi_rescan_device(). However, scsi_rescan_device() takes the generic device lock, which is also taken by dpm_resume() when the SCSI device is resumed as well. If a device rescan execution starts before the completion of the SCSI device resume, the rcu locking used to refresh the cached VPD pages of the device, combined with the generic device locking from scsi_rescan_device() and from dpm_resume() can cause a deadlock. Avoid this situation by changing struct ata_port scsi_rescan_task to be a delayed work instead of a simple work_struct. ata_scsi_dev_rescan() is modified to check if the SCSI device associated with the ATA device that must be rescanned is not suspended. If the SCSI device is still suspended, ata_scsi_dev_rescan() returns early and reschedule itself for execution after an arbitrary delay of 5ms. Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com> Reported-by: Joe Breuer <linux-kernel@jmbreuer.net> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217530 Fixes: a19a93e ("scsi: core: pm: Rely on the device driver core for async power management") Signed-off-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Hannes Reinecke <hare@suse.de> Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com> Tested-by: Joe Breuer <linux-kernel@jmbreuer.net>
1 parent 7f87585 commit 6aa0365

File tree

4 files changed

+25
-4
lines changed

4 files changed

+25
-4
lines changed

drivers/ata/libata-core.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5348,7 +5348,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
53485348

53495349
mutex_init(&ap->scsi_scan_mutex);
53505350
INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
5351-
INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
5351+
INIT_DELAYED_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
53525352
INIT_LIST_HEAD(&ap->eh_done_q);
53535353
init_waitqueue_head(&ap->eh_wait_q);
53545354
init_completion(&ap->park_req_pending);
@@ -5954,6 +5954,7 @@ static void ata_port_detach(struct ata_port *ap)
59545954
WARN_ON(!(ap->pflags & ATA_PFLAG_UNLOADED));
59555955

59565956
cancel_delayed_work_sync(&ap->hotplug_task);
5957+
cancel_delayed_work_sync(&ap->scsi_rescan_task);
59575958

59585959
skip_eh:
59595960
/* clean up zpodd on port removal */

drivers/ata/libata-eh.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2984,7 +2984,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
29842984
ehc->i.flags |= ATA_EHI_SETMODE;
29852985

29862986
/* schedule the scsi_rescan_device() here */
2987-
schedule_work(&(ap->scsi_rescan_task));
2987+
schedule_delayed_work(&ap->scsi_rescan_task, 0);
29882988
} else if (dev->class == ATA_DEV_UNKNOWN &&
29892989
ehc->tries[dev->devno] &&
29902990
ata_class_enabled(ehc->classes[dev->devno])) {

drivers/ata/libata-scsi.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4597,10 +4597,11 @@ int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
45974597
void ata_scsi_dev_rescan(struct work_struct *work)
45984598
{
45994599
struct ata_port *ap =
4600-
container_of(work, struct ata_port, scsi_rescan_task);
4600+
container_of(work, struct ata_port, scsi_rescan_task.work);
46014601
struct ata_link *link;
46024602
struct ata_device *dev;
46034603
unsigned long flags;
4604+
bool delay_rescan = false;
46044605

46054606
mutex_lock(&ap->scsi_scan_mutex);
46064607
spin_lock_irqsave(ap->lock, flags);
@@ -4614,6 +4615,21 @@ void ata_scsi_dev_rescan(struct work_struct *work)
46144615
if (scsi_device_get(sdev))
46154616
continue;
46164617

4618+
/*
4619+
* If the rescan work was scheduled because of a resume
4620+
* event, the port is already fully resumed, but the
4621+
* SCSI device may not yet be fully resumed. In such
4622+
* case, executing scsi_rescan_device() may cause a
4623+
* deadlock with the PM code on device_lock(). Prevent
4624+
* this by giving up and retrying rescan after a short
4625+
* delay.
4626+
*/
4627+
delay_rescan = sdev->sdev_gendev.power.is_suspended;
4628+
if (delay_rescan) {
4629+
scsi_device_put(sdev);
4630+
break;
4631+
}
4632+
46174633
spin_unlock_irqrestore(ap->lock, flags);
46184634
scsi_rescan_device(&(sdev->sdev_gendev));
46194635
scsi_device_put(sdev);
@@ -4623,4 +4639,8 @@ void ata_scsi_dev_rescan(struct work_struct *work)
46234639

46244640
spin_unlock_irqrestore(ap->lock, flags);
46254641
mutex_unlock(&ap->scsi_scan_mutex);
4642+
4643+
if (delay_rescan)
4644+
schedule_delayed_work(&ap->scsi_rescan_task,
4645+
msecs_to_jiffies(5));
46264646
}

include/linux/libata.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -836,7 +836,7 @@ struct ata_port {
836836

837837
struct mutex scsi_scan_mutex;
838838
struct delayed_work hotplug_task;
839-
struct work_struct scsi_rescan_task;
839+
struct delayed_work scsi_rescan_task;
840840

841841
unsigned int hsm_task_state;
842842

0 commit comments

Comments
 (0)