Skip to content

Commit 71a5bb1

Browse files
committed
nvme: ensure disabling pairs with unquiesce
If any error handling that disables the controller fails to queue the reset work, like if the state changed to disconnected inbetween, then the failed teardown needs to unquiesce the queues since it's no longer paired with reset_work. Just make sure that the controller can be put into a resetting state prior to starting the disable so that no other handling can change the queue states while recovery is happening. Reported-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Keith Busch <kbusch@kernel.org>
1 parent ee6fdc5 commit 71a5bb1

File tree

1 file changed

+17
-8
lines changed

1 file changed

+17
-8
lines changed

drivers/nvme/host/pci.c

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1298,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
12981298
*/
12991299
if (nvme_should_reset(dev, csts)) {
13001300
nvme_warn_reset(dev, csts);
1301-
nvme_dev_disable(dev, false);
1302-
nvme_reset_ctrl(&dev->ctrl);
1303-
return BLK_EH_DONE;
1301+
goto disable;
13041302
}
13051303

13061304
/*
@@ -1351,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13511349
"I/O %d QID %d timeout, reset controller\n",
13521350
req->tag, nvmeq->qid);
13531351
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
1354-
nvme_dev_disable(dev, false);
1355-
nvme_reset_ctrl(&dev->ctrl);
1356-
1357-
return BLK_EH_DONE;
1352+
goto disable;
13581353
}
13591354

13601355
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1391,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13911386
* as the device then is in a faulty state.
13921387
*/
13931388
return BLK_EH_RESET_TIMER;
1389+
1390+
disable:
1391+
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
1392+
return BLK_EH_DONE;
1393+
1394+
nvme_dev_disable(dev, false);
1395+
if (nvme_try_sched_reset(&dev->ctrl))
1396+
nvme_unquiesce_io_queues(&dev->ctrl);
1397+
return BLK_EH_DONE;
13941398
}
13951399

13961400
static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -3278,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
32783282
case pci_channel_io_frozen:
32793283
dev_warn(dev->ctrl.device,
32803284
"frozen state error detected, reset controller\n");
3285+
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
3286+
nvme_dev_disable(dev, true);
3287+
return PCI_ERS_RESULT_DISCONNECT;
3288+
}
32813289
nvme_dev_disable(dev, false);
32823290
return PCI_ERS_RESULT_NEED_RESET;
32833291
case pci_channel_io_perm_failure:
@@ -3294,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
32943302

32953303
dev_info(dev->ctrl.device, "restart after slot reset\n");
32963304
pci_restore_state(pdev);
3297-
nvme_reset_ctrl(&dev->ctrl);
3305+
if (!nvme_try_sched_reset(&dev->ctrl))
3306+
nvme_unquiesce_io_queues(&dev->ctrl);
32983307
return PCI_ERS_RESULT_RECOVERED;
32993308
}
33003309

0 commit comments

Comments
 (0)