Skip to content

Commit de7007e

Browse files
committed
Merge tag 'nvme-6.12-2024-10-18' of git://git.infradead.org/nvme into block-6.12
Pull NVMe fixes from Keith: "nvme fixes for Linux 6.12 - Fix target passthrough identifier (Nilay) - Fix tcp locking (Hannes) - Replace list with sbitmap for tracking RDMA rsp tags (Guixen) - Remove unnecessary fallthrough statements (Tokunori) - Remove ready-without-media support (Greg) - Fix multipath partition scan deadlock (Keith) - Fix concurrent PCI reset and remove queue mapping (Maurizio) - Fabrics shutdown fixes (Nilay)" * tag 'nvme-6.12-2024-10-18' of git://git.infradead.org/nvme: nvme: use helper nvme_ctrl_state in nvme_keep_alive_finish function nvme: make keep-alive synchronous operation nvme-loop: flush off pending I/O while shutting down loop controller nvme-pci: fix race condition between reset and nvme_dev_disable() nvme-multipath: defer partition scanning nvme: disable CC.CRIME (NVME_CC_CRIME) nvme: delete unnecessary fallthru comment nvmet-rdma: use sbitmap to replace rsp free list nvme: tcp: avoid race between queue_lock lock and destroy nvmet-passthru: clear EUID/NGUID/UUID while using loop target block: fix blk_rq_map_integrity_sg kernel-doc
2 parents 42aafd8 + 599d9f3 commit de7007e

File tree

8 files changed

+113
-70
lines changed

8 files changed

+113
-70
lines changed

drivers/nvme/host/core.c

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1292,14 +1292,12 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
12921292
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
12931293
}
12941294

1295-
static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
1296-
blk_status_t status)
1295+
static void nvme_keep_alive_finish(struct request *rq,
1296+
blk_status_t status, struct nvme_ctrl *ctrl)
12971297
{
1298-
struct nvme_ctrl *ctrl = rq->end_io_data;
1299-
unsigned long flags;
1300-
bool startka = false;
13011298
unsigned long rtt = jiffies - (rq->deadline - rq->timeout);
13021299
unsigned long delay = nvme_keep_alive_work_period(ctrl);
1300+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
13031301

13041302
/*
13051303
* Subtract off the keepalive RTT so nvme_keep_alive_work runs
@@ -1313,25 +1311,17 @@ static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
13131311
delay = 0;
13141312
}
13151313

1316-
blk_mq_free_request(rq);
1317-
13181314
if (status) {
13191315
dev_err(ctrl->device,
13201316
"failed nvme_keep_alive_end_io error=%d\n",
13211317
status);
1322-
return RQ_END_IO_NONE;
1318+
return;
13231319
}
13241320

13251321
ctrl->ka_last_check_time = jiffies;
13261322
ctrl->comp_seen = false;
1327-
spin_lock_irqsave(&ctrl->lock, flags);
1328-
if (ctrl->state == NVME_CTRL_LIVE ||
1329-
ctrl->state == NVME_CTRL_CONNECTING)
1330-
startka = true;
1331-
spin_unlock_irqrestore(&ctrl->lock, flags);
1332-
if (startka)
1323+
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_CONNECTING)
13331324
queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
1334-
return RQ_END_IO_NONE;
13351325
}
13361326

13371327
static void nvme_keep_alive_work(struct work_struct *work)
@@ -1340,6 +1330,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
13401330
struct nvme_ctrl, ka_work);
13411331
bool comp_seen = ctrl->comp_seen;
13421332
struct request *rq;
1333+
blk_status_t status;
13431334

13441335
ctrl->ka_last_check_time = jiffies;
13451336

@@ -1362,9 +1353,9 @@ static void nvme_keep_alive_work(struct work_struct *work)
13621353
nvme_init_request(rq, &ctrl->ka_cmd);
13631354

13641355
rq->timeout = ctrl->kato * HZ;
1365-
rq->end_io = nvme_keep_alive_end_io;
1366-
rq->end_io_data = ctrl;
1367-
blk_execute_rq_nowait(rq, false);
1356+
status = blk_execute_rq(rq, false);
1357+
nvme_keep_alive_finish(rq, status, ctrl);
1358+
blk_mq_free_request(rq);
13681359
}
13691360

13701361
static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -2458,8 +2449,13 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
24582449
else
24592450
ctrl->ctrl_config = NVME_CC_CSS_NVM;
24602451

2461-
if (ctrl->cap & NVME_CAP_CRMS_CRWMS && ctrl->cap & NVME_CAP_CRMS_CRIMS)
2462-
ctrl->ctrl_config |= NVME_CC_CRIME;
2452+
/*
2453+
* Setting CRIME results in CSTS.RDY before the media is ready. This
2454+
* makes it possible for media related commands to return the error
2455+
* NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY. Until the driver is
2456+
* restructured to handle retries, disable CC.CRIME.
2457+
*/
2458+
ctrl->ctrl_config &= ~NVME_CC_CRIME;
24632459

24642460
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
24652461
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
@@ -2489,10 +2485,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
24892485
* devices are known to get this wrong. Use the larger of the
24902486
* two values.
24912487
*/
2492-
if (ctrl->ctrl_config & NVME_CC_CRIME)
2493-
ready_timeout = NVME_CRTO_CRIMT(crto);
2494-
else
2495-
ready_timeout = NVME_CRTO_CRWMT(crto);
2488+
ready_timeout = NVME_CRTO_CRWMT(crto);
24962489

24972490
if (ready_timeout < timeout)
24982491
dev_warn_once(ctrl->device, "bad crto:%x cap:%llx\n",

drivers/nvme/host/multipath.c

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,6 @@ static bool nvme_available_path(struct nvme_ns_head *head)
431431
case NVME_CTRL_LIVE:
432432
case NVME_CTRL_RESETTING:
433433
case NVME_CTRL_CONNECTING:
434-
/* fallthru */
435434
return true;
436435
default:
437436
break;
@@ -580,6 +579,20 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
580579
return ret;
581580
}
582581

582+
static void nvme_partition_scan_work(struct work_struct *work)
583+
{
584+
struct nvme_ns_head *head =
585+
container_of(work, struct nvme_ns_head, partition_scan_work);
586+
587+
if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
588+
&head->disk->state)))
589+
return;
590+
591+
mutex_lock(&head->disk->open_mutex);
592+
bdev_disk_changed(head->disk, false);
593+
mutex_unlock(&head->disk->open_mutex);
594+
}
595+
583596
static void nvme_requeue_work(struct work_struct *work)
584597
{
585598
struct nvme_ns_head *head =
@@ -606,6 +619,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
606619
bio_list_init(&head->requeue_list);
607620
spin_lock_init(&head->requeue_lock);
608621
INIT_WORK(&head->requeue_work, nvme_requeue_work);
622+
INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
609623

610624
/*
611625
* Add a multipath node if the subsystems supports multiple controllers.
@@ -629,6 +643,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
629643
return PTR_ERR(head->disk);
630644
head->disk->fops = &nvme_ns_head_ops;
631645
head->disk->private_data = head;
646+
647+
/*
648+
* We need to suppress the partition scan from occuring within the
649+
* controller's scan_work context. If a path error occurs here, the IO
650+
* will wait until a path becomes available or all paths are torn down,
651+
* but that action also occurs within scan_work, so it would deadlock.
652+
* Defer the partion scan to a different context that does not block
653+
* scan_work.
654+
*/
655+
set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
632656
sprintf(head->disk->disk_name, "nvme%dn%d",
633657
ctrl->subsys->instance, head->instance);
634658
return 0;
@@ -655,6 +679,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
655679
return;
656680
}
657681
nvme_add_ns_head_cdev(head);
682+
kblockd_schedule_work(&head->partition_scan_work);
658683
}
659684

660685
mutex_lock(&head->lock);
@@ -974,14 +999,14 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
974999
return;
9751000
if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
9761001
nvme_cdev_del(&head->cdev, &head->cdev_device);
1002+
/*
1003+
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
1004+
* to allow multipath to fail all I/O.
1005+
*/
1006+
synchronize_srcu(&head->srcu);
1007+
kblockd_schedule_work(&head->requeue_work);
9771008
del_gendisk(head->disk);
9781009
}
979-
/*
980-
* requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
981-
* to allow multipath to fail all I/O.
982-
*/
983-
synchronize_srcu(&head->srcu);
984-
kblockd_schedule_work(&head->requeue_work);
9851010
}
9861011

9871012
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -991,6 +1016,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
9911016
/* make sure all pending bios are cleaned up */
9921017
kblockd_schedule_work(&head->requeue_work);
9931018
flush_work(&head->requeue_work);
1019+
flush_work(&head->partition_scan_work);
9941020
put_disk(head->disk);
9951021
}
9961022

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@ struct nvme_ns_head {
494494
struct bio_list requeue_list;
495495
spinlock_t requeue_lock;
496496
struct work_struct requeue_work;
497+
struct work_struct partition_scan_work;
497498
struct mutex lock;
498499
unsigned long flags;
499500
#define NVME_NSHEAD_DISK_LIVE 0

drivers/nvme/host/pci.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2506,17 +2506,29 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev)
25062506
return 1;
25072507
}
25082508

2509-
static void nvme_pci_update_nr_queues(struct nvme_dev *dev)
2509+
static bool nvme_pci_update_nr_queues(struct nvme_dev *dev)
25102510
{
25112511
if (!dev->ctrl.tagset) {
25122512
nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops,
25132513
nvme_pci_nr_maps(dev), sizeof(struct nvme_iod));
2514-
return;
2514+
return true;
2515+
}
2516+
2517+
/* Give up if we are racing with nvme_dev_disable() */
2518+
if (!mutex_trylock(&dev->shutdown_lock))
2519+
return false;
2520+
2521+
/* Check if nvme_dev_disable() has been executed already */
2522+
if (!dev->online_queues) {
2523+
mutex_unlock(&dev->shutdown_lock);
2524+
return false;
25152525
}
25162526

25172527
blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
25182528
/* free previously allocated queues that are no longer usable */
25192529
nvme_free_queues(dev, dev->online_queues);
2530+
mutex_unlock(&dev->shutdown_lock);
2531+
return true;
25202532
}
25212533

25222534
static int nvme_pci_enable(struct nvme_dev *dev)
@@ -2797,7 +2809,8 @@ static void nvme_reset_work(struct work_struct *work)
27972809
nvme_dbbuf_set(dev);
27982810
nvme_unquiesce_io_queues(&dev->ctrl);
27992811
nvme_wait_freeze(&dev->ctrl);
2800-
nvme_pci_update_nr_queues(dev);
2812+
if (!nvme_pci_update_nr_queues(dev))
2813+
goto out;
28012814
nvme_unfreeze(&dev->ctrl);
28022815
} else {
28032816
dev_warn(dev->ctrl.device, "IO queues lost\n");

drivers/nvme/host/tcp.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2644,18 +2644,19 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
26442644

26452645
len = nvmf_get_address(ctrl, buf, size);
26462646

2647+
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
2648+
return len;
2649+
26472650
mutex_lock(&queue->queue_lock);
26482651

2649-
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
2650-
goto done;
26512652
ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
26522653
if (ret > 0) {
26532654
if (len > 0)
26542655
len--; /* strip trailing newline */
26552656
len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
26562657
(len) ? "," : "", &src_addr);
26572658
}
2658-
done:
2659+
26592660
mutex_unlock(&queue->queue_lock);
26602661

26612662
return len;

drivers/nvme/target/loop.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,13 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
265265
{
266266
if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
267267
return;
268+
/*
269+
* It's possible that some requests might have been added
270+
* after admin queue is stopped/quiesced. So now start the
271+
* queue to flush these requests to the completion.
272+
*/
273+
nvme_unquiesce_admin_queue(&ctrl->ctrl);
274+
268275
nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
269276
nvme_remove_admin_tag_set(&ctrl->ctrl);
270277
}
@@ -297,6 +304,12 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
297304
nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
298305
}
299306
ctrl->ctrl.queue_count = 1;
307+
/*
308+
* It's possible that some requests might have been added
309+
* after io queue is stopped/quiesced. So now start the
310+
* queue to flush these requests to the completion.
311+
*/
312+
nvme_unquiesce_io_queues(&ctrl->ctrl);
300313
}
301314

302315
static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)

drivers/nvme/target/passthru.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -535,10 +535,6 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
535535
break;
536536
case nvme_admin_identify:
537537
switch (req->cmd->identify.cns) {
538-
case NVME_ID_CNS_CTRL:
539-
req->execute = nvmet_passthru_execute_cmd;
540-
req->p.use_workqueue = true;
541-
return NVME_SC_SUCCESS;
542538
case NVME_ID_CNS_CS_CTRL:
543539
switch (req->cmd->identify.csi) {
544540
case NVME_CSI_ZNS:
@@ -547,7 +543,9 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
547543
return NVME_SC_SUCCESS;
548544
}
549545
return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR;
546+
case NVME_ID_CNS_CTRL:
550547
case NVME_ID_CNS_NS:
548+
case NVME_ID_CNS_NS_DESC_LIST:
551549
req->execute = nvmet_passthru_execute_cmd;
552550
req->p.use_workqueue = true;
553551
return NVME_SC_SUCCESS;

0 commit comments

Comments
 (0)