Skip to content

Commit 6fbb0e3

Browse files
igawgregkh
authored andcommitted
nvme-fc: rely on state transitions to handle connectivity loss
commit f13409b upstream. It's not possible to call nvme_state_ctrl_state with holding a spin lock, because nvme_state_ctrl_state calls cancel_delayed_work_sync when fastfail is enabled. Instead syncing the ASSOC_FLAG and state transitions using a lock, it's possible to only rely on the state machine transitions. That means nvme_fc_ctrl_connectivity_loss should unconditionally call nvme_reset_ctrl which avoids the read race on the ctrl state variable. Actually, it's not necessary to test in which state the ctrl is, the reset work will only scheduled when the state machine is in LIVE state. In nvme_fc_create_association, the LIVE state can only be entered if it was previously CONNECTING. If this is not possible then the reset handler got triggered. Thus just error out here. Fixes: ee59e38 ("nvme-fc: do not ignore connectivity loss during connecting") Closes: https://lore.kernel.org/all/denqwui6sl5erqmz2gvrwueyxakl5txzbbiu3fgebryzrfxunm@iwxuthct377m/ Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Daniel Wagner <wagi@kernel.org> Signed-off-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 18ab6b6 commit 6fbb0e3

File tree

1 file changed

+6
-61
lines changed
  • drivers/nvme/host

1 file changed

+6
-61
lines changed

drivers/nvme/host/fc.c

Lines changed: 6 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -782,61 +782,12 @@ nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
782782
static void
783783
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
784784
{
785-
enum nvme_ctrl_state state;
786-
unsigned long flags;
787-
788785
dev_info(ctrl->ctrl.device,
789786
"NVME-FC{%d}: controller connectivity lost. Awaiting "
790787
"Reconnect", ctrl->cnum);
791788

792-
spin_lock_irqsave(&ctrl->lock, flags);
793789
set_bit(ASSOC_FAILED, &ctrl->flags);
794-
state = nvme_ctrl_state(&ctrl->ctrl);
795-
spin_unlock_irqrestore(&ctrl->lock, flags);
796-
797-
switch (state) {
798-
case NVME_CTRL_NEW:
799-
case NVME_CTRL_LIVE:
800-
/*
801-
* Schedule a controller reset. The reset will terminate the
802-
* association and schedule the reconnect timer. Reconnects
803-
* will be attempted until either the ctlr_loss_tmo
804-
* (max_retries * connect_delay) expires or the remoteport's
805-
* dev_loss_tmo expires.
806-
*/
807-
if (nvme_reset_ctrl(&ctrl->ctrl)) {
808-
dev_warn(ctrl->ctrl.device,
809-
"NVME-FC{%d}: Couldn't schedule reset.\n",
810-
ctrl->cnum);
811-
nvme_delete_ctrl(&ctrl->ctrl);
812-
}
813-
break;
814-
815-
case NVME_CTRL_CONNECTING:
816-
/*
817-
* The association has already been terminated and the
818-
* controller is attempting reconnects. No need to do anything
819-
* futher. Reconnects will be attempted until either the
820-
* ctlr_loss_tmo (max_retries * connect_delay) expires or the
821-
* remoteport's dev_loss_tmo expires.
822-
*/
823-
break;
824-
825-
case NVME_CTRL_RESETTING:
826-
/*
827-
* Controller is already in the process of terminating the
828-
* association. No need to do anything further. The reconnect
829-
* step will kick in naturally after the association is
830-
* terminated.
831-
*/
832-
break;
833-
834-
case NVME_CTRL_DELETING:
835-
case NVME_CTRL_DELETING_NOIO:
836-
default:
837-
/* no action to take - let it delete */
838-
break;
839-
}
790+
nvme_reset_ctrl(&ctrl->ctrl);
840791
}
841792

842793
/**
@@ -3072,7 +3023,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
30723023
struct nvmefc_ls_rcv_op *disls = NULL;
30733024
unsigned long flags;
30743025
int ret;
3075-
bool changed;
30763026

30773027
++ctrl->ctrl.nr_reconnects;
30783028

@@ -3178,23 +3128,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
31783128
else
31793129
ret = nvme_fc_recreate_io_queues(ctrl);
31803130
}
3131+
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
3132+
ret = -EIO;
31813133
if (ret)
31823134
goto out_term_aen_ops;
31833135

3184-
spin_lock_irqsave(&ctrl->lock, flags);
3185-
if (!test_bit(ASSOC_FAILED, &ctrl->flags))
3186-
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
3187-
else
3136+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) {
31883137
ret = -EIO;
3189-
spin_unlock_irqrestore(&ctrl->lock, flags);
3190-
3191-
if (ret)
31923138
goto out_term_aen_ops;
3139+
}
31933140

31943141
ctrl->ctrl.nr_reconnects = 0;
3195-
3196-
if (changed)
3197-
nvme_start_ctrl(&ctrl->ctrl);
3142+
nvme_start_ctrl(&ctrl->ctrl);
31983143

31993144
return 0; /* Success */
32003145

0 commit comments

Comments
 (0)