Skip to content

Commit 03690d8

Browse files
mikechristiemartinkpetersen
authored andcommitted
scsi: iscsi: Fix unbound endpoint error handling
If a driver raises a connection error before the connection is bound, we can leave a cleanup_work queued that can later run and disconnect/stop a connection that is logged in. The problem is that drivers can call iscsi_conn_error_event for endpoints that are connected but not yet bound when something like the network port they are using is brought down. iscsi_cleanup_conn_work_fn will check for this and exit early, but if the cleanup_work is stuck behind other works, it might not get run until after userspace has done ep_disconnect. Because the endpoint is not yet bound there was no way for ep_disconnect to flush the work. The bug of leaving stop_conns queued was added in: Commit 23d6fef ("scsi: iscsi: Fix in-kernel conn failure handling") and: Commit 0ab7104 ("scsi: iscsi: Perform connection failure entirely in kernel space") was supposed to fix it, but left this case. This patch moves the conn state check to before we even queue the work so we can avoid queueing. Link: https://lore.kernel.org/r/20220408001314.5014-7-michael.christie@oracle.com Fixes: 0ab7104 ("scsi: iscsi: Perform connection failure entirely in kernel space") Tested-by: Manish Rangankar <mrangankar@marvell.com> Reviewed-by: Lee Duncan <lduncan@@suse.com> Reviewed-by: Chris Leech <cleech@redhat.com> Signed-off-by: Mike Christie <michael.christie@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent 7c6e99c commit 03690d8

File tree

1 file changed

+36
-29
lines changed

1 file changed

+36
-29
lines changed

drivers/scsi/scsi_transport_iscsi.c

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2201,10 +2201,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
22012201

22022202
switch (flag) {
22032203
case STOP_CONN_RECOVER:
2204-
conn->state = ISCSI_CONN_FAILED;
2204+
WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
22052205
break;
22062206
case STOP_CONN_TERM:
2207-
conn->state = ISCSI_CONN_DOWN;
2207+
WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
22082208
break;
22092209
default:
22102210
iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n",
@@ -2222,7 +2222,7 @@ static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
22222222
struct iscsi_endpoint *ep;
22232223

22242224
ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
2225-
conn->state = ISCSI_CONN_FAILED;
2225+
WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
22262226

22272227
if (!conn->ep || !session->transport->ep_disconnect)
22282228
return;
@@ -2321,21 +2321,6 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
23212321
struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
23222322

23232323
mutex_lock(&conn->ep_mutex);
2324-
/*
2325-
* If we are not at least bound there is nothing for us to do. Userspace
2326-
* will do a ep_disconnect call if offload is used, but will not be
2327-
* doing a stop since there is nothing to clean up, so we have to clear
2328-
* the cleanup bit here.
2329-
*/
2330-
if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) {
2331-
ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n");
2332-
spin_lock_irq(&conn->lock);
2333-
clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags);
2334-
spin_unlock_irq(&conn->lock);
2335-
mutex_unlock(&conn->ep_mutex);
2336-
return;
2337-
}
2338-
23392324
/*
23402325
* Get a ref to the ep, so we don't release its ID until after
23412326
* userspace is done referencing it in iscsi_if_disconnect_bound_ep.
@@ -2391,7 +2376,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
23912376
INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
23922377
conn->transport = transport;
23932378
conn->cid = cid;
2394-
conn->state = ISCSI_CONN_DOWN;
2379+
WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
23952380

23962381
/* this is released in the dev's release function */
23972382
if (!get_device(&session->dev))
@@ -2590,10 +2575,30 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
25902575
struct iscsi_internal *priv;
25912576
int len = nlmsg_total_size(sizeof(*ev));
25922577
unsigned long flags;
2578+
int state;
25932579

25942580
spin_lock_irqsave(&conn->lock, flags);
2595-
if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
2596-
queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
2581+
/*
2582+
* Userspace will only do a stop call if we are at least bound. And, we
2583+
* only need to do the in kernel cleanup if in the UP state so cmds can
2584+
* be released to upper layers. If in other states just wait for
2585+
* userspace to avoid races that can leave the cleanup_work queued.
2586+
*/
2587+
state = READ_ONCE(conn->state);
2588+
switch (state) {
2589+
case ISCSI_CONN_BOUND:
2590+
case ISCSI_CONN_UP:
2591+
if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP,
2592+
&conn->flags)) {
2593+
queue_work(iscsi_conn_cleanup_workq,
2594+
&conn->cleanup_work);
2595+
}
2596+
break;
2597+
default:
2598+
ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n",
2599+
state);
2600+
break;
2601+
}
25972602
spin_unlock_irqrestore(&conn->lock, flags);
25982603

25992604
priv = iscsi_if_transport_lookup(conn->transport);
@@ -2944,7 +2949,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
29442949
char *data = (char*)ev + sizeof(*ev);
29452950
struct iscsi_cls_conn *conn;
29462951
struct iscsi_cls_session *session;
2947-
int err = 0, value = 0;
2952+
int err = 0, value = 0, state;
29482953

29492954
if (ev->u.set_param.len > PAGE_SIZE)
29502955
return -EINVAL;
@@ -2961,8 +2966,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
29612966
session->recovery_tmo = value;
29622967
break;
29632968
default:
2964-
if ((conn->state == ISCSI_CONN_BOUND) ||
2965-
(conn->state == ISCSI_CONN_UP)) {
2969+
state = READ_ONCE(conn->state);
2970+
if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) {
29662971
err = transport->set_param(conn, ev->u.set_param.param,
29672972
data, ev->u.set_param.len);
29682973
} else {
@@ -3758,7 +3763,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
37583763
ev->u.b_conn.transport_eph,
37593764
ev->u.b_conn.is_leading);
37603765
if (!ev->r.retcode)
3761-
conn->state = ISCSI_CONN_BOUND;
3766+
WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
37623767

37633768
if (ev->r.retcode || !transport->ep_connect)
37643769
break;
@@ -3777,7 +3782,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
37773782
case ISCSI_UEVENT_START_CONN:
37783783
ev->r.retcode = transport->start_conn(conn);
37793784
if (!ev->r.retcode)
3780-
conn->state = ISCSI_CONN_UP;
3785+
WRITE_ONCE(conn->state, ISCSI_CONN_UP);
3786+
37813787
break;
37823788
case ISCSI_UEVENT_SEND_PDU:
37833789
pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
@@ -4084,10 +4090,11 @@ static ssize_t show_conn_state(struct device *dev,
40844090
{
40854091
struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent);
40864092
const char *state = "unknown";
4093+
int conn_state = READ_ONCE(conn->state);
40874094

4088-
if (conn->state >= 0 &&
4089-
conn->state < ARRAY_SIZE(connection_state_names))
4090-
state = connection_state_names[conn->state];
4095+
if (conn_state >= 0 &&
4096+
conn_state < ARRAY_SIZE(connection_state_names))
4097+
state = connection_state_names[conn_state];
40914098

40924099
return sysfs_emit(buf, "%s\n", state);
40934100
}

0 commit comments

Comments
 (0)