Skip to content

Commit e6e7f7a

Browse files
committed
nvme: ensure reset state check ordering
A different CPU may be setting the ctrl->state value, so ensure proper barriers to prevent optimizing to a stale state. Normally it isn't a problem to observe the wrong state as it is merely advisory to take a quicker path during initialization and error recovery, but seeing an old state can report unexpected ENETRESET errors when a reset request was in fact successful. Reported-by: Minh Hoang <mh2022@meta.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Hannes Reinecke <hare@suse.de>
1 parent 5c687c2 commit e6e7f7a

File tree

5 files changed

+63
-49
lines changed

5 files changed

+63
-49
lines changed

drivers/nvme/host/core.c

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
131131
/*
132132
* Only new queue scan work when admin and IO queues are both alive
133133
*/
134-
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
134+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
135135
queue_work(nvme_wq, &ctrl->scan_work);
136136
}
137137

@@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
143143
*/
144144
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
145145
{
146-
if (ctrl->state != NVME_CTRL_RESETTING)
146+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
147147
return -EBUSY;
148148
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
149149
return -EBUSY;
@@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work)
156156
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
157157
struct nvme_ctrl, failfast_work);
158158

159-
if (ctrl->state != NVME_CTRL_CONNECTING)
159+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
160160
return;
161161

162162
set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
@@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
200200
ret = nvme_reset_ctrl(ctrl);
201201
if (!ret) {
202202
flush_work(&ctrl->reset_work);
203-
if (ctrl->state != NVME_CTRL_LIVE)
203+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
204204
ret = -ENETRESET;
205205
}
206206

@@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
499499

500500
spin_lock_irqsave(&ctrl->lock, flags);
501501

502-
old_state = ctrl->state;
502+
old_state = nvme_ctrl_state(ctrl);
503503
switch (new_state) {
504504
case NVME_CTRL_LIVE:
505505
switch (old_state) {
@@ -567,19 +567,19 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
567567
}
568568

569569
if (changed) {
570-
ctrl->state = new_state;
570+
WRITE_ONCE(ctrl->state, new_state);
571571
wake_up_all(&ctrl->state_wq);
572572
}
573573

574574
spin_unlock_irqrestore(&ctrl->lock, flags);
575575
if (!changed)
576576
return false;
577577

578-
if (ctrl->state == NVME_CTRL_LIVE) {
578+
if (new_state == NVME_CTRL_LIVE) {
579579
if (old_state == NVME_CTRL_CONNECTING)
580580
nvme_stop_failfast_work(ctrl);
581581
nvme_kick_requeue_lists(ctrl);
582-
} else if (ctrl->state == NVME_CTRL_CONNECTING &&
582+
} else if (new_state == NVME_CTRL_CONNECTING &&
583583
old_state == NVME_CTRL_RESETTING) {
584584
nvme_start_failfast_work(ctrl);
585585
}
@@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
592592
*/
593593
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
594594
{
595-
switch (ctrl->state) {
595+
switch (nvme_ctrl_state(ctrl)) {
596596
case NVME_CTRL_NEW:
597597
case NVME_CTRL_LIVE:
598598
case NVME_CTRL_RESETTING:
@@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
617617
wait_event(ctrl->state_wq,
618618
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
619619
nvme_state_terminal(ctrl));
620-
return ctrl->state == NVME_CTRL_RESETTING;
620+
return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
621621
}
622622
EXPORT_SYMBOL_GPL(nvme_wait_reset);
623623

@@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
704704
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
705705
struct request *rq)
706706
{
707-
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
708-
ctrl->state != NVME_CTRL_DELETING &&
709-
ctrl->state != NVME_CTRL_DEAD &&
707+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
708+
709+
if (state != NVME_CTRL_DELETING_NOIO &&
710+
state != NVME_CTRL_DELETING &&
711+
state != NVME_CTRL_DEAD &&
710712
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
711713
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
712714
return BLK_STS_RESOURCE;
@@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
736738
* command, which is require to set the queue live in the
737739
* appropinquate states.
738740
*/
739-
switch (ctrl->state) {
741+
switch (nvme_ctrl_state(ctrl)) {
740742
case NVME_CTRL_CONNECTING:
741743
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
742744
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -2550,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
25502552

25512553
if (ctrl->ps_max_latency_us != latency) {
25522554
ctrl->ps_max_latency_us = latency;
2553-
if (ctrl->state == NVME_CTRL_LIVE)
2555+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
25542556
nvme_configure_apst(ctrl);
25552557
}
25562558
}
@@ -3238,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
32383240
struct nvme_ctrl *ctrl =
32393241
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
32403242

3241-
switch (ctrl->state) {
3243+
switch (nvme_ctrl_state(ctrl)) {
32423244
case NVME_CTRL_LIVE:
32433245
break;
32443246
default:
@@ -3924,7 +3926,7 @@ static void nvme_scan_work(struct work_struct *work)
39243926
int ret;
39253927

39263928
/* No tagset on a live ctrl means IO queues could not created */
3927-
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
3929+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
39283930
return;
39293931

39303932
/*
@@ -3994,7 +3996,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
39943996
* removing the namespaces' disks; fail all the queues now to avoid
39953997
* potentially having to clean up the failed sync later.
39963998
*/
3997-
if (ctrl->state == NVME_CTRL_DEAD)
3999+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
39984000
nvme_mark_namespaces_dead(ctrl);
39994001

40004002
/* this is a no-op when called from the controller reset handler */
@@ -4076,7 +4078,7 @@ static void nvme_async_event_work(struct work_struct *work)
40764078
* flushing ctrl async_event_work after changing the controller state
40774079
* from LIVE and before freeing the admin queue.
40784080
*/
4079-
if (ctrl->state == NVME_CTRL_LIVE)
4081+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
40804082
ctrl->ops->submit_async_event(ctrl);
40814083
}
40824084

@@ -4471,7 +4473,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
44714473
{
44724474
int ret;
44734475

4474-
ctrl->state = NVME_CTRL_NEW;
4476+
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
44754477
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
44764478
spin_lock_init(&ctrl->lock);
44774479
mutex_init(&ctrl->scan_lock);

drivers/nvme/host/fc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
557557
static void
558558
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
559559
{
560-
switch (ctrl->ctrl.state) {
560+
switch (nvme_ctrl_state(&ctrl->ctrl)) {
561561
case NVME_CTRL_NEW:
562562
case NVME_CTRL_CONNECTING:
563563
/*
@@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
793793
"NVME-FC{%d}: controller connectivity lost. Awaiting "
794794
"Reconnect", ctrl->cnum);
795795

796-
switch (ctrl->ctrl.state) {
796+
switch (nvme_ctrl_state(&ctrl->ctrl)) {
797797
case NVME_CTRL_NEW:
798798
case NVME_CTRL_LIVE:
799799
/*
@@ -3319,7 +3319,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
33193319
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
33203320
bool recon = true;
33213321

3322-
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
3322+
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
33233323
return;
33243324

33253325
if (portptr->port_state == FC_OBJSTATE_ONLINE) {

drivers/nvme/host/pci.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
12331233
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
12341234

12351235
/* If there is a reset/reinit ongoing, we shouldn't reset again. */
1236-
switch (dev->ctrl.state) {
1236+
switch (nvme_ctrl_state(&dev->ctrl)) {
12371237
case NVME_CTRL_RESETTING:
12381238
case NVME_CTRL_CONNECTING:
12391239
return false;
@@ -1321,7 +1321,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
13211321
* cancellation error. All outstanding requests are completed on
13221322
* shutdown, so we return BLK_EH_DONE.
13231323
*/
1324-
switch (dev->ctrl.state) {
1324+
switch (nvme_ctrl_state(&dev->ctrl)) {
13251325
case NVME_CTRL_CONNECTING:
13261326
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
13271327
fallthrough;
@@ -1593,7 +1593,7 @@ static int nvme_setup_io_queues_trylock(struct nvme_dev *dev)
15931593
/*
15941594
* Controller is in wrong state, fail early.
15951595
*/
1596-
if (dev->ctrl.state != NVME_CTRL_CONNECTING) {
1596+
if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_CONNECTING) {
15971597
mutex_unlock(&dev->shutdown_lock);
15981598
return -ENODEV;
15991599
}
@@ -2573,13 +2573,13 @@ static bool nvme_pci_ctrl_is_dead(struct nvme_dev *dev)
25732573

25742574
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
25752575
{
2576+
enum nvme_ctrl_state state = nvme_ctrl_state(&dev->ctrl);
25762577
struct pci_dev *pdev = to_pci_dev(dev->dev);
25772578
bool dead;
25782579

25792580
mutex_lock(&dev->shutdown_lock);
25802581
dead = nvme_pci_ctrl_is_dead(dev);
2581-
if (dev->ctrl.state == NVME_CTRL_LIVE ||
2582-
dev->ctrl.state == NVME_CTRL_RESETTING) {
2582+
if (state == NVME_CTRL_LIVE || state == NVME_CTRL_RESETTING) {
25832583
if (pci_is_enabled(pdev))
25842584
nvme_start_freeze(&dev->ctrl);
25852585
/*
@@ -2690,7 +2690,7 @@ static void nvme_reset_work(struct work_struct *work)
26902690
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
26912691
int result;
26922692

2693-
if (dev->ctrl.state != NVME_CTRL_RESETTING) {
2693+
if (nvme_ctrl_state(&dev->ctrl) != NVME_CTRL_RESETTING) {
26942694
dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
26952695
dev->ctrl.state);
26962696
result = -ENODEV;
@@ -3192,7 +3192,7 @@ static int nvme_suspend(struct device *dev)
31923192
nvme_wait_freeze(ctrl);
31933193
nvme_sync_queues(ctrl);
31943194

3195-
if (ctrl->state != NVME_CTRL_LIVE)
3195+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
31963196
goto unfreeze;
31973197

31983198
/*

drivers/nvme/host/rdma.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -984,10 +984,11 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
984984

985985
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
986986
{
987+
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
988+
987989
/* If we are resetting/deleting then do nothing */
988-
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
989-
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
990-
ctrl->ctrl.state == NVME_CTRL_LIVE);
990+
if (state != NVME_CTRL_CONNECTING) {
991+
WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
991992
return;
992993
}
993994

@@ -1059,8 +1060,10 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
10591060
* unless we're during creation of a new controller to
10601061
* avoid races with teardown flow.
10611062
*/
1062-
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
1063-
ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
1063+
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
1064+
1065+
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
1066+
state != NVME_CTRL_DELETING_NOIO);
10641067
WARN_ON_ONCE(new);
10651068
ret = -EINVAL;
10661069
goto destroy_io;
@@ -1129,8 +1132,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
11291132

11301133
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
11311134
/* state change failure is ok if we started ctrl delete */
1132-
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING &&
1133-
ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO);
1135+
enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl);
1136+
1137+
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
1138+
state != NVME_CTRL_DELETING_NOIO);
11341139
return;
11351140
}
11361141

@@ -1162,7 +1167,7 @@ static void nvme_rdma_wr_error(struct ib_cq *cq, struct ib_wc *wc,
11621167
struct nvme_rdma_queue *queue = wc->qp->qp_context;
11631168
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
11641169

1165-
if (ctrl->ctrl.state == NVME_CTRL_LIVE)
1170+
if (nvme_ctrl_state(&ctrl->ctrl) == NVME_CTRL_LIVE)
11661171
dev_info(ctrl->ctrl.device,
11671172
"%s for CQE 0x%p failed with status %s (%d)\n",
11681173
op, wc->wr_cqe,
@@ -1945,7 +1950,7 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
19451950
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
19461951
rq->tag, nvme_rdma_queue_idx(queue));
19471952

1948-
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
1953+
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
19491954
/*
19501955
* If we are resetting, connecting or deleting we should
19511956
* complete immediately because we may block controller

drivers/nvme/host/tcp.c

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2152,10 +2152,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
21522152

21532153
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
21542154
{
2155+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
2156+
21552157
/* If we are resetting/deleting then do nothing */
2156-
if (ctrl->state != NVME_CTRL_CONNECTING) {
2157-
WARN_ON_ONCE(ctrl->state == NVME_CTRL_NEW ||
2158-
ctrl->state == NVME_CTRL_LIVE);
2158+
if (state != NVME_CTRL_CONNECTING) {
2159+
WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
21592160
return;
21602161
}
21612162

@@ -2215,8 +2216,10 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
22152216
* unless we're during creation of a new controller to
22162217
* avoid races with teardown flow.
22172218
*/
2218-
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2219-
ctrl->state != NVME_CTRL_DELETING_NOIO);
2219+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
2220+
2221+
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
2222+
state != NVME_CTRL_DELETING_NOIO);
22202223
WARN_ON_ONCE(new);
22212224
ret = -EINVAL;
22222225
goto destroy_io;
@@ -2280,8 +2283,10 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
22802283

22812284
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
22822285
/* state change failure is ok if we started ctrl delete */
2283-
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2284-
ctrl->state != NVME_CTRL_DELETING_NOIO);
2286+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
2287+
2288+
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
2289+
state != NVME_CTRL_DELETING_NOIO);
22852290
return;
22862291
}
22872292

@@ -2311,8 +2316,10 @@ static void nvme_reset_ctrl_work(struct work_struct *work)
23112316

23122317
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
23132318
/* state change failure is ok if we started ctrl delete */
2314-
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING &&
2315-
ctrl->state != NVME_CTRL_DELETING_NOIO);
2319+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
2320+
2321+
WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
2322+
state != NVME_CTRL_DELETING_NOIO);
23162323
return;
23172324
}
23182325

@@ -2430,7 +2437,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
24302437
nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
24312438
opc, nvme_opcode_str(qid, opc, fctype));
24322439

2433-
if (ctrl->state != NVME_CTRL_LIVE) {
2440+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
24342441
/*
24352442
* If we are resetting, connecting or deleting we should
24362443
* complete immediately because we may block controller

0 commit comments

Comments
 (0)