Skip to content

Commit 22b9a89

Browse files
committed
Merge tag 'nvme-6.7-2023-12-7' of git://git.infradead.org/nvme into block-6.7
Pull NVMe fixes from Keith: "nvme fixes for Linux 6.7 - Proper nvme ctrl state setting (Keith) - Passthrough command optimization (Keith) - Spectre fix (Nitesh) - Kconfig clarifications (Shin'ichiro) - Frozen state deadlock fix (Bitao) - Power setting quirk (Georg)" * tag 'nvme-6.7-2023-12-7' of git://git.infradead.org/nvme: nvme-pci: Add sleep quirk for Kingston drives nvme: fix deadlock between reset and scan nvme: prevent potential spectre v1 gadget nvme: improve NVME_HOST_AUTH and NVME_TARGET_AUTH config descriptions nvme-ioctl: move capable() admin check to the end nvme: ensure reset state check ordering nvme: introduce helper function to get ctrl state
2 parents 7d2affc + 107b4e0 commit 22b9a89

File tree

10 files changed

+119
-64
lines changed

10 files changed

+119
-64
lines changed

drivers/nvme/host/Kconfig

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,12 @@ config NVME_TCP_TLS
107107
If unsure, say N.
108108

109109
config NVME_HOST_AUTH
110-
bool "NVM Express over Fabrics In-Band Authentication"
110+
bool "NVMe over Fabrics In-Band Authentication in host side"
111111
depends on NVME_CORE
112112
select NVME_AUTH
113113
help
114-
This provides support for NVMe over Fabrics In-Band Authentication.
114+
This provides support for NVMe over Fabrics In-Band Authentication in
115+
host side.
115116

116117
If unsure, say N.
117118

drivers/nvme/host/core.c

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
131131
/*
132132
* Only new queue scan work when admin and IO queues are both alive
133133
*/
134-
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
134+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
135135
queue_work(nvme_wq, &ctrl->scan_work);
136136
}
137137

@@ -143,7 +143,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
143143
*/
144144
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
145145
{
146-
if (ctrl->state != NVME_CTRL_RESETTING)
146+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
147147
return -EBUSY;
148148
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
149149
return -EBUSY;
@@ -156,7 +156,7 @@ static void nvme_failfast_work(struct work_struct *work)
156156
struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
157157
struct nvme_ctrl, failfast_work);
158158

159-
if (ctrl->state != NVME_CTRL_CONNECTING)
159+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
160160
return;
161161

162162
set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
@@ -200,7 +200,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
200200
ret = nvme_reset_ctrl(ctrl);
201201
if (!ret) {
202202
flush_work(&ctrl->reset_work);
203-
if (ctrl->state != NVME_CTRL_LIVE)
203+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
204204
ret = -ENETRESET;
205205
}
206206

@@ -499,7 +499,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
499499

500500
spin_lock_irqsave(&ctrl->lock, flags);
501501

502-
old_state = ctrl->state;
502+
old_state = nvme_ctrl_state(ctrl);
503503
switch (new_state) {
504504
case NVME_CTRL_LIVE:
505505
switch (old_state) {
@@ -567,19 +567,19 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
567567
}
568568

569569
if (changed) {
570-
ctrl->state = new_state;
570+
WRITE_ONCE(ctrl->state, new_state);
571571
wake_up_all(&ctrl->state_wq);
572572
}
573573

574574
spin_unlock_irqrestore(&ctrl->lock, flags);
575575
if (!changed)
576576
return false;
577577

578-
if (ctrl->state == NVME_CTRL_LIVE) {
578+
if (new_state == NVME_CTRL_LIVE) {
579579
if (old_state == NVME_CTRL_CONNECTING)
580580
nvme_stop_failfast_work(ctrl);
581581
nvme_kick_requeue_lists(ctrl);
582-
} else if (ctrl->state == NVME_CTRL_CONNECTING &&
582+
} else if (new_state == NVME_CTRL_CONNECTING &&
583583
old_state == NVME_CTRL_RESETTING) {
584584
nvme_start_failfast_work(ctrl);
585585
}
@@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
592592
*/
593593
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
594594
{
595-
switch (ctrl->state) {
595+
switch (nvme_ctrl_state(ctrl)) {
596596
case NVME_CTRL_NEW:
597597
case NVME_CTRL_LIVE:
598598
case NVME_CTRL_RESETTING:
@@ -617,7 +617,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
617617
wait_event(ctrl->state_wq,
618618
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
619619
nvme_state_terminal(ctrl));
620-
return ctrl->state == NVME_CTRL_RESETTING;
620+
return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
621621
}
622622
EXPORT_SYMBOL_GPL(nvme_wait_reset);
623623

@@ -704,9 +704,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
704704
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
705705
struct request *rq)
706706
{
707-
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
708-
ctrl->state != NVME_CTRL_DELETING &&
709-
ctrl->state != NVME_CTRL_DEAD &&
707+
enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
708+
709+
if (state != NVME_CTRL_DELETING_NOIO &&
710+
state != NVME_CTRL_DELETING &&
711+
state != NVME_CTRL_DEAD &&
710712
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
711713
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
712714
return BLK_STS_RESOURCE;
@@ -736,7 +738,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
736738
* command, which is require to set the queue live in the
737739
* appropinquate states.
738740
*/
739-
switch (ctrl->state) {
741+
switch (nvme_ctrl_state(ctrl)) {
740742
case NVME_CTRL_CONNECTING:
741743
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
742744
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -2550,7 +2552,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
25502552

25512553
if (ctrl->ps_max_latency_us != latency) {
25522554
ctrl->ps_max_latency_us = latency;
2553-
if (ctrl->state == NVME_CTRL_LIVE)
2555+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
25542556
nvme_configure_apst(ctrl);
25552557
}
25562558
}
@@ -3238,7 +3240,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
32383240
struct nvme_ctrl *ctrl =
32393241
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
32403242

3241-
switch (ctrl->state) {
3243+
switch (nvme_ctrl_state(ctrl)) {
32423244
case NVME_CTRL_LIVE:
32433245
break;
32443246
default:
@@ -3660,6 +3662,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
36603662
goto out_unlink_ns;
36613663

36623664
down_write(&ctrl->namespaces_rwsem);
3665+
/*
3666+
* Ensure that no namespaces are added to the ctrl list after the queues
3667+
* are frozen, thereby avoiding a deadlock between scan and reset.
3668+
*/
3669+
if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
3670+
up_write(&ctrl->namespaces_rwsem);
3671+
goto out_unlink_ns;
3672+
}
36633673
nvme_ns_add_to_ctrl_list(ns);
36643674
up_write(&ctrl->namespaces_rwsem);
36653675
nvme_get_ctrl(ctrl);
@@ -3924,7 +3934,7 @@ static void nvme_scan_work(struct work_struct *work)
39243934
int ret;
39253935

39263936
/* No tagset on a live ctrl means IO queues could not created */
3927-
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
3937+
if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
39283938
return;
39293939

39303940
/*
@@ -3994,7 +4004,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
39944004
* removing the namespaces' disks; fail all the queues now to avoid
39954005
* potentially having to clean up the failed sync later.
39964006
*/
3997-
if (ctrl->state == NVME_CTRL_DEAD)
4007+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
39984008
nvme_mark_namespaces_dead(ctrl);
39994009

40004010
/* this is a no-op when called from the controller reset handler */
@@ -4076,7 +4086,7 @@ static void nvme_async_event_work(struct work_struct *work)
40764086
* flushing ctrl async_event_work after changing the controller state
40774087
* from LIVE and before freeing the admin queue.
40784088
*/
4079-
if (ctrl->state == NVME_CTRL_LIVE)
4089+
if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
40804090
ctrl->ops->submit_async_event(ctrl);
40814091
}
40824092

@@ -4471,7 +4481,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
44714481
{
44724482
int ret;
44734483

4474-
ctrl->state = NVME_CTRL_NEW;
4484+
WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
44754485
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
44764486
spin_lock_init(&ctrl->lock);
44774487
mutex_init(&ctrl->scan_lock);
@@ -4581,6 +4591,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
45814591
list_for_each_entry(ns, &ctrl->namespaces, list)
45824592
blk_mq_unfreeze_queue(ns->queue);
45834593
up_read(&ctrl->namespaces_rwsem);
4594+
clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
45844595
}
45854596
EXPORT_SYMBOL_GPL(nvme_unfreeze);
45864597

@@ -4614,6 +4625,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
46144625
{
46154626
struct nvme_ns *ns;
46164627

4628+
set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
46174629
down_read(&ctrl->namespaces_rwsem);
46184630
list_for_each_entry(ns, &ctrl->namespaces, list)
46194631
blk_freeze_queue_start(ns->queue);

drivers/nvme/host/fc.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport)
557557
static void
558558
nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
559559
{
560-
switch (ctrl->ctrl.state) {
560+
switch (nvme_ctrl_state(&ctrl->ctrl)) {
561561
case NVME_CTRL_NEW:
562562
case NVME_CTRL_CONNECTING:
563563
/*
@@ -793,7 +793,7 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
793793
"NVME-FC{%d}: controller connectivity lost. Awaiting "
794794
"Reconnect", ctrl->cnum);
795795

796-
switch (ctrl->ctrl.state) {
796+
switch (nvme_ctrl_state(&ctrl->ctrl)) {
797797
case NVME_CTRL_NEW:
798798
case NVME_CTRL_LIVE:
799799
/*
@@ -3319,7 +3319,7 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
33193319
unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ;
33203320
bool recon = true;
33213321

3322-
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
3322+
if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_CONNECTING)
33233323
return;
33243324

33253325
if (portptr->port_state == FC_OBJSTATE_ONLINE) {

drivers/nvme/host/ioctl.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
1818
{
1919
u32 effects;
2020

21-
if (capable(CAP_SYS_ADMIN))
22-
return true;
23-
2421
/*
2522
* Do not allow unprivileged passthrough on partitions, as that allows an
2623
* escape from the containment of the partition.
2724
*/
2825
if (flags & NVME_IOCTL_PARTITION)
29-
return false;
26+
goto admin;
3027

3128
/*
3229
* Do not allow unprivileged processes to send vendor specific or fabrics
3330
* commands as we can't be sure about their effects.
3431
*/
3532
if (c->common.opcode >= nvme_cmd_vendor_start ||
3633
c->common.opcode == nvme_fabrics_command)
37-
return false;
34+
goto admin;
3835

3936
/*
4037
* Do not allow unprivileged passthrough of admin commands except
@@ -53,7 +50,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
5350
return true;
5451
}
5552
}
56-
return false;
53+
goto admin;
5754
}
5855

5956
/*
@@ -63,7 +60,7 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
6360
*/
6461
effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
6562
if (!(effects & NVME_CMD_EFFECTS_CSUPP))
66-
return false;
63+
goto admin;
6764

6865
/*
6966
* Don't allow passthrough for command that have intrusive (or unknown)
@@ -72,16 +69,20 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
7269
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
7370
NVME_CMD_EFFECTS_UUID_SEL |
7471
NVME_CMD_EFFECTS_SCOPE_MASK))
75-
return false;
72+
goto admin;
7673

7774
/*
7875
* Only allow I/O commands that transfer data to the controller or that
7976
* change the logical block contents if the file descriptor is open for
8077
* writing.
8178
*/
82-
if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
83-
return open_for_write;
79+
if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) &&
80+
!open_for_write)
81+
goto admin;
82+
8483
return true;
84+
admin:
85+
return capable(CAP_SYS_ADMIN);
8586
}
8687

8788
/*

drivers/nvme/host/nvme.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,11 @@ enum nvme_quirks {
156156
* No temperature thresholds for channels other than 0 (Composite).
157157
*/
158158
NVME_QUIRK_NO_SECONDARY_TEMP_THRESH = (1 << 19),
159+
160+
/*
161+
* Disables simple suspend/resume path.
162+
*/
163+
NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20),
159164
};
160165

161166
/*
@@ -251,6 +256,7 @@ enum nvme_ctrl_flags {
251256
NVME_CTRL_STOPPED = 3,
252257
NVME_CTRL_SKIP_ID_CNS_CS = 4,
253258
NVME_CTRL_DIRTY_CAPABILITY = 5,
259+
NVME_CTRL_FROZEN = 6,
254260
};
255261

256262
struct nvme_ctrl {
@@ -387,6 +393,11 @@ struct nvme_ctrl {
387393
enum nvme_dctype dctype;
388394
};
389395

396+
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
397+
{
398+
return READ_ONCE(ctrl->state);
399+
}
400+
390401
enum nvme_iopolicy {
391402
NVME_IOPOLICY_NUMA,
392403
NVME_IOPOLICY_RR,

0 commit comments

Comments
 (0)