Skip to content

Commit 153c45d

Browse files
Kevin Barnettmartinkpetersen
authored andcommitted
scsi: smartpqi: Add abort handler
Implement aborts as resets. Avoid I/O stalls across all devices attached to a controller when device I/O requests time out. Reviewed-by: Mahesh Rajashekhara <mahesh.rajashekhara@microchip.com> Reviewed-by: Scott Teel <scott.teel@microchip.com> Reviewed-by: Scott Benesh <scott.benesh@microchip.com> Reviewed-by: Mike McGowen <mike.mcgowen@microchip.com> Signed-off-by: Kevin Barnett <kevin.barnett@microchip.com> Signed-off-by: Don Brace <don.brace@microchip.com> Link: https://lore.kernel.org/r/20230824155812.789913-2-don.brace@microchip.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent 06c2afb commit 153c45d

File tree

2 files changed

+149
-36
lines changed

2 files changed

+149
-36
lines changed

drivers/scsi/smartpqi/smartpqi.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,16 @@ struct pqi_stream_data {
10851085
u32 last_accessed;
10861086
};
10871087

1088-
#define PQI_MAX_LUNS_PER_DEVICE 256
1088+
#define PQI_MAX_LUNS_PER_DEVICE 256
1089+
1090+
struct pqi_tmf_work {
1091+
struct work_struct work_struct;
1092+
struct scsi_cmnd *scmd;
1093+
struct pqi_ctrl_info *ctrl_info;
1094+
struct pqi_scsi_dev *device;
1095+
u8 lun;
1096+
u8 scsi_opcode;
1097+
};
10891098

10901099
struct pqi_scsi_dev {
10911100
int devtype; /* as reported by INQUIRY command */
@@ -1111,6 +1120,7 @@ struct pqi_scsi_dev {
11111120
u8 erase_in_progress : 1;
11121121
bool aio_enabled; /* only valid for physical disks */
11131122
bool in_remove;
1123+
bool in_reset[PQI_MAX_LUNS_PER_DEVICE];
11141124
bool device_offline;
11151125
u8 vendor[8]; /* bytes 8-15 of inquiry data */
11161126
u8 model[16]; /* bytes 16-31 of inquiry data */
@@ -1149,6 +1159,8 @@ struct pqi_scsi_dev {
11491159
struct pqi_stream_data stream_data[NUM_STREAMS_PER_LUN];
11501160
atomic_t scsi_cmds_outstanding[PQI_MAX_LUNS_PER_DEVICE];
11511161
unsigned int raid_bypass_cnt;
1162+
1163+
struct pqi_tmf_work tmf_work[PQI_MAX_LUNS_PER_DEVICE];
11521164
};
11531165

11541166
/* VPD inquiry pages */

drivers/scsi/smartpqi/smartpqi_init.c

Lines changed: 136 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
#define PQI_POST_RESET_DELAY_SECS 5
4949
#define PQI_POST_OFA_RESET_DELAY_UPON_TIMEOUT_SECS 10
5050

51+
#define PQI_NO_COMPLETION ((void *)-1)
52+
5153
MODULE_AUTHOR("Microchip");
5254
MODULE_DESCRIPTION("Driver for Microchip Smart Family Controller version "
5355
DRIVER_VERSION);
@@ -96,6 +98,7 @@ static int pqi_ofa_host_memory_update(struct pqi_ctrl_info *ctrl_info);
9698
static int pqi_device_wait_for_pending_io(struct pqi_ctrl_info *ctrl_info,
9799
struct pqi_scsi_dev *device, u8 lun, unsigned long timeout_msecs);
98100
static void pqi_fail_all_outstanding_requests(struct pqi_ctrl_info *ctrl_info);
101+
static void pqi_tmf_worker(struct work_struct *work);
99102

100103
/* for flags argument to pqi_submit_raid_request_synchronous() */
101104
#define PQI_SYNC_FLAGS_INTERRUPTABLE 0x1
@@ -455,6 +458,21 @@ static inline bool pqi_device_in_remove(struct pqi_scsi_dev *device)
455458
return device->in_remove;
456459
}
457460

461+
static inline void pqi_device_reset_start(struct pqi_scsi_dev *device, u8 lun)
462+
{
463+
device->in_reset[lun] = true;
464+
}
465+
466+
static inline void pqi_device_reset_done(struct pqi_scsi_dev *device, u8 lun)
467+
{
468+
device->in_reset[lun] = false;
469+
}
470+
471+
static inline bool pqi_device_in_reset(struct pqi_scsi_dev *device, u8 lun)
472+
{
473+
return device->in_reset[lun];
474+
}
475+
458476
static inline int pqi_event_type_to_event_index(unsigned int event_type)
459477
{
460478
int index;
@@ -2137,6 +2155,15 @@ static inline bool pqi_is_device_added(struct pqi_scsi_dev *device)
21372155
return device->sdev != NULL;
21382156
}
21392157

2158+
static inline void pqi_init_device_tmf_work(struct pqi_scsi_dev *device)
2159+
{
2160+
unsigned int lun;
2161+
struct pqi_tmf_work *tmf_work;
2162+
2163+
for (lun = 0, tmf_work = device->tmf_work; lun < PQI_MAX_LUNS_PER_DEVICE; lun++, tmf_work++)
2164+
INIT_WORK(&tmf_work->work_struct, pqi_tmf_worker);
2165+
}
2166+
21402167
static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
21412168
struct pqi_scsi_dev *new_device_list[], unsigned int num_new_devices)
21422169
{
@@ -2217,6 +2244,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
22172244
list_add_tail(&device->add_list_entry, &add_list);
22182245
/* To prevent this device structure from being freed later. */
22192246
device->keep_device = true;
2247+
pqi_init_device_tmf_work(device);
22202248
}
22212249

22222250
spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
@@ -5850,6 +5878,7 @@ static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
58505878
void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
58515879
{
58525880
struct pqi_scsi_dev *device;
5881+
struct completion *wait;
58535882

58545883
if (!scmd->device) {
58555884
set_host_byte(scmd, DID_NO_CONNECT);
@@ -5863,6 +5892,10 @@ void pqi_prep_for_scsi_done(struct scsi_cmnd *scmd)
58635892
}
58645893

58655894
atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
5895+
5896+
wait = (struct completion *)xchg(&scmd->host_scribble, NULL);
5897+
if (wait != PQI_NO_COMPLETION)
5898+
complete(wait);
58665899
}
58675900

58685901
static bool pqi_is_parity_write_stream(struct pqi_ctrl_info *ctrl_info,
@@ -5948,6 +5981,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
59485981
u16 hw_queue;
59495982
struct pqi_queue_group *queue_group;
59505983
bool raid_bypassed;
5984+
u8 lun;
5985+
5986+
scmd->host_scribble = PQI_NO_COMPLETION;
59515987

59525988
device = scmd->device->hostdata;
59535989

@@ -5957,7 +5993,9 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
59575993
return 0;
59585994
}
59595995

5960-
atomic_inc(&device->scsi_cmds_outstanding[scmd->device->lun]);
5996+
lun = (u8)scmd->device->lun;
5997+
5998+
atomic_inc(&device->scsi_cmds_outstanding[lun]);
59615999

59626000
ctrl_info = shost_to_hba(shost);
59636001

@@ -5967,7 +6005,7 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
59676005
return 0;
59686006
}
59696007

5970-
if (pqi_ctrl_blocked(ctrl_info)) {
6008+
if (pqi_ctrl_blocked(ctrl_info) || pqi_device_in_reset(device, lun)) {
59716009
rc = SCSI_MLQUEUE_HOST_BUSY;
59726010
goto out;
59736011
}
@@ -6002,8 +6040,10 @@ static int pqi_scsi_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scm
60026040
}
60036041

60046042
out:
6005-
if (rc)
6006-
atomic_dec(&device->scsi_cmds_outstanding[scmd->device->lun]);
6043+
if (rc) {
6044+
scmd->host_scribble = NULL;
6045+
atomic_dec(&device->scsi_cmds_outstanding[lun]);
6046+
}
60076047

60086048
return rc;
60096049
}
@@ -6097,7 +6137,7 @@ static int pqi_wait_until_inbound_queues_empty(struct pqi_ctrl_info *ctrl_info)
60976137
}
60986138

60996139
static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
6100-
struct pqi_scsi_dev *device)
6140+
struct pqi_scsi_dev *device, u8 lun)
61016141
{
61026142
unsigned int i;
61036143
unsigned int path;
@@ -6127,6 +6167,9 @@ static void pqi_fail_io_queued_for_device(struct pqi_ctrl_info *ctrl_info,
61276167
if (scsi_device != device)
61286168
continue;
61296169

6170+
if ((u8)scmd->device->lun != lun)
6171+
continue;
6172+
61306173
list_del(&io_request->request_list_entry);
61316174
set_host_byte(scmd, DID_RESET);
61326175
pqi_free_io_request(io_request);
@@ -6224,15 +6267,13 @@ static int pqi_wait_for_lun_reset_completion(struct pqi_ctrl_info *ctrl_info,
62246267

62256268
#define PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS 30
62266269

6227-
static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
6270+
static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
62286271
{
62296272
int rc;
62306273
struct pqi_io_request *io_request;
62316274
DECLARE_COMPLETION_ONSTACK(wait);
62326275
struct pqi_task_management_request *request;
6233-
struct pqi_scsi_dev *device;
62346276

6235-
device = scmd->device->hostdata;
62366277
io_request = pqi_alloc_io_request(ctrl_info, NULL);
62376278
io_request->io_complete_callback = pqi_lun_reset_complete;
62386279
io_request->context = &wait;
@@ -6247,15 +6288,15 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
62476288
memcpy(request->lun_number, device->scsi3addr,
62486289
sizeof(request->lun_number));
62496290
if (!pqi_is_logical_device(device) && ctrl_info->multi_lun_device_supported)
6250-
request->ml_device_lun_number = (u8)scmd->device->lun;
6291+
request->ml_device_lun_number = lun;
62516292
request->task_management_function = SOP_TASK_MANAGEMENT_LUN_RESET;
62526293
if (ctrl_info->tmf_iu_timeout_supported)
62536294
put_unaligned_le16(PQI_LUN_RESET_FIRMWARE_TIMEOUT_SECS, &request->timeout);
62546295

62556296
pqi_start_io(ctrl_info, &ctrl_info->queue_groups[PQI_DEFAULT_QUEUE_GROUP], RAID_PATH,
62566297
io_request);
62576298

6258-
rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, (u8)scmd->device->lun, &wait);
6299+
rc = pqi_wait_for_lun_reset_completion(ctrl_info, device, lun, &wait);
62596300
if (rc == 0)
62606301
rc = io_request->status;
62616302

@@ -6269,86 +6310,145 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
62696310
#define PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS (10 * 60 * 1000)
62706311
#define PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS (2 * 60 * 1000)
62716312

6272-
static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
6313+
static int pqi_lun_reset_with_retries(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
62736314
{
62746315
int reset_rc;
62756316
int wait_rc;
62766317
unsigned int retries;
62776318
unsigned long timeout_msecs;
6278-
struct pqi_scsi_dev *device;
62796319

6280-
device = scmd->device->hostdata;
62816320
for (retries = 0;;) {
6282-
reset_rc = pqi_lun_reset(ctrl_info, scmd);
6283-
if (reset_rc == 0 || reset_rc == -ENODEV || ++retries > PQI_LUN_RESET_RETRIES)
6321+
reset_rc = pqi_lun_reset(ctrl_info, device, lun);
6322+
if (reset_rc == 0 || reset_rc == -ENODEV || reset_rc == -ENXIO || ++retries > PQI_LUN_RESET_RETRIES)
62846323
break;
62856324
msleep(PQI_LUN_RESET_RETRY_INTERVAL_MSECS);
62866325
}
62876326

62886327
timeout_msecs = reset_rc ? PQI_LUN_RESET_FAILED_PENDING_IO_TIMEOUT_MSECS :
62896328
PQI_LUN_RESET_PENDING_IO_TIMEOUT_MSECS;
62906329

6291-
wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, scmd->device->lun, timeout_msecs);
6330+
wait_rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun, timeout_msecs);
62926331
if (wait_rc && reset_rc == 0)
62936332
reset_rc = wait_rc;
62946333

62956334
return reset_rc == 0 ? SUCCESS : FAILED;
62966335
}
62976336

6298-
static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
6337+
static int pqi_device_reset(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun)
62996338
{
63006339
int rc;
6301-
struct pqi_scsi_dev *device;
63026340

6303-
device = scmd->device->hostdata;
63046341
pqi_ctrl_block_requests(ctrl_info);
63056342
pqi_ctrl_wait_until_quiesced(ctrl_info);
6306-
pqi_fail_io_queued_for_device(ctrl_info, device);
6343+
pqi_fail_io_queued_for_device(ctrl_info, device, lun);
63076344
rc = pqi_wait_until_inbound_queues_empty(ctrl_info);
6345+
pqi_device_reset_start(device, lun);
6346+
pqi_ctrl_unblock_requests(ctrl_info);
63086347
if (rc)
63096348
rc = FAILED;
63106349
else
6311-
rc = pqi_lun_reset_with_retries(ctrl_info, scmd);
6312-
pqi_ctrl_unblock_requests(ctrl_info);
6350+
rc = pqi_lun_reset_with_retries(ctrl_info, device, lun);
6351+
pqi_device_reset_done(device, lun);
63136352

63146353
return rc;
63156354
}
63166355

6317-
static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
6356+
static int pqi_device_reset_handler(struct pqi_ctrl_info *ctrl_info, struct pqi_scsi_dev *device, u8 lun, struct scsi_cmnd *scmd, u8 scsi_opcode)
63186357
{
63196358
int rc;
6320-
struct Scsi_Host *shost;
6321-
struct pqi_ctrl_info *ctrl_info;
6322-
struct pqi_scsi_dev *device;
6323-
6324-
shost = scmd->device->host;
6325-
ctrl_info = shost_to_hba(shost);
6326-
device = scmd->device->hostdata;
63276359

63286360
mutex_lock(&ctrl_info->lun_reset_mutex);
63296361

63306362
dev_err(&ctrl_info->pci_dev->dev,
63316363
"resetting scsi %d:%d:%d:%d due to cmd 0x%02x\n",
6332-
shost->host_no,
6333-
device->bus, device->target, (u32)scmd->device->lun,
6364+
ctrl_info->scsi_host->host_no,
6365+
device->bus, device->target, lun,
63346366
scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff);
63356367

63366368
pqi_check_ctrl_health(ctrl_info);
63376369
if (pqi_ctrl_offline(ctrl_info))
63386370
rc = FAILED;
63396371
else
6340-
rc = pqi_device_reset(ctrl_info, scmd);
6372+
rc = pqi_device_reset(ctrl_info, device, lun);
63416373

63426374
dev_err(&ctrl_info->pci_dev->dev,
6343-
"reset of scsi %d:%d:%d:%d: %s\n",
6344-
shost->host_no, device->bus, device->target, (u32)scmd->device->lun,
6375+
"reset of scsi %d:%d:%d:%u: %s\n",
6376+
ctrl_info->scsi_host->host_no, device->bus, device->target, lun,
63456377
rc == SUCCESS ? "SUCCESS" : "FAILED");
63466378

63476379
mutex_unlock(&ctrl_info->lun_reset_mutex);
63486380

63496381
return rc;
63506382
}
63516383

6384+
static int pqi_eh_device_reset_handler(struct scsi_cmnd *scmd)
6385+
{
6386+
struct Scsi_Host *shost;
6387+
struct pqi_ctrl_info *ctrl_info;
6388+
struct pqi_scsi_dev *device;
6389+
u8 scsi_opcode;
6390+
6391+
shost = scmd->device->host;
6392+
ctrl_info = shost_to_hba(shost);
6393+
device = scmd->device->hostdata;
6394+
scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
6395+
6396+
return pqi_device_reset_handler(ctrl_info, device, (u8)scmd->device->lun, scmd, scsi_opcode);
6397+
}
6398+
6399+
static void pqi_tmf_worker(struct work_struct *work)
6400+
{
6401+
struct pqi_tmf_work *tmf_work;
6402+
struct scsi_cmnd *scmd;
6403+
6404+
tmf_work = container_of(work, struct pqi_tmf_work, work_struct);
6405+
scmd = (struct scsi_cmnd *)xchg(&tmf_work->scmd, NULL);
6406+
6407+
pqi_device_reset_handler(tmf_work->ctrl_info, tmf_work->device, tmf_work->lun, scmd, tmf_work->scsi_opcode);
6408+
}
6409+
6410+
static int pqi_eh_abort_handler(struct scsi_cmnd *scmd)
6411+
{
6412+
struct Scsi_Host *shost;
6413+
struct pqi_ctrl_info *ctrl_info;
6414+
struct pqi_scsi_dev *device;
6415+
struct pqi_tmf_work *tmf_work;
6416+
DECLARE_COMPLETION_ONSTACK(wait);
6417+
6418+
shost = scmd->device->host;
6419+
ctrl_info = shost_to_hba(shost);
6420+
6421+
dev_err(&ctrl_info->pci_dev->dev,
6422+
"attempting TASK ABORT on SCSI cmd at %p\n", scmd);
6423+
6424+
if (cmpxchg(&scmd->host_scribble, PQI_NO_COMPLETION, (void *)&wait) == NULL) {
6425+
dev_err(&ctrl_info->pci_dev->dev,
6426+
"SCSI cmd at %p already completed\n", scmd);
6427+
scmd->result = DID_RESET << 16;
6428+
goto out;
6429+
}
6430+
6431+
device = scmd->device->hostdata;
6432+
tmf_work = &device->tmf_work[scmd->device->lun];
6433+
6434+
if (cmpxchg(&tmf_work->scmd, NULL, scmd) == NULL) {
6435+
tmf_work->ctrl_info = ctrl_info;
6436+
tmf_work->device = device;
6437+
tmf_work->lun = (u8)scmd->device->lun;
6438+
tmf_work->scsi_opcode = scmd->cmd_len > 0 ? scmd->cmnd[0] : 0xff;
6439+
schedule_work(&tmf_work->work_struct);
6440+
}
6441+
6442+
wait_for_completion(&wait);
6443+
6444+
dev_err(&ctrl_info->pci_dev->dev,
6445+
"TASK ABORT on SCSI cmd at %p: SUCCESS\n", scmd);
6446+
6447+
out:
6448+
6449+
return SUCCESS;
6450+
}
6451+
63526452
static int pqi_slave_alloc(struct scsi_device *sdev)
63536453
{
63546454
struct pqi_scsi_dev *device;
@@ -7362,6 +7462,7 @@ static const struct scsi_host_template pqi_driver_template = {
73627462
.scan_finished = pqi_scan_finished,
73637463
.this_id = -1,
73647464
.eh_device_reset_handler = pqi_eh_device_reset_handler,
7465+
.eh_abort_handler = pqi_eh_abort_handler,
73657466
.ioctl = pqi_ioctl,
73667467
.slave_alloc = pqi_slave_alloc,
73677468
.slave_configure = pqi_slave_configure,

0 commit comments

Comments
 (0)