Skip to content

Commit c423263

Browse files
Quinn Tranmartinkpetersen
authored andcommitted
scsi: qla2xxx: Fix abort in bsg timeout
Current abort of bsg on timeout prematurely clears the outstanding_cmds[]. Abort does not allow FW to return the IOCB/SRB. In addition, bsg_job_done() is not called to return the BSG (i.e. leak). Abort the outstanding bsg/SRB and wait for the completion. The completion IOCB will wake up the bsg_timeout thread. If abort is not successful, then driver will forcibly call bsg_job_done() and free the srb. Err Inject: - qaucli -z - assign CT Passthru IOCB's NportHandle with another initiator nport handle to trigger timeout. Remote port will drop CT request. - bsg_job_done is properly called as part of cleanup kernel: qla2xxx [0000:21:00.1]-7012:7: qla2x00_process_ct : 286 : Error Inject. kernel: qla2xxx [0000:21:00.1]-7016:7: bsg rqst type: FC_BSG_HST_CT else type: 101 - loop-id=1 portid=fffffa. kernel: qla2xxx [0000:21:00.1]-70bb:7: qla24xx_bsg_timeout CMD timeout. bsg ptr ffff9971a42f0838 msgcode 80000004 vendor cmd fa01000 kernel: qla2xxx [0000:21:00.1]-507c:7: Abort command issued - hdl=4b, type=5 kernel: qla2xxx [0000:21:00.1]-5040:7: ELS-CT pass-through-ct pass-through error hdl=4b comp_status-status=0x5 error subcode 1=0x0 error subcode 2=0xaf882e80. kernel: qla2xxx [0000:21:00.1]-7009:7: qla2x00_bsg_job_done: sp hdl 4b, result=70000 bsg ptr ffff9971a42f0838 kernel: qla2xxx [0000:21:00.1]-802c:7: Aborting bsg ffff9971a42f0838 sp=ffff99760b87ba80 handle=4b rval=0 kernel: qla2xxx [0000:21:00.1]-708a:7: bsg abort success. bsg ffff9971a42f0838 sp=ffff99760b87ba80 handle=0x4b kernel: qla2xxx [0000:21:00.1]-7012:7: qla2x00_process_ct : 286 : Error Inject. kernel: qla2xxx [0000:21:00.1]-7016:7: bsg rqst type: FC_BSG_HST_CT else type: 101 - loop-id=1 portid=fffffa. kernel: qla2xxx [0000:21:00.1]-70bb:7: qla24xx_bsg_timeout CMD timeout. bsg ptr ffff9971a42f43b8 msgcode 80000004 vendor cmd fa01000 kernel: qla2xxx [0000:21:00.1]-7012:7: qla_bsg_found : 2206 : Error Inject 2. kernel: qla2xxx [0000:21:00.1]-802c:7: Aborting bsg ffff9971a42f43b8 sp=ffff99762c304440 handle=5e rval=5 kernel: qla2xxx [0000:21:00.1]-704f:7: bsg abort fail. bsg=ffff9971a42f43b8 sp=ffff99762c304440 rval=5. kernel: qla2xxx [0000:21:00.1]-7051:7: qla_bsg_found bsg_job_done : bsg ffff9971a42f43b8 result 0xfffffffa sp ffff99762c304440. Cc: stable@vger.kernel.org Fixes: c449b41 ("scsi: qla2xxx: Use QP lock to search for bsg") Signed-off-by: Quinn Tran <qutran@marvell.com> Signed-off-by: Nilesh Javali <njavali@marvell.com> Link: https://lore.kernel.org/r/20241115130313.46826-2-njavali@marvell.com Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
1 parent 0deb37c commit c423263

File tree

1 file changed

+92
-22
lines changed

1 file changed

+92
-22
lines changed

drivers/scsi/qla2xxx/qla_bsg.c

Lines changed: 92 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ void qla2x00_bsg_job_done(srb_t *sp, int res)
2424
{
2525
struct bsg_job *bsg_job = sp->u.bsg_job;
2626
struct fc_bsg_reply *bsg_reply = bsg_job->reply;
27+
struct completion *comp = sp->comp;
2728

2829
ql_dbg(ql_dbg_user, sp->vha, 0x7009,
2930
"%s: sp hdl %x, result=%x bsg ptr %p\n",
@@ -35,6 +36,9 @@ void qla2x00_bsg_job_done(srb_t *sp, int res)
3536
bsg_reply->result = res;
3637
bsg_job_done(bsg_job, bsg_reply->result,
3738
bsg_reply->reply_payload_rcv_len);
39+
40+
if (comp)
41+
complete(comp);
3842
}
3943

4044
void qla2x00_bsg_sp_free(srb_t *sp)
@@ -3061,14 +3065,19 @@ qla24xx_bsg_request(struct bsg_job *bsg_job)
30613065

30623066
static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job)
30633067
{
3064-
bool found = false;
3068+
bool found, do_bsg_done;
30653069
struct fc_bsg_reply *bsg_reply = bsg_job->reply;
30663070
scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
30673071
struct qla_hw_data *ha = vha->hw;
30683072
srb_t *sp = NULL;
30693073
int cnt;
30703074
unsigned long flags;
30713075
struct req_que *req;
3076+
int rval;
3077+
DECLARE_COMPLETION_ONSTACK(comp);
3078+
uint32_t ratov_j;
3079+
3080+
found = do_bsg_done = false;
30723081

30733082
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
30743083
req = qpair->req;
@@ -3080,42 +3089,104 @@ static bool qla_bsg_found(struct qla_qpair *qpair, struct bsg_job *bsg_job)
30803089
sp->type == SRB_ELS_CMD_HST ||
30813090
sp->type == SRB_ELS_CMD_HST_NOLOGIN) &&
30823091
sp->u.bsg_job == bsg_job) {
3083-
req->outstanding_cmds[cnt] = NULL;
3084-
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3085-
3086-
if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
3087-
ql_log(ql_log_warn, vha, 0x7089,
3088-
"mbx abort_command failed.\n");
3089-
bsg_reply->result = -EIO;
3090-
} else {
3091-
ql_dbg(ql_dbg_user, vha, 0x708a,
3092-
"mbx abort_command success.\n");
3093-
bsg_reply->result = 0;
3094-
}
3095-
/* ref: INIT */
3096-
kref_put(&sp->cmd_kref, qla2x00_sp_release);
30973092

30983093
found = true;
3099-
goto done;
3094+
sp->comp = &comp;
3095+
break;
31003096
}
31013097
}
31023098
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
31033099

3104-
done:
3105-
return found;
3100+
if (!found)
3101+
return false;
3102+
3103+
if (ha->flags.eeh_busy) {
3104+
/* skip over abort. EEH handling will return the bsg. Wait for it */
3105+
rval = QLA_SUCCESS;
3106+
ql_dbg(ql_dbg_user, vha, 0x802c,
3107+
"eeh encounter. bsg %p sp=%p handle=%x \n",
3108+
bsg_job, sp, sp->handle);
3109+
} else {
3110+
rval = ha->isp_ops->abort_command(sp);
3111+
ql_dbg(ql_dbg_user, vha, 0x802c,
3112+
"Aborting bsg %p sp=%p handle=%x rval=%x\n",
3113+
bsg_job, sp, sp->handle, rval);
3114+
}
3115+
3116+
switch (rval) {
3117+
case QLA_SUCCESS:
3118+
/* Wait for the command completion. */
3119+
ratov_j = ha->r_a_tov / 10 * 4 * 1000;
3120+
ratov_j = msecs_to_jiffies(ratov_j);
3121+
3122+
if (!wait_for_completion_timeout(&comp, ratov_j)) {
3123+
ql_log(ql_log_info, vha, 0x7089,
3124+
"bsg abort timeout. bsg=%p sp=%p handle %#x .\n",
3125+
bsg_job, sp, sp->handle);
3126+
3127+
do_bsg_done = true;
3128+
} else {
3129+
/* fw had returned the bsg */
3130+
ql_dbg(ql_dbg_user, vha, 0x708a,
3131+
"bsg abort success. bsg %p sp=%p handle=%#x\n",
3132+
bsg_job, sp, sp->handle);
3133+
do_bsg_done = false;
3134+
}
3135+
break;
3136+
default:
3137+
ql_log(ql_log_info, vha, 0x704f,
3138+
"bsg abort fail. bsg=%p sp=%p rval=%x.\n",
3139+
bsg_job, sp, rval);
3140+
3141+
do_bsg_done = true;
3142+
break;
3143+
}
3144+
3145+
if (!do_bsg_done)
3146+
return true;
3147+
3148+
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
3149+
/*
3150+
* recheck to make sure it's still the same bsg_job due to
3151+
* qp_lock_ptr was released earlier.
3152+
*/
3153+
if (req->outstanding_cmds[cnt] &&
3154+
req->outstanding_cmds[cnt]->u.bsg_job != bsg_job) {
3155+
/* fw had returned the bsg */
3156+
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3157+
return true;
3158+
}
3159+
req->outstanding_cmds[cnt] = NULL;
3160+
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
3161+
3162+
/* ref: INIT */
3163+
sp->comp = NULL;
3164+
kref_put(&sp->cmd_kref, qla2x00_sp_release);
3165+
bsg_reply->result = -ENXIO;
3166+
bsg_reply->reply_payload_rcv_len = 0;
3167+
3168+
ql_dbg(ql_dbg_user, vha, 0x7051,
3169+
"%s bsg_job_done : bsg %p result %#x sp %p.\n",
3170+
__func__, bsg_job, bsg_reply->result, sp);
3171+
3172+
bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
3173+
3174+
return true;
31063175
}
31073176

31083177
int
31093178
qla24xx_bsg_timeout(struct bsg_job *bsg_job)
31103179
{
3111-
struct fc_bsg_reply *bsg_reply = bsg_job->reply;
3180+
struct fc_bsg_request *bsg_request = bsg_job->request;
31123181
scsi_qla_host_t *vha = shost_priv(fc_bsg_to_shost(bsg_job));
31133182
struct qla_hw_data *ha = vha->hw;
31143183
int i;
31153184
struct qla_qpair *qpair;
31163185

3117-
ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
3118-
__func__, bsg_job);
3186+
ql_log(ql_log_info, vha, 0x708b,
3187+
"%s CMD timeout. bsg ptr %p msgcode %x vendor cmd %x\n",
3188+
__func__, bsg_job, bsg_request->msgcode,
3189+
bsg_request->rqst_data.h_vendor.vendor_cmd[0]);
31193190

31203191
if (qla2x00_isp_reg_stat(ha)) {
31213192
ql_log(ql_log_info, vha, 0x9007,
@@ -3136,7 +3207,6 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
31363207
}
31373208

31383209
ql_log(ql_log_info, vha, 0x708b, "SRB not found to abort.\n");
3139-
bsg_reply->result = -ENXIO;
31403210

31413211
done:
31423212
return 0;

0 commit comments

Comments
 (0)