Skip to content

Commit b43a76f

Browse files
BernardMetzlerjgunthorpe
authored andcommitted
RDMA/siw: Fix broken RDMA Read Fence/Resume logic.
Code unconditionally resumed fenced SQ processing after next RDMA Read completion, even if other RDMA Read responses are still outstanding, or ORQ is full. Also adds comments for better readability of fence processing, and removes orq_get_tail() helper, which is not needed anymore. Fixes: 8b6a361 ("rdma/siw: receive path") Fixes: a531975 ("rdma/siw: main include file") Link: https://lore.kernel.org/r/20220130170815.1940-1-bmt@zurich.ibm.com Reported-by: Jared Holzman <jared.holzman@excelero.com> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 4028bcc commit b43a76f

File tree

2 files changed

+12
-15
lines changed

2 files changed

+12
-15
lines changed

drivers/infiniband/sw/siw/siw.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -644,14 +644,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp)
644644
return &qp->orq[qp->orq_get % qp->attrs.orq_size];
645645
}
646646

647-
static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp)
648-
{
649-
return &qp->orq[qp->orq_put % qp->attrs.orq_size];
650-
}
651-
652647
static inline struct siw_sqe *orq_get_free(struct siw_qp *qp)
653648
{
654-
struct siw_sqe *orq_e = orq_get_tail(qp);
649+
struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size];
655650

656651
if (READ_ONCE(orq_e->flags) == 0)
657652
return orq_e;

drivers/infiniband/sw/siw/siw_qp_rx.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,22 +1153,25 @@ static int siw_check_tx_fence(struct siw_qp *qp)
11531153

11541154
spin_lock_irqsave(&qp->orq_lock, flags);
11551155

1156-
rreq = orq_get_current(qp);
1157-
11581156
/* free current orq entry */
1157+
rreq = orq_get_current(qp);
11591158
WRITE_ONCE(rreq->flags, 0);
11601159

1160+
qp->orq_get++;
1161+
11611162
if (qp->tx_ctx.orq_fence) {
11621163
if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) {
11631164
pr_warn("siw: [QP %u]: fence resume: bad status %d\n",
11641165
qp_id(qp), tx_waiting->wr_status);
11651166
rv = -EPROTO;
11661167
goto out;
11671168
}
1168-
/* resume SQ processing */
1169+
/* resume SQ processing, if possible */
11691170
if (tx_waiting->sqe.opcode == SIW_OP_READ ||
11701171
tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
1171-
rreq = orq_get_tail(qp);
1172+
1173+
/* SQ processing was stopped because of a full ORQ */
1174+
rreq = orq_get_free(qp);
11721175
if (unlikely(!rreq)) {
11731176
pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp));
11741177
rv = -EPROTO;
@@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp)
11811184
resume_tx = 1;
11821185

11831186
} else if (siw_orq_empty(qp)) {
1187+
/*
1188+
* SQ processing was stopped by fenced work request.
1189+
* Resume since all previous Read's are now completed.
1190+
*/
11841191
qp->tx_ctx.orq_fence = 0;
11851192
resume_tx = 1;
1186-
} else {
1187-
pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n",
1188-
qp_id(qp), qp->orq_get, qp->orq_put);
1189-
rv = -EPROTO;
11901193
}
11911194
}
1192-
qp->orq_get++;
11931195
out:
11941196
spin_unlock_irqrestore(&qp->orq_lock, flags);
11951197

0 commit comments

Comments
 (0)