Skip to content

Commit 59fbc40

Browse files
isilenceaxboe
authored andcommitted
io_uring: optimise extra io_get_cqe null check
If the cached cqe check passes in io_get_cqe*() it already means that the cqe we return is valid and non-zero, however the compiler is unable to optimise null checks like in io_fill_cqe_req(). Do a bit of trickery, return success/fail boolean from io_get_cqe*() and store cqe in the cqe parameter. That makes it do the right thing, erasing the check together with the introduced indirection. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/322ea4d3377d3d4efd8ae90ab8ed28a99f518210.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 20d6b63 commit 59fbc40

File tree

2 files changed

+12
-15
lines changed

2 files changed

+12
-15
lines changed

io_uring/io_uring.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -683,10 +683,10 @@ static void __io_cqring_overflow_flush(struct io_ring_ctx *ctx)
683683

684684
io_cq_lock(ctx);
685685
while (!list_empty(&ctx->cq_overflow_list)) {
686-
struct io_uring_cqe *cqe = io_get_cqe_overflow(ctx, true);
686+
struct io_uring_cqe *cqe;
687687
struct io_overflow_cqe *ocqe;
688688

689-
if (!cqe)
689+
if (!io_get_cqe_overflow(ctx, &cqe, true))
690690
break;
691691
ocqe = list_first_entry(&ctx->cq_overflow_list,
692692
struct io_overflow_cqe, list);
@@ -862,8 +862,7 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
862862
* submission (by quite a lot). Increment the overflow count in
863863
* the ring.
864864
*/
865-
cqe = io_get_cqe(ctx);
866-
if (likely(cqe)) {
865+
if (likely(io_get_cqe(ctx, &cqe))) {
867866
trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
868867

869868
WRITE_ONCE(cqe->user_data, user_data);

io_uring/io_uring.h

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -109,28 +109,27 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
109109
#define io_for_each_link(pos, head) \
110110
for (pos = (head); pos; pos = pos->link)
111111

112-
static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
113-
bool overflow)
112+
static inline bool io_get_cqe_overflow(struct io_ring_ctx *ctx,
113+
struct io_uring_cqe **ret,
114+
bool overflow)
114115
{
115-
struct io_uring_cqe *cqe;
116-
117116
io_lockdep_assert_cq_locked(ctx);
118117

119118
if (unlikely(ctx->cqe_cached >= ctx->cqe_sentinel)) {
120119
if (unlikely(!io_cqe_cache_refill(ctx, overflow)))
121-
return NULL;
120+
return false;
122121
}
123-
cqe = ctx->cqe_cached;
122+
*ret = ctx->cqe_cached;
124123
ctx->cached_cq_tail++;
125124
ctx->cqe_cached++;
126125
if (ctx->flags & IORING_SETUP_CQE32)
127126
ctx->cqe_cached++;
128-
return cqe;
127+
return true;
129128
}
130129

131-
static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
130+
static inline bool io_get_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **ret)
132131
{
133-
return io_get_cqe_overflow(ctx, false);
132+
return io_get_cqe_overflow(ctx, ret, false);
134133
}
135134

136135
static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req)
@@ -142,8 +141,7 @@ static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req
142141
* submission (by quite a lot). Increment the overflow count in
143142
* the ring.
144143
*/
145-
cqe = io_get_cqe(ctx);
146-
if (unlikely(!cqe))
144+
if (unlikely(!io_get_cqe(ctx, &cqe)))
147145
return false;
148146

149147
if (trace_io_uring_complete_enabled())

0 commit comments

Comments
 (0)