Skip to content

Commit 2af89ab

Browse files
isilenceaxboe
authored andcommitted
io_uring: add option to remove SQ indirection
Not many aware, but io_uring submission queue has two levels. The first level usually appears as sq_array and stores indexes into the actual SQ. To my knowledge, no one has ever seriously used it, nor liburing exposes it to users. Add IORING_SETUP_NO_SQARRAY, when set we don't bother creating and using the sq_array and SQ heads/tails will be pointing directly into the SQ. Improves memory footprint, in term of both allocations as well as cache usage, and also should make io_get_sqe() less branchy in the end. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/0ffa3268a5ef61d326201ff43a233315c96312e0.1692916914.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent e5598d6 commit 2af89ab

File tree

2 files changed

+37
-20
lines changed

2 files changed

+37
-20
lines changed

include/uapi/linux/io_uring.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ enum {
185185
*/
186186
#define IORING_SETUP_REGISTERED_FD_ONLY (1U << 15)
187187

188+
/*
189+
* Removes indirection through the SQ index array.
190+
*/
191+
#define IORING_SETUP_NO_SQARRAY (1U << 16)
192+
188193
enum io_uring_op {
189194
IORING_OP_NOP,
190195
IORING_OP_READV,

io_uring/io_uring.c

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,8 +2339,21 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
23392339
*/
23402340
static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
23412341
{
2342-
unsigned head, mask = ctx->sq_entries - 1;
2343-
unsigned sq_idx = ctx->cached_sq_head++ & mask;
2342+
unsigned mask = ctx->sq_entries - 1;
2343+
unsigned head = ctx->cached_sq_head++ & mask;
2344+
2345+
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY)) {
2346+
head = READ_ONCE(ctx->sq_array[head]);
2347+
if (unlikely(head >= ctx->sq_entries)) {
2348+
/* drop invalid entries */
2349+
spin_lock(&ctx->completion_lock);
2350+
ctx->cq_extra--;
2351+
spin_unlock(&ctx->completion_lock);
2352+
WRITE_ONCE(ctx->rings->sq_dropped,
2353+
READ_ONCE(ctx->rings->sq_dropped) + 1);
2354+
return false;
2355+
}
2356+
}
23442357

23452358
/*
23462359
* The cached sq head (or cq tail) serves two purposes:
@@ -2350,22 +2363,12 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
23502363
* 2) allows the kernel side to track the head on its own, even
23512364
* though the application is the one updating it.
23522365
*/
2353-
head = READ_ONCE(ctx->sq_array[sq_idx]);
2354-
if (likely(head < ctx->sq_entries)) {
2355-
/* double index for 128-byte SQEs, twice as long */
2356-
if (ctx->flags & IORING_SETUP_SQE128)
2357-
head <<= 1;
2358-
*sqe = &ctx->sq_sqes[head];
2359-
return true;
2360-
}
23612366

2362-
/* drop invalid entries */
2363-
spin_lock(&ctx->completion_lock);
2364-
ctx->cq_extra--;
2365-
spin_unlock(&ctx->completion_lock);
2366-
WRITE_ONCE(ctx->rings->sq_dropped,
2367-
READ_ONCE(ctx->rings->sq_dropped) + 1);
2368-
return false;
2367+
/* double index for 128-byte SQEs, twice as long */
2368+
if (ctx->flags & IORING_SETUP_SQE128)
2369+
head <<= 1;
2370+
*sqe = &ctx->sq_sqes[head];
2371+
return true;
23692372
}
23702373

23712374
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
@@ -2734,6 +2737,12 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries
27342737
return SIZE_MAX;
27352738
#endif
27362739

2740+
if (ctx->flags & IORING_SETUP_NO_SQARRAY) {
2741+
if (sq_offset)
2742+
*sq_offset = SIZE_MAX;
2743+
return off;
2744+
}
2745+
27372746
if (sq_offset)
27382747
*sq_offset = off;
27392748

@@ -3710,7 +3719,8 @@ static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx,
37103719
return PTR_ERR(rings);
37113720

37123721
ctx->rings = rings;
3713-
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
3722+
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
3723+
ctx->sq_array = (u32 *)((char *)rings + sq_array_offset);
37143724
rings->sq_ring_mask = p->sq_entries - 1;
37153725
rings->cq_ring_mask = p->cq_entries - 1;
37163726
rings->sq_ring_entries = p->sq_entries;
@@ -3921,7 +3931,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
39213931
p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries);
39223932
p->sq_off.flags = offsetof(struct io_rings, sq_flags);
39233933
p->sq_off.dropped = offsetof(struct io_rings, sq_dropped);
3924-
p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
3934+
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
3935+
p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings;
39253936
p->sq_off.resv1 = 0;
39263937
if (!(ctx->flags & IORING_SETUP_NO_MMAP))
39273938
p->sq_off.user_addr = 0;
@@ -4010,7 +4021,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
40104021
IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG |
40114022
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
40124023
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
4013-
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY))
4024+
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
4025+
IORING_SETUP_NO_SQARRAY))
40144026
return -EINVAL;
40154027

40164028
return io_uring_create(entries, &p, params);

0 commit comments

Comments
 (0)