Skip to content

Commit 93238e6

Browse files
isilenceaxboe
authored andcommitted
io_uring: add memory region registration
Regions will serve multiple purposes. First, with it we can decouple ring/etc. object creation from registration / mapping of the memory they will be placed in. We already have hacks that allow to put both SQ and CQ into the same huge page, in the future we should be able to: region = create_region(io_ring); create_pbuf_ring(io_uring, region, offset=0); create_pbuf_ring(io_uring, region, offset=N); The second use case is efficiently passing parameters. The following patch enables back on top of regions IORING_ENTER_EXT_ARG_REG, which optimises wait arguments. It'll also be useful for request arguments replacing iovecs, msghdr, etc. pointers. Eventually it would also be handy for BPF as well if it comes to fruition. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/0798cf3a14fad19cfc96fc9feca5f3e11481691d.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent dfbbfbf commit 93238e6

File tree

4 files changed

+49
-0
lines changed

4 files changed

+49
-0
lines changed

include/linux/io_uring_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,9 @@ struct io_ring_ctx {
429429
unsigned short n_sqe_pages;
430430
struct page **ring_pages;
431431
struct page **sqe_pages;
432+
433+
/* used for optimised request parameter and wait argument passing */
434+
struct io_mapped_region param_region;
432435
};
433436

434437
struct io_tw_state {

include/uapi/linux/io_uring.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,8 @@ enum io_uring_register_op {
627627
/* resize CQ ring */
628628
IORING_REGISTER_RESIZE_RINGS = 33,
629629

630+
IORING_REGISTER_MEM_REGION = 34,
631+
630632
/* this goes last */
631633
IORING_REGISTER_LAST,
632634

@@ -661,6 +663,12 @@ struct io_uring_region_desc {
661663
__u64 __resv[4];
662664
};
663665

666+
struct io_uring_mem_region_reg {
667+
__u64 region_uptr; /* struct io_uring_region_desc * */
668+
__u64 flags;
669+
__u64 __resv[2];
670+
};
671+
664672
/*
665673
* Register a fully sparse file space, rather than pass in an array of all
666674
* -1 file descriptors.

io_uring/io_uring.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2709,6 +2709,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
27092709
io_alloc_cache_free(&ctx->msg_cache, io_msg_cache_free);
27102710
io_futex_cache_free(ctx);
27112711
io_destroy_buffers(ctx);
2712+
io_free_region(ctx, &ctx->param_region);
27122713
mutex_unlock(&ctx->uring_lock);
27132714
if (ctx->sq_creds)
27142715
put_cred(ctx->sq_creds);

io_uring/register.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,37 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
570570
return ret;
571571
}
572572

573+
static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
574+
{
575+
struct io_uring_mem_region_reg __user *reg_uptr = uarg;
576+
struct io_uring_mem_region_reg reg;
577+
struct io_uring_region_desc __user *rd_uptr;
578+
struct io_uring_region_desc rd;
579+
int ret;
580+
581+
if (io_region_is_set(&ctx->param_region))
582+
return -EBUSY;
583+
if (copy_from_user(&reg, reg_uptr, sizeof(reg)))
584+
return -EFAULT;
585+
rd_uptr = u64_to_user_ptr(reg.region_uptr);
586+
if (copy_from_user(&rd, rd_uptr, sizeof(rd)))
587+
return -EFAULT;
588+
589+
if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
590+
return -EINVAL;
591+
if (reg.flags)
592+
return -EINVAL;
593+
594+
ret = io_create_region(ctx, &ctx->param_region, &rd);
595+
if (ret)
596+
return ret;
597+
if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
598+
io_free_region(ctx, &ctx->param_region);
599+
return -EFAULT;
600+
}
601+
return 0;
602+
}
603+
573604
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
574605
void __user *arg, unsigned nr_args)
575606
__releases(ctx->uring_lock)
@@ -764,6 +795,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
764795
break;
765796
ret = io_register_resize_rings(ctx, arg);
766797
break;
798+
case IORING_REGISTER_MEM_REGION:
799+
ret = -EINVAL;
800+
if (!arg || nr_args != 1)
801+
break;
802+
ret = io_register_mem_region(ctx, arg);
803+
break;
767804
default:
768805
ret = -EINVAL;
769806
break;

0 commit comments

Comments
 (0)