Skip to content

Commit dfbbfbf

Browse files
isilenceaxboe
authored andcommitted
io_uring: introduce concept of memory regions
We've got a good number of mappings we share with the userspace, that includes the main rings, provided buffer rings, upcoming rings for zerocopy rx and more. All of them duplicate user argument parsing and some internal details as well (page pinnning, huge page optimisations, mmap'ing, etc.) Introduce a notion of regions. For userspace for now it's just a new structure called struct io_uring_region_desc which is supposed to parameterise all such mapping / queue creations. A region either represents a user provided chunk of memory, in which case the user_addr field should point to it, or a request for the kernel to allocate the memory, in which case the user would need to mmap it after using the offset returned in the mmap_offset field. With a uniform userspace API we can avoid additional boiler plate code and apply future optimisation to all of them at once. Internally, there is a new structure struct io_mapped_region holding all relevant runtime information and some helpers to work with it. This patch limits it to user provided regions. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> Link: https://lore.kernel.org/r/0e6fe25818dfbaebd1bd90b870a6cac503fe1a24.1731689588.git.asml.silence@gmail.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 83e0415 commit dfbbfbf

File tree

4 files changed

+101
-0
lines changed

4 files changed

+101
-0
lines changed

include/linux/io_uring_types.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ struct io_hash_table {
7575
unsigned hash_bits;
7676
};
7777

78+
struct io_mapped_region {
79+
struct page **pages;
80+
void *vmap_ptr;
81+
size_t nr_pages;
82+
};
83+
7884
/*
7985
* Arbitrary limit, can be raised if need be
8086
*/

include/uapi/linux/io_uring.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,20 @@ struct io_uring_files_update {
647647
__aligned_u64 /* __s32 * */ fds;
648648
};
649649

650+
enum {
651+
/* initialise with user provided memory pointed by user_addr */
652+
IORING_MEM_REGION_TYPE_USER = 1,
653+
};
654+
655+
struct io_uring_region_desc {
656+
__u64 user_addr;
657+
__u64 size;
658+
__u32 flags;
659+
__u32 id;
660+
__u64 mmap_offset;
661+
__u64 __resv[4];
662+
};
663+
650664
/*
651665
* Register a fully sparse file space, rather than pass in an array of all
652666
* -1 file descriptors.

io_uring/memmap.c

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "memmap.h"
1414
#include "kbuf.h"
15+
#include "rsrc.h"
1516

1617
static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
1718
size_t size, gfp_t gfp)
@@ -194,6 +195,72 @@ void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
194195
return ERR_PTR(-ENOMEM);
195196
}
196197

198+
void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr)
199+
{
200+
if (mr->pages) {
201+
unpin_user_pages(mr->pages, mr->nr_pages);
202+
kvfree(mr->pages);
203+
}
204+
if (mr->vmap_ptr)
205+
vunmap(mr->vmap_ptr);
206+
if (mr->nr_pages && ctx->user)
207+
__io_unaccount_mem(ctx->user, mr->nr_pages);
208+
209+
memset(mr, 0, sizeof(*mr));
210+
}
211+
212+
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
213+
struct io_uring_region_desc *reg)
214+
{
215+
int pages_accounted = 0;
216+
struct page **pages;
217+
int nr_pages, ret;
218+
void *vptr;
219+
u64 end;
220+
221+
if (WARN_ON_ONCE(mr->pages || mr->vmap_ptr || mr->nr_pages))
222+
return -EFAULT;
223+
if (memchr_inv(&reg->__resv, 0, sizeof(reg->__resv)))
224+
return -EINVAL;
225+
if (reg->flags != IORING_MEM_REGION_TYPE_USER)
226+
return -EINVAL;
227+
if (!reg->user_addr)
228+
return -EFAULT;
229+
if (!reg->size || reg->mmap_offset || reg->id)
230+
return -EINVAL;
231+
if ((reg->size >> PAGE_SHIFT) > INT_MAX)
232+
return E2BIG;
233+
if ((reg->user_addr | reg->size) & ~PAGE_MASK)
234+
return -EINVAL;
235+
if (check_add_overflow(reg->user_addr, reg->size, &end))
236+
return -EOVERFLOW;
237+
238+
pages = io_pin_pages(reg->user_addr, reg->size, &nr_pages);
239+
if (IS_ERR(pages))
240+
return PTR_ERR(pages);
241+
242+
if (ctx->user) {
243+
ret = __io_account_mem(ctx->user, nr_pages);
244+
if (ret)
245+
goto out_free;
246+
pages_accounted = nr_pages;
247+
}
248+
249+
vptr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
250+
if (!vptr)
251+
goto out_free;
252+
253+
mr->pages = pages;
254+
mr->vmap_ptr = vptr;
255+
mr->nr_pages = nr_pages;
256+
return 0;
257+
out_free:
258+
if (pages_accounted)
259+
__io_unaccount_mem(ctx->user, pages_accounted);
260+
io_pages_free(&pages, nr_pages);
261+
return ret;
262+
}
263+
197264
static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff,
198265
size_t sz)
199266
{

io_uring/memmap.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,18 @@ unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr,
2222
unsigned long flags);
2323
int io_uring_mmap(struct file *file, struct vm_area_struct *vma);
2424

25+
void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr);
26+
int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
27+
struct io_uring_region_desc *reg);
28+
29+
static inline void *io_region_get_ptr(struct io_mapped_region *mr)
30+
{
31+
return mr->vmap_ptr;
32+
}
33+
34+
static inline bool io_region_is_set(struct io_mapped_region *mr)
35+
{
36+
return !!mr->nr_pages;
37+
}
38+
2539
#endif

0 commit comments

Comments
 (0)