Skip to content

Commit c9a925b

Browse files
committed
Merge tag 'io_uring-6.7-2023-11-30' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: - Fix an issue with discontig page checking for IORING_SETUP_NO_MMAP - Fix an issue with not allowing IORING_SETUP_NO_MMAP also disallowing mmap'ed buffer rings - Fix an issue with deferred release of memory mapped pages - Fix a lockdep issue with IORING_SETUP_NO_MMAP - Use fget/fput consistently, even from our sync system calls. No real issue here, but if we were ever to allow closing io_uring descriptors it would be required. Let's play it safe and just use the full ref counted versions upfront. Most uses of io_uring are threaded anyway, and hence already doing the full version underneath. * tag 'io_uring-6.7-2023-11-30' of git://git.kernel.dk/linux: io_uring: use fget/fput consistently io_uring: free io_buffer_list entries via RCU io_uring/kbuf: prune deferred locked cache when tearing down io_uring/kbuf: recycle freed mapped buffer ring entries io_uring/kbuf: defer release of mapped buffer rings io_uring: enable io_mem_alloc/free to be used in other parts io_uring: don't guard IORING_OFF_PBUF_RING with SETUP_NO_MMAP io_uring: don't allow discontig pages for IORING_SETUP_NO_MMAP
2 parents ee0c8a9 + 73363c2 commit c9a925b

File tree

6 files changed

+224
-70
lines changed

6 files changed

+224
-70
lines changed

include/linux/io_uring_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ struct io_ring_ctx {
340340

341341
struct list_head io_buffers_cache;
342342

343+
/* deferred free list, protected by ->uring_lock */
344+
struct hlist_head io_buf_list;
345+
343346
/* Keep this last, we don't need it for the fast path */
344347
struct wait_queue_head poll_wq;
345348
struct io_restriction restrictions;

io_uring/cancel.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
273273
};
274274
ktime_t timeout = KTIME_MAX;
275275
struct io_uring_sync_cancel_reg sc;
276-
struct fd f = { };
276+
struct file *file = NULL;
277277
DEFINE_WAIT(wait);
278278
int ret, i;
279279

@@ -295,10 +295,10 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
295295
/* we can grab a normal file descriptor upfront */
296296
if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
297297
!(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
298-
f = fdget(sc.fd);
299-
if (!f.file)
298+
file = fget(sc.fd);
299+
if (!file)
300300
return -EBADF;
301-
cd.file = f.file;
301+
cd.file = file;
302302
}
303303

304304
ret = __io_sync_cancel(current->io_uring, &cd, sc.fd);
@@ -348,6 +348,7 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
348348
if (ret == -ENOENT || ret > 0)
349349
ret = 0;
350350
out:
351-
fdput(f);
351+
if (file)
352+
fput(file);
352353
return ret;
353354
}

io_uring/io_uring.c

Lines changed: 51 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
325325
INIT_LIST_HEAD(&ctx->sqd_list);
326326
INIT_LIST_HEAD(&ctx->cq_overflow_list);
327327
INIT_LIST_HEAD(&ctx->io_buffers_cache);
328+
INIT_HLIST_HEAD(&ctx->io_buf_list);
328329
io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX,
329330
sizeof(struct io_rsrc_node));
330331
io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX,
@@ -2666,7 +2667,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
26662667
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
26672668
}
26682669

2669-
static void io_mem_free(void *ptr)
2670+
void io_mem_free(void *ptr)
26702671
{
26712672
if (!ptr)
26722673
return;
@@ -2697,6 +2698,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
26972698
{
26982699
struct page **page_array;
26992700
unsigned int nr_pages;
2701+
void *page_addr;
27002702
int ret, i;
27012703

27022704
*npages = 0;
@@ -2718,27 +2720,29 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
27182720
io_pages_free(&page_array, ret > 0 ? ret : 0);
27192721
return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
27202722
}
2721-
/*
2722-
* Should be a single page. If the ring is small enough that we can
2723-
* use a normal page, that is fine. If we need multiple pages, then
2724-
* userspace should use a huge page. That's the only way to guarantee
2725-
* that we get contigious memory, outside of just being lucky or
2726-
* (currently) having low memory fragmentation.
2727-
*/
2728-
if (page_array[0] != page_array[ret - 1])
2729-
goto err;
27302723

2731-
/*
2732-
* Can't support mapping user allocated ring memory on 32-bit archs
2733-
* where it could potentially reside in highmem. Just fail those with
2734-
* -EINVAL, just like we did on kernels that didn't support this
2735-
* feature.
2736-
*/
2724+
page_addr = page_address(page_array[0]);
27372725
for (i = 0; i < nr_pages; i++) {
2738-
if (PageHighMem(page_array[i])) {
2739-
ret = -EINVAL;
2726+
ret = -EINVAL;
2727+
2728+
/*
2729+
* Can't support mapping user allocated ring memory on 32-bit
2730+
* archs where it could potentially reside in highmem. Just
2731+
* fail those with -EINVAL, just like we did on kernels that
2732+
* didn't support this feature.
2733+
*/
2734+
if (PageHighMem(page_array[i]))
27402735
goto err;
2741-
}
2736+
2737+
/*
2738+
* No support for discontig pages for now, should either be a
2739+
* single normal page, or a huge page. Later on we can add
2740+
* support for remapping discontig pages, for now we will
2741+
* just fail them with EINVAL.
2742+
*/
2743+
if (page_address(page_array[i]) != page_addr)
2744+
goto err;
2745+
page_addr += PAGE_SIZE;
27422746
}
27432747

27442748
*pages = page_array;
@@ -2775,7 +2779,7 @@ static void io_rings_free(struct io_ring_ctx *ctx)
27752779
}
27762780
}
27772781

2778-
static void *io_mem_alloc(size_t size)
2782+
void *io_mem_alloc(size_t size)
27792783
{
27802784
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
27812785
void *ret;
@@ -2947,6 +2951,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
29472951
ctx->mm_account = NULL;
29482952
}
29492953
io_rings_free(ctx);
2954+
io_kbuf_mmap_list_free(ctx);
29502955

29512956
percpu_ref_exit(&ctx->refs);
29522957
free_uid(ctx->user);
@@ -3475,25 +3480,27 @@ static void *io_uring_validate_mmap_request(struct file *file,
34753480
struct page *page;
34763481
void *ptr;
34773482

3478-
/* Don't allow mmap if the ring was setup without it */
3479-
if (ctx->flags & IORING_SETUP_NO_MMAP)
3480-
return ERR_PTR(-EINVAL);
3481-
34823483
switch (offset & IORING_OFF_MMAP_MASK) {
34833484
case IORING_OFF_SQ_RING:
34843485
case IORING_OFF_CQ_RING:
3486+
/* Don't allow mmap if the ring was setup without it */
3487+
if (ctx->flags & IORING_SETUP_NO_MMAP)
3488+
return ERR_PTR(-EINVAL);
34853489
ptr = ctx->rings;
34863490
break;
34873491
case IORING_OFF_SQES:
3492+
/* Don't allow mmap if the ring was setup without it */
3493+
if (ctx->flags & IORING_SETUP_NO_MMAP)
3494+
return ERR_PTR(-EINVAL);
34883495
ptr = ctx->sq_sqes;
34893496
break;
34903497
case IORING_OFF_PBUF_RING: {
34913498
unsigned int bgid;
34923499

34933500
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
3494-
mutex_lock(&ctx->uring_lock);
3501+
rcu_read_lock();
34953502
ptr = io_pbuf_get_address(ctx, bgid);
3496-
mutex_unlock(&ctx->uring_lock);
3503+
rcu_read_unlock();
34973504
if (!ptr)
34983505
return ERR_PTR(-EINVAL);
34993506
break;
@@ -3645,7 +3652,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
36453652
size_t, argsz)
36463653
{
36473654
struct io_ring_ctx *ctx;
3648-
struct fd f;
3655+
struct file *file;
36493656
long ret;
36503657

36513658
if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
@@ -3663,20 +3670,19 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
36633670
if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
36643671
return -EINVAL;
36653672
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
3666-
f.file = tctx->registered_rings[fd];
3667-
f.flags = 0;
3668-
if (unlikely(!f.file))
3673+
file = tctx->registered_rings[fd];
3674+
if (unlikely(!file))
36693675
return -EBADF;
36703676
} else {
3671-
f = fdget(fd);
3672-
if (unlikely(!f.file))
3677+
file = fget(fd);
3678+
if (unlikely(!file))
36733679
return -EBADF;
36743680
ret = -EOPNOTSUPP;
3675-
if (unlikely(!io_is_uring_fops(f.file)))
3681+
if (unlikely(!io_is_uring_fops(file)))
36763682
goto out;
36773683
}
36783684

3679-
ctx = f.file->private_data;
3685+
ctx = file->private_data;
36803686
ret = -EBADFD;
36813687
if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED))
36823688
goto out;
@@ -3770,7 +3776,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
37703776
}
37713777
}
37723778
out:
3773-
fdput(f);
3779+
if (!(flags & IORING_ENTER_REGISTERED_RING))
3780+
fput(file);
37743781
return ret;
37753782
}
37763783

@@ -4611,7 +4618,7 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
46114618
{
46124619
struct io_ring_ctx *ctx;
46134620
long ret = -EBADF;
4614-
struct fd f;
4621+
struct file *file;
46154622
bool use_registered_ring;
46164623

46174624
use_registered_ring = !!(opcode & IORING_REGISTER_USE_REGISTERED_RING);
@@ -4630,27 +4637,27 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
46304637
if (unlikely(!tctx || fd >= IO_RINGFD_REG_MAX))
46314638
return -EINVAL;
46324639
fd = array_index_nospec(fd, IO_RINGFD_REG_MAX);
4633-
f.file = tctx->registered_rings[fd];
4634-
f.flags = 0;
4635-
if (unlikely(!f.file))
4640+
file = tctx->registered_rings[fd];
4641+
if (unlikely(!file))
46364642
return -EBADF;
46374643
} else {
4638-
f = fdget(fd);
4639-
if (unlikely(!f.file))
4644+
file = fget(fd);
4645+
if (unlikely(!file))
46404646
return -EBADF;
46414647
ret = -EOPNOTSUPP;
4642-
if (!io_is_uring_fops(f.file))
4648+
if (!io_is_uring_fops(file))
46434649
goto out_fput;
46444650
}
46454651

4646-
ctx = f.file->private_data;
4652+
ctx = file->private_data;
46474653

46484654
mutex_lock(&ctx->uring_lock);
46494655
ret = __io_uring_register(ctx, opcode, arg, nr_args);
46504656
mutex_unlock(&ctx->uring_lock);
46514657
trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret);
46524658
out_fput:
4653-
fdput(f);
4659+
if (!use_registered_ring)
4660+
fput(file);
46544661
return ret;
46554662
}
46564663

io_uring/io_uring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,9 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
8686
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
8787
bool cancel_all);
8888

89+
void *io_mem_alloc(size_t size);
90+
void io_mem_free(void *ptr);
91+
8992
#if defined(CONFIG_PROVE_LOCKING)
9093
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
9194
{

0 commit comments

Comments
 (0)