Skip to content

Commit 19dba09

Browse files
committed
Merge tag 'io_uring-6.9-20240322' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe: "One patch just missed the initial pull, the rest are either fixes or small cleanups that make our life easier for the next kernel: - Fix a potential leak in error handling of pinned pages, and clean it up (Gabriel, Pavel) - Fix an issue with how read multishot returns retry (me) - Fix a problem with waitid/futex removals, if we hit the case of needing to remove all of them at exit time (me) - Fix for a regression introduced in this merge window, where we don't always have sr->done_io initialized if the ->prep_async() path is used (me) - Fix for SQPOLL setup error handling (me) - Fix for a poll removal request being delayed (Pavel) - Rename of a struct member which had a confusing name (Pavel)" * tag 'io_uring-6.9-20240322' of git://git.kernel.dk/linux: io_uring/sqpoll: early exit thread if task_context wasn't allocated io_uring: clear opcode specific data for an early failure io_uring/net: ensure async prep handlers always initialize ->done_io io_uring/waitid: always remove waitid entry for cancel all io_uring/futex: always remove futex entry for cancel all io_uring: fix poll_remove stalled req completion io_uring: Fix release of pinned pages when __io_uaddr_map fails io_uring/kbuf: rename is_mapped io_uring: simplify io_pages_free io_uring: clean rings on NO_MMAP alloc fail io_uring/rw: return IOU_ISSUE_SKIP_COMPLETE for multishot retry io_uring: don't save/restore iowait state
2 parents 64f799f + 1251d20 commit 19dba09

File tree

9 files changed

+65
-49
lines changed

9 files changed

+65
-49
lines changed

io_uring/futex.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
159159
hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) {
160160
if (!io_match_task_safe(req, task, cancel_all))
161161
continue;
162+
hlist_del_init(&req->hash_node);
162163
__io_futex_cancel(ctx, req);
163164
found = true;
164165
}

io_uring/io_uring.c

Lines changed: 35 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2181,6 +2181,13 @@ static void io_init_req_drain(struct io_kiocb *req)
21812181
}
21822182
}
21832183

2184+
static __cold int io_init_fail_req(struct io_kiocb *req, int err)
2185+
{
2186+
/* ensure per-opcode data is cleared if we fail before prep */
2187+
memset(&req->cmd.data, 0, sizeof(req->cmd.data));
2188+
return err;
2189+
}
2190+
21842191
static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
21852192
const struct io_uring_sqe *sqe)
21862193
__must_hold(&ctx->uring_lock)
@@ -2202,29 +2209,29 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
22022209

22032210
if (unlikely(opcode >= IORING_OP_LAST)) {
22042211
req->opcode = 0;
2205-
return -EINVAL;
2212+
return io_init_fail_req(req, -EINVAL);
22062213
}
22072214
def = &io_issue_defs[opcode];
22082215
if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) {
22092216
/* enforce forwards compatibility on users */
22102217
if (sqe_flags & ~SQE_VALID_FLAGS)
2211-
return -EINVAL;
2218+
return io_init_fail_req(req, -EINVAL);
22122219
if (sqe_flags & IOSQE_BUFFER_SELECT) {
22132220
if (!def->buffer_select)
2214-
return -EOPNOTSUPP;
2221+
return io_init_fail_req(req, -EOPNOTSUPP);
22152222
req->buf_index = READ_ONCE(sqe->buf_group);
22162223
}
22172224
if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
22182225
ctx->drain_disabled = true;
22192226
if (sqe_flags & IOSQE_IO_DRAIN) {
22202227
if (ctx->drain_disabled)
2221-
return -EOPNOTSUPP;
2228+
return io_init_fail_req(req, -EOPNOTSUPP);
22222229
io_init_req_drain(req);
22232230
}
22242231
}
22252232
if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) {
22262233
if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags))
2227-
return -EACCES;
2234+
return io_init_fail_req(req, -EACCES);
22282235
/* knock it to the slow queue path, will be drained there */
22292236
if (ctx->drain_active)
22302237
req->flags |= REQ_F_FORCE_ASYNC;
@@ -2237,9 +2244,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
22372244
}
22382245

22392246
if (!def->ioprio && sqe->ioprio)
2240-
return -EINVAL;
2247+
return io_init_fail_req(req, -EINVAL);
22412248
if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL))
2242-
return -EINVAL;
2249+
return io_init_fail_req(req, -EINVAL);
22432250

22442251
if (def->needs_file) {
22452252
struct io_submit_state *state = &ctx->submit_state;
@@ -2263,12 +2270,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
22632270

22642271
req->creds = xa_load(&ctx->personalities, personality);
22652272
if (!req->creds)
2266-
return -EINVAL;
2273+
return io_init_fail_req(req, -EINVAL);
22672274
get_cred(req->creds);
22682275
ret = security_uring_override_creds(req->creds);
22692276
if (ret) {
22702277
put_cred(req->creds);
2271-
return ret;
2278+
return io_init_fail_req(req, ret);
22722279
}
22732280
req->flags |= REQ_F_CREDS;
22742281
}
@@ -2539,7 +2546,7 @@ static bool current_pending_io(void)
25392546
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
25402547
struct io_wait_queue *iowq)
25412548
{
2542-
int io_wait, ret;
2549+
int ret;
25432550

25442551
if (unlikely(READ_ONCE(ctx->check_cq)))
25452552
return 1;
@@ -2557,15 +2564,14 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
25572564
* can take into account that the task is waiting for IO - turns out
25582565
* to be important for low QD IO.
25592566
*/
2560-
io_wait = current->in_iowait;
25612567
if (current_pending_io())
25622568
current->in_iowait = 1;
25632569
ret = 0;
25642570
if (iowq->timeout == KTIME_MAX)
25652571
schedule();
25662572
else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
25672573
ret = -ETIME;
2568-
current->in_iowait = io_wait;
2574+
current->in_iowait = 0;
25692575
return ret;
25702576
}
25712577

@@ -2697,13 +2703,9 @@ void io_mem_free(void *ptr)
26972703

26982704
static void io_pages_free(struct page ***pages, int npages)
26992705
{
2700-
struct page **page_array;
2706+
struct page **page_array = *pages;
27012707
int i;
27022708

2703-
if (!pages)
2704-
return;
2705-
2706-
page_array = *pages;
27072709
if (!page_array)
27082710
return;
27092711

@@ -2719,7 +2721,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
27192721
struct page **page_array;
27202722
unsigned int nr_pages;
27212723
void *page_addr;
2722-
int ret, i;
2724+
int ret, i, pinned;
27232725

27242726
*npages = 0;
27252727

@@ -2733,12 +2735,12 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
27332735
if (!page_array)
27342736
return ERR_PTR(-ENOMEM);
27352737

2736-
ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
2737-
page_array);
2738-
if (ret != nr_pages) {
2739-
err:
2740-
io_pages_free(&page_array, ret > 0 ? ret : 0);
2741-
return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT);
2738+
2739+
pinned = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
2740+
page_array);
2741+
if (pinned != nr_pages) {
2742+
ret = (pinned < 0) ? pinned : -EFAULT;
2743+
goto free_pages;
27422744
}
27432745

27442746
page_addr = page_address(page_array[0]);
@@ -2752,7 +2754,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
27522754
* didn't support this feature.
27532755
*/
27542756
if (PageHighMem(page_array[i]))
2755-
goto err;
2757+
goto free_pages;
27562758

27572759
/*
27582760
* No support for discontig pages for now, should either be a
@@ -2761,13 +2763,17 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
27612763
* just fail them with EINVAL.
27622764
*/
27632765
if (page_address(page_array[i]) != page_addr)
2764-
goto err;
2766+
goto free_pages;
27652767
page_addr += PAGE_SIZE;
27662768
}
27672769

27682770
*pages = page_array;
27692771
*npages = nr_pages;
27702772
return page_to_virt(page_array[0]);
2773+
2774+
free_pages:
2775+
io_pages_free(&page_array, pinned > 0 ? pinned : 0);
2776+
return ERR_PTR(ret);
27712777
}
27722778

27732779
static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr,
@@ -2789,14 +2795,15 @@ static void io_rings_free(struct io_ring_ctx *ctx)
27892795
if (!(ctx->flags & IORING_SETUP_NO_MMAP)) {
27902796
io_mem_free(ctx->rings);
27912797
io_mem_free(ctx->sq_sqes);
2792-
ctx->rings = NULL;
2793-
ctx->sq_sqes = NULL;
27942798
} else {
27952799
io_pages_free(&ctx->ring_pages, ctx->n_ring_pages);
27962800
ctx->n_ring_pages = 0;
27972801
io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages);
27982802
ctx->n_sqe_pages = 0;
27992803
}
2804+
2805+
ctx->rings = NULL;
2806+
ctx->sq_sqes = NULL;
28002807
}
28012808

28022809
void *io_mem_alloc(size_t size)

io_uring/kbuf.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
199199

200200
bl = io_buffer_get_list(ctx, req->buf_index);
201201
if (likely(bl)) {
202-
if (bl->is_mapped)
202+
if (bl->is_buf_ring)
203203
ret = io_ring_buffer_select(req, len, bl, issue_flags);
204204
else
205205
ret = io_provided_buffer_select(req, len, bl);
@@ -253,7 +253,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
253253
if (!nbufs)
254254
return 0;
255255

256-
if (bl->is_mapped) {
256+
if (bl->is_buf_ring) {
257257
i = bl->buf_ring->tail - bl->head;
258258
if (bl->is_mmap) {
259259
/*
@@ -274,7 +274,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
274274
}
275275
/* make sure it's seen as empty */
276276
INIT_LIST_HEAD(&bl->buf_list);
277-
bl->is_mapped = 0;
277+
bl->is_buf_ring = 0;
278278
return i;
279279
}
280280

@@ -361,7 +361,7 @@ int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
361361
if (bl) {
362362
ret = -EINVAL;
363363
/* can't use provide/remove buffers command on mapped buffers */
364-
if (!bl->is_mapped)
364+
if (!bl->is_buf_ring)
365365
ret = __io_remove_buffers(ctx, bl, p->nbufs);
366366
}
367367
io_ring_submit_unlock(ctx, issue_flags);
@@ -519,7 +519,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
519519
}
520520
}
521521
/* can't add buffers via this command for a mapped buffer ring */
522-
if (bl->is_mapped) {
522+
if (bl->is_buf_ring) {
523523
ret = -EINVAL;
524524
goto err;
525525
}
@@ -575,7 +575,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
575575
bl->buf_pages = pages;
576576
bl->buf_nr_pages = nr_pages;
577577
bl->buf_ring = br;
578-
bl->is_mapped = 1;
578+
bl->is_buf_ring = 1;
579579
bl->is_mmap = 0;
580580
return 0;
581581
error_unpin:
@@ -642,7 +642,7 @@ static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx,
642642
}
643643
ibf->inuse = 1;
644644
bl->buf_ring = ibf->mem;
645-
bl->is_mapped = 1;
645+
bl->is_buf_ring = 1;
646646
bl->is_mmap = 1;
647647
return 0;
648648
}
@@ -688,7 +688,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
688688
bl = io_buffer_get_list(ctx, reg.bgid);
689689
if (bl) {
690690
/* if mapped buffer ring OR classic exists, don't allow */
691-
if (bl->is_mapped || !list_empty(&bl->buf_list))
691+
if (bl->is_buf_ring || !list_empty(&bl->buf_list))
692692
return -EEXIST;
693693
} else {
694694
free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
@@ -730,7 +730,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
730730
bl = io_buffer_get_list(ctx, reg.bgid);
731731
if (!bl)
732732
return -ENOENT;
733-
if (!bl->is_mapped)
733+
if (!bl->is_buf_ring)
734734
return -EINVAL;
735735

736736
__io_remove_buffers(ctx, bl, -1U);
@@ -757,7 +757,7 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
757757
bl = io_buffer_get_list(ctx, buf_status.buf_group);
758758
if (!bl)
759759
return -ENOENT;
760-
if (!bl->is_mapped)
760+
if (!bl->is_buf_ring)
761761
return -EINVAL;
762762

763763
buf_status.head = bl->head;

io_uring/kbuf.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ struct io_buffer_list {
2626
__u16 mask;
2727

2828
/* ring mapped provided buffers */
29-
__u8 is_mapped;
29+
__u8 is_buf_ring;
3030
/* ring mapped provided buffers, but mmap'ed by application */
3131
__u8 is_mmap;
3232
/* bl is visible from an RCU point of view for lookup */

io_uring/net.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,10 @@ int io_send_prep_async(struct io_kiocb *req)
326326
struct io_async_msghdr *io;
327327
int ret;
328328

329-
if (!zc->addr || req_has_async_data(req))
329+
if (req_has_async_data(req))
330+
return 0;
331+
zc->done_io = 0;
332+
if (!zc->addr)
330333
return 0;
331334
io = io_msg_alloc_async_prep(req);
332335
if (!io)
@@ -353,8 +356,10 @@ static int io_setup_async_addr(struct io_kiocb *req,
353356

354357
int io_sendmsg_prep_async(struct io_kiocb *req)
355358
{
359+
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
356360
int ret;
357361

362+
sr->done_io = 0;
358363
if (!io_msg_alloc_async_prep(req))
359364
return -ENOMEM;
360365
ret = io_sendmsg_copy_hdr(req, req->async_data);
@@ -608,9 +613,11 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
608613

609614
int io_recvmsg_prep_async(struct io_kiocb *req)
610615
{
616+
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
611617
struct io_async_msghdr *iomsg;
612618
int ret;
613619

620+
sr->done_io = 0;
614621
if (!io_msg_alloc_async_prep(req))
615622
return -ENOMEM;
616623
iomsg = req->async_data;

io_uring/poll.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,6 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
996996
struct io_hash_bucket *bucket;
997997
struct io_kiocb *preq;
998998
int ret2, ret = 0;
999-
struct io_tw_state ts = { .locked = true };
1000999

10011000
io_ring_submit_lock(ctx, issue_flags);
10021001
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket);
@@ -1045,7 +1044,8 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
10451044

10461045
req_set_fail(preq);
10471046
io_req_set_res(preq, -ECANCELED, 0);
1048-
io_req_task_complete(preq, &ts);
1047+
preq->io_task_work.func = io_req_task_complete;
1048+
io_req_task_work_add(preq);
10491049
out:
10501050
io_ring_submit_unlock(ctx, issue_flags);
10511051
if (ret < 0) {

io_uring/rw.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,8 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
947947
*/
948948
if (io_kbuf_recycle(req, issue_flags))
949949
rw->len = 0;
950+
if (issue_flags & IO_URING_F_MULTISHOT)
951+
return IOU_ISSUE_SKIP_COMPLETE;
950952
return -EAGAIN;
951953
}
952954

io_uring/sqpoll.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ static int io_sq_thread(void *data)
274274
char buf[TASK_COMM_LEN];
275275
DEFINE_WAIT(wait);
276276

277+
/* offload context creation failed, just exit */
278+
if (!current->io_uring)
279+
goto err_out;
280+
277281
snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
278282
set_task_comm(current, buf);
279283

@@ -371,7 +375,7 @@ static int io_sq_thread(void *data)
371375
atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
372376
io_run_task_work();
373377
mutex_unlock(&sqd->lock);
374-
378+
err_out:
375379
complete(&sqd->exited);
376380
do_exit(0);
377381
}

0 commit comments

Comments
 (0)