Skip to content

Commit 364eb61

Browse files
committed
Merge tag 'io_uring-6.1-2022-11-25' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: - A few poll related fixes. One fixing a race condition between poll cancelation and trigger, and one making the overflow handling a bit more robust (Lin, Pavel) - Fix an fput() for error handling in the direct file table (Lin) - Fix for a regression introduced in this cycle, where we don't always get TIF_NOTIFY_SIGNAL cleared appropriately (me) * tag 'io_uring-6.1-2022-11-25' of git://git.kernel.dk/linux: io_uring: clear TIF_NOTIFY_SIGNAL if set and task_work not available io_uring/poll: fix poll_refs race with cancelation io_uring/filetable: fix file reference underflow io_uring: make poll refs more robust io_uring: cmpxchg for poll arm refs release
2 parents 3e0d88f + 7cfe7a0 commit 364eb61

File tree

3 files changed

+47
-11
lines changed

3 files changed

+47
-11
lines changed

io_uring/filetable.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
101101
err:
102102
if (needs_switch)
103103
io_rsrc_node_switch(ctx, ctx->file_data);
104-
if (ret)
105-
fput(file);
106104
return ret;
107105
}
108106

io_uring/io_uring.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,14 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
238238

239239
static inline int io_run_task_work(void)
240240
{
241+
/*
242+
* Always check-and-clear the task_work notification signal. With how
243+
* signaling works for task_work, we can find it set with nothing to
244+
* run. We need to clear it for that case, like get_signal() does.
245+
*/
246+
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
247+
clear_notify_signal();
241248
if (task_work_pending(current)) {
242-
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
243-
clear_notify_signal();
244249
__set_current_state(TASK_RUNNING);
245250
task_work_run();
246251
return 1;

io_uring/poll.c

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,14 @@ struct io_poll_table {
4040
};
4141

4242
#define IO_POLL_CANCEL_FLAG BIT(31)
43-
#define IO_POLL_REF_MASK GENMASK(30, 0)
43+
#define IO_POLL_RETRY_FLAG BIT(30)
44+
#define IO_POLL_REF_MASK GENMASK(29, 0)
45+
46+
/*
47+
* We usually have 1-2 refs taken, 128 is more than enough and we want to
48+
* maximise the margin between this amount and the moment when it overflows.
49+
*/
50+
#define IO_POLL_REF_BIAS 128
4451

4552
#define IO_WQE_F_DOUBLE 1
4653

@@ -58,6 +65,21 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe)
5865
return priv & IO_WQE_F_DOUBLE;
5966
}
6067

68+
static bool io_poll_get_ownership_slowpath(struct io_kiocb *req)
69+
{
70+
int v;
71+
72+
/*
73+
* poll_refs are already elevated and we don't have much hope for
74+
* grabbing the ownership. Instead of incrementing set a retry flag
75+
* to notify the loop that there might have been some change.
76+
*/
77+
v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs);
78+
if (v & IO_POLL_REF_MASK)
79+
return false;
80+
return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
81+
}
82+
6183
/*
6284
* If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
6385
* bump it and acquire ownership. It's disallowed to modify requests while not
@@ -66,6 +88,8 @@ static inline bool wqe_is_double(struct wait_queue_entry *wqe)
6688
*/
6789
static inline bool io_poll_get_ownership(struct io_kiocb *req)
6890
{
91+
if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS))
92+
return io_poll_get_ownership_slowpath(req);
6993
return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
7094
}
7195

@@ -235,6 +259,16 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
235259
*/
236260
if ((v & IO_POLL_REF_MASK) != 1)
237261
req->cqe.res = 0;
262+
if (v & IO_POLL_RETRY_FLAG) {
263+
req->cqe.res = 0;
264+
/*
265+
* We won't find new events that came in between
266+
* vfs_poll and the ref put unless we clear the flag
267+
* in advance.
268+
*/
269+
atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs);
270+
v &= ~IO_POLL_RETRY_FLAG;
271+
}
238272

239273
/* the mask was stashed in __io_poll_execute */
240274
if (!req->cqe.res) {
@@ -274,7 +308,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
274308
* Release all references, retry if someone tried to restart
275309
* task_work while we were executing it.
276310
*/
277-
} while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
311+
} while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) &
312+
IO_POLL_REF_MASK);
278313

279314
return IOU_POLL_NO_ACTION;
280315
}
@@ -518,7 +553,6 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
518553
unsigned issue_flags)
519554
{
520555
struct io_ring_ctx *ctx = req->ctx;
521-
int v;
522556

523557
INIT_HLIST_NODE(&req->hash_node);
524558
req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
@@ -586,11 +620,10 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
586620

587621
if (ipt->owning) {
588622
/*
589-
* Release ownership. If someone tried to queue a tw while it was
590-
* locked, kick it off for them.
623+
* Try to release ownership. If we see a change of state, e.g.
624+
* poll was waken up, queue up a tw, it'll deal with it.
591625
*/
592-
v = atomic_dec_return(&req->poll_refs);
593-
if (unlikely(v & IO_POLL_REF_MASK))
626+
if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1)
594627
__io_poll_execute(req, 0);
595628
}
596629
return 0;

0 commit comments

Comments
 (0)