Skip to content

Commit 7ccc3eb

Browse files
committed
Merge tag 'io_uring-6.6-2023-09-08' of git://git.kernel.dk/linux
Pull io_uring fixes from Jens Axboe: "A few fixes that should go into the 6.6-rc merge window: - Fix for a regression this merge window caused by the SQPOLL affinity patch, where we can race with SQPOLL thread shutdown and cause an oops when trying to set affinity (Gabriel) - Fix for a regression this merge window where fdinfo reading with for a ring setup with IORING_SETUP_NO_SQARRAY will attempt to deference the non-existing SQ ring array (me) - Add the patch that allows more finegrained control over who can use io_uring (Matteo) - Locking fix for a regression added this merge window for IOPOLL overflow (Pavel) - IOPOLL fix for stable, breaking our loop if helper threads are exiting (Pavel) Also had a fix for unreaped iopoll requests from io-wq from Ming, but we found an issue with that and hence it got reverted. Will get this sorted for a future rc" * tag 'io_uring-6.6-2023-09-08' of git://git.kernel.dk/linux: Revert "io_uring: fix IO hang in io_wq_put_and_exit from do_exit()" io_uring: fix unprotected iopoll overflow io_uring: break out of iowq iopoll on teardown io_uring: add a sysctl to disable io_uring system-wide io_uring/fdinfo: only print ->sq_array[] if it's there io_uring: fix IO hang in io_wq_put_and_exit from do_exit() io_uring: Don't set affinity on a dying sqpoll thread
2 parents 32bf43e + 023464f commit 7ccc3eb

File tree

6 files changed

+99
-3
lines changed

6 files changed

+99
-3
lines changed

Documentation/admin-guide/sysctl/kernel.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,35 @@ this allows system administrators to override the
450450
``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded.
451451

452452

453+
io_uring_disabled
454+
=================
455+
456+
Prevents all processes from creating new io_uring instances. Enabling this
457+
shrinks the kernel's attack surface.
458+
459+
= ======================================================================
460+
0 All processes can create io_uring instances as normal. This is the
461+
default setting.
462+
1 io_uring creation is disabled (io_uring_setup() will fail with
463+
-EPERM) for unprivileged processes not in the io_uring_group group.
464+
Existing io_uring instances can still be used. See the
465+
documentation for io_uring_group for more information.
466+
2 io_uring creation is disabled for all processes. io_uring_setup()
467+
always fails with -EPERM. Existing io_uring instances can still be
468+
used.
469+
= ======================================================================
470+
471+
472+
io_uring_group
473+
==============
474+
475+
When io_uring_disabled is set to 1, a process must either be
476+
privileged (CAP_SYS_ADMIN) or be in the io_uring_group group in order
477+
to create an io_uring instance. If io_uring_group is set to -1 (the
478+
default), only processes with the CAP_SYS_ADMIN capability may create
479+
io_uring instances.
480+
481+
453482
kexec_load_disabled
454483
===================
455484

io_uring/fdinfo.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
9393
struct io_uring_sqe *sqe;
9494
unsigned int sq_idx;
9595

96+
if (ctx->flags & IORING_SETUP_NO_SQARRAY)
97+
break;
9698
sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
9799
if (sq_idx > sq_mask)
98100
continue;

io_uring/io-wq.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,16 @@ static void io_worker_ref_put(struct io_wq *wq)
174174
complete(&wq->worker_done);
175175
}
176176

177+
bool io_wq_worker_stopped(void)
178+
{
179+
struct io_worker *worker = current->worker_private;
180+
181+
if (WARN_ON_ONCE(!io_wq_current_is_worker()))
182+
return true;
183+
184+
return test_bit(IO_WQ_BIT_EXIT, &worker->wq->state);
185+
}
186+
177187
static void io_worker_cancel_cb(struct io_worker *worker)
178188
{
179189
struct io_wq_acct *acct = io_wq_get_acct(worker);

io_uring/io-wq.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val);
5252

5353
int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
5454
int io_wq_max_workers(struct io_wq *wq, int *new_count);
55+
bool io_wq_worker_stopped(void);
5556

5657
static inline bool io_wq_is_hashed(struct io_wq_work *work)
5758
{

io_uring/io_uring.c

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,31 @@ static void io_queue_sqe(struct io_kiocb *req);
150150

151151
struct kmem_cache *req_cachep;
152152

153+
static int __read_mostly sysctl_io_uring_disabled;
154+
static int __read_mostly sysctl_io_uring_group = -1;
155+
156+
#ifdef CONFIG_SYSCTL
157+
static struct ctl_table kernel_io_uring_disabled_table[] = {
158+
{
159+
.procname = "io_uring_disabled",
160+
.data = &sysctl_io_uring_disabled,
161+
.maxlen = sizeof(sysctl_io_uring_disabled),
162+
.mode = 0644,
163+
.proc_handler = proc_dointvec_minmax,
164+
.extra1 = SYSCTL_ZERO,
165+
.extra2 = SYSCTL_TWO,
166+
},
167+
{
168+
.procname = "io_uring_group",
169+
.data = &sysctl_io_uring_group,
170+
.maxlen = sizeof(gid_t),
171+
.mode = 0644,
172+
.proc_handler = proc_dointvec,
173+
},
174+
{},
175+
};
176+
#endif
177+
153178
struct sock *io_uring_get_socket(struct file *file)
154179
{
155180
#if defined(CONFIG_UNIX)
@@ -883,7 +908,7 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
883908
struct io_uring_cqe *cqe = &ctx->completion_cqes[i];
884909

885910
if (!io_fill_cqe_aux(ctx, cqe->user_data, cqe->res, cqe->flags)) {
886-
if (ctx->task_complete) {
911+
if (ctx->lockless_cq) {
887912
spin_lock(&ctx->completion_lock);
888913
io_cqring_event_overflow(ctx, cqe->user_data,
889914
cqe->res, cqe->flags, 0, 0);
@@ -1541,7 +1566,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
15411566

15421567
if (!(req->flags & REQ_F_CQE_SKIP) &&
15431568
unlikely(!io_fill_cqe_req(ctx, req))) {
1544-
if (ctx->task_complete) {
1569+
if (ctx->lockless_cq) {
15451570
spin_lock(&ctx->completion_lock);
15461571
io_req_cqe_overflow(req);
15471572
spin_unlock(&ctx->completion_lock);
@@ -1950,6 +1975,8 @@ void io_wq_submit_work(struct io_wq_work *work)
19501975
if (!needs_poll) {
19511976
if (!(req->ctx->flags & IORING_SETUP_IOPOLL))
19521977
break;
1978+
if (io_wq_worker_stopped())
1979+
break;
19531980
cond_resched();
19541981
continue;
19551982
}
@@ -4038,9 +4065,30 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
40384065
return io_uring_create(entries, &p, params);
40394066
}
40404067

4068+
static inline bool io_uring_allowed(void)
4069+
{
4070+
int disabled = READ_ONCE(sysctl_io_uring_disabled);
4071+
kgid_t io_uring_group;
4072+
4073+
if (disabled == 2)
4074+
return false;
4075+
4076+
if (disabled == 0 || capable(CAP_SYS_ADMIN))
4077+
return true;
4078+
4079+
io_uring_group = make_kgid(&init_user_ns, sysctl_io_uring_group);
4080+
if (!gid_valid(io_uring_group))
4081+
return false;
4082+
4083+
return in_group_p(io_uring_group);
4084+
}
4085+
40414086
SYSCALL_DEFINE2(io_uring_setup, u32, entries,
40424087
struct io_uring_params __user *, params)
40434088
{
4089+
if (!io_uring_allowed())
4090+
return -EPERM;
4091+
40444092
return io_uring_setup(entries, params);
40454093
}
40464094

@@ -4634,6 +4682,10 @@ static int __init io_uring_init(void)
46344682
offsetof(struct io_kiocb, cmd.data),
46354683
sizeof_field(struct io_kiocb, cmd.data), NULL);
46364684

4685+
#ifdef CONFIG_SYSCTL
4686+
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
4687+
#endif
4688+
46374689
return 0;
46384690
};
46394691
__initcall(io_uring_init);

io_uring/sqpoll.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,9 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
430430

431431
if (sqd) {
432432
io_sq_thread_park(sqd);
433-
ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
433+
/* Don't set affinity for a dying thread */
434+
if (sqd->thread)
435+
ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
434436
io_sq_thread_unpark(sqd);
435437
}
436438

0 commit comments

Comments
 (0)