Skip to content

Commit 0bc7eb0

Browse files
committed
Merge tag 'io_uring-5.15-2021-09-17' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: "Mostly fixes for regressions in this cycle, but also a few fixes that predate this release. The odd one out is a tweak to the direct files added in this release, where attempting to reuse a slot is allowed instead of needing an explicit removal of that slot first. It's a considerable improvement in usability to that API, hence I'm sending it for -rc2. - io-wq race fix and cleanup (Hao) - loop_rw_iter() type fix - SQPOLL max worker race fix - Allow poll arm for O_NONBLOCK files, fixing a case where it's impossible to properly use io_uring if you cannot modify the file flags - Allow direct open to simply reuse a slot, instead of needing it explicitly removed first (Pavel) - Fix a case where we missed signal mask restoring in cqring_wait, if we hit -EFAULT (Xiaoguang)" * tag 'io_uring-5.15-2021-09-17' of git://git.kernel.dk/linux-block: io_uring: allow retry for O_NONBLOCK if async is supported io_uring: auto-removal for direct open/accept io_uring: fix missing sigmask restore in io_cqring_wait() io_uring: pin SQPOLL data before unlocking ring lock io-wq: provide IO_WQ_* constants for IORING_REGISTER_IOWQ_MAX_WORKERS arg items io-wq: fix potential race of acct->nr_workers io-wq: code clean of io_wqe_create_worker() io_uring: ensure symmetry in handling iter types in loop_rw_iter()
2 parents 36d6753 + 5d329e1 commit 0bc7eb0

File tree

3 files changed

+88
-52
lines changed

3 files changed

+88
-52
lines changed

fs/io-wq.c

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <linux/rculist_nulls.h>
1515
#include <linux/cpu.h>
1616
#include <linux/tracehook.h>
17+
#include <uapi/linux/io_uring.h>
1718

1819
#include "io-wq.h"
1920

@@ -176,7 +177,6 @@ static void io_worker_ref_put(struct io_wq *wq)
176177
static void io_worker_exit(struct io_worker *worker)
177178
{
178179
struct io_wqe *wqe = worker->wqe;
179-
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
180180

181181
if (refcount_dec_and_test(&worker->ref))
182182
complete(&worker->ref_done);
@@ -186,7 +186,6 @@ static void io_worker_exit(struct io_worker *worker)
186186
if (worker->flags & IO_WORKER_F_FREE)
187187
hlist_nulls_del_rcu(&worker->nulls_node);
188188
list_del_rcu(&worker->all_list);
189-
acct->nr_workers--;
190189
preempt_disable();
191190
io_wqe_dec_running(worker);
192191
worker->flags = 0;
@@ -246,8 +245,6 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe,
246245
*/
247246
static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
248247
{
249-
bool do_create = false;
250-
251248
/*
252249
* Most likely an attempt to queue unbounded work on an io_wq that
253250
* wasn't setup with any unbounded workers.
@@ -256,18 +253,15 @@ static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
256253
pr_warn_once("io-wq is not configured for unbound workers");
257254

258255
raw_spin_lock(&wqe->lock);
259-
if (acct->nr_workers < acct->max_workers) {
260-
acct->nr_workers++;
261-
do_create = true;
256+
if (acct->nr_workers == acct->max_workers) {
257+
raw_spin_unlock(&wqe->lock);
258+
return true;
262259
}
260+
acct->nr_workers++;
263261
raw_spin_unlock(&wqe->lock);
264-
if (do_create) {
265-
atomic_inc(&acct->nr_running);
266-
atomic_inc(&wqe->wq->worker_refs);
267-
return create_io_worker(wqe->wq, wqe, acct->index);
268-
}
269-
270-
return true;
262+
atomic_inc(&acct->nr_running);
263+
atomic_inc(&wqe->wq->worker_refs);
264+
return create_io_worker(wqe->wq, wqe, acct->index);
271265
}
272266

273267
static void io_wqe_inc_running(struct io_worker *worker)
@@ -574,6 +568,7 @@ static int io_wqe_worker(void *data)
574568
}
575569
/* timed out, exit unless we're the last worker */
576570
if (last_timeout && acct->nr_workers > 1) {
571+
acct->nr_workers--;
577572
raw_spin_unlock(&wqe->lock);
578573
__set_current_state(TASK_RUNNING);
579574
break;
@@ -1287,6 +1282,10 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
12871282
{
12881283
int i, node, prev = 0;
12891284

1285+
BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND);
1286+
BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND);
1287+
BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2);
1288+
12901289
for (i = 0; i < 2; i++) {
12911290
if (new_count[i] > task_rlimit(current, RLIMIT_NPROC))
12921291
new_count[i] = task_rlimit(current, RLIMIT_NPROC);

fs/io_uring.c

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2843,7 +2843,8 @@ static bool io_file_supports_nowait(struct io_kiocb *req, int rw)
28432843
return __io_file_supports_nowait(req->file, rw);
28442844
}
28452845

2846-
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
2846+
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
2847+
int rw)
28472848
{
28482849
struct io_ring_ctx *ctx = req->ctx;
28492850
struct kiocb *kiocb = &req->rw.kiocb;
@@ -2865,8 +2866,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
28652866
if (unlikely(ret))
28662867
return ret;
28672868

2868-
/* don't allow async punt for O_NONBLOCK or RWF_NOWAIT */
2869-
if ((kiocb->ki_flags & IOCB_NOWAIT) || (file->f_flags & O_NONBLOCK))
2869+
/*
2870+
* If the file is marked O_NONBLOCK, still allow retry for it if it
2871+
* supports async. Otherwise it's impossible to use O_NONBLOCK files
2872+
* reliably. If not, or it IOCB_NOWAIT is set, don't retry.
2873+
*/
2874+
if ((kiocb->ki_flags & IOCB_NOWAIT) ||
2875+
((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw)))
28702876
req->flags |= REQ_F_NOWAIT;
28712877

28722878
ioprio = READ_ONCE(sqe->ioprio);
@@ -3263,12 +3269,15 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
32633269
ret = nr;
32643270
break;
32653271
}
3272+
if (!iov_iter_is_bvec(iter)) {
3273+
iov_iter_advance(iter, nr);
3274+
} else {
3275+
req->rw.len -= nr;
3276+
req->rw.addr += nr;
3277+
}
32663278
ret += nr;
32673279
if (nr != iovec.iov_len)
32683280
break;
3269-
req->rw.len -= nr;
3270-
req->rw.addr += nr;
3271-
iov_iter_advance(iter, nr);
32723281
}
32733282

32743283
return ret;
@@ -3346,7 +3355,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
33463355
{
33473356
if (unlikely(!(req->file->f_mode & FMODE_READ)))
33483357
return -EBADF;
3349-
return io_prep_rw(req, sqe);
3358+
return io_prep_rw(req, sqe, READ);
33503359
}
33513360

33523361
/*
@@ -3539,7 +3548,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
35393548
{
35403549
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
35413550
return -EBADF;
3542-
return io_prep_rw(req, sqe);
3551+
return io_prep_rw(req, sqe, WRITE);
35433552
}
35443553

35453554
static int io_write(struct io_kiocb *req, unsigned int issue_flags)
@@ -7515,6 +7524,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
75157524
break;
75167525
} while (1);
75177526

7527+
if (uts) {
7528+
struct timespec64 ts;
7529+
7530+
if (get_timespec64(&ts, uts))
7531+
return -EFAULT;
7532+
timeout = timespec64_to_jiffies(&ts);
7533+
}
7534+
75187535
if (sig) {
75197536
#ifdef CONFIG_COMPAT
75207537
if (in_compat_syscall())
@@ -7528,14 +7545,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
75287545
return ret;
75297546
}
75307547

7531-
if (uts) {
7532-
struct timespec64 ts;
7533-
7534-
if (get_timespec64(&ts, uts))
7535-
return -EFAULT;
7536-
timeout = timespec64_to_jiffies(&ts);
7537-
}
7538-
75397548
init_waitqueue_func_entry(&iowq.wq, io_wake_function);
75407549
iowq.wq.private = current;
75417550
INIT_LIST_HEAD(&iowq.wq.entry);
@@ -8284,11 +8293,27 @@ static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
82848293
#endif
82858294
}
82868295

8296+
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
8297+
struct io_rsrc_node *node, void *rsrc)
8298+
{
8299+
struct io_rsrc_put *prsrc;
8300+
8301+
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
8302+
if (!prsrc)
8303+
return -ENOMEM;
8304+
8305+
prsrc->tag = *io_get_tag_slot(data, idx);
8306+
prsrc->rsrc = rsrc;
8307+
list_add(&prsrc->list, &node->rsrc_list);
8308+
return 0;
8309+
}
8310+
82878311
static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
82888312
unsigned int issue_flags, u32 slot_index)
82898313
{
82908314
struct io_ring_ctx *ctx = req->ctx;
82918315
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
8316+
bool needs_switch = false;
82928317
struct io_fixed_file *file_slot;
82938318
int ret = -EBADF;
82948319

@@ -8304,9 +8329,22 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
83048329

83058330
slot_index = array_index_nospec(slot_index, ctx->nr_user_files);
83068331
file_slot = io_fixed_file_slot(&ctx->file_table, slot_index);
8307-
ret = -EBADF;
8308-
if (file_slot->file_ptr)
8309-
goto err;
8332+
8333+
if (file_slot->file_ptr) {
8334+
struct file *old_file;
8335+
8336+
ret = io_rsrc_node_switch_start(ctx);
8337+
if (ret)
8338+
goto err;
8339+
8340+
old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
8341+
ret = io_queue_rsrc_removal(ctx->file_data, slot_index,
8342+
ctx->rsrc_node, old_file);
8343+
if (ret)
8344+
goto err;
8345+
file_slot->file_ptr = 0;
8346+
needs_switch = true;
8347+
}
83108348

83118349
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
83128350
io_fixed_file_set(file_slot, file);
@@ -8318,27 +8356,14 @@ static int io_install_fixed_file(struct io_kiocb *req, struct file *file,
83188356

83198357
ret = 0;
83208358
err:
8359+
if (needs_switch)
8360+
io_rsrc_node_switch(ctx, ctx->file_data);
83218361
io_ring_submit_unlock(ctx, !force_nonblock);
83228362
if (ret)
83238363
fput(file);
83248364
return ret;
83258365
}
83268366

8327-
static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
8328-
struct io_rsrc_node *node, void *rsrc)
8329-
{
8330-
struct io_rsrc_put *prsrc;
8331-
8332-
prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL);
8333-
if (!prsrc)
8334-
return -ENOMEM;
8335-
8336-
prsrc->tag = *io_get_tag_slot(data, idx);
8337-
prsrc->rsrc = rsrc;
8338-
list_add(&prsrc->list, &node->rsrc_list);
8339-
return 0;
8340-
}
8341-
83428367
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
83438368
struct io_uring_rsrc_update2 *up,
83448369
unsigned nr_args)
@@ -10560,10 +10585,12 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
1056010585
* ordering. Fine to drop uring_lock here, we hold
1056110586
* a ref to the ctx.
1056210587
*/
10588+
refcount_inc(&sqd->refs);
1056310589
mutex_unlock(&ctx->uring_lock);
1056410590
mutex_lock(&sqd->lock);
1056510591
mutex_lock(&ctx->uring_lock);
10566-
tctx = sqd->thread->io_uring;
10592+
if (sqd->thread)
10593+
tctx = sqd->thread->io_uring;
1056710594
}
1056810595
} else {
1056910596
tctx = current->io_uring;
@@ -10577,16 +10604,20 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
1057710604
if (ret)
1057810605
goto err;
1057910606

10580-
if (sqd)
10607+
if (sqd) {
1058110608
mutex_unlock(&sqd->lock);
10609+
io_put_sq_data(sqd);
10610+
}
1058210611

1058310612
if (copy_to_user(arg, new_count, sizeof(new_count)))
1058410613
return -EFAULT;
1058510614

1058610615
return 0;
1058710616
err:
10588-
if (sqd)
10617+
if (sqd) {
1058910618
mutex_unlock(&sqd->lock);
10619+
io_put_sq_data(sqd);
10620+
}
1059010621
return ret;
1059110622
}
1059210623

include/uapi/linux/io_uring.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,19 @@ enum {
317317
IORING_REGISTER_IOWQ_AFF = 17,
318318
IORING_UNREGISTER_IOWQ_AFF = 18,
319319

320-
/* set/get max number of workers */
320+
/* set/get max number of io-wq workers */
321321
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
322322

323323
/* this goes last */
324324
IORING_REGISTER_LAST
325325
};
326326

327+
/* io-wq worker categories */
328+
enum {
329+
IO_WQ_BOUND,
330+
IO_WQ_UNBOUND,
331+
};
332+
327333
/* deprecated, see struct io_uring_rsrc_update */
328334
struct io_uring_files_update {
329335
__u32 offset;

0 commit comments

Comments
 (0)