Skip to content

Commit f46b9cd

Browse files
spikehaxboe
authored andcommitted
io_uring: limit local tw done
Instead of eagerly running all available local tw, limit the amount of local tw done to the max of IO_LOCAL_TW_DEFAULT_MAX (20) or wait_nr. The value of 20 is chosen as a reasonable heuristic to allow enough work batching but also keep latency down. Add a retry_llist that maintains a list of local tw that couldn't be done in time. No synchronisation is needed since it is only modified within the task context. Signed-off-by: David Wei <dw@davidwei.uk> Link: https://lore.kernel.org/r/20241120221452.3762588-3-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 40cfe55 commit f46b9cd

File tree

3 files changed

+34
-12
lines changed

3 files changed

+34
-12
lines changed

include/linux/io_uring_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ struct io_ring_ctx {
335335
*/
336336
struct {
337337
struct llist_head work_llist;
338+
struct llist_head retry_llist;
338339
unsigned long check_cq;
339340
atomic_t cq_wait_nr;
340341
atomic_t cq_timeouts;

io_uring/io_uring.c

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122

123123
#define IO_COMPL_BATCH 32
124124
#define IO_REQ_ALLOC_BATCH 8
125+
#define IO_LOCAL_TW_DEFAULT_MAX 20
125126

126127
struct io_defer_entry {
127128
struct list_head list;
@@ -1256,6 +1257,8 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
12561257
struct llist_node *node = llist_del_all(&ctx->work_llist);
12571258

12581259
__io_fallback_tw(node, false);
1260+
node = llist_del_all(&ctx->retry_llist);
1261+
__io_fallback_tw(node, false);
12591262
}
12601263

12611264
static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
@@ -1270,37 +1273,55 @@ static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
12701273
return false;
12711274
}
12721275

1276+
static int __io_run_local_work_loop(struct llist_node **node,
1277+
struct io_tw_state *ts,
1278+
int events)
1279+
{
1280+
while (*node) {
1281+
struct llist_node *next = (*node)->next;
1282+
struct io_kiocb *req = container_of(*node, struct io_kiocb,
1283+
io_task_work.node);
1284+
INDIRECT_CALL_2(req->io_task_work.func,
1285+
io_poll_task_func, io_req_rw_complete,
1286+
req, ts);
1287+
*node = next;
1288+
if (--events <= 0)
1289+
break;
1290+
}
1291+
1292+
return events;
1293+
}
1294+
12731295
static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
12741296
int min_events)
12751297
{
12761298
struct llist_node *node;
12771299
unsigned int loops = 0;
1278-
int ret = 0;
1300+
int ret, limit;
12791301

12801302
if (WARN_ON_ONCE(ctx->submitter_task != current))
12811303
return -EEXIST;
12821304
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
12831305
atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
1306+
limit = max(IO_LOCAL_TW_DEFAULT_MAX, min_events);
12841307
again:
1308+
ret = __io_run_local_work_loop(&ctx->retry_llist.first, ts, limit);
1309+
if (ctx->retry_llist.first)
1310+
goto retry_done;
1311+
12851312
/*
12861313
* llists are in reverse order, flip it back the right way before
12871314
* running the pending items.
12881315
*/
12891316
node = llist_reverse_order(llist_del_all(&ctx->work_llist));
1290-
while (node) {
1291-
struct llist_node *next = node->next;
1292-
struct io_kiocb *req = container_of(node, struct io_kiocb,
1293-
io_task_work.node);
1294-
INDIRECT_CALL_2(req->io_task_work.func,
1295-
io_poll_task_func, io_req_rw_complete,
1296-
req, ts);
1297-
ret++;
1298-
node = next;
1299-
}
1317+
ret = __io_run_local_work_loop(&node, ts, ret);
1318+
ctx->retry_llist.first = node;
13001319
loops++;
13011320

1321+
ret = limit - ret;
13021322
if (io_run_local_work_continue(ctx, ret, min_events))
13031323
goto again;
1324+
retry_done:
13041325
io_submit_flush_completions(ctx);
13051326
if (io_run_local_work_continue(ctx, ret, min_events))
13061327
goto again;

io_uring/io_uring.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ static inline int io_run_task_work(void)
349349

350350
static inline bool io_local_work_pending(struct io_ring_ctx *ctx)
351351
{
352-
return !llist_empty(&ctx->work_llist);
352+
return !llist_empty(&ctx->work_llist) || !llist_empty(&ctx->retry_llist);
353353
}
354354

355355
static inline bool io_task_work_pending(struct io_ring_ctx *ctx)

0 commit comments

Comments
 (0)