2
2
#define IO_URING_TYPES_H
3
3
4
4
#include <linux/blkdev.h>
5
+ #include <linux/hashtable.h>
5
6
#include <linux/task_work.h>
6
7
#include <linux/bitmap.h>
7
8
#include <linux/llist.h>
@@ -240,12 +241,14 @@ struct io_ring_ctx {
240
241
unsigned int poll_activated : 1 ;
241
242
unsigned int drain_disabled : 1 ;
242
243
unsigned int compat : 1 ;
244
+ unsigned int iowq_limits_set : 1 ;
243
245
244
246
struct task_struct * submitter_task ;
245
247
struct io_rings * rings ;
246
248
struct percpu_ref refs ;
247
249
248
250
enum task_work_notify_mode notify_method ;
251
+ unsigned sq_thread_idle ;
249
252
} ____cacheline_aligned_in_smp ;
250
253
251
254
/* submission data */
@@ -274,10 +277,20 @@ struct io_ring_ctx {
274
277
*/
275
278
struct io_rsrc_node * rsrc_node ;
276
279
atomic_t cancel_seq ;
280
+
281
+ /*
282
+ * ->iopoll_list is protected by the ctx->uring_lock for
283
+ * io_uring instances that don't use IORING_SETUP_SQPOLL.
284
+ * For SQPOLL, only the single threaded io_sq_thread() will
285
+ * manipulate the list, hence no extra locking is needed there.
286
+ */
287
+ bool poll_multi_queue ;
288
+ struct io_wq_work_list iopoll_list ;
289
+
277
290
struct io_file_table file_table ;
291
+ struct io_mapped_ubuf * * user_bufs ;
278
292
unsigned nr_user_files ;
279
293
unsigned nr_user_bufs ;
280
- struct io_mapped_ubuf * * user_bufs ;
281
294
282
295
struct io_submit_state submit_state ;
283
296
@@ -288,15 +301,6 @@ struct io_ring_ctx {
288
301
struct io_alloc_cache apoll_cache ;
289
302
struct io_alloc_cache netmsg_cache ;
290
303
291
- /*
292
- * ->iopoll_list is protected by the ctx->uring_lock for
293
- * io_uring instances that don't use IORING_SETUP_SQPOLL.
294
- * For SQPOLL, only the single threaded io_sq_thread() will
295
- * manipulate the list, hence no extra locking is needed there.
296
- */
297
- struct io_wq_work_list iopoll_list ;
298
- bool poll_multi_queue ;
299
-
300
304
/*
301
305
* Any cancelable uring_cmd is added to this list in
302
306
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
@@ -343,8 +347,8 @@ struct io_ring_ctx {
343
347
spinlock_t completion_lock ;
344
348
345
349
/* IRQ completion list, under ->completion_lock */
346
- struct io_wq_work_list locked_free_list ;
347
350
unsigned int locked_free_nr ;
351
+ struct io_wq_work_list locked_free_list ;
348
352
349
353
struct list_head io_buffers_comp ;
350
354
struct list_head cq_overflow_list ;
@@ -366,9 +370,6 @@ struct io_ring_ctx {
366
370
unsigned int file_alloc_start ;
367
371
unsigned int file_alloc_end ;
368
372
369
- struct xarray personalities ;
370
- u32 pers_next ;
371
-
372
373
struct list_head io_buffers_cache ;
373
374
374
375
/* deferred free list, protected by ->uring_lock */
@@ -389,6 +390,9 @@ struct io_ring_ctx {
389
390
struct wait_queue_head rsrc_quiesce_wq ;
390
391
unsigned rsrc_quiesce ;
391
392
393
+ u32 pers_next ;
394
+ struct xarray personalities ;
395
+
392
396
/* hashed buffered write serialization */
393
397
struct io_wq_hash * hash_map ;
394
398
@@ -405,11 +409,22 @@ struct io_ring_ctx {
405
409
406
410
/* io-wq management, e.g. thread count */
407
411
u32 iowq_limits [2 ];
408
- bool iowq_limits_set ;
409
412
410
413
struct callback_head poll_wq_task_work ;
411
414
struct list_head defer_list ;
412
- unsigned sq_thread_idle ;
415
+
416
+ #ifdef CONFIG_NET_RX_BUSY_POLL
417
+ struct list_head napi_list ; /* track busy poll napi_id */
418
+ spinlock_t napi_lock ; /* napi_list lock */
419
+
420
+ /* napi busy poll default timeout */
421
+ unsigned int napi_busy_poll_to ;
422
+ bool napi_prefer_busy_poll ;
423
+ bool napi_enabled ;
424
+
425
+ DECLARE_HASHTABLE (napi_ht , 4 );
426
+ #endif
427
+
413
428
/* protected by ->completion_lock */
414
429
unsigned evfd_last_cq_tail ;
415
430
@@ -455,83 +470,95 @@ enum {
455
470
REQ_F_SKIP_LINK_CQES_BIT ,
456
471
REQ_F_SINGLE_POLL_BIT ,
457
472
REQ_F_DOUBLE_POLL_BIT ,
458
- REQ_F_PARTIAL_IO_BIT ,
459
473
REQ_F_APOLL_MULTISHOT_BIT ,
460
474
REQ_F_CLEAR_POLLIN_BIT ,
461
475
REQ_F_HASH_LOCKED_BIT ,
462
476
/* keep async read/write and isreg together and in order */
463
477
REQ_F_SUPPORT_NOWAIT_BIT ,
464
478
REQ_F_ISREG_BIT ,
465
479
REQ_F_POLL_NO_LAZY_BIT ,
480
+ REQ_F_CANCEL_SEQ_BIT ,
481
+ REQ_F_CAN_POLL_BIT ,
482
+ REQ_F_BL_EMPTY_BIT ,
483
+ REQ_F_BL_NO_RECYCLE_BIT ,
466
484
467
485
/* not a real bit, just to check we're not overflowing the space */
468
486
__REQ_F_LAST_BIT ,
469
487
};
470
488
489
+ typedef u64 __bitwise io_req_flags_t ;
490
+ #define IO_REQ_FLAG (bitno ) ((__force io_req_flags_t) BIT_ULL((bitno)))
491
+
471
492
enum {
472
493
/* ctx owns file */
473
- REQ_F_FIXED_FILE = BIT (REQ_F_FIXED_FILE_BIT ),
494
+ REQ_F_FIXED_FILE = IO_REQ_FLAG (REQ_F_FIXED_FILE_BIT ),
474
495
/* drain existing IO first */
475
- REQ_F_IO_DRAIN = BIT (REQ_F_IO_DRAIN_BIT ),
496
+ REQ_F_IO_DRAIN = IO_REQ_FLAG (REQ_F_IO_DRAIN_BIT ),
476
497
/* linked sqes */
477
- REQ_F_LINK = BIT (REQ_F_LINK_BIT ),
498
+ REQ_F_LINK = IO_REQ_FLAG (REQ_F_LINK_BIT ),
478
499
/* doesn't sever on completion < 0 */
479
- REQ_F_HARDLINK = BIT (REQ_F_HARDLINK_BIT ),
500
+ REQ_F_HARDLINK = IO_REQ_FLAG (REQ_F_HARDLINK_BIT ),
480
501
/* IOSQE_ASYNC */
481
- REQ_F_FORCE_ASYNC = BIT (REQ_F_FORCE_ASYNC_BIT ),
502
+ REQ_F_FORCE_ASYNC = IO_REQ_FLAG (REQ_F_FORCE_ASYNC_BIT ),
482
503
/* IOSQE_BUFFER_SELECT */
483
- REQ_F_BUFFER_SELECT = BIT (REQ_F_BUFFER_SELECT_BIT ),
504
+ REQ_F_BUFFER_SELECT = IO_REQ_FLAG (REQ_F_BUFFER_SELECT_BIT ),
484
505
/* IOSQE_CQE_SKIP_SUCCESS */
485
- REQ_F_CQE_SKIP = BIT (REQ_F_CQE_SKIP_BIT ),
506
+ REQ_F_CQE_SKIP = IO_REQ_FLAG (REQ_F_CQE_SKIP_BIT ),
486
507
487
508
/* fail rest of links */
488
- REQ_F_FAIL = BIT (REQ_F_FAIL_BIT ),
509
+ REQ_F_FAIL = IO_REQ_FLAG (REQ_F_FAIL_BIT ),
489
510
/* on inflight list, should be cancelled and waited on exit reliably */
490
- REQ_F_INFLIGHT = BIT (REQ_F_INFLIGHT_BIT ),
511
+ REQ_F_INFLIGHT = IO_REQ_FLAG (REQ_F_INFLIGHT_BIT ),
491
512
/* read/write uses file position */
492
- REQ_F_CUR_POS = BIT (REQ_F_CUR_POS_BIT ),
513
+ REQ_F_CUR_POS = IO_REQ_FLAG (REQ_F_CUR_POS_BIT ),
493
514
/* must not punt to workers */
494
- REQ_F_NOWAIT = BIT (REQ_F_NOWAIT_BIT ),
515
+ REQ_F_NOWAIT = IO_REQ_FLAG (REQ_F_NOWAIT_BIT ),
495
516
/* has or had linked timeout */
496
- REQ_F_LINK_TIMEOUT = BIT (REQ_F_LINK_TIMEOUT_BIT ),
517
+ REQ_F_LINK_TIMEOUT = IO_REQ_FLAG (REQ_F_LINK_TIMEOUT_BIT ),
497
518
/* needs cleanup */
498
- REQ_F_NEED_CLEANUP = BIT (REQ_F_NEED_CLEANUP_BIT ),
519
+ REQ_F_NEED_CLEANUP = IO_REQ_FLAG (REQ_F_NEED_CLEANUP_BIT ),
499
520
/* already went through poll handler */
500
- REQ_F_POLLED = BIT (REQ_F_POLLED_BIT ),
521
+ REQ_F_POLLED = IO_REQ_FLAG (REQ_F_POLLED_BIT ),
501
522
/* buffer already selected */
502
- REQ_F_BUFFER_SELECTED = BIT (REQ_F_BUFFER_SELECTED_BIT ),
523
+ REQ_F_BUFFER_SELECTED = IO_REQ_FLAG (REQ_F_BUFFER_SELECTED_BIT ),
503
524
/* buffer selected from ring, needs commit */
504
- REQ_F_BUFFER_RING = BIT (REQ_F_BUFFER_RING_BIT ),
525
+ REQ_F_BUFFER_RING = IO_REQ_FLAG (REQ_F_BUFFER_RING_BIT ),
505
526
/* caller should reissue async */
506
- REQ_F_REISSUE = BIT (REQ_F_REISSUE_BIT ),
527
+ REQ_F_REISSUE = IO_REQ_FLAG (REQ_F_REISSUE_BIT ),
507
528
/* supports async reads/writes */
508
- REQ_F_SUPPORT_NOWAIT = BIT (REQ_F_SUPPORT_NOWAIT_BIT ),
529
+ REQ_F_SUPPORT_NOWAIT = IO_REQ_FLAG (REQ_F_SUPPORT_NOWAIT_BIT ),
509
530
/* regular file */
510
- REQ_F_ISREG = BIT (REQ_F_ISREG_BIT ),
531
+ REQ_F_ISREG = IO_REQ_FLAG (REQ_F_ISREG_BIT ),
511
532
/* has creds assigned */
512
- REQ_F_CREDS = BIT (REQ_F_CREDS_BIT ),
533
+ REQ_F_CREDS = IO_REQ_FLAG (REQ_F_CREDS_BIT ),
513
534
/* skip refcounting if not set */
514
- REQ_F_REFCOUNT = BIT (REQ_F_REFCOUNT_BIT ),
535
+ REQ_F_REFCOUNT = IO_REQ_FLAG (REQ_F_REFCOUNT_BIT ),
515
536
/* there is a linked timeout that has to be armed */
516
- REQ_F_ARM_LTIMEOUT = BIT (REQ_F_ARM_LTIMEOUT_BIT ),
537
+ REQ_F_ARM_LTIMEOUT = IO_REQ_FLAG (REQ_F_ARM_LTIMEOUT_BIT ),
517
538
/* ->async_data allocated */
518
- REQ_F_ASYNC_DATA = BIT (REQ_F_ASYNC_DATA_BIT ),
539
+ REQ_F_ASYNC_DATA = IO_REQ_FLAG (REQ_F_ASYNC_DATA_BIT ),
519
540
/* don't post CQEs while failing linked requests */
520
- REQ_F_SKIP_LINK_CQES = BIT (REQ_F_SKIP_LINK_CQES_BIT ),
541
+ REQ_F_SKIP_LINK_CQES = IO_REQ_FLAG (REQ_F_SKIP_LINK_CQES_BIT ),
521
542
/* single poll may be active */
522
- REQ_F_SINGLE_POLL = BIT (REQ_F_SINGLE_POLL_BIT ),
543
+ REQ_F_SINGLE_POLL = IO_REQ_FLAG (REQ_F_SINGLE_POLL_BIT ),
523
544
/* double poll may active */
524
- REQ_F_DOUBLE_POLL = BIT (REQ_F_DOUBLE_POLL_BIT ),
525
- /* request has already done partial IO */
526
- REQ_F_PARTIAL_IO = BIT (REQ_F_PARTIAL_IO_BIT ),
545
+ REQ_F_DOUBLE_POLL = IO_REQ_FLAG (REQ_F_DOUBLE_POLL_BIT ),
527
546
/* fast poll multishot mode */
528
- REQ_F_APOLL_MULTISHOT = BIT (REQ_F_APOLL_MULTISHOT_BIT ),
547
+ REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG (REQ_F_APOLL_MULTISHOT_BIT ),
529
548
/* recvmsg special flag, clear EPOLLIN */
530
- REQ_F_CLEAR_POLLIN = BIT (REQ_F_CLEAR_POLLIN_BIT ),
549
+ REQ_F_CLEAR_POLLIN = IO_REQ_FLAG (REQ_F_CLEAR_POLLIN_BIT ),
531
550
/* hashed into ->cancel_hash_locked, protected by ->uring_lock */
532
- REQ_F_HASH_LOCKED = BIT (REQ_F_HASH_LOCKED_BIT ),
551
+ REQ_F_HASH_LOCKED = IO_REQ_FLAG (REQ_F_HASH_LOCKED_BIT ),
533
552
/* don't use lazy poll wake for this request */
534
- REQ_F_POLL_NO_LAZY = BIT (REQ_F_POLL_NO_LAZY_BIT ),
553
+ REQ_F_POLL_NO_LAZY = IO_REQ_FLAG (REQ_F_POLL_NO_LAZY_BIT ),
554
+ /* cancel sequence is set and valid */
555
+ REQ_F_CANCEL_SEQ = IO_REQ_FLAG (REQ_F_CANCEL_SEQ_BIT ),
556
+ /* file is pollable */
557
+ REQ_F_CAN_POLL = IO_REQ_FLAG (REQ_F_CAN_POLL_BIT ),
558
+ /* buffer list was empty after selection of buffer */
559
+ REQ_F_BL_EMPTY = IO_REQ_FLAG (REQ_F_BL_EMPTY_BIT ),
560
+ /* don't recycle provided buffers for this request */
561
+ REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG (REQ_F_BL_NO_RECYCLE_BIT ),
535
562
};
536
563
537
564
typedef void (* io_req_tw_func_t )(struct io_kiocb * req , struct io_tw_state * ts );
@@ -592,15 +619,17 @@ struct io_kiocb {
592
619
* and after selection it points to the buffer ID itself.
593
620
*/
594
621
u16 buf_index ;
595
- unsigned int flags ;
622
+
623
+ unsigned nr_tw ;
624
+
625
+ /* REQ_F_* flags */
626
+ io_req_flags_t flags ;
596
627
597
628
struct io_cqe cqe ;
598
629
599
630
struct io_ring_ctx * ctx ;
600
631
struct task_struct * task ;
601
632
602
- struct io_rsrc_node * rsrc_node ;
603
-
604
633
union {
605
634
/* store used ubuf, so we can prevent reloading */
606
635
struct io_mapped_ubuf * imu ;
@@ -621,10 +650,12 @@ struct io_kiocb {
621
650
/* cache ->apoll->events */
622
651
__poll_t apoll_events ;
623
652
};
653
+
654
+ struct io_rsrc_node * rsrc_node ;
655
+
624
656
atomic_t refs ;
625
657
atomic_t poll_refs ;
626
658
struct io_task_work io_task_work ;
627
- unsigned nr_tw ;
628
659
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
629
660
struct hlist_node hash_node ;
630
661
/* internal polling, see IORING_FEAT_FAST_POLL */
0 commit comments