Skip to content

Commit 361aee4

Browse files
committed
io-wq: add intermediate work step between pending list and active work
We have a gap where a worker removes an item from the work list and to when it gets added as the workers active work. In this state, the work item cannot be found by cancelations. This is a small window, but it does exist. Add a temporary pointer to a work item that isn't on the pending work list anymore, but also not the active work. This is needed as we need to drop the wqe lock in between grabbing the work item and marking it as active, to ensure that signal based cancelations are properly ordered. Reported-by: Florian Fischer <florian.fl.fischer@fau.de> Link: https://lore.kernel.org/io-uring/20220118151337.fac6cthvbnu7icoc@pasture/ Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent efdf518 commit 361aee4

File tree

1 file changed

+29
-5
lines changed

1 file changed

+29
-5
lines changed

fs/io-wq.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ struct io_worker {
4848
struct io_wqe *wqe;
4949

5050
struct io_wq_work *cur_work;
51+
struct io_wq_work *next_work;
5152
raw_spinlock_t lock;
5253

5354
struct completion ref_done;
@@ -530,6 +531,7 @@ static void io_assign_current_work(struct io_worker *worker,
530531

531532
raw_spin_lock(&worker->lock);
532533
worker->cur_work = work;
534+
worker->next_work = NULL;
533535
raw_spin_unlock(&worker->lock);
534536
}
535537

@@ -554,9 +556,20 @@ static void io_worker_handle_work(struct io_worker *worker)
554556
* clear the stalled flag.
555557
*/
556558
work = io_get_next_work(acct, worker);
557-
if (work)
559+
if (work) {
558560
__io_worker_busy(wqe, worker);
559561

562+
/*
563+
* Make sure cancelation can find this, even before
564+
* it becomes the active work. That avoids a window
565+
* where the work has been removed from our general
566+
* work list, but isn't yet discoverable as the
567+
* current work item for this worker.
568+
*/
569+
raw_spin_lock(&worker->lock);
570+
worker->next_work = work;
571+
raw_spin_unlock(&worker->lock);
572+
}
560573
raw_spin_unlock(&wqe->lock);
561574
if (!work)
562575
break;
@@ -972,6 +985,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
972985
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
973986
}
974987

988+
static bool __io_wq_worker_cancel(struct io_worker *worker,
989+
struct io_cb_cancel_data *match,
990+
struct io_wq_work *work)
991+
{
992+
if (work && match->fn(work, match->data)) {
993+
work->flags |= IO_WQ_WORK_CANCEL;
994+
set_notify_signal(worker->task);
995+
return true;
996+
}
997+
998+
return false;
999+
}
1000+
9751001
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
9761002
{
9771003
struct io_cb_cancel_data *match = data;
@@ -981,11 +1007,9 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
9811007
* may dereference the passed in work.
9821008
*/
9831009
raw_spin_lock(&worker->lock);
984-
if (worker->cur_work &&
985-
match->fn(worker->cur_work, match->data)) {
986-
set_notify_signal(worker->task);
1010+
if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
1011+
__io_wq_worker_cancel(worker, match, worker->next_work))
9871012
match->nr_running++;
988-
}
9891013
raw_spin_unlock(&worker->lock);
9901014

9911015
return match->nr_running && !match->cancel_all;

0 commit comments

Comments
 (0)