Skip to content

Commit c4f135d

Browse files
Tetsuo Handahtejun
authored andcommitted
workqueue: Wrap flush_workqueue() using a macro
Since flush operation synchronously waits for completion, flushing system-wide WQs (e.g. system_wq) might introduce possibility of deadlock due to unexpected locking dependency. Tejun Heo commented at [1] that it makes no sense at all to call flush_workqueue() on the shared WQs as the caller has no idea what it's gonna end up waiting for. Although there is flush_scheduled_work() which flushes system_wq WQ with "Think twice before calling this function! It's very easy to get into trouble if you don't take great care." warning message, syzbot found a circular locking dependency caused by flushing system_wq WQ [2]. Therefore, let's change the direction to that developers had better use their local WQs if flush_scheduled_work()/flush_workqueue(system_*_wq) is inevitable. Steps for converting system-wide WQs into local WQs are explained at [3], and a conversion to stop flushing system-wide WQs is in progress. Now we want some mechanism for preventing developers who are not aware of this conversion from again start flushing system-wide WQs. Since I found that WARN_ON() is complete but awkward approach for teaching developers about this problem, let's use __compiletime_warning() for incomplete but handy approach. For completeness, we will also insert WARN_ON() into __flush_workqueue() after all in-tree users stopped calling flush_scheduled_work(). Link: https://lore.kernel.org/all/YgnQGZWT%2Fn3VAITX@slm.duckdns.org/ [1] Link: https://syzkaller.appspot.com/bug?extid=bde0f89deacca7c765b8 [2] Link: https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp [3] Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent e71e60c commit c4f135d

File tree

2 files changed

+68
-12
lines changed

2 files changed

+68
-12
lines changed

include/linux/workqueue.h

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
445445
struct delayed_work *dwork, unsigned long delay);
446446
extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);
447447

448-
extern void flush_workqueue(struct workqueue_struct *wq);
448+
extern void __flush_workqueue(struct workqueue_struct *wq);
449449
extern void drain_workqueue(struct workqueue_struct *wq);
450450

451451
extern int schedule_on_each_cpu(work_func_t func);
@@ -563,15 +563,23 @@ static inline bool schedule_work(struct work_struct *work)
563563
return queue_work(system_wq, work);
564564
}
565565

566+
/*
567+
* Detect attempt to flush system-wide workqueues at compile time when possible.
568+
*
569+
* See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
570+
* for reasons and steps for converting system-wide workqueues into local workqueues.
571+
*/
572+
extern void __warn_flushing_systemwide_wq(void)
573+
__compiletime_warning("Please avoid flushing system-wide workqueues.");
574+
566575
/**
567576
* flush_scheduled_work - ensure that any scheduled work has run to completion.
568577
*
569578
* Forces execution of the kernel-global workqueue and blocks until its
570579
* completion.
571580
*
572-
* Think twice before calling this function! It's very easy to get into
573-
* trouble if you don't take great care. Either of the following situations
574-
* will lead to deadlock:
581+
* It's very easy to get into trouble if you don't take great care.
582+
* Either of the following situations will lead to deadlock:
575583
*
576584
* One of the work items currently on the workqueue needs to acquire
577585
* a lock held by your code or its caller.
@@ -586,11 +594,51 @@ static inline bool schedule_work(struct work_struct *work)
586594
* need to know that a particular work item isn't queued and isn't running.
587595
* In such cases you should use cancel_delayed_work_sync() or
588596
* cancel_work_sync() instead.
597+
*
598+
* Please stop calling this function! A conversion to stop flushing system-wide
599+
* workqueues is in progress. This function will be removed after all in-tree
600+
* users stopped calling this function.
589601
*/
590-
static inline void flush_scheduled_work(void)
591-
{
592-
flush_workqueue(system_wq);
593-
}
602+
/*
603+
* The background of commit 771c035372a036f8 ("deprecate the
604+
* '__deprecated' attribute warnings entirely and for good") is that,
605+
* since Linus builds all modules between every single pull he does,
606+
* the standard kernel build needs to be _clean_ in order to be able to
607+
* notice when new problems happen. Therefore, don't emit warning while
608+
* there are in-tree users.
609+
*/
610+
#define flush_scheduled_work() \
611+
({ \
612+
if (0) \
613+
__warn_flushing_systemwide_wq(); \
614+
__flush_workqueue(system_wq); \
615+
})
616+
617+
/*
618+
* Although there is no longer in-tree caller, for now just emit warning
619+
* in order to give out-of-tree callers time to update.
620+
*/
621+
#define flush_workqueue(wq) \
622+
({ \
623+
struct workqueue_struct *_wq = (wq); \
624+
\
625+
if ((__builtin_constant_p(_wq == system_wq) && \
626+
_wq == system_wq) || \
627+
(__builtin_constant_p(_wq == system_highpri_wq) && \
628+
_wq == system_highpri_wq) || \
629+
(__builtin_constant_p(_wq == system_long_wq) && \
630+
_wq == system_long_wq) || \
631+
(__builtin_constant_p(_wq == system_unbound_wq) && \
632+
_wq == system_unbound_wq) || \
633+
(__builtin_constant_p(_wq == system_freezable_wq) && \
634+
_wq == system_freezable_wq) || \
635+
(__builtin_constant_p(_wq == system_power_efficient_wq) && \
636+
_wq == system_power_efficient_wq) || \
637+
(__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \
638+
_wq == system_freezable_power_efficient_wq)) \
639+
__warn_flushing_systemwide_wq(); \
640+
__flush_workqueue(_wq); \
641+
})
594642

595643
/**
596644
* schedule_delayed_work_on - queue work in global workqueue on CPU after delay

kernel/workqueue.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2788,13 +2788,13 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
27882788
}
27892789

27902790
/**
2791-
* flush_workqueue - ensure that any scheduled work has run to completion.
2791+
* __flush_workqueue - ensure that any scheduled work has run to completion.
27922792
* @wq: workqueue to flush
27932793
*
27942794
* This function sleeps until all work items which were queued on entry
27952795
* have finished execution, but it is not livelocked by new incoming ones.
27962796
*/
2797-
void flush_workqueue(struct workqueue_struct *wq)
2797+
void __flush_workqueue(struct workqueue_struct *wq)
27982798
{
27992799
struct wq_flusher this_flusher = {
28002800
.list = LIST_HEAD_INIT(this_flusher.list),
@@ -2943,7 +2943,7 @@ void flush_workqueue(struct workqueue_struct *wq)
29432943
out_unlock:
29442944
mutex_unlock(&wq->mutex);
29452945
}
2946-
EXPORT_SYMBOL(flush_workqueue);
2946+
EXPORT_SYMBOL(__flush_workqueue);
29472947

29482948
/**
29492949
* drain_workqueue - drain a workqueue
@@ -2971,7 +2971,7 @@ void drain_workqueue(struct workqueue_struct *wq)
29712971
wq->flags |= __WQ_DRAINING;
29722972
mutex_unlock(&wq->mutex);
29732973
reflush:
2974-
flush_workqueue(wq);
2974+
__flush_workqueue(wq);
29752975

29762976
mutex_lock(&wq->mutex);
29772977

@@ -6111,3 +6111,11 @@ void __init workqueue_init(void)
61116111
wq_online = true;
61126112
wq_watchdog_init();
61136113
}
6114+
6115+
/*
6116+
* Despite the naming, this is a no-op function which is here only for avoiding
6117+
* link error. Since compile-time warning may fail to catch, we will need to
6118+
* emit run-time warning from __flush_workqueue().
6119+
*/
6120+
void __warn_flushing_systemwide_wq(void) { }
6121+
EXPORT_SYMBOL(__warn_flushing_systemwide_wq);

0 commit comments

Comments
 (0)