Skip to content

Commit 4c065db

Browse files
Waiman-Longhtejun
authored andcommitted
workqueue: Enable unbound cpumask update on ordered workqueues
Ordered workqueues does not currently follow changes made to the global unbound cpumask because per-pool workqueue changes may break the ordering guarantee. IOW, a work function in an ordered workqueue may run on an isolated CPU. This patch enables ordered workqueues to follow changes made to the global unbound cpumask by temporaily plug or suspend the newly allocated pool_workqueue from executing newly queued work items until the old pwq has been properly drained. For ordered workqueues, there should only be one pwq that is unplugged, the rests should be plugged. This enables ordered workqueues to follow the unbound cpumask changes like other unbound workqueues at the expense of some delay in execution of work functions during the transition period. Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 26fb7e3 commit 4c065db

File tree

1 file changed

+59
-10
lines changed

1 file changed

+59
-10
lines changed

kernel/workqueue.c

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ struct pool_workqueue {
255255
int refcnt; /* L: reference count */
256256
int nr_in_flight[WORK_NR_COLORS];
257257
/* L: nr of in_flight works */
258+
bool plugged; /* L: execution suspended */
258259

259260
/*
260261
* nr_active management and WORK_STRUCT_INACTIVE:
@@ -1708,6 +1709,9 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
17081709
goto out;
17091710
}
17101711

1712+
if (unlikely(pwq->plugged))
1713+
return false;
1714+
17111715
/*
17121716
* Unbound workqueue uses per-node shared nr_active $nna. If @pwq is
17131717
* already waiting on $nna, pwq_dec_nr_active() will maintain the
@@ -1782,6 +1786,43 @@ static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill)
17821786
}
17831787
}
17841788

1789+
/**
1790+
* unplug_oldest_pwq - restart an oldest plugged pool_workqueue
1791+
* @wq: workqueue_struct to be restarted
1792+
*
1793+
* pwq's are linked into wq->pwqs with the oldest first. For ordered
1794+
* workqueues, only the oldest pwq is unplugged, the others are plugged to
1795+
* suspend execution until the oldest one is drained. When this happens, the
1796+
* next oldest one (first plugged pwq in iteration) will be unplugged to
1797+
* restart work item execution to ensure proper work item ordering.
1798+
*
1799+
* dfl_pwq --------------+ [P] - plugged
1800+
* |
1801+
* v
1802+
* pwqs -> A -> B [P] -> C [P] (newest)
1803+
* | | |
1804+
* 1 3 5
1805+
* | | |
1806+
* 2 4 6
1807+
*/
1808+
static void unplug_oldest_pwq(struct workqueue_struct *wq)
1809+
{
1810+
struct pool_workqueue *pwq;
1811+
1812+
lockdep_assert_held(&wq->mutex);
1813+
1814+
/* Caller should make sure that pwqs isn't empty before calling */
1815+
pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue,
1816+
pwqs_node);
1817+
raw_spin_lock_irq(&pwq->pool->lock);
1818+
if (pwq->plugged) {
1819+
pwq->plugged = false;
1820+
if (pwq_activate_first_inactive(pwq, true))
1821+
kick_pool(pwq->pool);
1822+
}
1823+
raw_spin_unlock_irq(&pwq->pool->lock);
1824+
}
1825+
17851826
/**
17861827
* node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
17871828
* @nna: wq_node_nr_active to activate a pending pwq for
@@ -4740,6 +4781,13 @@ static void pwq_release_workfn(struct kthread_work *work)
47404781
mutex_lock(&wq->mutex);
47414782
list_del_rcu(&pwq->pwqs_node);
47424783
is_last = list_empty(&wq->pwqs);
4784+
4785+
/*
4786+
* For ordered workqueue with a plugged dfl_pwq, restart it now.
4787+
*/
4788+
if (!is_last && (wq->flags & __WQ_ORDERED))
4789+
unplug_oldest_pwq(wq);
4790+
47434791
mutex_unlock(&wq->mutex);
47444792
}
47454793

@@ -4966,6 +5014,15 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
49665014
cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask);
49675015
ctx->attrs = new_attrs;
49685016

5017+
/*
5018+
* For initialized ordered workqueues, there should only be one pwq
5019+
* (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution
5020+
* of newly queued work items until execution of older work items in
5021+
* the old pwq's have completed.
5022+
*/
5023+
if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))
5024+
ctx->dfl_pwq->plugged = true;
5025+
49695026
ctx->wq = wq;
49705027
return ctx;
49715028

@@ -5006,10 +5063,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
50065063
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
50075064
return -EINVAL;
50085065

5009-
/* creating multiple pwqs breaks ordering guarantee */
5010-
if (!list_empty(&wq->pwqs) && WARN_ON(wq->flags & __WQ_ORDERED))
5011-
return -EINVAL;
5012-
50135066
ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask);
50145067
if (IS_ERR(ctx))
50155068
return PTR_ERR(ctx);
@@ -6489,9 +6542,6 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
64896542
list_for_each_entry(wq, &workqueues, list) {
64906543
if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING))
64916544
continue;
6492-
/* creating multiple pwqs breaks ordering guarantee */
6493-
if (wq->flags & __WQ_ORDERED)
6494-
continue;
64956545

64966546
ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
64976547
if (IS_ERR(ctx)) {
@@ -7006,9 +7056,8 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
70067056
int ret;
70077057

70087058
/*
7009-
* Adjusting max_active or creating new pwqs by applying
7010-
* attributes breaks ordering guarantee. Disallow exposing ordered
7011-
* workqueues.
7059+
* Adjusting max_active breaks ordering guarantee. Disallow exposing
7060+
* ordered workqueues.
70127061
*/
70137062
if (WARN_ON(wq->flags & __WQ_ORDERED))
70147063
return -EINVAL;

0 commit comments

Comments
 (0)