Skip to content

Commit a22b3d5

Browse files
Waiman-Longhtejun
authored andcommitted
cgroup/cpuset: Fix race between newly created partition and dying one
There is a possible race between removing a cgroup diectory that is a partition root and the creation of a new partition. The partition to be removed can be dying but still online, it doesn't not currently participate in checking for exclusive CPUs conflict, but the exclusive CPUs are still there in subpartitions_cpus and isolated_cpus. These two cpumasks are global states that affect the operation of cpuset partitions. The exclusive CPUs in dying cpusets will only be removed when cpuset_css_offline() function is called after an RCU delay. As a result, it is possible that a new partition can be created with exclusive CPUs that overlap with those of a dying one. When that dying partition is finally offlined, it removes those overlapping exclusive CPUs from subpartitions_cpus and maybe isolated_cpus resulting in an incorrect CPU configuration. This bug was found when a warning was triggered in remote_partition_disable() during testing because the subpartitions_cpus mask was empty. One possible way to fix this is to iterate the dying cpusets as well and avoid using the exclusive CPUs in those dying cpusets. However, this can still cause random partition creation failures or other anomalies due to racing. A better way to fix this race is to reset the partition state at the moment when a cpuset is being killed. Introduce a new css_killed() CSS function pointer and call it, if defined, before setting CSS_DYING flag in kill_css(). Also update the css_is_dying() helper to use the CSS_DYING flag introduced by commit 33c35aa ("cgroup: Prevent kill_css() from being called more than once") for proper synchronization. Add a new cpuset_css_killed() function to reset the partition state of a valid partition root if it is being killed. Fixes: ee8dde0 ("cpuset: Add new v2 cpuset.sched.partition flag") Signed-off-by: Waiman Long <longman@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 7d6c63c commit a22b3d5

File tree

4 files changed

+25
-4
lines changed

4 files changed

+25
-4
lines changed

include/linux/cgroup-defs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ struct cgroup_subsys {
710710
void (*css_released)(struct cgroup_subsys_state *css);
711711
void (*css_free)(struct cgroup_subsys_state *css);
712712
void (*css_reset)(struct cgroup_subsys_state *css);
713+
void (*css_killed)(struct cgroup_subsys_state *css);
713714
void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
714715
int (*css_extra_stat_show)(struct seq_file *seq,
715716
struct cgroup_subsys_state *css);

include/linux/cgroup.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ static inline u64 cgroup_id(const struct cgroup *cgrp)
344344
*/
345345
static inline bool css_is_dying(struct cgroup_subsys_state *css)
346346
{
347-
return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt);
347+
return css->flags & CSS_DYING;
348348
}
349349

350350
static inline void cgroup_get(struct cgroup *cgrp)

kernel/cgroup/cgroup.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5909,6 +5909,12 @@ static void kill_css(struct cgroup_subsys_state *css)
59095909
if (css->flags & CSS_DYING)
59105910
return;
59115911

5912+
/*
5913+
* Call css_killed(), if defined, before setting the CSS_DYING flag
5914+
*/
5915+
if (css->ss->css_killed)
5916+
css->ss->css_killed(css);
5917+
59125918
css->flags |= CSS_DYING;
59135919

59145920
/*

kernel/cgroup/cpuset.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3536,9 +3536,6 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
35363536
cpus_read_lock();
35373537
mutex_lock(&cpuset_mutex);
35383538

3539-
if (is_partition_valid(cs))
3540-
update_prstate(cs, 0);
3541-
35423539
if (!cpuset_v2() && is_sched_load_balance(cs))
35433540
cpuset_update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
35443541

@@ -3549,6 +3546,22 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
35493546
cpus_read_unlock();
35503547
}
35513548

3549+
static void cpuset_css_killed(struct cgroup_subsys_state *css)
3550+
{
3551+
struct cpuset *cs = css_cs(css);
3552+
3553+
cpus_read_lock();
3554+
mutex_lock(&cpuset_mutex);
3555+
3556+
/* Reset valid partition back to member */
3557+
if (is_partition_valid(cs))
3558+
update_prstate(cs, PRS_MEMBER);
3559+
3560+
mutex_unlock(&cpuset_mutex);
3561+
cpus_read_unlock();
3562+
3563+
}
3564+
35523565
static void cpuset_css_free(struct cgroup_subsys_state *css)
35533566
{
35543567
struct cpuset *cs = css_cs(css);
@@ -3670,6 +3683,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
36703683
.css_alloc = cpuset_css_alloc,
36713684
.css_online = cpuset_css_online,
36723685
.css_offline = cpuset_css_offline,
3686+
.css_killed = cpuset_css_killed,
36733687
.css_free = cpuset_css_free,
36743688
.can_attach = cpuset_can_attach,
36753689
.cancel_attach = cpuset_cancel_attach,

0 commit comments

Comments
 (0)