Skip to content

Commit 58624e4

Browse files
committed
Merge tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "Cpuset fixes: - Fix isolated CPUs leaking into sched domains - Remove now unnecessary kernfs active break which can trigger a warning - Comment updates" * tag 'cgroup-for-6.13-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup/cpuset: remove kernfs active break cgroup/cpuset: Prevent leakage of isolated CPUs into sched domains cgroup/cpuset: Remove stale text
2 parents 257a8be + 3cb97a9 commit 58624e4

File tree

2 files changed

+30
-47
lines changed

2 files changed

+30
-47
lines changed

kernel/cgroup/cpuset.c

Lines changed: 11 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,8 @@ static struct cpuset top_cpuset = {
197197

198198
/*
199199
* There are two global locks guarding cpuset structures - cpuset_mutex and
200-
* callback_lock. We also require taking task_lock() when dereferencing a
201-
* task's cpuset pointer. See "The task_lock() exception", at the end of this
202-
* comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems
203-
* can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
200+
* callback_lock. The cpuset code uses only cpuset_mutex. Other kernel
201+
* subsystems can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
204202
* structures. Note that cpuset_mutex needs to be a mutex as it is used in
205203
* paths that rely on priority inheritance (e.g. scheduler - on RT) for
206204
* correctness.
@@ -229,9 +227,6 @@ static struct cpuset top_cpuset = {
229227
* The cpuset_common_seq_show() handlers only hold callback_lock across
230228
* small pieces of code, such as when reading out possibly multi-word
231229
* cpumasks and nodemasks.
232-
*
233-
* Accessing a task's cpuset should be done in accordance with the
234-
* guidelines for accessing subsystem state in kernel/cgroup.c
235230
*/
236231

237232
static DEFINE_MUTEX(cpuset_mutex);
@@ -890,7 +885,15 @@ static int generate_sched_domains(cpumask_var_t **domains,
890885
*/
891886
if (cgrpv2) {
892887
for (i = 0; i < ndoms; i++) {
893-
cpumask_copy(doms[i], csa[i]->effective_cpus);
888+
/*
889+
* The top cpuset may contain some boot time isolated
890+
* CPUs that need to be excluded from the sched domain.
891+
*/
892+
if (csa[i] == &top_cpuset)
893+
cpumask_and(doms[i], csa[i]->effective_cpus,
894+
housekeeping_cpumask(HK_TYPE_DOMAIN));
895+
else
896+
cpumask_copy(doms[i], csa[i]->effective_cpus);
894897
if (dattr)
895898
dattr[i] = SD_ATTR_INIT;
896899
}
@@ -3121,29 +3124,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
31213124
int retval = -ENODEV;
31223125

31233126
buf = strstrip(buf);
3124-
3125-
/*
3126-
* CPU or memory hotunplug may leave @cs w/o any execution
3127-
* resources, in which case the hotplug code asynchronously updates
3128-
* configuration and transfers all tasks to the nearest ancestor
3129-
* which can execute.
3130-
*
3131-
* As writes to "cpus" or "mems" may restore @cs's execution
3132-
* resources, wait for the previously scheduled operations before
3133-
* proceeding, so that we don't end up keep removing tasks added
3134-
* after execution capability is restored.
3135-
*
3136-
* cpuset_handle_hotplug may call back into cgroup core asynchronously
3137-
* via cgroup_transfer_tasks() and waiting for it from a cgroupfs
3138-
* operation like this one can lead to a deadlock through kernfs
3139-
* active_ref protection. Let's break the protection. Losing the
3140-
* protection is okay as we check whether @cs is online after
3141-
* grabbing cpuset_mutex anyway. This only happens on the legacy
3142-
* hierarchies.
3143-
*/
3144-
css_get(&cs->css);
3145-
kernfs_break_active_protection(of->kn);
3146-
31473127
cpus_read_lock();
31483128
mutex_lock(&cpuset_mutex);
31493129
if (!is_cpuset_online(cs))
@@ -3176,8 +3156,6 @@ ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
31763156
out_unlock:
31773157
mutex_unlock(&cpuset_mutex);
31783158
cpus_read_unlock();
3179-
kernfs_unbreak_active_protection(of->kn);
3180-
css_put(&cs->css);
31813159
flush_workqueue(cpuset_migrate_mm_wq);
31823160
return retval ?: nbytes;
31833161
}

tools/testing/selftests/cgroup/test_cpuset_prs.sh

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus
8686

8787
#
8888
# If isolated CPUs have been reserved at boot time (as shown in
89-
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7
89+
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
9090
# that will be used by this script for testing purpose. If not, some of
91-
# the tests may fail incorrectly. These isolated CPUs will also be removed
92-
# before being compared with the expected results.
91+
# the tests may fail incorrectly. These pre-isolated CPUs should stay in
92+
# an isolated state throughout the testing process for now.
9393
#
9494
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
9595
if [[ -n "$BOOT_ISOLCPUS" ]]
9696
then
97-
[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] &&
97+
[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
9898
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
9999
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
100100
fi
@@ -683,15 +683,19 @@ check_isolcpus()
683683
EXPECT_VAL2=$EXPECT_VAL
684684
fi
685685
686+
#
687+
# Appending pre-isolated CPUs
688+
# Even though CPU #8 isn't used for testing, it can't be pre-isolated
689+
# to make appending those CPUs easier.
690+
#
691+
[[ -n "$BOOT_ISOLCPUS" ]] && {
692+
EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
693+
EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
694+
}
695+
686696
#
687697
# Check cpuset.cpus.isolated cpumask
688698
#
689-
if [[ -z "$BOOT_ISOLCPUS" ]]
690-
then
691-
ISOLCPUS=$(cat $ISCPUS)
692-
else
693-
ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
694-
fi
695699
[[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
696700
# Take a 50ms pause and try again
697701
pause 0.05
@@ -731,8 +735,6 @@ check_isolcpus()
731735
fi
732736
done
733737
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
734-
[[ -n "BOOT_ISOLCPUS" ]] &&
735-
ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
736738

737739
[[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
738740
}
@@ -836,8 +838,11 @@ run_state_test()
836838
# if available
837839
[[ -n "$ICPUS" ]] && {
838840
check_isolcpus $ICPUS
839-
[[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
840-
"Expect $ICPUS, get $ISOLCPUS instead"
841+
[[ $? -ne 0 ]] && {
842+
[[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
843+
test_fail $I "isolated CPU" \
844+
"Expect $ICPUS, get $ISOLCPUS instead"
845+
}
841846
}
842847
reset_cgroup_states
843848
#

0 commit comments

Comments
 (0)