Skip to content

Commit 3c474b3

Browse files
author
Peter Zijlstra
committed
sched: Fix Core-wide rq->lock for uninitialized CPUs
Eugene tripped over the case where rq_lock(), as called in a for_each_possible_cpu() loop came apart because rq->core hadn't been setup yet. This is a somewhat unusual, but valid case. Rework things such that rq->core is initialized to point at itself. IOW initialize each CPU as a single threaded Core. CPU online will then join the new CPU (thread) to an existing Core where needed. For completeness sake, have CPU offline fully undo the state so as to not presume the topology will match the next time it comes online. Fixes: 9edeaea ("sched: Core-wide rq->lock") Reported-by: Eugene Syromiatnikov <esyr@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Josh Don <joshdon@google.com> Tested-by: Eugene Syromiatnikov <esyr@redhat.com> Link: https://lkml.kernel.org/r/YR473ZGeKqMs6kw+@hirez.programming.kicks-ass.net
1 parent 7c60610 commit 3c474b3

File tree

2 files changed

+118
-27
lines changed

2 files changed

+118
-27
lines changed

kernel/sched/core.c

Lines changed: 117 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,30 @@ static DEFINE_MUTEX(sched_core_mutex);
237237
static atomic_t sched_core_count;
238238
static struct cpumask sched_core_mask;
239239

240+
static void sched_core_lock(int cpu, unsigned long *flags)
241+
{
242+
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
243+
int t, i = 0;
244+
245+
local_irq_save(*flags);
246+
for_each_cpu(t, smt_mask)
247+
raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
248+
}
249+
250+
static void sched_core_unlock(int cpu, unsigned long *flags)
251+
{
252+
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
253+
int t;
254+
255+
for_each_cpu(t, smt_mask)
256+
raw_spin_unlock(&cpu_rq(t)->__lock);
257+
local_irq_restore(*flags);
258+
}
259+
240260
static void __sched_core_flip(bool enabled)
241261
{
242-
int cpu, t, i;
262+
unsigned long flags;
263+
int cpu, t;
243264

244265
cpus_read_lock();
245266

@@ -250,19 +271,12 @@ static void __sched_core_flip(bool enabled)
250271
for_each_cpu(cpu, &sched_core_mask) {
251272
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
252273

253-
i = 0;
254-
local_irq_disable();
255-
for_each_cpu(t, smt_mask) {
256-
/* supports up to SMT8 */
257-
raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
258-
}
274+
sched_core_lock(cpu, &flags);
259275

260276
for_each_cpu(t, smt_mask)
261277
cpu_rq(t)->core_enabled = enabled;
262278

263-
for_each_cpu(t, smt_mask)
264-
raw_spin_unlock(&cpu_rq(t)->__lock);
265-
local_irq_enable();
279+
sched_core_unlock(cpu, &flags);
266280

267281
cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
268282
}
@@ -5736,35 +5750,109 @@ void queue_core_balance(struct rq *rq)
57365750
queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
57375751
}
57385752

5739-
static inline void sched_core_cpu_starting(unsigned int cpu)
5753+
static void sched_core_cpu_starting(unsigned int cpu)
57405754
{
57415755
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
5742-
struct rq *rq, *core_rq = NULL;
5743-
int i;
5756+
struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
5757+
unsigned long flags;
5758+
int t;
57445759

5745-
core_rq = cpu_rq(cpu)->core;
5760+
sched_core_lock(cpu, &flags);
57465761

5747-
if (!core_rq) {
5748-
for_each_cpu(i, smt_mask) {
5749-
rq = cpu_rq(i);
5750-
if (rq->core && rq->core == rq)
5751-
core_rq = rq;
5762+
WARN_ON_ONCE(rq->core != rq);
5763+
5764+
/* if we're the first, we'll be our own leader */
5765+
if (cpumask_weight(smt_mask) == 1)
5766+
goto unlock;
5767+
5768+
/* find the leader */
5769+
for_each_cpu(t, smt_mask) {
5770+
if (t == cpu)
5771+
continue;
5772+
rq = cpu_rq(t);
5773+
if (rq->core == rq) {
5774+
core_rq = rq;
5775+
break;
57525776
}
5777+
}
57535778

5754-
if (!core_rq)
5755-
core_rq = cpu_rq(cpu);
5779+
if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
5780+
goto unlock;
57565781

5757-
for_each_cpu(i, smt_mask) {
5758-
rq = cpu_rq(i);
5782+
/* install and validate core_rq */
5783+
for_each_cpu(t, smt_mask) {
5784+
rq = cpu_rq(t);
57595785

5760-
WARN_ON_ONCE(rq->core && rq->core != core_rq);
5786+
if (t == cpu)
57615787
rq->core = core_rq;
5762-
}
5788+
5789+
WARN_ON_ONCE(rq->core != core_rq);
57635790
}
5791+
5792+
unlock:
5793+
sched_core_unlock(cpu, &flags);
57645794
}
5795+
5796+
static void sched_core_cpu_deactivate(unsigned int cpu)
5797+
{
5798+
const struct cpumask *smt_mask = cpu_smt_mask(cpu);
5799+
struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
5800+
unsigned long flags;
5801+
int t;
5802+
5803+
sched_core_lock(cpu, &flags);
5804+
5805+
/* if we're the last man standing, nothing to do */
5806+
if (cpumask_weight(smt_mask) == 1) {
5807+
WARN_ON_ONCE(rq->core != rq);
5808+
goto unlock;
5809+
}
5810+
5811+
/* if we're not the leader, nothing to do */
5812+
if (rq->core != rq)
5813+
goto unlock;
5814+
5815+
/* find a new leader */
5816+
for_each_cpu(t, smt_mask) {
5817+
if (t == cpu)
5818+
continue;
5819+
core_rq = cpu_rq(t);
5820+
break;
5821+
}
5822+
5823+
if (WARN_ON_ONCE(!core_rq)) /* impossible */
5824+
goto unlock;
5825+
5826+
/* copy the shared state to the new leader */
5827+
core_rq->core_task_seq = rq->core_task_seq;
5828+
core_rq->core_pick_seq = rq->core_pick_seq;
5829+
core_rq->core_cookie = rq->core_cookie;
5830+
core_rq->core_forceidle = rq->core_forceidle;
5831+
core_rq->core_forceidle_seq = rq->core_forceidle_seq;
5832+
5833+
/* install new leader */
5834+
for_each_cpu(t, smt_mask) {
5835+
rq = cpu_rq(t);
5836+
rq->core = core_rq;
5837+
}
5838+
5839+
unlock:
5840+
sched_core_unlock(cpu, &flags);
5841+
}
5842+
5843+
static inline void sched_core_cpu_dying(unsigned int cpu)
5844+
{
5845+
struct rq *rq = cpu_rq(cpu);
5846+
5847+
if (rq->core != rq)
5848+
rq->core = rq;
5849+
}
5850+
57655851
#else /* !CONFIG_SCHED_CORE */
57665852

57675853
static inline void sched_core_cpu_starting(unsigned int cpu) {}
5854+
static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
5855+
static inline void sched_core_cpu_dying(unsigned int cpu) {}
57685856

57695857
static struct task_struct *
57705858
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
@@ -8707,6 +8795,8 @@ int sched_cpu_deactivate(unsigned int cpu)
87078795
*/
87088796
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
87098797
static_branch_dec_cpuslocked(&sched_smt_present);
8798+
8799+
sched_core_cpu_deactivate(cpu);
87108800
#endif
87118801

87128802
if (!sched_smp_initialized)
@@ -8811,6 +8901,7 @@ int sched_cpu_dying(unsigned int cpu)
88118901
calc_load_migrate(rq);
88128902
update_max_interval();
88138903
hrtick_clear(rq);
8904+
sched_core_cpu_dying(cpu);
88148905
return 0;
88158906
}
88168907
#endif
@@ -9022,7 +9113,7 @@ void __init sched_init(void)
90229113
atomic_set(&rq->nr_iowait, 0);
90239114

90249115
#ifdef CONFIG_SCHED_CORE
9025-
rq->core = NULL;
9116+
rq->core = rq;
90269117
rq->core_pick = NULL;
90279118
rq->core_enabled = 0;
90289119
rq->core_tree = RB_ROOT;

kernel/sched/sched.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ struct rq {
10931093
unsigned int core_sched_seq;
10941094
struct rb_root core_tree;
10951095

1096-
/* shared state */
1096+
/* shared state -- careful with sched_core_cpu_deactivate() */
10971097
unsigned int core_task_seq;
10981098
unsigned int core_pick_seq;
10991099
unsigned long core_cookie;

0 commit comments

Comments
 (0)