Skip to content

Commit fb70081

Browse files
James Morsebp3tk0v
authored andcommitted
x86/resctrl: Separate arch and fs resctrl locks
resctrl has one mutex that is taken by the architecture-specific code, and the filesystem parts. The two interact via cpuhp, where the architecture code updates the domain list. Filesystem handlers that walk the domains list should not run concurrently with the cpuhp callback modifying the list. Exposing a lock from the filesystem code means the interface is not cleanly defined, and creates the possibility of cross-architecture lock ordering headaches. The interaction only exists so that certain filesystem paths are serialised against CPU hotplug. The CPU hotplug code already has a mechanism to do this using cpus_read_lock(). MPAM's monitors have an overflow interrupt, so it needs to be possible to walk the domains list in irq context. RCU is ideal for this, but some paths need to be able to sleep to allocate memory. Because resctrl_{on,off}line_cpu() take the rdtgroup_mutex as part of a cpuhp callback, cpus_read_lock() must always be taken first. rdtgroup_schemata_write() already does this. Most of the filesystem code's domain list walkers are currently protected by the rdtgroup_mutex taken in rdtgroup_kn_lock_live(). The exceptions are rdt_bit_usage_show() and the mon_config helpers which take the lock directly. Make the domain list protected by RCU. An architecture-specific lock prevents concurrent writers. rdt_bit_usage_show() could walk the domain list using RCU, but to keep all the filesystem operations the same, this is changed to call cpus_read_lock(). The mon_config helpers send multiple IPIs, take the cpus_read_lock() in these cases. The other filesystem list walkers need to be able to sleep. Add cpus_read_lock() to rdtgroup_kn_lock_live() so that the cpuhp callbacks can't be invoked when file system operations are occurring. Add lockdep_assert_cpus_held() in the cases where the rdtgroup_kn_lock_live() call isn't obvious. Resctrl's domain online/offline calls now need to take the rdtgroup_mutex themselves. [ bp: Fold in a build fix: https://lore.kernel.org/r/87zfvwieli.ffs@tglx ] Signed-off-by: James Morse <james.morse@arm.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Reviewed-by: Babu Moger <babu.moger@amd.com> Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com> Tested-by: Peter Newman <peternewman@google.com> Tested-by: Babu Moger <babu.moger@amd.com> Tested-by: Carl Worth <carl@os.amperecomputing.com> # arm64 Link: https://lore.kernel.org/r/20240213184438.16675-25-james.morse@arm.com Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
1 parent eeff1d4 commit fb70081

File tree

6 files changed

+112
-28
lines changed

6 files changed

+112
-28
lines changed

arch/x86/kernel/cpu/resctrl/core.c

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#define pr_fmt(fmt) "resctrl: " fmt
1818

19+
#include <linux/cpu.h>
1920
#include <linux/slab.h>
2021
#include <linux/err.h>
2122
#include <linux/cacheinfo.h>
@@ -25,8 +26,15 @@
2526
#include <asm/resctrl.h>
2627
#include "internal.h"
2728

28-
/* Mutex to protect rdtgroup access. */
29-
DEFINE_MUTEX(rdtgroup_mutex);
29+
/*
30+
* rdt_domain structures are kfree()d when their last CPU goes offline,
31+
* and allocated when the first CPU in a new domain comes online.
32+
* The rdt_resource's domain list is updated when this happens. Readers of
33+
* the domain list must either take cpus_read_lock(), or rely on an RCU
34+
* read-side critical section, to avoid observing concurrent modification.
35+
* All writers take this mutex:
36+
*/
37+
static DEFINE_MUTEX(domain_list_lock);
3038

3139
/*
3240
* The cached resctrl_pqr_state is strictly per CPU and can never be
@@ -354,6 +362,15 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r)
354362
{
355363
struct rdt_domain *d;
356364

365+
/*
366+
* Walking r->domains, ensure it can't race with cpuhp.
367+
* Because this is called via IPI by rdt_ctrl_update(), assertions
368+
* about locks this thread holds will lead to false positives. Check
369+
* someone is holding the CPUs lock.
370+
*/
371+
if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && IS_ENABLED(CONFIG_LOCKDEP))
372+
WARN_ON_ONCE(!lockdep_is_cpus_held());
373+
357374
list_for_each_entry(d, &r->domains, list) {
358375
/* Find the domain that contains this CPU */
359376
if (cpumask_test_cpu(cpu, &d->cpu_mask))
@@ -510,6 +527,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
510527
struct rdt_domain *d;
511528
int err;
512529

530+
lockdep_assert_held(&domain_list_lock);
531+
513532
d = rdt_find_domain(r, id, &add_pos);
514533
if (IS_ERR(d)) {
515534
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
@@ -543,11 +562,12 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
543562
return;
544563
}
545564

546-
list_add_tail(&d->list, add_pos);
565+
list_add_tail_rcu(&d->list, add_pos);
547566

548567
err = resctrl_online_domain(r, d);
549568
if (err) {
550-
list_del(&d->list);
569+
list_del_rcu(&d->list);
570+
synchronize_rcu();
551571
domain_free(hw_dom);
552572
}
553573
}
@@ -558,6 +578,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
558578
struct rdt_hw_domain *hw_dom;
559579
struct rdt_domain *d;
560580

581+
lockdep_assert_held(&domain_list_lock);
582+
561583
d = rdt_find_domain(r, id, NULL);
562584
if (IS_ERR_OR_NULL(d)) {
563585
pr_warn("Couldn't find cache id for CPU %d\n", cpu);
@@ -568,7 +590,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
568590
cpumask_clear_cpu(cpu, &d->cpu_mask);
569591
if (cpumask_empty(&d->cpu_mask)) {
570592
resctrl_offline_domain(r, d);
571-
list_del(&d->list);
593+
list_del_rcu(&d->list);
594+
synchronize_rcu();
572595

573596
/*
574597
* rdt_domain "d" is going to be freed below, so clear
@@ -598,13 +621,13 @@ static int resctrl_arch_online_cpu(unsigned int cpu)
598621
{
599622
struct rdt_resource *r;
600623

601-
mutex_lock(&rdtgroup_mutex);
624+
mutex_lock(&domain_list_lock);
602625
for_each_capable_rdt_resource(r)
603626
domain_add_cpu(cpu, r);
604-
clear_closid_rmid(cpu);
627+
mutex_unlock(&domain_list_lock);
605628

629+
clear_closid_rmid(cpu);
606630
resctrl_online_cpu(cpu);
607-
mutex_unlock(&rdtgroup_mutex);
608631

609632
return 0;
610633
}
@@ -613,13 +636,14 @@ static int resctrl_arch_offline_cpu(unsigned int cpu)
613636
{
614637
struct rdt_resource *r;
615638

616-
mutex_lock(&rdtgroup_mutex);
617639
resctrl_offline_cpu(cpu);
618640

641+
mutex_lock(&domain_list_lock);
619642
for_each_capable_rdt_resource(r)
620643
domain_remove_cpu(cpu, r);
644+
mutex_unlock(&domain_list_lock);
645+
621646
clear_closid_rmid(cpu);
622-
mutex_unlock(&rdtgroup_mutex);
623647

624648
return 0;
625649
}

arch/x86/kernel/cpu/resctrl/ctrlmondata.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ static int parse_line(char *line, struct resctrl_schema *s,
212212
struct rdt_domain *d;
213213
unsigned long dom_id;
214214

215+
/* Walking r->domains, ensure it can't race with cpuhp */
216+
lockdep_assert_cpus_held();
217+
215218
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
216219
(r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)) {
217220
rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
@@ -316,6 +319,9 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid)
316319
struct rdt_domain *d;
317320
u32 idx;
318321

322+
/* Walking r->domains, ensure it can't race with cpuhp */
323+
lockdep_assert_cpus_held();
324+
319325
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
320326
return -ENOMEM;
321327

@@ -381,11 +387,9 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
381387
return -EINVAL;
382388
buf[nbytes - 1] = '\0';
383389

384-
cpus_read_lock();
385390
rdtgrp = rdtgroup_kn_lock_live(of->kn);
386391
if (!rdtgrp) {
387392
rdtgroup_kn_unlock(of->kn);
388-
cpus_read_unlock();
389393
return -ENOENT;
390394
}
391395
rdt_last_cmd_clear();
@@ -447,7 +451,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
447451
out:
448452
rdt_staged_configs_clear();
449453
rdtgroup_kn_unlock(of->kn);
450-
cpus_read_unlock();
451454
return ret ?: nbytes;
452455
}
453456

@@ -467,6 +470,9 @@ static void show_doms(struct seq_file *s, struct resctrl_schema *schema, int clo
467470
bool sep = false;
468471
u32 ctrl_val;
469472

473+
/* Walking r->domains, ensure it can't race with cpuhp */
474+
lockdep_assert_cpus_held();
475+
470476
seq_printf(s, "%*s:", max_name_width, schema->name);
471477
list_for_each_entry(dom, &r->domains, list) {
472478
if (sep)
@@ -537,6 +543,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
537543
{
538544
int cpu;
539545

546+
/* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
547+
lockdep_assert_cpus_held();
548+
540549
/*
541550
* Setup the parameters to pass to mon_event_count() to read the data.
542551
*/

arch/x86/kernel/cpu/resctrl/monitor.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* Software Developer Manual June 2016, volume 3, section 17.17.
1616
*/
1717

18+
#include <linux/cpu.h>
1819
#include <linux/module.h>
1920
#include <linux/sizes.h>
2021
#include <linux/slab.h>
@@ -472,6 +473,9 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
472473

473474
lockdep_assert_held(&rdtgroup_mutex);
474475

476+
/* Walking r->domains, ensure it can't race with cpuhp */
477+
lockdep_assert_cpus_held();
478+
475479
idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
476480

477481
entry->busy = 0;
@@ -778,6 +782,7 @@ void cqm_handle_limbo(struct work_struct *work)
778782
unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
779783
struct rdt_domain *d;
780784

785+
cpus_read_lock();
781786
mutex_lock(&rdtgroup_mutex);
782787

783788
d = container_of(work, struct rdt_domain, cqm_limbo.work);
@@ -792,6 +797,7 @@ void cqm_handle_limbo(struct work_struct *work)
792797
}
793798

794799
mutex_unlock(&rdtgroup_mutex);
800+
cpus_read_unlock();
795801
}
796802

797803
/**
@@ -823,6 +829,7 @@ void mbm_handle_overflow(struct work_struct *work)
823829
struct rdt_resource *r;
824830
struct rdt_domain *d;
825831

832+
cpus_read_lock();
826833
mutex_lock(&rdtgroup_mutex);
827834

828835
/*
@@ -856,6 +863,7 @@ void mbm_handle_overflow(struct work_struct *work)
856863

857864
out_unlock:
858865
mutex_unlock(&rdtgroup_mutex);
866+
cpus_read_unlock();
859867
}
860868

861869
/**

arch/x86/kernel/cpu/resctrl/pseudo_lock.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,9 @@ bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d)
844844
struct rdt_domain *d_i;
845845
bool ret = false;
846846

847+
/* Walking r->domains, ensure it can't race with cpuhp */
848+
lockdep_assert_cpus_held();
849+
847850
if (!zalloc_cpumask_var(&cpu_with_psl, GFP_KERNEL))
848851
return true;
849852

0 commit comments

Comments
 (0)