Skip to content

Commit add7695

Browse files
committed
Merge tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: - Thoroughly rewrite the data structures that implement perf task context handling, with the goal of fixing various quirks and unfeatures both in already merged, and in upcoming proposed code. The old data structure is the per task and per cpu perf_event_contexts: task_struct::perf_events_ctxp[] <-> perf_event_context <-> perf_cpu_context ^ | ^ | ^ `---------------------------------' | `--> pmu ---' v ^ perf_event ------' In this new design this is replaced with a single task context and a single CPU context, plus intermediate data-structures: task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context ^ | ^ ^ `---------------------------' | | | | perf_cpu_pmu_context <--. | `----. ^ | | | | | | v v | | ,--> perf_event_pmu_context | | | | | | | v v | perf_event ---> pmu ----------------' [ See commit bd27568 for more details. ] This rewrite was developed by Peter Zijlstra and Ravi Bangoria. - Optimize perf_tp_event() - Update the Intel uncore PMU driver, extending it with UPI topology discovery on various hardware models. - Misc fixes & cleanups * tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (25 commits) perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box() perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map() perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox() perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology() perf/x86/intel/uncore: Make set_mapping() procedure void perf/x86/intel/uncore: Update sysfs-devices-mapping file perf/x86/intel/uncore: Enable UPI topology discovery for Sapphire Rapids perf/x86/intel/uncore: Enable UPI topology discovery for Icelake Server perf/x86/intel/uncore: Get UPI NodeID and GroupID perf/x86/intel/uncore: Enable UPI topology discovery for Skylake Server perf/x86/intel/uncore: Generalize get_topology() for SKX PMUs perf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D perf/x86/intel/uncore: Clear attr_update properly perf/x86/intel/uncore: Introduce UPI topology type perf/x86/intel/uncore: Generalize IIO topology support perf/core: Don't allow grouping events from different hw pmus perf/amd/ibs: Make IBS a core pmu perf: Fix function pointer case perf/x86/amd: Remove the repeated declaration perf: Fix possible memleak in pmu_dev_alloc() ...
2 parents 617fe4f + 17b8d84 commit add7695

File tree

21 files changed

+1765
-1230
lines changed

21 files changed

+1765
-1230
lines changed

Documentation/ABI/testing/sysfs-devices-mapping

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
What: /sys/devices/uncore_iio_x/dieX
22
Date: February 2020
3-
Contact: Roman Sudarikov <roman.sudarikov@linux.intel.com>
3+
Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
44
Description:
55
Each IIO stack (PCIe root port) has its own IIO PMON block, so
66
each dieX file (where X is die number) holds "Segment:Root Bus"
@@ -32,3 +32,31 @@ Description:
3232
IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
3333
IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
3434
IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000
35+
36+
What: /sys/devices/uncore_upi_x/dieX
37+
Date: March 2022
38+
Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
39+
Description:
40+
Each /sys/devices/uncore_upi_X/dieY file holds "upi_Z,die_W"
41+
value that means UPI link number X on die Y is connected to UPI
42+
link Z on die W and this link between sockets can be monitored
43+
by UPI PMON block.
44+
For example, 4-die Sapphire Rapids platform has the following
45+
UPI 0 topology::
46+
47+
# tail /sys/devices/uncore_upi_0/die*
48+
==> /sys/devices/uncore_upi_0/die0 <==
49+
upi_1,die_1
50+
==> /sys/devices/uncore_upi_0/die1 <==
51+
upi_0,die_3
52+
==> /sys/devices/uncore_upi_0/die2 <==
53+
upi_1,die_3
54+
==> /sys/devices/uncore_upi_0/die3 <==
55+
upi_0,die_1
56+
57+
Which means::
58+
59+
UPI link 0 on die 0 is connected to UPI link 1 on die 1
60+
UPI link 0 on die 1 is connected to UPI link 0 on die 3
61+
UPI link 0 on die 2 is connected to UPI link 1 on die 3
62+
UPI link 0 on die 3 is connected to UPI link 0 on die 1

arch/arm64/kernel/perf_event.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -806,10 +806,14 @@ static void armv8pmu_disable_event(struct perf_event *event)
806806

807807
static void armv8pmu_start(struct arm_pmu *cpu_pmu)
808808
{
809-
struct perf_event_context *task_ctx =
810-
this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx;
809+
struct perf_event_context *ctx;
810+
int nr_user = 0;
811811

812-
if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user)
812+
ctx = perf_cpu_task_ctx();
813+
if (ctx)
814+
nr_user = ctx->nr_user;
815+
816+
if (sysctl_perf_user_access && nr_user)
813817
armv8pmu_enable_user_access(cpu_pmu);
814818
else
815819
armv8pmu_disable_user_access();
@@ -1019,10 +1023,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
10191023
return 0;
10201024
}
10211025

1022-
static int armv8pmu_filter_match(struct perf_event *event)
1026+
static bool armv8pmu_filter(struct pmu *pmu, int cpu)
10231027
{
1024-
unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
1025-
return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
1028+
struct arm_pmu *armpmu = to_arm_pmu(pmu);
1029+
return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus);
10261030
}
10271031

10281032
static void armv8pmu_reset(void *info)
@@ -1254,7 +1258,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
12541258
cpu_pmu->stop = armv8pmu_stop;
12551259
cpu_pmu->reset = armv8pmu_reset;
12561260
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
1257-
cpu_pmu->filter_match = armv8pmu_filter_match;
1261+
cpu_pmu->filter = armv8pmu_filter;
12581262

12591263
cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;
12601264

arch/powerpc/perf/core-book3s.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
132132

133133
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
134134
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
135-
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
135+
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
136136
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
137137
static void pmao_restore_workaround(bool ebb) { }
138138
#endif /* CONFIG_PPC32 */
@@ -424,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
424424
cpuhw->bhrb_context = event->ctx;
425425
}
426426
cpuhw->bhrb_users++;
427-
perf_sched_cb_inc(event->ctx->pmu);
427+
perf_sched_cb_inc(event->pmu);
428428
}
429429

430430
static void power_pmu_bhrb_disable(struct perf_event *event)
@@ -436,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
436436

437437
WARN_ON_ONCE(!cpuhw->bhrb_users);
438438
cpuhw->bhrb_users--;
439-
perf_sched_cb_dec(event->ctx->pmu);
439+
perf_sched_cb_dec(event->pmu);
440440

441441
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
442442
/* BHRB cannot be turned off when other
@@ -451,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
451451
/* Called from ctxsw to prevent one process's branch entries to
452452
* mingle with the other process's entries during context switch.
453453
*/
454-
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
454+
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
455455
{
456456
if (!ppmu->bhrb_nr)
457457
return;

arch/s390/kernel/perf_pai_crypto.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ static int paicrypt_push_sample(void)
377377
/* Called on schedule-in and schedule-out. No access to event structure,
378378
* but for sampling only event CRYPTO_ALL is allowed.
379379
*/
380-
static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
380+
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
381381
{
382382
/* We started with a clean page on event installation. So read out
383383
* results on schedule_out and if page was dirty, clear values.

arch/s390/kernel/perf_pai_ext.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ static int paiext_push_sample(void)
466466
/* Called on schedule-in and schedule-out. No access to event structure,
467467
* but for sampling only event NNPA_ALL is allowed.
468468
*/
469-
static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
469+
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
470470
{
471471
/* We started with a clean page on event installation. So read out
472472
* results on schedule_out and if page was dirty, clear values.

arch/x86/events/amd/brs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ static void amd_brs_poison_buffer(void)
384384
* On ctxswin, sched_in = true, called after the PMU has started
385385
* On ctxswout, sched_in = false, called before the PMU is stopped
386386
*/
387-
void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
387+
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
388388
{
389389
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
390390

arch/x86/events/amd/ibs.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ static const struct attribute_group *op_attr_update[] = {
631631

632632
static struct perf_ibs perf_ibs_fetch = {
633633
.pmu = {
634-
.task_ctx_nr = perf_invalid_context,
634+
.task_ctx_nr = perf_hw_context,
635635

636636
.event_init = perf_ibs_init,
637637
.add = perf_ibs_add,
@@ -655,7 +655,7 @@ static struct perf_ibs perf_ibs_fetch = {
655655

656656
static struct perf_ibs perf_ibs_op = {
657657
.pmu = {
658-
.task_ctx_nr = perf_invalid_context,
658+
.task_ctx_nr = perf_hw_context,
659659

660660
.event_init = perf_ibs_init,
661661
.add = perf_ibs_add,

arch/x86/events/amd/lbr.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ void amd_pmu_lbr_add(struct perf_event *event)
352352
cpuc->br_sel = reg->reg;
353353
}
354354

355-
perf_sched_cb_inc(event->ctx->pmu);
355+
perf_sched_cb_inc(event->pmu);
356356

357357
if (!cpuc->lbr_users++ && !event->total_time_running)
358358
amd_pmu_lbr_reset();
@@ -370,10 +370,10 @@ void amd_pmu_lbr_del(struct perf_event *event)
370370

371371
cpuc->lbr_users--;
372372
WARN_ON_ONCE(cpuc->lbr_users < 0);
373-
perf_sched_cb_dec(event->ctx->pmu);
373+
perf_sched_cb_dec(event->pmu);
374374
}
375375

376-
void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
376+
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
377377
{
378378
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
379379

arch/x86/events/core.c

Lines changed: 14 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
9090
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
9191
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
9292

93+
DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
94+
9395
/*
9496
* This one is magic, it will get called even when PMU init fails (because
9597
* there is no PMU), in which case it should simply return NULL.
@@ -2031,6 +2033,7 @@ static void x86_pmu_static_call_update(void)
20312033
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
20322034

20332035
static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
2036+
static_call_update(x86_pmu_filter, x86_pmu.filter);
20342037
}
20352038

20362039
static void _x86_pmu_read(struct perf_event *event)
@@ -2052,23 +2055,6 @@ void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
20522055
pr_info("... event mask: %016Lx\n", intel_ctrl);
20532056
}
20542057

2055-
/*
2056-
* The generic code is not hybrid friendly. The hybrid_pmu->pmu
2057-
* of the first registered PMU is unconditionally assigned to
2058-
* each possible cpuctx->ctx.pmu.
2059-
* Update the correct hybrid PMU to the cpuctx->ctx.pmu.
2060-
*/
2061-
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu)
2062-
{
2063-
struct perf_cpu_context *cpuctx;
2064-
2065-
if (!pmu->pmu_cpu_context)
2066-
return;
2067-
2068-
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2069-
cpuctx->ctx.pmu = pmu;
2070-
}
2071-
20722058
static int __init init_hw_perf_events(void)
20732059
{
20742060
struct x86_pmu_quirk *quirk;
@@ -2175,13 +2161,9 @@ static int __init init_hw_perf_events(void)
21752161
if (err)
21762162
goto out2;
21772163
} else {
2178-
u8 cpu_type = get_this_hybrid_cpu_type();
21792164
struct x86_hybrid_pmu *hybrid_pmu;
21802165
int i, j;
21812166

2182-
if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
2183-
cpu_type = x86_pmu.get_hybrid_cpu_type();
2184-
21852167
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
21862168
hybrid_pmu = &x86_pmu.hybrid_pmu[i];
21872169

@@ -2195,9 +2177,6 @@ static int __init init_hw_perf_events(void)
21952177
(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
21962178
if (err)
21972179
break;
2198-
2199-
if (cpu_type == hybrid_pmu->cpu_type)
2200-
x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id());
22012180
}
22022181

22032182
if (i < x86_pmu.num_hybrid_pmus) {
@@ -2646,15 +2625,15 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
26462625
NULL,
26472626
};
26482627

2649-
static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
2628+
static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
26502629
{
2651-
static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
2630+
static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
26522631
}
26532632

2654-
static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
2655-
struct perf_event_context *next)
2633+
static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
2634+
struct perf_event_pmu_context *next_epc)
26562635
{
2657-
static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
2636+
static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
26582637
}
26592638

26602639
void perf_check_microcode(void)
@@ -2689,12 +2668,13 @@ static int x86_pmu_aux_output_match(struct perf_event *event)
26892668
return 0;
26902669
}
26912670

2692-
static int x86_pmu_filter_match(struct perf_event *event)
2671+
static bool x86_pmu_filter(struct pmu *pmu, int cpu)
26932672
{
2694-
if (x86_pmu.filter_match)
2695-
return x86_pmu.filter_match(event);
2673+
bool ret = false;
26962674

2697-
return 1;
2675+
static_call_cond(x86_pmu_filter)(pmu, cpu, &ret);
2676+
2677+
return ret;
26982678
}
26992679

27002680
static struct pmu pmu = {
@@ -2725,7 +2705,7 @@ static struct pmu pmu = {
27252705

27262706
.aux_output_match = x86_pmu_aux_output_match,
27272707

2728-
.filter_match = x86_pmu_filter_match,
2708+
.filter = x86_pmu_filter,
27292709
};
27302710

27312711
void arch_perf_update_userpage(struct perf_event *event,

arch/x86/events/intel/core.c

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4536,8 +4536,6 @@ static bool init_hybrid_pmu(int cpu)
45364536
cpumask_set_cpu(cpu, &pmu->supported_cpus);
45374537
cpuc->pmu = &pmu->pmu;
45384538

4539-
x86_pmu_update_cpu_context(&pmu->pmu, cpu);
4540-
45414539
return true;
45424540
}
45434541

@@ -4671,17 +4669,17 @@ static void intel_pmu_cpu_dead(int cpu)
46714669
cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
46724670
}
46734671

4674-
static void intel_pmu_sched_task(struct perf_event_context *ctx,
4672+
static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
46754673
bool sched_in)
46764674
{
4677-
intel_pmu_pebs_sched_task(ctx, sched_in);
4678-
intel_pmu_lbr_sched_task(ctx, sched_in);
4675+
intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
4676+
intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
46794677
}
46804678

4681-
static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
4682-
struct perf_event_context *next)
4679+
static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
4680+
struct perf_event_pmu_context *next_epc)
46834681
{
4684-
intel_pmu_lbr_swap_task_ctx(prev, next);
4682+
intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
46854683
}
46864684

46874685
static int intel_pmu_check_period(struct perf_event *event, u64 value)
@@ -4705,12 +4703,11 @@ static int intel_pmu_aux_output_match(struct perf_event *event)
47054703
return is_intel_pt_event(event);
47064704
}
47074705

4708-
static int intel_pmu_filter_match(struct perf_event *event)
4706+
static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret)
47094707
{
4710-
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
4711-
unsigned int cpu = smp_processor_id();
4708+
struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu);
47124709

4713-
return cpumask_test_cpu(cpu, &pmu->supported_cpus);
4710+
*ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus);
47144711
}
47154712

47164713
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
@@ -6413,7 +6410,7 @@ __init int intel_pmu_init(void)
64136410
static_call_update(intel_pmu_set_topdown_event_period,
64146411
&adl_set_topdown_event_period);
64156412

6416-
x86_pmu.filter_match = intel_pmu_filter_match;
6413+
x86_pmu.filter = intel_pmu_filter;
64176414
x86_pmu.get_event_constraints = adl_get_event_constraints;
64186415
x86_pmu.hw_config = adl_hw_config;
64196416
x86_pmu.limit_period = spr_limit_period;

0 commit comments

Comments
 (0)