Skip to content

Commit aac4de4

Browse files
committed
Merge tag 'perf-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: - Add branch stack counters ABI extension to better capture the growing amount of information the PMU exposes via branch stack sampling. There's matching tooling support. - Fix race when creating the nr_addr_filters sysfs file - Add Intel Sierra Forest and Grand Ridge intel/cstate PMU support - Add Intel Granite Rapids, Sierra Forest and Grand Ridge uncore PMU support - Misc cleanups & fixes * tag 'perf-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/uncore: Factor out topology_gidnid_map() perf/x86/intel/uncore: Fix NULL pointer dereference issue in upi_fill_topology() perf/x86/amd: Reject branch stack for IBS events perf/x86/intel/uncore: Support Sierra Forest and Grand Ridge perf/x86/intel/uncore: Support IIO free-running counters on GNR perf/x86/intel/uncore: Support Granite Rapids perf/x86/uncore: Use u64 to replace unsigned for the uncore offsets array perf/x86/intel/uncore: Generic uncore_get_uncores and MMIO format of SPR perf: Fix the nr_addr_filters fix perf/x86/intel/cstate: Add Grand Ridge support perf/x86/intel/cstate: Add Sierra Forest support x86/smp: Export symbol cpu_clustergroup_mask() perf/x86/intel/cstate: Cleanup duplicate attr_groups perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file perf/x86/intel: Support branch counters logging perf/x86/intel: Reorganize attrs and is_visible perf: Add branch_sample_call_stack perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag perf: Add branch stack counters
2 parents 0bdf062 + fdd0410 commit aac4de4

File tree

23 files changed

+625
-128
lines changed

23 files changed

+625
-128
lines changed

Documentation/ABI/testing/sysfs-bus-event_source-devices-caps

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,9 @@ Description:
1616
Example output in powerpc:
1717
grep . /sys/bus/event_source/devices/cpu/caps/*
1818
/sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
19+
20+
The "branch_counter_nr" in the supported platform exposes the
21+
maximum number of counters which can be shown in the u64 counters
22+
of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width"
23+
exposes the width of each counter. Both of them can be used by
24+
the perf tool to parse the logged counters in each branch.

arch/powerpc/perf/core-book3s.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2312,7 +2312,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
23122312
struct cpu_hw_events *cpuhw;
23132313
cpuhw = this_cpu_ptr(&cpu_hw_events);
23142314
power_pmu_bhrb_read(event, cpuhw);
2315-
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
2315+
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
23162316
}
23172317

23182318
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&

arch/x86/events/amd/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
940940
continue;
941941

942942
if (has_branch_stack(event))
943-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
943+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
944944

945945
if (perf_event_overflow(event, &data, regs))
946946
x86_pmu_stop(event, 0);

arch/x86/events/amd/ibs.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,9 @@ static int perf_ibs_init(struct perf_event *event)
287287
if (config & ~perf_ibs->config_mask)
288288
return -EINVAL;
289289

290+
if (has_branch_stack(event))
291+
return -EOPNOTSUPP;
292+
290293
ret = validate_group(event);
291294
if (ret)
292295
return ret;

arch/x86/events/core.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ int x86_pmu_hw_config(struct perf_event *event)
601601
}
602602
}
603603

604-
if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK)
604+
if (branch_sample_call_stack(event))
605605
event->attach_state |= PERF_ATTACH_TASK_DATA;
606606

607607
/*
@@ -1702,7 +1702,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
17021702
perf_sample_data_init(&data, 0, event->hw.last_period);
17031703

17041704
if (has_branch_stack(event))
1705-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
1705+
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
17061706

17071707
if (perf_event_overflow(event, &data, regs))
17081708
x86_pmu_stop(event, 0);

arch/x86/events/intel/core.c

Lines changed: 122 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
25272527
perf_report_aux_output_id(event, idx);
25282528
}
25292529

2530+
static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
2531+
{
2532+
return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
2533+
}
2534+
25302535
static void intel_pmu_del_event(struct perf_event *event)
25312536
{
2532-
if (needs_branch_stack(event))
2537+
if (intel_pmu_needs_branch_stack(event))
25332538
intel_pmu_lbr_del(event);
25342539
if (event->attr.precise_ip)
25352540
intel_pmu_pebs_del(event);
@@ -2787,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
27872792

27882793
static void intel_pmu_enable_event(struct perf_event *event)
27892794
{
2795+
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
27902796
struct hw_perf_event *hwc = &event->hw;
27912797
int idx = hwc->idx;
27922798

@@ -2795,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
27952801

27962802
switch (idx) {
27972803
case 0 ... INTEL_PMC_IDX_FIXED - 1:
2804+
if (branch_sample_counters(event))
2805+
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
27982806
intel_set_masks(event, idx);
2799-
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
2807+
__x86_pmu_enable_event(hwc, enable_mask);
28002808
break;
28012809
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
28022810
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
@@ -2820,7 +2828,7 @@ static void intel_pmu_add_event(struct perf_event *event)
28202828
{
28212829
if (event->attr.precise_ip)
28222830
intel_pmu_pebs_add(event);
2823-
if (needs_branch_stack(event))
2831+
if (intel_pmu_needs_branch_stack(event))
28242832
intel_pmu_lbr_add(event);
28252833
}
28262834

@@ -3047,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
30473055
perf_sample_data_init(&data, 0, event->hw.last_period);
30483056

30493057
if (has_branch_stack(event))
3050-
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
3058+
intel_pmu_lbr_save_brstack(&data, cpuc, event);
30513059

30523060
if (perf_event_overflow(event, &data, regs))
30533061
x86_pmu_stop(event, 0);
@@ -3612,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
36123620
if (cpuc->excl_cntrs)
36133621
return intel_get_excl_constraints(cpuc, event, idx, c2);
36143622

3623+
/* Not all counters support the branch counter feature. */
3624+
if (branch_sample_counters(event)) {
3625+
c2 = dyn_constraint(cpuc, c2, idx);
3626+
c2->idxmsk64 &= x86_pmu.lbr_counters;
3627+
c2->weight = hweight64(c2->idxmsk64);
3628+
}
3629+
36153630
return c2;
36163631
}
36173632

@@ -3897,7 +3912,62 @@ static int intel_pmu_hw_config(struct perf_event *event)
38973912
x86_pmu.pebs_aliases(event);
38983913
}
38993914

3900-
if (needs_branch_stack(event)) {
3915+
if (needs_branch_stack(event) && is_sampling_event(event))
3916+
event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
3917+
3918+
if (branch_sample_counters(event)) {
3919+
struct perf_event *leader, *sibling;
3920+
int num = 0;
3921+
3922+
if (!(x86_pmu.flags & PMU_FL_BR_CNTR) ||
3923+
(event->attr.config & ~INTEL_ARCH_EVENT_MASK))
3924+
return -EINVAL;
3925+
3926+
/*
3927+
* The branch counter logging is not supported in the call stack
3928+
* mode yet, since we cannot simply flush the LBR during e.g.,
3929+
* multiplexing. Also, there is no obvious usage with the call
3930+
* stack mode. Simply forbids it for now.
3931+
*
3932+
* If any events in the group enable the branch counter logging
3933+
* feature, the group is treated as a branch counter logging
3934+
* group, which requires the extra space to store the counters.
3935+
*/
3936+
leader = event->group_leader;
3937+
if (branch_sample_call_stack(leader))
3938+
return -EINVAL;
3939+
if (branch_sample_counters(leader))
3940+
num++;
3941+
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
3942+
3943+
for_each_sibling_event(sibling, leader) {
3944+
if (branch_sample_call_stack(sibling))
3945+
return -EINVAL;
3946+
if (branch_sample_counters(sibling))
3947+
num++;
3948+
}
3949+
3950+
if (num > fls(x86_pmu.lbr_counters))
3951+
return -EINVAL;
3952+
/*
3953+
* Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
3954+
* require any branch stack setup.
3955+
* Clear the bit to avoid unnecessary branch stack setup.
3956+
*/
3957+
if (0 == (event->attr.branch_sample_type &
3958+
~(PERF_SAMPLE_BRANCH_PLM_ALL |
3959+
PERF_SAMPLE_BRANCH_COUNTERS)))
3960+
event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK;
3961+
3962+
/*
3963+
* Force the leader to be a LBR event. So LBRs can be reset
3964+
* with the leader event. See intel_pmu_lbr_del() for details.
3965+
*/
3966+
if (!intel_pmu_needs_branch_stack(leader))
3967+
return -EINVAL;
3968+
}
3969+
3970+
if (intel_pmu_needs_branch_stack(event)) {
39013971
ret = intel_pmu_setup_lbr_filter(event);
39023972
if (ret)
39033973
return ret;
@@ -4380,8 +4450,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
43804450
*/
43814451
if (event->attr.precise_ip == 3) {
43824452
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
4383-
if (constraint_match(&fixed0_constraint, event->hw.config))
4384-
return &fixed0_counter0_1_constraint;
4453+
if (constraint_match(&fixed0_constraint, event->hw.config)) {
4454+
/* The fixed counter 0 doesn't support LBR event logging. */
4455+
if (branch_sample_counters(event))
4456+
return &counter0_1_constraint;
4457+
else
4458+
return &fixed0_counter0_1_constraint;
4459+
}
43854460

43864461
switch (c->idxmsk64 & 0x3ull) {
43874462
case 0x1:
@@ -4560,7 +4635,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
45604635
goto err;
45614636
}
45624637

4563-
if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
4638+
if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
45644639
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
45654640

45664641
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
@@ -5532,11 +5607,41 @@ static ssize_t branches_show(struct device *cdev,
55325607

55335608
static DEVICE_ATTR_RO(branches);
55345609

5610+
static ssize_t branch_counter_nr_show(struct device *cdev,
5611+
struct device_attribute *attr,
5612+
char *buf)
5613+
{
5614+
return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters));
5615+
}
5616+
5617+
static DEVICE_ATTR_RO(branch_counter_nr);
5618+
5619+
static ssize_t branch_counter_width_show(struct device *cdev,
5620+
struct device_attribute *attr,
5621+
char *buf)
5622+
{
5623+
return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS);
5624+
}
5625+
5626+
static DEVICE_ATTR_RO(branch_counter_width);
5627+
55355628
static struct attribute *lbr_attrs[] = {
55365629
&dev_attr_branches.attr,
5630+
&dev_attr_branch_counter_nr.attr,
5631+
&dev_attr_branch_counter_width.attr,
55375632
NULL
55385633
};
55395634

5635+
static umode_t
5636+
lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
5637+
{
5638+
/* branches */
5639+
if (i == 0)
5640+
return x86_pmu.lbr_nr ? attr->mode : 0;
5641+
5642+
return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0;
5643+
}
5644+
55405645
static char pmu_name_str[30];
55415646

55425647
static ssize_t pmu_name_show(struct device *cdev,
@@ -5563,6 +5668,15 @@ static struct attribute *intel_pmu_attrs[] = {
55635668
NULL,
55645669
};
55655670

5671+
static umode_t
5672+
default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
5673+
{
5674+
if (attr == &dev_attr_allow_tsx_force_abort.attr)
5675+
return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
5676+
5677+
return attr->mode;
5678+
}
5679+
55665680
static umode_t
55675681
tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
55685682
{
@@ -5584,27 +5698,12 @@ mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
55845698
return pebs_is_visible(kobj, attr, i);
55855699
}
55865700

5587-
static umode_t
5588-
lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
5589-
{
5590-
return x86_pmu.lbr_nr ? attr->mode : 0;
5591-
}
5592-
55935701
static umode_t
55945702
exra_is_visible(struct kobject *kobj, struct attribute *attr, int i)
55955703
{
55965704
return x86_pmu.version >= 2 ? attr->mode : 0;
55975705
}
55985706

5599-
static umode_t
5600-
default_is_visible(struct kobject *kobj, struct attribute *attr, int i)
5601-
{
5602-
if (attr == &dev_attr_allow_tsx_force_abort.attr)
5603-
return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0;
5604-
5605-
return attr->mode;
5606-
}
5607-
56085707
static struct attribute_group group_events_td = {
56095708
.name = "events",
56105709
};

0 commit comments

Comments
 (0)