@@ -2527,9 +2527,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
2527
2527
perf_report_aux_output_id (event , idx );
2528
2528
}
2529
2529
2530
+ static __always_inline bool intel_pmu_needs_branch_stack (struct perf_event * event )
2531
+ {
2532
+ return event -> hw .flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK ;
2533
+ }
2534
+
2530
2535
static void intel_pmu_del_event (struct perf_event * event )
2531
2536
{
2532
- if (needs_branch_stack (event ))
2537
+ if (intel_pmu_needs_branch_stack (event ))
2533
2538
intel_pmu_lbr_del (event );
2534
2539
if (event -> attr .precise_ip )
2535
2540
intel_pmu_pebs_del (event );
@@ -2787,6 +2792,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
2787
2792
2788
2793
static void intel_pmu_enable_event (struct perf_event * event )
2789
2794
{
2795
+ u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE ;
2790
2796
struct hw_perf_event * hwc = & event -> hw ;
2791
2797
int idx = hwc -> idx ;
2792
2798
@@ -2795,8 +2801,10 @@ static void intel_pmu_enable_event(struct perf_event *event)
2795
2801
2796
2802
switch (idx ) {
2797
2803
case 0 ... INTEL_PMC_IDX_FIXED - 1 :
2804
+ if (branch_sample_counters (event ))
2805
+ enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR ;
2798
2806
intel_set_masks (event , idx );
2799
- __x86_pmu_enable_event (hwc , ARCH_PERFMON_EVENTSEL_ENABLE );
2807
+ __x86_pmu_enable_event (hwc , enable_mask );
2800
2808
break ;
2801
2809
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1 :
2802
2810
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END :
@@ -2820,7 +2828,7 @@ static void intel_pmu_add_event(struct perf_event *event)
2820
2828
{
2821
2829
if (event -> attr .precise_ip )
2822
2830
intel_pmu_pebs_add (event );
2823
- if (needs_branch_stack (event ))
2831
+ if (intel_pmu_needs_branch_stack (event ))
2824
2832
intel_pmu_lbr_add (event );
2825
2833
}
2826
2834
@@ -3047,7 +3055,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
3047
3055
perf_sample_data_init (& data , 0 , event -> hw .last_period );
3048
3056
3049
3057
if (has_branch_stack (event ))
3050
- perf_sample_save_brstack (& data , event , & cpuc -> lbr_stack );
3058
+ intel_pmu_lbr_save_brstack (& data , cpuc , event );
3051
3059
3052
3060
if (perf_event_overflow (event , & data , regs ))
3053
3061
x86_pmu_stop (event , 0 );
@@ -3612,6 +3620,13 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3612
3620
if (cpuc -> excl_cntrs )
3613
3621
return intel_get_excl_constraints (cpuc , event , idx , c2 );
3614
3622
3623
+ /* Not all counters support the branch counter feature. */
3624
+ if (branch_sample_counters (event )) {
3625
+ c2 = dyn_constraint (cpuc , c2 , idx );
3626
+ c2 -> idxmsk64 &= x86_pmu .lbr_counters ;
3627
+ c2 -> weight = hweight64 (c2 -> idxmsk64 );
3628
+ }
3629
+
3615
3630
return c2 ;
3616
3631
}
3617
3632
@@ -3897,7 +3912,62 @@ static int intel_pmu_hw_config(struct perf_event *event)
3897
3912
x86_pmu .pebs_aliases (event );
3898
3913
}
3899
3914
3900
- if (needs_branch_stack (event )) {
3915
+ if (needs_branch_stack (event ) && is_sampling_event (event ))
3916
+ event -> hw .flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK ;
3917
+
3918
+ if (branch_sample_counters (event )) {
3919
+ struct perf_event * leader , * sibling ;
3920
+ int num = 0 ;
3921
+
3922
+ if (!(x86_pmu .flags & PMU_FL_BR_CNTR ) ||
3923
+ (event -> attr .config & ~INTEL_ARCH_EVENT_MASK ))
3924
+ return - EINVAL ;
3925
+
3926
+ /*
3927
+ * The branch counter logging is not supported in the call stack
3928
+ * mode yet, since we cannot simply flush the LBR during e.g.,
3929
+ * multiplexing. Also, there is no obvious usage with the call
3930
+ * stack mode. Simply forbids it for now.
3931
+ *
3932
+ * If any events in the group enable the branch counter logging
3933
+ * feature, the group is treated as a branch counter logging
3934
+ * group, which requires the extra space to store the counters.
3935
+ */
3936
+ leader = event -> group_leader ;
3937
+ if (branch_sample_call_stack (leader ))
3938
+ return - EINVAL ;
3939
+ if (branch_sample_counters (leader ))
3940
+ num ++ ;
3941
+ leader -> hw .flags |= PERF_X86_EVENT_BRANCH_COUNTERS ;
3942
+
3943
+ for_each_sibling_event (sibling , leader ) {
3944
+ if (branch_sample_call_stack (sibling ))
3945
+ return - EINVAL ;
3946
+ if (branch_sample_counters (sibling ))
3947
+ num ++ ;
3948
+ }
3949
+
3950
+ if (num > fls (x86_pmu .lbr_counters ))
3951
+ return - EINVAL ;
3952
+ /*
3953
+ * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't
3954
+ * require any branch stack setup.
3955
+ * Clear the bit to avoid unnecessary branch stack setup.
3956
+ */
3957
+ if (0 == (event -> attr .branch_sample_type &
3958
+ ~(PERF_SAMPLE_BRANCH_PLM_ALL |
3959
+ PERF_SAMPLE_BRANCH_COUNTERS )))
3960
+ event -> hw .flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK ;
3961
+
3962
+ /*
3963
+ * Force the leader to be a LBR event. So LBRs can be reset
3964
+ * with the leader event. See intel_pmu_lbr_del() for details.
3965
+ */
3966
+ if (!intel_pmu_needs_branch_stack (leader ))
3967
+ return - EINVAL ;
3968
+ }
3969
+
3970
+ if (intel_pmu_needs_branch_stack (event )) {
3901
3971
ret = intel_pmu_setup_lbr_filter (event );
3902
3972
if (ret )
3903
3973
return ret ;
@@ -4380,8 +4450,13 @@ cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
4380
4450
*/
4381
4451
if (event -> attr .precise_ip == 3 ) {
4382
4452
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
4383
- if (constraint_match (& fixed0_constraint , event -> hw .config ))
4384
- return & fixed0_counter0_1_constraint ;
4453
+ if (constraint_match (& fixed0_constraint , event -> hw .config )) {
4454
+ /* The fixed counter 0 doesn't support LBR event logging. */
4455
+ if (branch_sample_counters (event ))
4456
+ return & counter0_1_constraint ;
4457
+ else
4458
+ return & fixed0_counter0_1_constraint ;
4459
+ }
4385
4460
4386
4461
switch (c -> idxmsk64 & 0x3ull ) {
4387
4462
case 0x1 :
@@ -4560,7 +4635,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
4560
4635
goto err ;
4561
4636
}
4562
4637
4563
- if (x86_pmu .flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA )) {
4638
+ if (x86_pmu .flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR )) {
4564
4639
size_t sz = X86_PMC_IDX_MAX * sizeof (struct event_constraint );
4565
4640
4566
4641
cpuc -> constraint_list = kzalloc_node (sz , GFP_KERNEL , cpu_to_node (cpu ));
@@ -5532,11 +5607,41 @@ static ssize_t branches_show(struct device *cdev,
5532
5607
5533
5608
static DEVICE_ATTR_RO (branches );
5534
5609
5610
+ static ssize_t branch_counter_nr_show (struct device * cdev ,
5611
+ struct device_attribute * attr ,
5612
+ char * buf )
5613
+ {
5614
+ return snprintf (buf , PAGE_SIZE , "%d\n" , fls (x86_pmu .lbr_counters ));
5615
+ }
5616
+
5617
+ static DEVICE_ATTR_RO (branch_counter_nr );
5618
+
5619
+ static ssize_t branch_counter_width_show (struct device * cdev ,
5620
+ struct device_attribute * attr ,
5621
+ char * buf )
5622
+ {
5623
+ return snprintf (buf , PAGE_SIZE , "%d\n" , LBR_INFO_BR_CNTR_BITS );
5624
+ }
5625
+
5626
+ static DEVICE_ATTR_RO (branch_counter_width );
5627
+
5535
5628
static struct attribute * lbr_attrs [] = {
5536
5629
& dev_attr_branches .attr ,
5630
+ & dev_attr_branch_counter_nr .attr ,
5631
+ & dev_attr_branch_counter_width .attr ,
5537
5632
NULL
5538
5633
};
5539
5634
5635
+ static umode_t
5636
+ lbr_is_visible (struct kobject * kobj , struct attribute * attr , int i )
5637
+ {
5638
+ /* branches */
5639
+ if (i == 0 )
5640
+ return x86_pmu .lbr_nr ? attr -> mode : 0 ;
5641
+
5642
+ return (x86_pmu .flags & PMU_FL_BR_CNTR ) ? attr -> mode : 0 ;
5643
+ }
5644
+
5540
5645
static char pmu_name_str [30 ];
5541
5646
5542
5647
static ssize_t pmu_name_show (struct device * cdev ,
@@ -5563,6 +5668,15 @@ static struct attribute *intel_pmu_attrs[] = {
5563
5668
NULL ,
5564
5669
};
5565
5670
5671
+ static umode_t
5672
+ default_is_visible (struct kobject * kobj , struct attribute * attr , int i )
5673
+ {
5674
+ if (attr == & dev_attr_allow_tsx_force_abort .attr )
5675
+ return x86_pmu .flags & PMU_FL_TFA ? attr -> mode : 0 ;
5676
+
5677
+ return attr -> mode ;
5678
+ }
5679
+
5566
5680
static umode_t
5567
5681
tsx_is_visible (struct kobject * kobj , struct attribute * attr , int i )
5568
5682
{
@@ -5584,27 +5698,12 @@ mem_is_visible(struct kobject *kobj, struct attribute *attr, int i)
5584
5698
return pebs_is_visible (kobj , attr , i );
5585
5699
}
5586
5700
5587
- static umode_t
5588
- lbr_is_visible (struct kobject * kobj , struct attribute * attr , int i )
5589
- {
5590
- return x86_pmu .lbr_nr ? attr -> mode : 0 ;
5591
- }
5592
-
5593
5701
static umode_t
5594
5702
exra_is_visible (struct kobject * kobj , struct attribute * attr , int i )
5595
5703
{
5596
5704
return x86_pmu .version >= 2 ? attr -> mode : 0 ;
5597
5705
}
5598
5706
5599
- static umode_t
5600
- default_is_visible (struct kobject * kobj , struct attribute * attr , int i )
5601
- {
5602
- if (attr == & dev_attr_allow_tsx_force_abort .attr )
5603
- return x86_pmu .flags & PMU_FL_TFA ? attr -> mode : 0 ;
5604
-
5605
- return attr -> mode ;
5606
- }
5607
-
5608
5707
static struct attribute_group group_events_td = {
5609
5708
.name = "events" ,
5610
5709
};
0 commit comments