Skip to content

Commit 4459d80

Browse files
committed
Merge tag 'perf-urgent-2022-08-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 perf fixes from Ingo Molnar: "Misc fixes: an Arch-LBR fix, a PEBS enumeration fix, an Intel DS fix, PEBS constraints fix on Alder Lake CPUs and an Intel uncore PMU fix" * tag 'perf-urgent-2022-08-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/uncore: Fix broken read_counter() for SNB IMC PMU perf/x86/intel: Fix pebs event constraints for ADL perf/x86/intel/ds: Fix precise store latency handling perf/x86/core: Set pebs_capable and PMU_FL_PEBS_ALL for the Baseline perf/x86/lbr: Enable the branch type for the Arch LBR by default
2 parents 611875d + 11745ec commit 4459d80

File tree

4 files changed

+36
-7
lines changed

4 files changed

+36
-7
lines changed

arch/x86/events/intel/core.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6291,10 +6291,8 @@ __init int intel_pmu_init(void)
62916291
x86_pmu.pebs_aliases = NULL;
62926292
x86_pmu.pebs_prec_dist = true;
62936293
x86_pmu.pebs_block = true;
6294-
x86_pmu.pebs_capable = ~0ULL;
62956294
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
62966295
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
6297-
x86_pmu.flags |= PMU_FL_PEBS_ALL;
62986296
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
62996297
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
63006298

@@ -6337,10 +6335,8 @@ __init int intel_pmu_init(void)
63376335
x86_pmu.pebs_aliases = NULL;
63386336
x86_pmu.pebs_prec_dist = true;
63396337
x86_pmu.pebs_block = true;
6340-
x86_pmu.pebs_capable = ~0ULL;
63416338
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
63426339
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
6343-
x86_pmu.flags |= PMU_FL_PEBS_ALL;
63446340
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
63456341
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
63466342
x86_pmu.lbr_pt_coexist = true;

arch/x86/events/intel/ds.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ static u64 load_latency_data(struct perf_event *event, u64 status)
291291
static u64 store_latency_data(struct perf_event *event, u64 status)
292292
{
293293
union intel_x86_pebs_dse dse;
294+
union perf_mem_data_src src;
294295
u64 val;
295296

296297
dse.val = status;
@@ -304,7 +305,14 @@ static u64 store_latency_data(struct perf_event *event, u64 status)
304305

305306
val |= P(BLK, NA);
306307

307-
return val;
308+
/*
309+
* the pebs_data_source table is only for loads
310+
* so override the mem_op to say STORE instead
311+
*/
312+
src.val = val;
313+
src.mem_op = P(OP,STORE);
314+
315+
return src.val;
308316
}
309317

310318
struct pebs_record_core {
@@ -822,7 +830,7 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
822830

823831
struct event_constraint intel_grt_pebs_event_constraints[] = {
824832
/* Allow all events as PEBS with no flags */
825-
INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
833+
INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
826834
INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
827835
EVENT_CONSTRAINT_END
828836
};
@@ -2262,6 +2270,7 @@ void __init intel_ds_init(void)
22622270
PERF_SAMPLE_BRANCH_STACK |
22632271
PERF_SAMPLE_TIME;
22642272
x86_pmu.flags |= PMU_FL_PEBS_ALL;
2273+
x86_pmu.pebs_capable = ~0ULL;
22652274
pebs_qual = "-baseline";
22662275
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
22672276
} else {

arch/x86/events/intel/lbr.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
10971097

10981098
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
10991099
reg->config = mask;
1100+
1101+
/*
1102+
* The Arch LBR HW can retrieve the common branch types
1103+
* from the LBR_INFO. It doesn't require the high overhead
1104+
* SW disassemble.
1105+
* Enable the branch type by default for the Arch LBR.
1106+
*/
1107+
reg->reg |= X86_BR_TYPE_SAVE;
11001108
return 0;
11011109
}
11021110

arch/x86/events/intel/uncore_snb.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid)
841841
return 0;
842842
}
843843

844+
static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
845+
{
846+
struct hw_perf_event *hwc = &event->hw;
847+
848+
/*
849+
* SNB IMC counters are 32-bit and are laid out back to back
850+
* in MMIO space. Therefore we must use a 32-bit accessor function
851+
* using readq() from uncore_mmio_read_counter() causes problems
852+
* because it is reading 64-bit at a time. This is okay for the
853+
* uncore_perf_event_update() function because it drops the upper
854+
* 32-bits but not okay for plain uncore_read_counter() as invoked
855+
* in uncore_pmu_event_start().
856+
*/
857+
return (u64)readl(box->io_addr + hwc->event_base);
858+
}
859+
844860
static struct pmu snb_uncore_imc_pmu = {
845861
.task_ctx_nr = perf_invalid_context,
846862
.event_init = snb_uncore_imc_event_init,
@@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
860876
.disable_event = snb_uncore_imc_disable_event,
861877
.enable_event = snb_uncore_imc_enable_event,
862878
.hw_config = snb_uncore_imc_hw_config,
863-
.read_counter = uncore_mmio_read_counter,
879+
.read_counter = snb_uncore_imc_read_counter,
864880
};
865881

866882
static struct intel_uncore_type snb_uncore_imc = {

0 commit comments

Comments
 (0)