Skip to content

Commit 39a4127

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel: Fix PEBS memory access info encoding for ADL
The PEBS memory access latency encoding for the e-core is slightly different from the p-core. The bit 4 is Lock, while the bit 5 is TLB access. Add a new flag to indicate the load/store latency event on a hybrid platform. Add a new function pointer to retrieve the latency data for a hybrid platform. Only implement the new flag and function for the e-core on ADL. Still use the existing PERF_X86_EVENT_PEBS_LDLAT/STLAT flag for the p-core on ADL. Factor out pebs_set_tlb_lock() to set the generic memory data source information of the TLB access and lock for both load and store latency. Move the intel_get_event_constraints() to ahead of the :ppp check, otherwise the new flag never gets a chance to be set for the :ppp events. Fixes: f83d2f9 ("perf/x86/intel: Add Alder Lake Hybrid support") Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Andi Kleen <ak@linux.intel.com> Link: https://lkml.kernel.org/r/20220629150840.2235741-1-kan.liang@linux.intel.com
1 parent 119a784 commit 39a4127

File tree

3 files changed

+60
-33
lines changed

3 files changed

+60
-33
lines changed

arch/x86/events/intel/core.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
41414141
{
41424142
struct event_constraint *c;
41434143

4144+
c = intel_get_event_constraints(cpuc, idx, event);
4145+
41444146
/*
41454147
* :ppp means to do reduced skid PEBS,
41464148
* which is available on PMC0 and fixed counter 0.
@@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
41534155
return &counter0_constraint;
41544156
}
41554157

4156-
c = intel_get_event_constraints(cpuc, idx, event);
4157-
41584158
return c;
41594159
}
41604160

@@ -6242,6 +6242,7 @@ __init int intel_pmu_init(void)
62426242
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
62436243
x86_pmu.lbr_pt_coexist = true;
62446244
intel_pmu_pebs_data_source_skl(false);
6245+
x86_pmu.pebs_latency_data = adl_latency_data_small;
62456246
x86_pmu.num_topdown_events = 8;
62466247
x86_pmu.update_topdown_event = adl_update_topdown_event;
62476248
x86_pmu.set_topdown_event_period = adl_set_topdown_event_period;

arch/x86/events/intel/ds.c

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,49 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status)
171171
return dse.val;
172172
}
173173

174+
static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
175+
{
176+
/*
177+
* TLB access
178+
* 0 = did not miss 2nd level TLB
179+
* 1 = missed 2nd level TLB
180+
*/
181+
if (tlb)
182+
*val |= P(TLB, MISS) | P(TLB, L2);
183+
else
184+
*val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
185+
186+
/* locked prefix */
187+
if (lock)
188+
*val |= P(LOCK, LOCKED);
189+
}
190+
191+
/* Retrieve the latency data for e-core of ADL */
192+
u64 adl_latency_data_small(struct perf_event *event, u64 status)
193+
{
194+
union intel_x86_pebs_dse dse;
195+
u64 val;
196+
197+
WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
198+
199+
dse.val = status;
200+
201+
val = pebs_data_source[dse.ld_dse];
202+
203+
/*
204+
* For the atom core on ADL,
205+
* bit 4: lock, bit 5: TLB access.
206+
*/
207+
pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
208+
209+
if (dse.ld_data_blk)
210+
val |= P(BLK, DATA);
211+
else
212+
val |= P(BLK, NA);
213+
214+
return val;
215+
}
216+
174217
static u64 load_latency_data(u64 status)
175218
{
176219
union intel_x86_pebs_dse dse;
@@ -190,21 +233,8 @@ static u64 load_latency_data(u64 status)
190233
val |= P(TLB, NA) | P(LOCK, NA);
191234
return val;
192235
}
193-
/*
194-
* bit 4: TLB access
195-
* 0 = did not miss 2nd level TLB
196-
* 1 = missed 2nd level TLB
197-
*/
198-
if (dse.ld_stlb_miss)
199-
val |= P(TLB, MISS) | P(TLB, L2);
200-
else
201-
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
202236

203-
/*
204-
* bit 5: locked prefix
205-
*/
206-
if (dse.ld_locked)
207-
val |= P(LOCK, LOCKED);
237+
pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked);
208238

209239
/*
210240
* Ice Lake and earlier models do not support block infos.
@@ -245,21 +275,7 @@ static u64 store_latency_data(u64 status)
245275
*/
246276
val = pebs_data_source[dse.st_lat_dse];
247277

248-
/*
249-
* bit 4: TLB access
250-
* 0 = did not miss 2nd level TLB
251-
* 1 = missed 2nd level TLB
252-
*/
253-
if (dse.st_lat_stlb_miss)
254-
val |= P(TLB, MISS) | P(TLB, L2);
255-
else
256-
val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
257-
258-
/*
259-
* bit 5: locked prefix
260-
*/
261-
if (dse.st_lat_locked)
262-
val |= P(LOCK, LOCKED);
278+
pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked);
263279

264280
val |= P(BLK, NA);
265281

@@ -781,8 +797,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
781797

782798
struct event_constraint intel_grt_pebs_event_constraints[] = {
783799
/* Allow all events as PEBS with no flags */
784-
INTEL_PLD_CONSTRAINT(0x5d0, 0xf),
785-
INTEL_PSD_CONSTRAINT(0x6d0, 0xf),
800+
INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
801+
INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
786802
EVENT_CONSTRAINT_END
787803
};
788804

@@ -1446,6 +1462,8 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
14461462
val = load_latency_data(aux);
14471463
else if (fl & PERF_X86_EVENT_PEBS_STLAT)
14481464
val = store_latency_data(aux);
1465+
else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID)
1466+
val = x86_pmu.pebs_latency_data(event, aux);
14491467
else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
14501468
val = precise_datala_hsw(event, aux);
14511469
else if (fst)

arch/x86/events/perf_event.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
8484
#define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */
8585
#define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */
8686
#define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */
87+
#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */
8788

8889
static inline bool is_topdown_count(struct perf_event *event)
8990
{
@@ -461,6 +462,10 @@ struct cpu_hw_events {
461462
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
462463
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
463464

465+
#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \
466+
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
467+
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
468+
464469
/* Event constraint, but match on all event flags too. */
465470
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
466471
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -826,6 +831,7 @@ struct x86_pmu {
826831
void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data);
827832
struct event_constraint *pebs_constraints;
828833
void (*pebs_aliases)(struct perf_event *event);
834+
u64 (*pebs_latency_data)(struct perf_event *event, u64 status);
829835
unsigned long large_pebs_flags;
830836
u64 rtm_abort_event;
831837

@@ -1393,6 +1399,8 @@ void intel_pmu_disable_bts(void);
13931399

13941400
int intel_pmu_drain_bts_buffer(void);
13951401

1402+
u64 adl_latency_data_small(struct perf_event *event, u64 status);
1403+
13961404
extern struct event_constraint intel_core2_pebs_event_constraints[];
13971405

13981406
extern struct event_constraint intel_atom_pebs_event_constraints[];

0 commit comments

Comments
 (0)