Skip to content

Commit 3cec9fd

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/lbr: Fix shorter LBRs call stacks for the system-wide mode
In the system-wide mode, LBR callstacks are shorter in comparison to the per-process mode. LBR MSRs are reset during a context switch in the system-wide mode. For the LBR call stack, the LBRs should be always saved/restored during a context switch. Use the space in task_struct to save/restore the LBR call stack data. For a system-wide event, it's unnecessagy to update the lbr_callstack_users for each threads. Add a variable in x86_pmu to indicate whether the system-wide event is active. Fixes: 76cb2c6 ("perf/x86/intel: Save/restore LBR stack during context switch") Reported-by: Andi Kleen <ak@linux.intel.com> Reported-by: Alexey Budankov <alexey.budankov@linux.intel.com> Debugged-by: Alexey Budankov <alexey.budankov@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20250314172700.438923-5-kan.liang@linux.intel.com
1 parent d57e94f commit 3cec9fd

File tree

2 files changed

+40
-8
lines changed

2 files changed

+40
-8
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -422,11 +422,17 @@ static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
422422
return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
423423
}
424424

425+
static inline bool has_lbr_callstack_users(void *ctx)
426+
{
427+
return task_context_opt(ctx)->lbr_callstack_users ||
428+
x86_pmu.lbr_callstack_users;
429+
}
430+
425431
static void __intel_pmu_lbr_restore(void *ctx)
426432
{
427433
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
428434

429-
if (task_context_opt(ctx)->lbr_callstack_users == 0 ||
435+
if (!has_lbr_callstack_users(ctx) ||
430436
task_context_opt(ctx)->lbr_stack_state == LBR_NONE) {
431437
intel_pmu_lbr_reset();
432438
return;
@@ -503,7 +509,7 @@ static void __intel_pmu_lbr_save(void *ctx)
503509
{
504510
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
505511

506-
if (task_context_opt(ctx)->lbr_callstack_users == 0) {
512+
if (!has_lbr_callstack_users(ctx)) {
507513
task_context_opt(ctx)->lbr_stack_state = LBR_NONE;
508514
return;
509515
}
@@ -543,6 +549,7 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
543549
struct task_struct *task, bool sched_in)
544550
{
545551
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
552+
struct perf_ctx_data *ctx_data;
546553
void *task_ctx;
547554

548555
if (!cpuc->lbr_users)
@@ -553,14 +560,18 @@ void intel_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx,
553560
* the task was scheduled out, restore the stack. Otherwise flush
554561
* the LBR stack.
555562
*/
556-
task_ctx = pmu_ctx ? pmu_ctx->task_ctx_data : NULL;
563+
rcu_read_lock();
564+
ctx_data = rcu_dereference(task->perf_ctx_data);
565+
task_ctx = ctx_data ? ctx_data->data : NULL;
557566
if (task_ctx) {
558567
if (sched_in)
559568
__intel_pmu_lbr_restore(task_ctx);
560569
else
561570
__intel_pmu_lbr_save(task_ctx);
571+
rcu_read_unlock();
562572
return;
563573
}
574+
rcu_read_unlock();
564575

565576
/*
566577
* Since a context switch can flip the address space and LBR entries
@@ -589,9 +600,19 @@ void intel_pmu_lbr_add(struct perf_event *event)
589600

590601
cpuc->br_sel = event->hw.branch_reg.reg;
591602

592-
if (branch_user_callstack(cpuc->br_sel) && event->pmu_ctx->task_ctx_data)
593-
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users++;
603+
if (branch_user_callstack(cpuc->br_sel)) {
604+
if (event->attach_state & PERF_ATTACH_TASK) {
605+
struct task_struct *task = event->hw.target;
606+
struct perf_ctx_data *ctx_data;
594607

608+
rcu_read_lock();
609+
ctx_data = rcu_dereference(task->perf_ctx_data);
610+
if (ctx_data)
611+
task_context_opt(ctx_data->data)->lbr_callstack_users++;
612+
rcu_read_unlock();
613+
} else
614+
x86_pmu.lbr_callstack_users++;
615+
}
595616
/*
596617
* Request pmu::sched_task() callback, which will fire inside the
597618
* regular perf event scheduling, so that call will:
@@ -665,9 +686,19 @@ void intel_pmu_lbr_del(struct perf_event *event)
665686
if (!x86_pmu.lbr_nr)
666687
return;
667688

668-
if (branch_user_callstack(cpuc->br_sel) &&
669-
event->pmu_ctx->task_ctx_data)
670-
task_context_opt(event->pmu_ctx->task_ctx_data)->lbr_callstack_users--;
689+
if (branch_user_callstack(cpuc->br_sel)) {
690+
if (event->attach_state & PERF_ATTACH_TASK) {
691+
struct task_struct *task = event->hw.target;
692+
struct perf_ctx_data *ctx_data;
693+
694+
rcu_read_lock();
695+
ctx_data = rcu_dereference(task->perf_ctx_data);
696+
if (ctx_data)
697+
task_context_opt(ctx_data->data)->lbr_callstack_users--;
698+
rcu_read_unlock();
699+
} else
700+
x86_pmu.lbr_callstack_users--;
701+
}
671702

672703
if (event->hw.flags & PERF_X86_EVENT_LBR_SELECT)
673704
cpuc->lbr_select = 0;

arch/x86/events/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -920,6 +920,7 @@ struct x86_pmu {
920920
const int *lbr_sel_map; /* lbr_select mappings */
921921
int *lbr_ctl_map; /* LBR_CTL mappings */
922922
};
923+
u64 lbr_callstack_users; /* lbr callstack system wide users */
923924
bool lbr_double_abort; /* duplicated lbr aborts */
924925
bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
925926

0 commit comments

Comments
 (0)