Skip to content

Commit 2b84def

Browse files
Sebastian Andrzej SiewiorPeter Zijlstra
authored andcommitted
perf: Split __perf_pending_irq() out of perf_pending_irq()
perf_pending_irq() invokes perf_event_wakeup() and __perf_pending_irq(). The former is in charge of waking any tasks which waits to be woken up while the latter disables perf-events. The irq_work perf_pending_irq(), while this an irq_work, the callback is invoked in thread context on PREEMPT_RT. This is needed because all the waking functions (wake_up_all(), kill_fasync()) acquire sleep locks which must not be used with disabled interrupts. Disabling events, as done by __perf_pending_irq(), expects a hardirq context and disabled interrupts. This requirement is not fulfilled on PREEMPT_RT. Split functionality based on perf_event::pending_disable into irq_work named `pending_disable_irq' and invoke it in hardirq context on PREEMPT_RT. Rename the split out callback to perf_pending_disable(). Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Marco Elver <elver@google.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Link: https://lore.kernel.org/r/20240704170424.1466941-8-bigeasy@linutronix.de
1 parent 16b9569 commit 2b84def

File tree

2 files changed

+23
-7
lines changed

2 files changed

+23
-7
lines changed

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -783,6 +783,7 @@ struct perf_event {
783783
unsigned int pending_disable;
784784
unsigned long pending_addr; /* SIGTRAP */
785785
struct irq_work pending_irq;
786+
struct irq_work pending_disable_irq;
786787
struct callback_head pending_task;
787788
unsigned int pending_work;
788789
struct rcuwait pending_work_wait;

kernel/events/core.c

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2451,7 +2451,7 @@ static void __perf_event_disable(struct perf_event *event,
24512451
* hold the top-level event's child_mutex, so any descendant that
24522452
* goes to exit will block in perf_event_exit_event().
24532453
*
2454-
* When called from perf_pending_irq it's OK because event->ctx
2454+
* When called from perf_pending_disable it's OK because event->ctx
24552455
* is the current context on this CPU and preemption is disabled,
24562456
* hence we can't get into perf_event_task_sched_out for this context.
24572457
*/
@@ -2491,7 +2491,7 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
24912491
void perf_event_disable_inatomic(struct perf_event *event)
24922492
{
24932493
event->pending_disable = 1;
2494-
irq_work_queue(&event->pending_irq);
2494+
irq_work_queue(&event->pending_disable_irq);
24952495
}
24962496

24972497
#define MAX_INTERRUPTS (~0ULL)
@@ -5218,6 +5218,7 @@ static void perf_pending_task_sync(struct perf_event *event)
52185218
static void _free_event(struct perf_event *event)
52195219
{
52205220
irq_work_sync(&event->pending_irq);
5221+
irq_work_sync(&event->pending_disable_irq);
52215222
perf_pending_task_sync(event);
52225223

52235224
unaccount_event(event);
@@ -6749,7 +6750,7 @@ static void perf_sigtrap(struct perf_event *event)
67496750
/*
67506751
* Deliver the pending work in-event-context or follow the context.
67516752
*/
6752-
static void __perf_pending_irq(struct perf_event *event)
6753+
static void __perf_pending_disable(struct perf_event *event)
67536754
{
67546755
int cpu = READ_ONCE(event->oncpu);
67556756

@@ -6787,11 +6788,26 @@ static void __perf_pending_irq(struct perf_event *event)
67876788
* irq_work_queue(); // FAILS
67886789
*
67896790
* irq_work_run()
6790-
* perf_pending_irq()
6791+
* perf_pending_disable()
67916792
*
67926793
* But the event runs on CPU-B and wants disabling there.
67936794
*/
6794-
irq_work_queue_on(&event->pending_irq, cpu);
6795+
irq_work_queue_on(&event->pending_disable_irq, cpu);
6796+
}
6797+
6798+
static void perf_pending_disable(struct irq_work *entry)
6799+
{
6800+
struct perf_event *event = container_of(entry, struct perf_event, pending_disable_irq);
6801+
int rctx;
6802+
6803+
/*
6804+
* If we 'fail' here, that's OK, it means recursion is already disabled
6805+
* and we won't recurse 'further'.
6806+
*/
6807+
rctx = perf_swevent_get_recursion_context();
6808+
__perf_pending_disable(event);
6809+
if (rctx >= 0)
6810+
perf_swevent_put_recursion_context(rctx);
67956811
}
67966812

67976813
static void perf_pending_irq(struct irq_work *entry)
@@ -6814,8 +6830,6 @@ static void perf_pending_irq(struct irq_work *entry)
68146830
perf_event_wakeup(event);
68156831
}
68166832

6817-
__perf_pending_irq(event);
6818-
68196833
if (rctx >= 0)
68206834
perf_swevent_put_recursion_context(rctx);
68216835
}
@@ -11956,6 +11970,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
1195611970

1195711971
init_waitqueue_head(&event->waitq);
1195811972
init_irq_work(&event->pending_irq, perf_pending_irq);
11973+
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
1195911974
init_task_work(&event->pending_task, perf_pending_task);
1196011975
rcuwait_init(&event->pending_work_wait);
1196111976

0 commit comments

Comments
 (0)