Skip to content

Commit 56799bc

Browse files
Frederic WeisbeckerPeter Zijlstra
authored andcommitted
perf: Fix hang while freeing sigtrap event
Perf can hang while freeing a sigtrap event if a related deferred signal hadn't managed to be sent before the file got closed: perf_event_overflow() task_work_add(perf_pending_task) fput() task_work_add(____fput()) task_work_run() ____fput() perf_release() perf_event_release_kernel() _free_event() perf_pending_task_sync() task_work_cancel() -> FAILED rcuwait_wait_event() Once task_work_run() is running, the list of pending callbacks is removed from the task_struct and from this point on task_work_cancel() can't remove any pending and not yet started work items, hence the task_work_cancel() failure and the hang on rcuwait_wait_event(). Task work could be changed to remove one work at a time, so a work running on the current task can always cancel a pending one, however the wait / wake design is still subject to inverted dependencies when remote targets are involved, as pictured by Oleg: T1 T2 fd = perf_event_open(pid => T2->pid); fd = perf_event_open(pid => T1->pid); close(fd) close(fd) <IRQ> <IRQ> perf_event_overflow() perf_event_overflow() task_work_add(perf_pending_task) task_work_add(perf_pending_task) </IRQ> </IRQ> fput() fput() task_work_add(____fput()) task_work_add(____fput()) task_work_run() task_work_run() ____fput() ____fput() perf_release() perf_release() perf_event_release_kernel() perf_event_release_kernel() _free_event() _free_event() perf_pending_task_sync() perf_pending_task_sync() rcuwait_wait_event() rcuwait_wait_event() Therefore the only option left is to acquire the event reference count upon queueing the perf task work and release it from the task work, just like it was done before 3a54654 ("perf: Fix event leak upon exec and file release") but without the leaks it fixed. Some adjustments are necessary to make it work: * A child event might dereference its parent upon freeing. Care must be taken to release the parent last. * Some places assuming the event doesn't have any reference held and therefore can be freed right away must instead put the reference and let the reference counting to its job. Reported-by: "Yi Lai" <yi1.lai@linux.intel.com> Closes: https://lore.kernel.org/all/Zx9Losv4YcJowaP%2F@ly-workstation/ Reported-by: syzbot+3c4321e10eea460eb606@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/673adf75.050a0220.87769.0024.GAE@google.com/ Fixes: 3a54654 ("perf: Fix event leak upon exec and file release") Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20250304135446.18905-1-frederic@kernel.org
1 parent 0cd575c commit 56799bc

File tree

2 files changed

+18
-47
lines changed

2 files changed

+18
-47
lines changed

include/linux/perf_event.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,6 @@ struct perf_event {
823823
struct irq_work pending_disable_irq;
824824
struct callback_head pending_task;
825825
unsigned int pending_work;
826-
struct rcuwait pending_work_wait;
827826

828827
atomic_t event_limit;
829828

kernel/events/core.c

Lines changed: 18 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -5518,30 +5518,6 @@ static bool exclusive_event_installable(struct perf_event *event,
55185518

55195519
static void perf_free_addr_filters(struct perf_event *event);
55205520

5521-
static void perf_pending_task_sync(struct perf_event *event)
5522-
{
5523-
struct callback_head *head = &event->pending_task;
5524-
5525-
if (!event->pending_work)
5526-
return;
5527-
/*
5528-
* If the task is queued to the current task's queue, we
5529-
* obviously can't wait for it to complete. Simply cancel it.
5530-
*/
5531-
if (task_work_cancel(current, head)) {
5532-
event->pending_work = 0;
5533-
local_dec(&event->ctx->nr_no_switch_fast);
5534-
return;
5535-
}
5536-
5537-
/*
5538-
* All accesses related to the event are within the same RCU section in
5539-
* perf_pending_task(). The RCU grace period before the event is freed
5540-
* will make sure all those accesses are complete by then.
5541-
*/
5542-
rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
5543-
}
5544-
55455521
/* vs perf_event_alloc() error */
55465522
static void __free_event(struct perf_event *event)
55475523
{
@@ -5599,7 +5575,6 @@ static void _free_event(struct perf_event *event)
55995575
{
56005576
irq_work_sync(&event->pending_irq);
56015577
irq_work_sync(&event->pending_disable_irq);
5602-
perf_pending_task_sync(event);
56035578

56045579
unaccount_event(event);
56055580

@@ -5692,10 +5667,17 @@ static void perf_remove_from_owner(struct perf_event *event)
56925667

56935668
static void put_event(struct perf_event *event)
56945669
{
5670+
struct perf_event *parent;
5671+
56955672
if (!atomic_long_dec_and_test(&event->refcount))
56965673
return;
56975674

5675+
parent = event->parent;
56985676
_free_event(event);
5677+
5678+
/* Matches the refcount bump in inherit_event() */
5679+
if (parent)
5680+
put_event(parent);
56995681
}
57005682

57015683
/*
@@ -5779,11 +5761,6 @@ int perf_event_release_kernel(struct perf_event *event)
57795761
if (tmp == child) {
57805762
perf_remove_from_context(child, DETACH_GROUP);
57815763
list_move(&child->child_list, &free_list);
5782-
/*
5783-
* This matches the refcount bump in inherit_event();
5784-
* this can't be the last reference.
5785-
*/
5786-
put_event(event);
57875764
} else {
57885765
var = &ctx->refcount;
57895766
}
@@ -5809,7 +5786,8 @@ int perf_event_release_kernel(struct perf_event *event)
58095786
void *var = &child->ctx->refcount;
58105787

58115788
list_del(&child->child_list);
5812-
free_event(child);
5789+
/* Last reference unless ->pending_task work is pending */
5790+
put_event(child);
58135791

58145792
/*
58155793
* Wake any perf_event_free_task() waiting for this event to be
@@ -5820,7 +5798,11 @@ int perf_event_release_kernel(struct perf_event *event)
58205798
}
58215799

58225800
no_ctx:
5823-
put_event(event); /* Must be the 'last' reference */
5801+
/*
5802+
* Last reference unless ->pending_task work is pending on this event
5803+
* or any of its children.
5804+
*/
5805+
put_event(event);
58245806
return 0;
58255807
}
58265808
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -7235,12 +7217,6 @@ static void perf_pending_task(struct callback_head *head)
72357217
struct perf_event *event = container_of(head, struct perf_event, pending_task);
72367218
int rctx;
72377219

7238-
/*
7239-
* All accesses to the event must belong to the same implicit RCU read-side
7240-
* critical section as the ->pending_work reset. See comment in
7241-
* perf_pending_task_sync().
7242-
*/
7243-
rcu_read_lock();
72447220
/*
72457221
* If we 'fail' here, that's OK, it means recursion is already disabled
72467222
* and we won't recurse 'further'.
@@ -7251,9 +7227,8 @@ static void perf_pending_task(struct callback_head *head)
72517227
event->pending_work = 0;
72527228
perf_sigtrap(event);
72537229
local_dec(&event->ctx->nr_no_switch_fast);
7254-
rcuwait_wake_up(&event->pending_work_wait);
72557230
}
7256-
rcu_read_unlock();
7231+
put_event(event);
72577232

72587233
if (rctx >= 0)
72597234
perf_swevent_put_recursion_context(rctx);
@@ -10248,6 +10223,7 @@ static int __perf_event_overflow(struct perf_event *event,
1024810223
!task_work_add(current, &event->pending_task, notify_mode)) {
1024910224
event->pending_work = pending_id;
1025010225
local_inc(&event->ctx->nr_no_switch_fast);
10226+
WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
1025110227

1025210228
event->pending_addr = 0;
1025310229
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
@@ -12610,7 +12586,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
1261012586
init_irq_work(&event->pending_irq, perf_pending_irq);
1261112587
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
1261212588
init_task_work(&event->pending_task, perf_pending_task);
12613-
rcuwait_init(&event->pending_work_wait);
1261412589

1261512590
mutex_init(&event->mmap_mutex);
1261612591
raw_spin_lock_init(&event->addr_filters.lock);
@@ -13747,8 +13722,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
1374713722
* Kick perf_poll() for is_event_hup();
1374813723
*/
1374913724
perf_event_wakeup(parent_event);
13750-
free_event(event);
13751-
put_event(parent_event);
13725+
put_event(event);
1375213726
return;
1375313727
}
1375413728

@@ -13872,13 +13846,11 @@ static void perf_free_event(struct perf_event *event,
1387213846
list_del_init(&event->child_list);
1387313847
mutex_unlock(&parent->child_mutex);
1387413848

13875-
put_event(parent);
13876-
1387713849
raw_spin_lock_irq(&ctx->lock);
1387813850
perf_group_detach(event);
1387913851
list_del_event(event, ctx);
1388013852
raw_spin_unlock_irq(&ctx->lock);
13881-
free_event(event);
13853+
put_event(event);
1388213854
}
1388313855

1388413856
/*

0 commit comments

Comments
 (0)