Skip to content

Commit ac253a5

Browse files
committed
Merge tag 'perf-urgent-2025-04-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull misc perf events fixes from Ingo Molnar: - Fix __free_event() corner case splat - Fix false-positive uprobes related lockdep splat on CONFIG_PREEMPT_RT=y kernels - Fix a complicated perf sigtrap race that may result in hangs * tag 'perf-urgent-2025-04-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf: Fix hang while freeing sigtrap event uprobes: Avoid false-positive lockdep splat on CONFIG_PREEMPT_RT=y in the ri_timer() uprobe timer callback, use raw_write_seqcount_*() perf/core: Fix WARN_ON(!ctx) in __free_event() for partial init
2 parents 54a012b + 56799bc commit ac253a5

File tree

3 files changed

+34
-52
lines changed

3 files changed

+34
-52
lines changed

include/linux/perf_event.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,6 @@ struct perf_event {
823823
struct irq_work pending_disable_irq;
824824
struct callback_head pending_task;
825825
unsigned int pending_work;
826-
struct rcuwait pending_work_wait;
827826

828827
atomic_t event_limit;
829828

kernel/events/core.c

Lines changed: 21 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5518,30 +5518,6 @@ static bool exclusive_event_installable(struct perf_event *event,
55185518

55195519
static void perf_free_addr_filters(struct perf_event *event);
55205520

5521-
static void perf_pending_task_sync(struct perf_event *event)
5522-
{
5523-
struct callback_head *head = &event->pending_task;
5524-
5525-
if (!event->pending_work)
5526-
return;
5527-
/*
5528-
* If the task is queued to the current task's queue, we
5529-
* obviously can't wait for it to complete. Simply cancel it.
5530-
*/
5531-
if (task_work_cancel(current, head)) {
5532-
event->pending_work = 0;
5533-
local_dec(&event->ctx->nr_no_switch_fast);
5534-
return;
5535-
}
5536-
5537-
/*
5538-
* All accesses related to the event are within the same RCU section in
5539-
* perf_pending_task(). The RCU grace period before the event is freed
5540-
* will make sure all those accesses are complete by then.
5541-
*/
5542-
rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
5543-
}
5544-
55455521
/* vs perf_event_alloc() error */
55465522
static void __free_event(struct perf_event *event)
55475523
{
@@ -5599,7 +5575,6 @@ static void _free_event(struct perf_event *event)
55995575
{
56005576
irq_work_sync(&event->pending_irq);
56015577
irq_work_sync(&event->pending_disable_irq);
5602-
perf_pending_task_sync(event);
56035578

56045579
unaccount_event(event);
56055580

@@ -5692,10 +5667,17 @@ static void perf_remove_from_owner(struct perf_event *event)
56925667

56935668
static void put_event(struct perf_event *event)
56945669
{
5670+
struct perf_event *parent;
5671+
56955672
if (!atomic_long_dec_and_test(&event->refcount))
56965673
return;
56975674

5675+
parent = event->parent;
56985676
_free_event(event);
5677+
5678+
/* Matches the refcount bump in inherit_event() */
5679+
if (parent)
5680+
put_event(parent);
56995681
}
57005682

57015683
/*
@@ -5779,11 +5761,6 @@ int perf_event_release_kernel(struct perf_event *event)
57795761
if (tmp == child) {
57805762
perf_remove_from_context(child, DETACH_GROUP);
57815763
list_move(&child->child_list, &free_list);
5782-
/*
5783-
* This matches the refcount bump in inherit_event();
5784-
* this can't be the last reference.
5785-
*/
5786-
put_event(event);
57875764
} else {
57885765
var = &ctx->refcount;
57895766
}
@@ -5809,7 +5786,8 @@ int perf_event_release_kernel(struct perf_event *event)
58095786
void *var = &child->ctx->refcount;
58105787

58115788
list_del(&child->child_list);
5812-
free_event(child);
5789+
/* Last reference unless ->pending_task work is pending */
5790+
put_event(child);
58135791

58145792
/*
58155793
* Wake any perf_event_free_task() waiting for this event to be
@@ -5820,7 +5798,11 @@ int perf_event_release_kernel(struct perf_event *event)
58205798
}
58215799

58225800
no_ctx:
5823-
put_event(event); /* Must be the 'last' reference */
5801+
/*
5802+
* Last reference unless ->pending_task work is pending on this event
5803+
* or any of its children.
5804+
*/
5805+
put_event(event);
58245806
return 0;
58255807
}
58265808
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -7235,12 +7217,6 @@ static void perf_pending_task(struct callback_head *head)
72357217
struct perf_event *event = container_of(head, struct perf_event, pending_task);
72367218
int rctx;
72377219

7238-
/*
7239-
* All accesses to the event must belong to the same implicit RCU read-side
7240-
* critical section as the ->pending_work reset. See comment in
7241-
* perf_pending_task_sync().
7242-
*/
7243-
rcu_read_lock();
72447220
/*
72457221
* If we 'fail' here, that's OK, it means recursion is already disabled
72467222
* and we won't recurse 'further'.
@@ -7251,9 +7227,8 @@ static void perf_pending_task(struct callback_head *head)
72517227
event->pending_work = 0;
72527228
perf_sigtrap(event);
72537229
local_dec(&event->ctx->nr_no_switch_fast);
7254-
rcuwait_wake_up(&event->pending_work_wait);
72557230
}
7256-
rcu_read_unlock();
7231+
put_event(event);
72577232

72587233
if (rctx >= 0)
72597234
perf_swevent_put_recursion_context(rctx);
@@ -10248,6 +10223,7 @@ static int __perf_event_overflow(struct perf_event *event,
1024810223
!task_work_add(current, &event->pending_task, notify_mode)) {
1024910224
event->pending_work = pending_id;
1025010225
local_inc(&event->ctx->nr_no_switch_fast);
10226+
WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
1025110227

1025210228
event->pending_addr = 0;
1025310229
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
@@ -12610,7 +12586,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
1261012586
init_irq_work(&event->pending_irq, perf_pending_irq);
1261112587
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
1261212588
init_task_work(&event->pending_task, perf_pending_task);
12613-
rcuwait_init(&event->pending_work_wait);
1261412589

1261512590
mutex_init(&event->mmap_mutex);
1261612591
raw_spin_lock_init(&event->addr_filters.lock);
@@ -13747,8 +13722,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
1374713722
* Kick perf_poll() for is_event_hup();
1374813723
*/
1374913724
perf_event_wakeup(parent_event);
13750-
free_event(event);
13751-
put_event(parent_event);
13725+
put_event(event);
1375213726
return;
1375313727
}
1375413728

@@ -13872,13 +13846,11 @@ static void perf_free_event(struct perf_event *event,
1387213846
list_del_init(&event->child_list);
1387313847
mutex_unlock(&parent->child_mutex);
1387413848

13875-
put_event(parent);
13876-
1387713849
raw_spin_lock_irq(&ctx->lock);
1387813850
perf_group_detach(event);
1387913851
list_del_event(event, ctx);
1388013852
raw_spin_unlock_irq(&ctx->lock);
13881-
free_event(event);
13853+
put_event(event);
1388213854
}
1388313855

1388413856
/*
@@ -14016,6 +13988,9 @@ inherit_event(struct perf_event *parent_event,
1401613988
if (IS_ERR(child_event))
1401713989
return child_event;
1401813990

13991+
get_ctx(child_ctx);
13992+
child_event->ctx = child_ctx;
13993+
1401913994
pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
1402013995
if (IS_ERR(pmu_ctx)) {
1402113996
free_event(child_event);
@@ -14037,8 +14012,6 @@ inherit_event(struct perf_event *parent_event,
1403714012
return NULL;
1403814013
}
1403914014

14040-
get_ctx(child_ctx);
14041-
1404214015
/*
1404314016
* Make the child state follow the state of the parent event,
1404414017
* not its attr.disabled bit. We hold the parent's mutex,
@@ -14059,7 +14032,6 @@ inherit_event(struct perf_event *parent_event,
1405914032
local64_set(&hwc->period_left, sample_period);
1406014033
}
1406114034

14062-
child_event->ctx = child_ctx;
1406314035
child_event->overflow_handler = parent_event->overflow_handler;
1406414036
child_event->overflow_handler_context
1406514037
= parent_event->overflow_handler_context;

kernel/events/uprobes.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,6 +1956,9 @@ static void free_ret_instance(struct uprobe_task *utask,
19561956
* to-be-reused return instances for future uretprobes. If ri_timer()
19571957
* happens to be running right now, though, we fallback to safety and
19581958
* just perform RCU-delated freeing of ri.
1959+
* Admittedly, this is a rather simple use of seqcount, but it nicely
1960+
* abstracts away all the necessary memory barriers, so we use
1961+
* a well-supported kernel primitive here.
19591962
*/
19601963
if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
19611964
/* immediate reuse of ri without RCU GP is OK */
@@ -2016,12 +2019,20 @@ static void ri_timer(struct timer_list *timer)
20162019
/* RCU protects return_instance from freeing. */
20172020
guard(rcu)();
20182021

2019-
write_seqcount_begin(&utask->ri_seqcount);
2022+
/*
2023+
* See free_ret_instance() for notes on seqcount use.
2024+
* We also employ raw API variants to avoid lockdep false-positive
2025+
* warning complaining about enabled preemption. The timer can only be
2026+
* invoked once for a uprobe_task. Therefore there can only be one
2027+
* writer. The reader does not require an even sequence count to make
2028+
* progress, so it is OK to remain preemptible on PREEMPT_RT.
2029+
*/
2030+
raw_write_seqcount_begin(&utask->ri_seqcount);
20202031

20212032
for_each_ret_instance_rcu(ri, utask->return_instances)
20222033
hprobe_expire(&ri->hprobe, false);
20232034

2024-
write_seqcount_end(&utask->ri_seqcount);
2035+
raw_write_seqcount_end(&utask->ri_seqcount);
20252036
}
20262037

20272038
static struct uprobe_task *alloc_utask(void)

0 commit comments

Comments
 (0)