Skip to content

Commit aa89592

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-disable-preemption-in-perf_event_output-helpers-code'
Jiri Olsa says: ==================== bpf: Disable preemption in perf_event_output helpers code hi, we got report of kernel crash [1][3] within bpf_event_output helper. The reason is the nesting protection code in bpf_event_output that expects disabled preemption, which is not guaranteed for programs executed by bpf_prog_run_array_cg. I managed to reproduce on tracing side where we have the same problem in bpf_perf_event_output. The reproducer [2] just creates busy uprobe and call bpf_perf_event_output helper a lot. v3 changes: - added acks and fixed 'Fixes' tag style [Hou Tao] - added Closes tag to patch 2 v2 changes: - I changed 'Fixes' commits to where I saw we switched from preempt_disable to migrate_disable, but I'm not completely sure about the patch 2, because it was tricky to find, would be nice if somebody could check on that thanks, jirka [1] cilium/cilium#26756 [2] https://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git/commit/?h=bpf_output_fix_reproducer&id=8054dcc634121b884c7c331329d61d93351d03b5 [3] slack: [66194.378161] BUG: kernel NULL pointer dereference, address: 0000000000000001 [66194.378324] #PF: supervisor instruction fetch in kernel mode [66194.378447] #PF: error_code(0x0010) - not-present page ... [66194.378692] Oops: 0010 [#1] PREEMPT SMP NOPTI ... [66194.380666] <TASK> [66194.380775] ? perf_output_sample+0x12a/0x9a0 [66194.380902] ? finish_task_switch.isra.0+0x81/0x280 [66194.381024] ? perf_event_output+0x66/0xa0 [66194.381148] ? bpf_event_output+0x13a/0x190 [66194.381270] ? bpf_event_output_data+0x22/0x40 [66194.381391] ? bpf_prog_dfc84bbde731b257_cil_sock4_connect+0x40a/0xacb [66194.381519] ? xa_load+0x87/0xe0 [66194.381635] ? __cgroup_bpf_run_filter_sock_addr+0xc1/0x1a0 [66194.381759] ? release_sock+0x3e/0x90 [66194.381876] ? sk_setsockopt+0x1a1/0x12f0 [66194.381996] ? udp_pre_connect+0x36/0x50 [66194.382114] ? inet_dgram_connect+0x93/0xa0 [66194.382233] ? __sys_connect+0xb4/0xe0 [66194.382353] ? udp_setsockopt+0x27/0x40 [66194.382470] ? __pfx_udp_push_pending_frames+0x10/0x10 [66194.382593] ? __sys_setsockopt+0xdf/0x1a0 [66194.382713] ? __x64_sys_connect+0xf/0x20 [66194.382832] ? do_syscall_64+0x3a/0x90 [66194.382949] ? entry_SYSCALL_64_after_hwframe+0x72/0xdc [66194.383077] </TASK> --- ==================== Link: https://lore.kernel.org/r/20230725084206.580930-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2 parents 284779d + d62cc39 commit aa89592

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

kernel/trace/bpf_trace.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -661,16 +661,19 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level);
661661
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
662662
u64, flags, void *, data, u64, size)
663663
{
664-
struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
665-
int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
664+
struct bpf_trace_sample_data *sds;
666665
struct perf_raw_record raw = {
667666
.frag = {
668667
.size = size,
669668
.data = data,
670669
},
671670
};
672671
struct perf_sample_data *sd;
673-
int err;
672+
int nest_level, err;
673+
674+
preempt_disable();
675+
sds = this_cpu_ptr(&bpf_trace_sds);
676+
nest_level = this_cpu_inc_return(bpf_trace_nest_level);
674677

675678
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
676679
err = -EBUSY;
@@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
688691
perf_sample_save_raw_data(sd, &raw);
689692

690693
err = __bpf_perf_event_output(regs, map, flags, sd);
691-
692694
out:
693695
this_cpu_dec(bpf_trace_nest_level);
696+
preempt_enable();
694697
return err;
695698
}
696699

@@ -715,7 +718,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
715718
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
716719
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
717720
{
718-
int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
719721
struct perf_raw_frag frag = {
720722
.copy = ctx_copy,
721723
.size = ctx_size,
@@ -732,8 +734,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
732734
};
733735
struct perf_sample_data *sd;
734736
struct pt_regs *regs;
737+
int nest_level;
735738
u64 ret;
736739

740+
preempt_disable();
741+
nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
742+
737743
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
738744
ret = -EBUSY;
739745
goto out;
@@ -748,6 +754,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
748754
ret = __bpf_perf_event_output(regs, map, flags, sd);
749755
out:
750756
this_cpu_dec(bpf_event_output_nest_level);
757+
preempt_enable();
751758
return ret;
752759
}
753760

0 commit comments

Comments
 (0)