Skip to content

Commit 766331f

Browse files
committed
Merge tag 'perf-urgent-2025-02-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf event fixes from Ingo Molnar: "Miscellaneous perf events fixes and a minor HW enablement change: - Fix missing RCU protection in perf_iterate_ctx() - Fix pmu_ctx_list ordering bug - Reject the zero page in uprobes - Fix a family of bugs related to low frequency sampling - Add Intel Arrow Lake U CPUs to the generic Arrow Lake RAPL support table - Fix a lockdep-assert false positive in uretprobes" * tag 'perf-urgent-2025-02-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: uprobes: Remove too strict lockdep_assert() condition in hprobe_expire() perf/x86/rapl: Add support for Intel Arrow Lake U perf/x86/intel: Use better start period for frequency mode perf/core: Fix low freq setting via IOC_PERIOD perf/x86: Fix low freqency setting issue uprobes: Reject the shared zeropage in uprobe_write_opcode() perf/core: Order the PMU list to fix warning about unordered pmu_ctx_list perf/core: Add RCU read lock protection to perf_iterate_ctx()
2 parents ad69e02 + f8c8572 commit 766331f

File tree

5 files changed

+119
-15
lines changed

5 files changed

+119
-15
lines changed

arch/x86/events/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event)
628628
if (event->attr.type == event->pmu->type)
629629
event->hw.config |= x86_pmu_get_event_config(event);
630630

631-
if (event->attr.sample_period && x86_pmu.limit_period) {
631+
if (!event->attr.freq && x86_pmu.limit_period) {
632632
s64 left = event->attr.sample_period;
633633
x86_pmu.limit_period(event, &left);
634634
if (left > event->attr.sample_period)

arch/x86/events/intel/core.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3952,6 +3952,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
39523952
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
39533953
}
39543954

3955+
static u64 intel_pmu_freq_start_period(struct perf_event *event)
3956+
{
3957+
int type = event->attr.type;
3958+
u64 config, factor;
3959+
s64 start;
3960+
3961+
/*
3962+
* The 127 is the lowest possible recommended SAV (sample after value)
3963+
* for a 4000 freq (default freq), according to the event list JSON file.
3964+
* Also, assume the workload is idle 50% time.
3965+
*/
3966+
factor = 64 * 4000;
3967+
if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE)
3968+
goto end;
3969+
3970+
/*
3971+
* The estimation of the start period in the freq mode is
3972+
* based on the below assumption.
3973+
*
3974+
* For a cycles or an instructions event, 1GHZ of the
3975+
* underlying platform, 1 IPC. The workload is idle 50% time.
3976+
* The start period = 1,000,000,000 * 1 / freq / 2.
3977+
* = 500,000,000 / freq
3978+
*
3979+
* Usually, the branch-related events occur less than the
3980+
* instructions event. According to the Intel event list JSON
3981+
* file, the SAV (sample after value) of a branch-related event
3982+
* is usually 1/4 of an instruction event.
3983+
* The start period of branch-related events = 125,000,000 / freq.
3984+
*
3985+
* The cache-related events occurs even less. The SAV is usually
3986+
* 1/20 of an instruction event.
3987+
* The start period of cache-related events = 25,000,000 / freq.
3988+
*/
3989+
config = event->attr.config & PERF_HW_EVENT_MASK;
3990+
if (type == PERF_TYPE_HARDWARE) {
3991+
switch (config) {
3992+
case PERF_COUNT_HW_CPU_CYCLES:
3993+
case PERF_COUNT_HW_INSTRUCTIONS:
3994+
case PERF_COUNT_HW_BUS_CYCLES:
3995+
case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND:
3996+
case PERF_COUNT_HW_STALLED_CYCLES_BACKEND:
3997+
case PERF_COUNT_HW_REF_CPU_CYCLES:
3998+
factor = 500000000;
3999+
break;
4000+
case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
4001+
case PERF_COUNT_HW_BRANCH_MISSES:
4002+
factor = 125000000;
4003+
break;
4004+
case PERF_COUNT_HW_CACHE_REFERENCES:
4005+
case PERF_COUNT_HW_CACHE_MISSES:
4006+
factor = 25000000;
4007+
break;
4008+
default:
4009+
goto end;
4010+
}
4011+
}
4012+
4013+
if (type == PERF_TYPE_HW_CACHE)
4014+
factor = 25000000;
4015+
end:
4016+
/*
4017+
* Usually, a prime or a number with less factors (close to prime)
4018+
* is chosen as an SAV, which makes it less likely that the sampling
4019+
* period synchronizes with some periodic event in the workload.
4020+
* Minus 1 to make it at least avoiding values near power of twos
4021+
* for the default freq.
4022+
*/
4023+
start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1;
4024+
4025+
if (start > x86_pmu.max_period)
4026+
start = x86_pmu.max_period;
4027+
4028+
if (x86_pmu.limit_period)
4029+
x86_pmu.limit_period(event, &start);
4030+
4031+
return start;
4032+
}
4033+
39554034
static int intel_pmu_hw_config(struct perf_event *event)
39564035
{
39574036
int ret = x86_pmu_hw_config(event);
@@ -3963,6 +4042,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
39634042
if (ret)
39644043
return ret;
39654044

4045+
if (event->attr.freq && event->attr.sample_freq) {
4046+
event->hw.sample_period = intel_pmu_freq_start_period(event);
4047+
event->hw.last_period = event->hw.sample_period;
4048+
local64_set(&event->hw.period_left, event->hw.sample_period);
4049+
}
4050+
39664051
if (event->attr.precise_ip) {
39674052
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
39684053
return -EINVAL;

arch/x86/events/rapl.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
879879
X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl),
880880
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl),
881881
X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl),
882+
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl),
882883
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl),
883884
{},
884885
};

kernel/events/core.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4950,7 +4950,7 @@ static struct perf_event_pmu_context *
49504950
find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
49514951
struct perf_event *event)
49524952
{
4953-
struct perf_event_pmu_context *new = NULL, *epc;
4953+
struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc;
49544954
void *task_ctx_data = NULL;
49554955

49564956
if (!ctx->task) {
@@ -5007,12 +5007,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
50075007
atomic_inc(&epc->refcount);
50085008
goto found_epc;
50095009
}
5010+
/* Make sure the pmu_ctx_list is sorted by PMU type: */
5011+
if (!pos && epc->pmu->type > pmu->type)
5012+
pos = epc;
50105013
}
50115014

50125015
epc = new;
50135016
new = NULL;
50145017

5015-
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
5018+
if (!pos)
5019+
list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
5020+
else
5021+
list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev);
5022+
50165023
epc->ctx = ctx;
50175024

50185025
found_epc:
@@ -5962,14 +5969,15 @@ static int _perf_event_period(struct perf_event *event, u64 value)
59625969
if (!value)
59635970
return -EINVAL;
59645971

5965-
if (event->attr.freq && value > sysctl_perf_event_sample_rate)
5966-
return -EINVAL;
5967-
5968-
if (perf_event_check_period(event, value))
5969-
return -EINVAL;
5970-
5971-
if (!event->attr.freq && (value & (1ULL << 63)))
5972-
return -EINVAL;
5972+
if (event->attr.freq) {
5973+
if (value > sysctl_perf_event_sample_rate)
5974+
return -EINVAL;
5975+
} else {
5976+
if (perf_event_check_period(event, value))
5977+
return -EINVAL;
5978+
if (value & (1ULL << 63))
5979+
return -EINVAL;
5980+
}
59735981

59745982
event_function_call(event, __perf_event_period, &value);
59755983

@@ -8321,7 +8329,8 @@ void perf_event_exec(void)
83218329

83228330
perf_event_enable_on_exec(ctx);
83238331
perf_event_remove_on_exec(ctx);
8324-
perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);
8332+
scoped_guard(rcu)
8333+
perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true);
83258334

83268335
perf_unpin_context(ctx);
83278336
put_ctx(ctx);

kernel/events/uprobes.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,11 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
495495
if (ret <= 0)
496496
goto put_old;
497497

498+
if (is_zero_page(old_page)) {
499+
ret = -EINVAL;
500+
goto put_old;
501+
}
502+
498503
if (WARN(!is_register && PageCompound(old_page),
499504
"uprobe unregister should never work on compound page\n")) {
500505
ret = -EINVAL;
@@ -762,10 +767,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get)
762767
enum hprobe_state hstate;
763768

764769
/*
765-
* return_instance's hprobe is protected by RCU.
766-
* Underlying uprobe is itself protected from reuse by SRCU.
770+
* Caller should guarantee that return_instance is not going to be
771+
* freed from under us. This can be achieved either through holding
772+
* rcu_read_lock() or by owning return_instance in the first place.
773+
*
774+
* Underlying uprobe is itself protected from reuse by SRCU, so ensure
775+
* SRCU lock is held properly.
767776
*/
768-
lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu));
777+
lockdep_assert(srcu_read_lock_held(&uretprobes_srcu));
769778

770779
hstate = READ_ONCE(hprobe->state);
771780
switch (hstate) {

0 commit comments

Comments
 (0)