Skip to content

Commit 576a997

Browse files
committed
Merge tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar: - Intel PT support enhancements & fixes - Fix leaked SIGTRAP events - Improve and fix the Intel uncore driver - Add support for Intel HBM and CXL uncore counters - Add Intel Lake and Arrow Lake support - AMD uncore driver fixes - Make SIGTRAP and __perf_pending_irq() work on RT - Micro-optimizations - Misc cleanups and fixes * tag 'perf-core-2024-07-16' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) perf/x86/intel: Add a distinct name for Granite Rapids perf/x86/intel/ds: Fix non 0 retire latency on Raptorlake perf/x86/intel: Hide Topdown metrics events if the feature is not enumerated perf/x86/intel/uncore: Fix the bits of the CHA extended umask for SPR perf: Split __perf_pending_irq() out of perf_pending_irq() perf: Don't disable preemption in perf_pending_task(). perf: Move swevent_htable::recursion into task_struct. perf: Shrink the size of the recursion counter. perf: Enqueue SIGTRAP always via task_work. task_work: Add TWA_NMI_CURRENT as an additional notify mode. perf: Move irq_work_queue() where the event is prepared. perf: Fix event leak upon exec and file release perf: Fix event leak upon exit task_work: Introduce task_work_cancel() again task_work: s/task_work_cancel()/task_work_cancel_func()/ perf/x86/amd/uncore: Fix DF and UMC domain identification perf/x86/amd/uncore: Avoid PMU registration if counters are unavailable perf/x86/intel: Support Perfmon MSRs aliasing perf/x86/intel: Support PERFEVTSEL extension perf/x86: Add config_mask to represent EVENTSEL bitmask ...
2 parents 4a996d9 + fa0c1c9 commit 576a997

32 files changed

+1256
-507
lines changed

arch/x86/events/amd/core.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -432,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
432432
* be removed on one CPU at a time AND PMU is disabled
433433
* when we come here
434434
*/
435-
for (i = 0; i < x86_pmu.num_counters; i++) {
436-
if (cmpxchg(nb->owners + i, event, NULL) == event)
435+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
436+
struct perf_event *tmp = event;
437+
438+
if (try_cmpxchg(nb->owners + i, &tmp, NULL))
437439
break;
438440
}
439441
}
@@ -499,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
499501
* because of successive calls to x86_schedule_events() from
500502
* hw_perf_group_sched_in() without hw_perf_enable()
501503
*/
502-
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
504+
for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
503505
if (new == -1 || hwc->idx == idx)
504506
/* assign free slot, prefer hwc->idx */
505507
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -542,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
542544
/*
543545
* initialize all possible NB constraints
544546
*/
545-
for (i = 0; i < x86_pmu.num_counters; i++) {
547+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
546548
__set_bit(i, nb->event_constraints[i].idxmsk);
547549
nb->event_constraints[i].weight = 1;
548550
}
@@ -735,7 +737,7 @@ static void amd_pmu_check_overflow(void)
735737
* counters are always enabled when this function is called and
736738
* ARCH_PERFMON_EVENTSEL_INT is always set.
737739
*/
738-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
740+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
739741
if (!test_bit(idx, cpuc->active_mask))
740742
continue;
741743

@@ -755,7 +757,7 @@ static void amd_pmu_enable_all(int added)
755757

756758
amd_brs_enable_all();
757759

758-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
760+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
759761
/* only activate events which are marked as active */
760762
if (!test_bit(idx, cpuc->active_mask))
761763
continue;
@@ -978,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
978980
/* Clear any reserved bits set by buggy microcode */
979981
status &= amd_pmu_global_cntr_mask;
980982

981-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
983+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
982984
if (!test_bit(idx, cpuc->active_mask))
983985
continue;
984986

@@ -1313,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = {
13131315
.addr_offset = amd_pmu_addr_offset,
13141316
.event_map = amd_pmu_event_map,
13151317
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
1316-
.num_counters = AMD64_NUM_COUNTERS,
1318+
.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
13171319
.add = amd_pmu_add_event,
13181320
.del = amd_pmu_del_event,
13191321
.cntval_bits = 48,
@@ -1412,7 +1414,7 @@ static int __init amd_core_pmu_init(void)
14121414
*/
14131415
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
14141416
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
1415-
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
1417+
x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
14161418

14171419
/* Check for Performance Monitoring v2 support */
14181420
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1422,9 +1424,9 @@ static int __init amd_core_pmu_init(void)
14221424
x86_pmu.version = 2;
14231425

14241426
/* Find the number of available Core PMCs */
1425-
x86_pmu.num_counters = ebx.split.num_core_pmc;
1427+
x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
14261428

1427-
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
1429+
amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
14281430

14291431
/* Update PMC handling functions */
14301432
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1452,12 +1454,12 @@ static int __init amd_core_pmu_init(void)
14521454
* even numbered counter that has a consecutive adjacent odd
14531455
* numbered counter following it.
14541456
*/
1455-
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
1457+
for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
14561458
even_ctr_mask |= BIT_ULL(i);
14571459

14581460
pair_constraint = (struct event_constraint)
14591461
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1460-
x86_pmu.num_counters / 2, 0,
1462+
x86_pmu_max_num_counters(NULL) / 2, 0,
14611463
PERF_X86_EVENT_PAIR);
14621464

14631465
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;

arch/x86/events/amd/uncore.c

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,9 @@ static int amd_uncore_add(struct perf_event *event, int flags)
162162
/* if not, take the first available counter */
163163
hwc->idx = -1;
164164
for (i = 0; i < pmu->num_counters; i++) {
165-
if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
165+
struct perf_event *tmp = NULL;
166+
167+
if (try_cmpxchg(&ctx->events[i], &tmp, event)) {
166168
hwc->idx = i;
167169
break;
168170
}
@@ -196,7 +198,9 @@ static void amd_uncore_del(struct perf_event *event, int flags)
196198
event->pmu->stop(event, PERF_EF_UPDATE);
197199

198200
for (i = 0; i < pmu->num_counters; i++) {
199-
if (cmpxchg(&ctx->events[i], event, NULL) == event)
201+
struct perf_event *tmp = event;
202+
203+
if (try_cmpxchg(&ctx->events[i], &tmp, NULL))
200204
break;
201205
}
202206

@@ -639,7 +643,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
639643
info.split.aux_data = 0;
640644
info.split.num_pmcs = NUM_COUNTERS_NB;
641645
info.split.gid = 0;
642-
info.split.cid = topology_die_id(cpu);
646+
info.split.cid = topology_logical_package_id(cpu);
643647

644648
if (pmu_version >= 2) {
645649
ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
@@ -654,17 +658,20 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
654658
{
655659
struct attribute **df_attr = amd_uncore_df_format_attr;
656660
struct amd_uncore_pmu *pmu;
661+
int num_counters;
657662

658663
/* Run just once */
659664
if (uncore->init_done)
660665
return amd_uncore_ctx_init(uncore, cpu);
661666

667+
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
668+
if (!num_counters)
669+
goto done;
670+
662671
/* No grouping, single instance for a system */
663672
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
664-
if (!uncore->pmus) {
665-
uncore->num_pmus = 0;
673+
if (!uncore->pmus)
666674
goto done;
667-
}
668675

669676
/*
670677
* For Family 17h and above, the Northbridge counters are repurposed
@@ -674,7 +681,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
674681
pmu = &uncore->pmus[0];
675682
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
676683
sizeof(pmu->name));
677-
pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
684+
pmu->num_counters = num_counters;
678685
pmu->msr_base = MSR_F15H_NB_PERF_CTL;
679686
pmu->rdpmc_base = RDPMC_BASE_NB;
680687
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
@@ -785,17 +792,20 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
785792
{
786793
struct attribute **l3_attr = amd_uncore_l3_format_attr;
787794
struct amd_uncore_pmu *pmu;
795+
int num_counters;
788796

789797
/* Run just once */
790798
if (uncore->init_done)
791799
return amd_uncore_ctx_init(uncore, cpu);
792800

801+
num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
802+
if (!num_counters)
803+
goto done;
804+
793805
/* No grouping, single instance for a system */
794806
uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
795-
if (!uncore->pmus) {
796-
uncore->num_pmus = 0;
807+
if (!uncore->pmus)
797808
goto done;
798-
}
799809

800810
/*
801811
* For Family 17h and above, L3 cache counters are available instead
@@ -805,7 +815,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
805815
pmu = &uncore->pmus[0];
806816
strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
807817
sizeof(pmu->name));
808-
pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
818+
pmu->num_counters = num_counters;
809819
pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
810820
pmu->rdpmc_base = RDPMC_BASE_LLC;
811821
pmu->group = amd_uncore_ctx_gid(uncore, cpu);
@@ -893,8 +903,8 @@ void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
893903
cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
894904
info.split.aux_data = ecx; /* stash active mask */
895905
info.split.num_pmcs = ebx.split.num_umc_pmc;
896-
info.split.gid = topology_die_id(cpu);
897-
info.split.cid = topology_die_id(cpu);
906+
info.split.gid = topology_logical_package_id(cpu);
907+
info.split.cid = topology_logical_package_id(cpu);
898908
*per_cpu_ptr(uncore->info, cpu) = info;
899909
}
900910

0 commit comments

Comments
 (0)