Skip to content

Commit b555d19

Browse files
committed
Merge tag 'perf-tools-fixes-for-v6.8-1-2024-02-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools fixes from Arnaldo Carvalho de Melo: "Vendor events: - Intel Alderlake/Sapphire Rapids metric fixes, the CPU type ("cpu_atom", "cpu_core") needs to be used as a prefix to be considered on a metric formula, detected via one of the 'perf test' entries. 'perf test' fixes: - Fix the creation of event selector lists on 'perf test' entries, by initializing the sample ID flag, which is done by 'perf record', so this fix affects only the tests, the common case isn't affected - Make 'perf list' respect debug settings (-v) to fix its 'perf test' entry - Fix 'perf script' test when python support isn't enabled - Special case 'perf script' tests on s390, where only DWARF call graphs are supported and only on software events - Make 'perf daemon' signal test less racy Compiler warnings/errors: - Remove needless malloc(0) call in 'perf top' that triggers -Walloc-size - Fix calloc() argument order to address error introduced in gcc-14 Build: - Make minimal shellcheck version to v0.6.0, avoiding the build to fail with older versions Sync kernel header copies: - stat.h to pick STATX_MNT_ID_UNIQUE - msr-index.h to pick IA32_MKTME_KEYID_PARTITIONING - drm.h to pick DRM_IOCTL_MODE_CLOSEFB - unistd.h to pick {list,stat}mount, lsm_{[gs]et_self_attr,list_modules} syscall numbers - x86 cpufeatures to pick TDX, Zen, APIC MSR fence changes - x86's mem{cpy,set}_64.S used in 'perf bench' - Also, without tooling effects: asm-generic/unaligned.h, mount.h, fcntl.h, kvm headers" * tag 'perf-tools-fixes-for-v6.8-1-2024-02-01' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (21 commits) perf tools headers: update the asm-generic/unaligned.h copy with the kernel sources tools include UAPI: Sync linux/mount.h copy with the kernel sources perf evlist: Fix evlist__new_default() for > 1 core PMU tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench' tools headers x86 cpufeatures: Sync with the kernel sources to pick TDX, Zen, APIC MSR fence changes tools headers UAPI: Sync unistd.h to pick {list,stat}mount, lsm_{[gs]et_self_attr,list_modules} syscall numbers perf vendor events intel: Alderlake/sapphirerapids metric fixes tools headers UAPI: Sync kvm headers with the kernel sources perf tools: Fix calloc() arguments to address error introduced in gcc-14 perf top: Remove needless malloc(0) call that triggers -Walloc-size perf build: Make minimal shellcheck version to v0.6.0 tools headers UAPI: Update tools's copy of drm.h headers to pick DRM_IOCTL_MODE_CLOSEFB perf test shell daemon: Make signal test less racy perf test shell script: Fix test for python being disabled perf test: Workaround debug output in list test perf list: Add output file option perf list: Switch error message to pr_err() to respect debug settings (-v) perf test: Fix 'perf script' tests on s390 tools headers UAPI: Sync linux/fcntl.h with the kernel sources tools arch x86: Sync the msr-index.h copy with the kernel sources to pick IA32_MKTME_KEYID_PARTITIONING ...
2 parents 56897d5 + fdd0ae7 commit b555d19

File tree

31 files changed

+588
-383
lines changed

31 files changed

+588
-383
lines changed

tools/arch/x86/include/asm/cpufeatures.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@
198198
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
199199
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
200200
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
201+
#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
201202
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
202203
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
203204
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
@@ -308,10 +309,14 @@
308309
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
309310
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
310311
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
311-
312312
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
313313
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
314314
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
315+
#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
316+
#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
317+
#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
318+
#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
319+
#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
315320

316321
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
317322
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
@@ -495,6 +500,7 @@
495500
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
496501
#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
497502
#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
503+
#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
498504

499505
/* BUG word 2 */
500506
#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */

tools/arch/x86/include/asm/msr-index.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,11 @@
237237
#define LBR_INFO_CYCLES 0xffff
238238
#define LBR_INFO_BR_TYPE_OFFSET 56
239239
#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
240+
#define LBR_INFO_BR_CNTR_OFFSET 32
241+
#define LBR_INFO_BR_CNTR_NUM 4
242+
#define LBR_INFO_BR_CNTR_BITS 2
243+
#define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0)
244+
#define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0)
240245

241246
#define MSR_ARCH_LBR_CTL 0x000014ce
242247
#define ARCH_LBR_CTL_LBREN BIT(0)
@@ -536,6 +541,9 @@
536541
#define MSR_RELOAD_PMC0 0x000014c1
537542
#define MSR_RELOAD_FIXED_CTR0 0x00001309
538543

544+
/* KeyID partitioning between MKTME and TDX */
545+
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
546+
539547
/*
540548
* AMD64 MSRs. Not complete. See the architecture manual for a more
541549
* complete list.

tools/arch/x86/include/uapi/asm/kvm.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,4 +562,7 @@ struct kvm_pmu_event_filter {
562562
/* x86-specific KVM_EXIT_HYPERCALL flags. */
563563
#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
564564

565+
#define KVM_X86_DEFAULT_VM 0
566+
#define KVM_X86_SW_PROTECTED_VM 1
567+
565568
#endif /* _ASM_X86_KVM_H */

tools/arch/x86/lib/memcpy_64.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
/* SPDX-License-Identifier: GPL-2.0-only */
22
/* Copyright 2002 Andi Kleen */
33

4+
#include <linux/export.h>
45
#include <linux/linkage.h>
56
#include <asm/errno.h>
67
#include <asm/cpufeatures.h>
78
#include <asm/alternative.h>
8-
#include <asm/export.h>
99

1010
.section .noinstr.text, "ax"
1111

@@ -39,7 +39,7 @@ SYM_TYPED_FUNC_START(__memcpy)
3939
SYM_FUNC_END(__memcpy)
4040
EXPORT_SYMBOL(__memcpy)
4141

42-
SYM_FUNC_ALIAS(memcpy, __memcpy)
42+
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
4343
EXPORT_SYMBOL(memcpy)
4444

4545
SYM_FUNC_START_LOCAL(memcpy_orig)

tools/arch/x86/lib/memset_64.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
22
/* Copyright 2002 Andi Kleen, SuSE Labs */
33

4+
#include <linux/export.h>
45
#include <linux/linkage.h>
56
#include <asm/cpufeatures.h>
67
#include <asm/alternative.h>
7-
#include <asm/export.h>
88

99
.section .noinstr.text, "ax"
1010

@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
4040
SYM_FUNC_END(__memset)
4141
EXPORT_SYMBOL(__memset)
4242

43-
SYM_FUNC_ALIAS(memset, __memset)
43+
SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
4444
EXPORT_SYMBOL(memset)
4545

4646
SYM_FUNC_START_LOCAL(memset_orig)

tools/include/asm-generic/unaligned.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,9 @@ static inline u32 get_unaligned_le24(const void *p)
105105

106106
static inline void __put_unaligned_be24(const u32 val, u8 *p)
107107
{
108-
*p++ = val >> 16;
109-
*p++ = val >> 8;
110-
*p++ = val;
108+
*p++ = (val >> 16) & 0xff;
109+
*p++ = (val >> 8) & 0xff;
110+
*p++ = val & 0xff;
111111
}
112112

113113
static inline void put_unaligned_be24(const u32 val, void *p)
@@ -117,9 +117,9 @@ static inline void put_unaligned_be24(const u32 val, void *p)
117117

118118
static inline void __put_unaligned_le24(const u32 val, u8 *p)
119119
{
120-
*p++ = val;
121-
*p++ = val >> 8;
122-
*p++ = val >> 16;
120+
*p++ = val & 0xff;
121+
*p++ = (val >> 8) & 0xff;
122+
*p++ = (val >> 16) & 0xff;
123123
}
124124

125125
static inline void put_unaligned_le24(const u32 val, void *p)
@@ -129,12 +129,12 @@ static inline void put_unaligned_le24(const u32 val, void *p)
129129

130130
static inline void __put_unaligned_be48(const u64 val, u8 *p)
131131
{
132-
*p++ = val >> 40;
133-
*p++ = val >> 32;
134-
*p++ = val >> 24;
135-
*p++ = val >> 16;
136-
*p++ = val >> 8;
137-
*p++ = val;
132+
*p++ = (val >> 40) & 0xff;
133+
*p++ = (val >> 32) & 0xff;
134+
*p++ = (val >> 24) & 0xff;
135+
*p++ = (val >> 16) & 0xff;
136+
*p++ = (val >> 8) & 0xff;
137+
*p++ = val & 0xff;
138138
}
139139

140140
static inline void put_unaligned_be48(const u64 val, void *p)

tools/include/uapi/asm-generic/unistd.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,8 +829,21 @@ __SYSCALL(__NR_futex_wait, sys_futex_wait)
829829
#define __NR_futex_requeue 456
830830
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
831831

832+
#define __NR_statmount 457
833+
__SYSCALL(__NR_statmount, sys_statmount)
834+
835+
#define __NR_listmount 458
836+
__SYSCALL(__NR_listmount, sys_listmount)
837+
838+
#define __NR_lsm_get_self_attr 459
839+
__SYSCALL(__NR_lsm_get_self_attr, sys_lsm_get_self_attr)
840+
#define __NR_lsm_set_self_attr 460
841+
__SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
842+
#define __NR_lsm_list_modules 461
843+
__SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
844+
832845
#undef __NR_syscalls
833-
#define __NR_syscalls 457
846+
#define __NR_syscalls 462
834847

835848
/*
836849
* 32 bit systems traditionally used different

tools/include/uapi/drm/drm.h

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,8 @@ struct drm_gem_open {
713713
/**
714714
* DRM_CAP_ASYNC_PAGE_FLIP
715715
*
716-
* If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
716+
* If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy
717+
* page-flips.
717718
*/
718719
#define DRM_CAP_ASYNC_PAGE_FLIP 0x7
719720
/**
@@ -773,6 +774,13 @@ struct drm_gem_open {
773774
* :ref:`drm_sync_objects`.
774775
*/
775776
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
777+
/**
778+
* DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP
779+
*
780+
* If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic
781+
* commits.
782+
*/
783+
#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15
776784

777785
/* DRM_IOCTL_GET_CAP ioctl argument type */
778786
struct drm_get_cap {
@@ -842,6 +850,31 @@ struct drm_get_cap {
842850
*/
843851
#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
844852

853+
/**
854+
* DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT
855+
*
856+
* Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and
857+
* virtualbox) have additional restrictions for cursor planes (thus
858+
* making cursor planes on those drivers not truly universal,) e.g.
859+
* they need cursor planes to act like one would expect from a mouse
860+
* cursor and have correctly set hotspot properties.
861+
* If this client cap is not set the DRM core will hide cursor plane on
862+
* those virtualized drivers because not setting it implies that the
863+
* client is not capable of dealing with those extra restictions.
864+
* Clients which do set cursor hotspot and treat the cursor plane
865+
* like a mouse cursor should set this property.
866+
* The client must enable &DRM_CLIENT_CAP_ATOMIC first.
867+
*
868+
* Setting this property on drivers which do not special case
869+
* cursor planes (i.e. non-virtualized drivers) will return
870+
* EOPNOTSUPP, which can be used by userspace to gauge
871+
* requirements of the hardware/drivers they're running on.
872+
*
873+
* This capability is always supported for atomic-capable virtualized
874+
* drivers starting from kernel version 6.6.
875+
*/
876+
#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6
877+
845878
/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
846879
struct drm_set_client_cap {
847880
__u64 capability;
@@ -893,6 +926,7 @@ struct drm_syncobj_transfer {
893926
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
894927
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
895928
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */
929+
#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */
896930
struct drm_syncobj_wait {
897931
__u64 handles;
898932
/* absolute timeout */
@@ -901,6 +935,14 @@ struct drm_syncobj_wait {
901935
__u32 flags;
902936
__u32 first_signaled; /* only valid when not waiting all */
903937
__u32 pad;
938+
/**
939+
* @deadline_nsec - fence deadline hint
940+
*
941+
* Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
942+
* fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
943+
* set.
944+
*/
945+
__u64 deadline_nsec;
904946
};
905947

906948
struct drm_syncobj_timeline_wait {
@@ -913,6 +955,14 @@ struct drm_syncobj_timeline_wait {
913955
__u32 flags;
914956
__u32 first_signaled; /* only valid when not waiting all */
915957
__u32 pad;
958+
/**
959+
* @deadline_nsec - fence deadline hint
960+
*
961+
* Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing
962+
* fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is
963+
* set.
964+
*/
965+
__u64 deadline_nsec;
916966
};
917967

918968
/**
@@ -1218,6 +1268,26 @@ extern "C" {
12181268

12191269
#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd)
12201270

1271+
/**
1272+
* DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer.
1273+
*
1274+
* This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL
1275+
* argument is a framebuffer object ID.
1276+
*
1277+
* This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable
1278+
* planes and CRTCs. As long as the framebuffer is used by a plane, it's kept
1279+
* alive. When the plane no longer uses the framebuffer (because the
1280+
* framebuffer is replaced with another one, or the plane is disabled), the
1281+
* framebuffer is cleaned up.
1282+
*
1283+
* This is useful to implement flicker-free transitions between two processes.
1284+
*
1285+
* Depending on the threat model, user-space may want to ensure that the
1286+
* framebuffer doesn't expose any sensitive user information: closed
1287+
* framebuffers attached to a plane can be read back by the next DRM master.
1288+
*/
1289+
#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb)
1290+
12211291
/*
12221292
* Device specific ioctls should only be in their respective headers
12231293
* The device specific ioctl range is from 0x40 to 0x9f.

tools/include/uapi/drm/i915_drm.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ typedef struct drm_i915_irq_wait {
693693
#define I915_PARAM_HAS_EXEC_FENCE 44
694694

695695
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
696-
* user specified bufffers for post-mortem debugging of GPU hangs. See
696+
* user-specified buffers for post-mortem debugging of GPU hangs. See
697697
* EXEC_OBJECT_CAPTURE.
698698
*/
699699
#define I915_PARAM_HAS_EXEC_CAPTURE 45
@@ -1606,7 +1606,7 @@ struct drm_i915_gem_busy {
16061606
* is accurate.
16071607
*
16081608
* The returned dword is split into two fields to indicate both
1609-
* the engine classess on which the object is being read, and the
1609+
* the engine classes on which the object is being read, and the
16101610
* engine class on which it is currently being written (if any).
16111611
*
16121612
* The low word (bits 0:15) indicate if the object is being written
@@ -1815,7 +1815,7 @@ struct drm_i915_gem_madvise {
18151815
__u32 handle;
18161816

18171817
/* Advice: either the buffer will be needed again in the near future,
1818-
* or wont be and could be discarded under memory pressure.
1818+
* or won't be and could be discarded under memory pressure.
18191819
*/
18201820
__u32 madv;
18211821

@@ -3246,7 +3246,7 @@ struct drm_i915_query_topology_info {
32463246
* // enough to hold our array of engines. The kernel will fill out the
32473247
* // item.length for us, which is the number of bytes we need.
32483248
* //
3249-
* // Alternatively a large buffer can be allocated straight away enabling
3249+
* // Alternatively a large buffer can be allocated straightaway enabling
32503250
* // querying in one pass, in which case item.length should contain the
32513251
* // length of the provided buffer.
32523252
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
@@ -3256,7 +3256,7 @@ struct drm_i915_query_topology_info {
32563256
* // Now that we allocated the required number of bytes, we call the ioctl
32573257
* // again, this time with the data_ptr pointing to our newly allocated
32583258
* // blob, which the kernel can then populate with info on all engines.
3259-
* item.data_ptr = (uintptr_t)&info,
3259+
* item.data_ptr = (uintptr_t)&info;
32603260
*
32613261
* err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
32623262
* if (err) ...
@@ -3286,7 +3286,7 @@ struct drm_i915_query_topology_info {
32863286
/**
32873287
* struct drm_i915_engine_info
32883288
*
3289-
* Describes one engine and it's capabilities as known to the driver.
3289+
* Describes one engine and its capabilities as known to the driver.
32903290
*/
32913291
struct drm_i915_engine_info {
32923292
/** @engine: Engine class and instance. */

tools/include/uapi/linux/fcntl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,5 +116,8 @@
116116
#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
117117
compare object identity and may not
118118
be usable to open_by_handle_at(2) */
119+
#if defined(__KERNEL__)
120+
#define AT_GETATTR_NOSEC 0x80000000
121+
#endif
119122

120123
#endif /* _UAPI_LINUX_FCNTL_H */

0 commit comments

Comments
 (0)