Rust-for-Linux
diff --git a/‎MAINTAINERS
Lines changed: 1 addition & 0 deletions b/‎MAINTAINERS
Lines changed: 1 addition & 0 deletions
diff --git a/‎tools/arch/x86/include/uapi/asm/unistd_32.h
Lines changed: 16 additions & 7 deletions b/‎tools/arch/x86/include/uapi/asm/unistd_32.h
Lines changed: 16 additions & 7 deletions
diff --git a/‎tools/arch/x86/include/uapi/asm/unistd_64.h
Lines changed: 16 additions & 7 deletions b/‎tools/arch/x86/include/uapi/asm/unistd_64.h
Lines changed: 16 additions & 7 deletions
diff --git a/‎tools/build/Makefile.build
Lines changed: 1 addition & 0 deletions b/‎tools/build/Makefile.build
Lines changed: 1 addition & 0 deletions
diff --git a/‎tools/perf/.gitignore
Lines changed: 1 addition & 0 deletions b/‎tools/perf/.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎tools/perf/Documentation/itrace.txt
Lines changed: 2 additions & 1 deletion b/‎tools/perf/Documentation/itrace.txt
Lines changed: 2 additions & 1 deletion
diff --git a/‎tools/perf/Documentation/perf-bench.txt
Lines changed: 1 addition & 1 deletion b/‎tools/perf/Documentation/perf-bench.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎tools/perf/Documentation/perf-c2c.txt
Lines changed: 13 additions & 3 deletions b/‎tools/perf/Documentation/perf-c2c.txt
Lines changed: 13 additions & 3 deletions
diff --git a/‎tools/perf/Documentation/perf-intel-pt.txt
Lines changed: 54 additions & 12 deletions b/‎tools/perf/Documentation/perf-intel-pt.txt
Lines changed: 54 additions & 12 deletions
diff --git a/‎tools/perf/Documentation/perf-list.txt
Lines changed: 1 addition & 1 deletion b/‎tools/perf/Documentation/perf-list.txt
Lines changed: 1 addition & 1 deletion
@@ -16323,6 +16323,7 @@ R:	Mark Rutland <mark.rutland@arm.com>
 R:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 R:	Jiri Olsa <jolsa@kernel.org>
 R:	Namhyung Kim <namhyung@kernel.org>
+R:	Ian Rogers <irogers@google.com>
 L:	linux-perf-users@vger.kernel.org
 L:	linux-kernel@vger.kernel.org
 S:	Supported
 
@@ -1,16 +1,25 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NR_perf_event_open
-# define __NR_perf_event_open 336
+#ifndef __NR_execve
+#define __NR_execve 11
 #endif
-#ifndef __NR_futex
-# define __NR_futex 240
+#ifndef __NR_getppid
+#define __NR_getppid 64
+#endif
+#ifndef __NR_getpgid
+#define __NR_getpgid 132
 #endif
 #ifndef __NR_gettid
-# define __NR_gettid 224
+#define __NR_gettid 224
+#endif
+#ifndef __NR_futex
+#define __NR_futex 240
 #endif
 #ifndef __NR_getcpu
-# define __NR_getcpu 318
+#define __NR_getcpu 318
+#endif
+#ifndef __NR_perf_event_open
+#define __NR_perf_event_open 336
 #endif
 #ifndef __NR_setns
-# define __NR_setns 346
+#define __NR_setns 346
 #endif
@@ -1,16 +1,25 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NR_perf_event_open
-# define __NR_perf_event_open 298
+#ifndef __NR_execve
+#define __NR_execve 59
 #endif
-#ifndef __NR_futex
-# define __NR_futex 202
+#ifndef __NR_getppid
+#define __NR_getppid 110
+#endif
+#ifndef __NR_getpgid
+#define __NR_getpgid 121
 #endif
 #ifndef __NR_gettid
-# define __NR_gettid 186
+#define __NR_gettid 186
 #endif
-#ifndef __NR_getcpu
-# define __NR_getcpu 309
+#ifndef __NR_futex
+#define __NR_futex 202
+#endif
+#ifndef __NR_perf_event_open
+#define __NR_perf_event_open 298
 #endif
 #ifndef __NR_setns
 #define __NR_setns 308
 #endif
+#ifndef __NR_getcpu
+#define __NR_getcpu 309
+#endif
@@ -53,6 +53,7 @@ build-file := $(dir)/Build
 
 quiet_cmd_flex  = FLEX    $@
 quiet_cmd_bison = BISON   $@
+quiet_cmd_test  = TEST    $@
 
 # Create directory unless it exists
 quiet_cmd_mkdir = MKDIR   $(dir $@)
 
@@ -38,6 +38,7 @@ arch/*/include/generated/
 trace/beauty/generated/
 pmu-events/pmu-events.c
 pmu-events/jevents
+pmu-events/metric_test.log
 feature/
 libapi/
 libbpf/
 
@@ -1,4 +1,5 @@
 		i	synthesize instructions events
+		y	synthesize cycles events
 		b	synthesize branches events (branch misses for Arm SPE)
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
@@ -25,7 +26,7 @@
 		A	approximate IPC
 		Z	prefer to ignore timestamps (so-called "timeless" decoding)
 
-	The default is all events i.e. the same as --itrace=ibxwpe,
+	The default is all events i.e. the same as --itrace=iybxwpe,
 	except for perf script where it is --itrace=ce
 
 	In addition, the period (default 100000, except for perf script where it is 1)
 
@@ -18,7 +18,7 @@ COMMON OPTIONS
 --------------
 -r::
 --repeat=::
-Specify amount of times to repeat the run (default 10).
+Specify number of times to repeat the run (default 10).
 
 -f::
 --format=::
 
@@ -22,7 +22,11 @@ you to track down the cacheline contentions.
 On Intel, the tool is based on load latency and precise store facility events
 provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
 with thresholding feature. On AMD, the tool uses IBS op pmu (due to hardware
-limitations, perf c2c is not supported on Zen3 cpus).
+limitations, perf c2c is not supported on Zen3 cpus). On Arm64 it uses SPE to
+sample load and store operations, therefore hardware and kernel support is
+required. See linkperf:perf-arm-spe[1] for a setup guide. Due to the
+statistical nature of Arm SPE sampling, not every memory operation will be
+sampled.
 
 These events provide:
   - memory address of the access
@@ -121,11 +125,17 @@ REPORT OPTIONS
 	perf c2c record --call-graph lbr.
 	Disabled by default. In common cases with call stack overflows,
 	it can recreate better call stacks than the default lbr call stack
-	output. But this approach is not full proof. There can be cases
+	output. But this approach is not foolproof. There can be cases
 	where it creates incorrect call stacks from incorrect matches.
 	The known limitations include exception handing such as
 	setjmp/longjmp will have calls/returns not match.
 
+--double-cl::
+	Group the detection of shared cacheline events into double cacheline
+	granularity. Some architectures have an Adjacent Cacheline Prefetch
+	feature, which causes cacheline sharing to behave like the cacheline
+	size is doubled.
+
 C2C RECORD
 ----------
 The perf c2c record command setup options related to HITM cacheline analysis
@@ -333,4 +343,4 @@ Check Joe's blog on c2c tool for detailed use case explanation:
 
 SEE ALSO
 --------
-linkperf:perf-record[1], linkperf:perf-mem[1]
+linkperf:perf-record[1], linkperf:perf-mem[1], linkperf:perf-arm-spe[1]
@@ -101,12 +101,12 @@ data is available you can use the 'perf script' tool with all itrace sampling
 options, which will list all the samples.
 
 	perf record -e intel_pt//u ls
-	perf script --itrace=ibxwpe
+	perf script --itrace=iybxwpe
 
 An interesting field that is not printed by default is 'flags' which can be
 displayed as follows:
 
-	perf script --itrace=ibxwpe -F+flags
+	perf script --itrace=iybxwpe -F+flags
 
 The flags are "bcrosyiABExghDt" which stand for branch, call, return, conditional,
 system, asynchronous, interrupt, transaction abort, trace begin, trace end,
@@ -147,16 +147,17 @@ displayed as follows:
 There are two ways that instructions-per-cycle (IPC) can be calculated depending
 on the recording.
 
-If the 'cyc' config term (see config terms section below) was used, then IPC is
-calculated using the cycle count from CYC packets, otherwise MTC packets are
-used - refer to the 'mtc' config term.  When MTC is used, however, the values
-are less accurate because the timing is less accurate.
+If the 'cyc' config term (see config terms section below) was used, then IPC
+and cycle events are calculated using the cycle count from CYC packets, otherwise
+MTC packets are used - refer to the 'mtc' config term.  When MTC is used, however,
+the values are less accurate because the timing is less accurate.
 
 Because Intel PT does not update the cycle count on every branch or instruction,
 the values will often be zero.  When there are values, they will be the number
 of instructions and number of cycles since the last update, and thus represent
-the average IPC since the last IPC for that event type.  Note IPC for "branches"
-events is calculated separately from IPC for "instructions" events.
+the average IPC cycle count since the last IPC for that event type.
+Note IPC for "branches" events is calculated separately from IPC for "instructions"
+events.
 
 Even with the 'cyc' config term, it is possible to produce IPC information for
 every change of timestamp, but at the expense of accuracy.  That is selected by
@@ -900,11 +901,12 @@ Having no option is the same as
 
 which, in turn, is the same as
 
-	--itrace=cepwx
+	--itrace=cepwxy
 
 The letters are:
 
 	i	synthesize "instructions" events
+	y	synthesize "cycles" events
 	b	synthesize "branches" events
 	x	synthesize "transactions" events
 	w	synthesize "ptwrite" events
@@ -927,16 +929,26 @@ The letters are:
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
 
+"Cycles" events look like they were recorded by "perf record -e cycles"
+(ie., the default). Note that even with CYC packets enabled and no sampling,
+these are not fully accurate, since CYC packets are not emitted for each
+instruction, only when some other event (like an indirect branch, or a
+TNT packet representing multiple branches) happens causes a packet to
+be emitted. Thus, it is more effective for attributing cycles to functions
+(and possibly basic blocks) than to individual instructions, although it
+is not even perfect for functions (although it becomes better if the noretcomp
+option is active).
+
 "Branches" events look like they were recorded by "perf record -e branches". "c"
 and "r" can be combined to get calls and returns.
 
 "Transactions" events correspond to the start or end of transactions. The
 'flags' field can be used in perf script to determine whether the event is a
 transaction start, commit or abort.
 
-Note that "instructions", "branches" and "transactions" events depend on code
-flow packets which can be disabled by using the config term "branch=0".  Refer
-to the config terms section above.
+Note that "instructions", "cycles", "branches" and "transactions" events
+depend on code flow packets which can be disabled by using the config term
+"branch=0".  Refer to the config terms section above.
 
 "ptwrite" events record the payload of the ptwrite instruction and whether
 "fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
@@ -1821,6 +1833,36 @@ Can be compiled and traced:
  $
 
 
+Pipe mode
+---------
+Pipe mode is a problem for Intel PT and possibly other auxtrace users.
+It's not recommended to use a pipe as data output with Intel PT because
+of the following reason.
+
+Essentially the auxtrace buffers do not behave like the regular perf
+event buffers.  That is because the head and tail are updated by
+software, but in the auxtrace case the data is written by hardware.
+So the head and tail do not get updated as data is written.
+
+In the Intel PT case, the head and tail are updated only when the trace
+is disabled by software, for example:
+    - full-trace, system wide : when buffer passes watermark
+    - full-trace, not system-wide : when buffer passes watermark or
+                                    context switches
+    - snapshot mode : as above but also when a snapshot is made
+    - sample mode : as above but also when a sample is made
+
+That means finished-round ordering doesn't work.  An auxtrace buffer
+can turn up that has data that extends back in time, possibly to the
+very beginning of tracing.
+
+For a perf.data file, that problem is solved by going through the trace
+and queuing up the auxtrace buffers in advance.
+
+For pipe mode, the order of events and timestamps can presumably
+be messed up.
+
+
 EXAMPLE
 -------
 
 
@@ -232,7 +232,7 @@ This can be overridden by setting the kernel.perf_event_paranoid
 sysctl to -1, which allows non root to use these events.
 
 For accessing trace point events perf needs to have read access to
-/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
+/sys/kernel/tracing, even when perf_event_paranoid is in a relaxed
 setting.
 
 TRACING