Skip to content

Commit 611875d

Browse files
committed
Merge tag 'perf-tools-fixes-for-v6.0-2022-08-27' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo: - Fixup setup of weak groups when using 'perf stat --repeat', add a 'perf test' for it. - Fix memory leaks in 'perf sched record' detected with -fsanitize=address. - Fix build when PYTHON_CONFIG is user supplied. - Capitalize topdown metrics' names in 'perf stat', so that the output, sometimes parsed, matches the Intel SDM docs. - Make sure the documentation for the save_type filter about Intel systems with Arch LBR support (12th-Gen+ client or 4th-Gen Xeon+ server) reflects recent related kernel changes. - Fix 'perf record' man page formatting of description of support to hybrid systems. - Update arm64´s KVM header from the kernel sources. * tag 'perf-tools-fixes-for-v6.0-2022-08-27' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf stat: Capitalize topdown metrics' names perf docs: Update the documentation for the save_type filter perf sched: Fix memory leaks in __cmd_record detected with -fsanitize=address perf record: Fix manpage formatting of description of support to hybrid systems perf test: Stat test for repeat with a weak group perf stat: Clear evsel->reset_group for each stat run tools kvm headers arm64: Update KVM header from the kernel sources perf python: Fix build when PYTHON_CONFIG is user supplied
2 parents 10d4879 + 4864854 commit 611875d

File tree

8 files changed

+61
-32
lines changed

8 files changed

+61
-32
lines changed

tools/arch/arm64/include/uapi/asm/kvm.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,11 @@ struct kvm_regs {
7575

7676
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
7777
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
78-
#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
78+
#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
79+
KVM_ARM_DEVICE_TYPE_SHIFT)
7980
#define KVM_ARM_DEVICE_ID_SHIFT 16
80-
#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
81+
#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
82+
KVM_ARM_DEVICE_ID_SHIFT)
8183

8284
/* Supported device IDs */
8385
#define KVM_ARM_DEVICE_VGIC_V2 0

tools/perf/Documentation/intel-hybrid.txt

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@ cat /sys/devices/cpu_atom/cpus
2121

2222
It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus.
2323

24-
Quickstart
25-
26-
List hybrid event
27-
-----------------
28-
2924
As before, use perf-list to list the symbolic event.
3025

3126
perf list
@@ -40,7 +35,6 @@ the event is belong to. Same event name but with different pmu can
4035
be supported.
4136

4237
Enable hybrid event with a specific pmu
43-
---------------------------------------
4438

4539
To enable a core only event or atom only event, following syntax is supported:
4640

@@ -53,7 +47,6 @@ For example, count the 'cycles' event on core cpus.
5347
perf stat -e cpu_core/cycles/
5448

5549
Create two events for one hardware event automatically
56-
------------------------------------------------------
5750

5851
When creating one event and the event is available on both atom and core,
5952
two events are created automatically. One is for atom, the other is for
@@ -132,7 +125,6 @@ For perf-stat result, it displays two events:
132125
The first 'cycles' is core event, the second 'cycles' is atom event.
133126

134127
Thread mode example:
135-
--------------------
136128

137129
perf-stat reports the scaled counts for hybrid event and with a percentage
138130
displayed. The percentage is the event's running time/enabling time.
@@ -176,14 +168,12 @@ perf_event_attr:
176168
604,097,080 cpu_atom/cycles/ (99.57%)
177169

178170
perf-record:
179-
------------
180171

181172
If there is no '-e' specified in perf record, on hybrid platform,
182173
it creates two default 'cycles' and adds them to event list. One
183174
is for core, the other is for atom.
184175

185176
perf-stat:
186-
----------
187177

188178
If there is no '-e' specified in perf stat, on hybrid platform,
189179
besides of software events, following events are created and

tools/perf/Documentation/perf-record.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,9 @@ following filters are defined:
397397
- abort_tx: only when the target is a hardware transaction abort
398398
- cond: conditional branches
399399
- save_type: save branch type during sampling in case binary is not available later
400+
For the platforms with Intel Arch LBR support (12th-Gen+ client or
401+
4th-Gen Xeon+ server), the save branch type is unconditionally enabled
402+
when the taken branch stack sampling is enabled.
400403

401404
+
402405
The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
@@ -757,8 +760,6 @@ events in data directory files. Option specified with no or empty value
757760
defaults to CPU layout. Masks defined or provided by the option value are
758761
filtered through the mask provided by -C option.
759762

760-
include::intel-hybrid.txt[]
761-
762763
--debuginfod[=URLs]::
763764
Specify debuginfod URL to be used when cacheing perf.data binaries,
764765
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
@@ -778,6 +779,8 @@ include::intel-hybrid.txt[]
778779
only, as of now. So the applications built without the frame
779780
pointer might see bogus addresses.
780781

782+
include::intel-hybrid.txt[]
783+
781784
SEE ALSO
782785
--------
783786
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]

tools/perf/Makefile.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ endif
265265
# defined. get-executable-or-default fails with an error if the first argument is supplied but
266266
# doesn't exist.
267267
override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO))
268-
override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO)))
268+
override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG)))
269269

270270
grep-libs = $(filter -l%,$(1))
271271
strip-libs = $(filter-out -l%,$(1))

tools/perf/builtin-sched.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3355,7 +3355,8 @@ static bool schedstat_events_exposed(void)
33553355
static int __cmd_record(int argc, const char **argv)
33563356
{
33573357
unsigned int rec_argc, i, j;
3358-
const char **rec_argv;
3358+
char **rec_argv;
3359+
const char **rec_argv_copy;
33593360
const char * const record_args[] = {
33603361
"record",
33613362
"-a",
@@ -3384,21 +3385,26 @@ static int __cmd_record(int argc, const char **argv)
33843385
ARRAY_SIZE(schedstat_args) : 0;
33853386

33863387
struct tep_event *waking_event;
3388+
int ret;
33873389

33883390
/*
33893391
* +2 for either "-e", "sched:sched_wakeup" or
33903392
* "-e", "sched:sched_waking"
33913393
*/
33923394
rec_argc = ARRAY_SIZE(record_args) + 2 + schedstat_argc + argc - 1;
33933395
rec_argv = calloc(rec_argc + 1, sizeof(char *));
3394-
33953396
if (rec_argv == NULL)
33963397
return -ENOMEM;
3398+
rec_argv_copy = calloc(rec_argc + 1, sizeof(char *));
3399+
if (rec_argv_copy == NULL) {
3400+
free(rec_argv);
3401+
return -ENOMEM;
3402+
}
33973403

33983404
for (i = 0; i < ARRAY_SIZE(record_args); i++)
33993405
rec_argv[i] = strdup(record_args[i]);
34003406

3401-
rec_argv[i++] = "-e";
3407+
rec_argv[i++] = strdup("-e");
34023408
waking_event = trace_event__tp_format("sched", "sched_waking");
34033409
if (!IS_ERR(waking_event))
34043410
rec_argv[i++] = strdup("sched:sched_waking");
@@ -3409,11 +3415,19 @@ static int __cmd_record(int argc, const char **argv)
34093415
rec_argv[i++] = strdup(schedstat_args[j]);
34103416

34113417
for (j = 1; j < (unsigned int)argc; j++, i++)
3412-
rec_argv[i] = argv[j];
3418+
rec_argv[i] = strdup(argv[j]);
34133419

34143420
BUG_ON(i != rec_argc);
34153421

3416-
return cmd_record(i, rec_argv);
3422+
memcpy(rec_argv_copy, rec_argv, sizeof(char *) * rec_argc);
3423+
ret = cmd_record(rec_argc, rec_argv_copy);
3424+
3425+
for (i = 0; i < rec_argc; i++)
3426+
free(rec_argv[i]);
3427+
free(rec_argv);
3428+
free(rec_argv_copy);
3429+
3430+
return ret;
34173431
}
34183432

34193433
int cmd_sched(int argc, const char **argv)

tools/perf/builtin-stat.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
826826
}
827827

828828
evlist__for_each_entry(evsel_list, counter) {
829+
counter->reset_group = false;
829830
if (bpf_counter__load(counter, &target))
830831
return -1;
831832
if (!evsel__is_bpf(counter))

tools/perf/tests/shell/stat.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,24 @@ test_stat_record_report() {
2828
echo "stat record and report test [Success]"
2929
}
3030

31+
test_stat_repeat_weak_groups() {
32+
echo "stat repeat weak groups test"
33+
if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \
34+
true 2>&1 | grep -q 'seconds time elapsed'
35+
then
36+
echo "stat repeat weak groups test [Skipped event parsing failed]"
37+
return
38+
fi
39+
if ! perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W' \
40+
true > /dev/null 2>&1
41+
then
42+
echo "stat repeat weak groups test [Failed]"
43+
err=1
44+
return
45+
fi
46+
echo "stat repeat weak groups test [Success]"
47+
}
48+
3149
test_topdown_groups() {
3250
# Topdown events must be grouped with the slots event first. Test that
3351
# parse-events reorders this.
@@ -75,6 +93,7 @@ test_topdown_weak_groups() {
7593

7694
test_default_stat
7795
test_stat_record_report
96+
test_stat_repeat_weak_groups
7897
test_topdown_groups
7998
test_topdown_weak_groups
8099
exit $err

tools/perf/util/stat-shadow.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
11931193
&rsd);
11941194
if (retiring > 0.7)
11951195
color = PERF_COLOR_GREEN;
1196-
print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1196+
print_metric(config, ctxp, color, "%8.1f%%", "Retiring",
11971197
retiring * 100.);
11981198
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
11991199
full_td(cpu_map_idx, st, &rsd)) {
@@ -1202,7 +1202,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12021202
&rsd);
12031203
if (fe_bound > 0.2)
12041204
color = PERF_COLOR_RED;
1205-
print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1205+
print_metric(config, ctxp, color, "%8.1f%%", "Frontend Bound",
12061206
fe_bound * 100.);
12071207
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
12081208
full_td(cpu_map_idx, st, &rsd)) {
@@ -1211,7 +1211,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12111211
&rsd);
12121212
if (be_bound > 0.2)
12131213
color = PERF_COLOR_RED;
1214-
print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
1214+
print_metric(config, ctxp, color, "%8.1f%%", "Backend Bound",
12151215
be_bound * 100.);
12161216
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
12171217
full_td(cpu_map_idx, st, &rsd)) {
@@ -1220,7 +1220,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12201220
&rsd);
12211221
if (bad_spec > 0.1)
12221222
color = PERF_COLOR_RED;
1223-
print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1223+
print_metric(config, ctxp, color, "%8.1f%%", "Bad Speculation",
12241224
bad_spec * 100.);
12251225
} else if (perf_stat_evsel__is(evsel, TOPDOWN_HEAVY_OPS) &&
12261226
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
@@ -1234,13 +1234,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12341234

12351235
if (retiring > 0.7 && heavy_ops > 0.1)
12361236
color = PERF_COLOR_GREEN;
1237-
print_metric(config, ctxp, color, "%8.1f%%", "heavy operations",
1237+
print_metric(config, ctxp, color, "%8.1f%%", "Heavy Operations",
12381238
heavy_ops * 100.);
12391239
if (retiring > 0.7 && light_ops > 0.6)
12401240
color = PERF_COLOR_GREEN;
12411241
else
12421242
color = NULL;
1243-
print_metric(config, ctxp, color, "%8.1f%%", "light operations",
1243+
print_metric(config, ctxp, color, "%8.1f%%", "Light Operations",
12441244
light_ops * 100.);
12451245
} else if (perf_stat_evsel__is(evsel, TOPDOWN_BR_MISPREDICT) &&
12461246
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
@@ -1254,13 +1254,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12541254

12551255
if (bad_spec > 0.1 && br_mis > 0.05)
12561256
color = PERF_COLOR_RED;
1257-
print_metric(config, ctxp, color, "%8.1f%%", "branch mispredict",
1257+
print_metric(config, ctxp, color, "%8.1f%%", "Branch Mispredict",
12581258
br_mis * 100.);
12591259
if (bad_spec > 0.1 && m_clears > 0.05)
12601260
color = PERF_COLOR_RED;
12611261
else
12621262
color = NULL;
1263-
print_metric(config, ctxp, color, "%8.1f%%", "machine clears",
1263+
print_metric(config, ctxp, color, "%8.1f%%", "Machine Clears",
12641264
m_clears * 100.);
12651265
} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_LAT) &&
12661266
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
@@ -1274,13 +1274,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12741274

12751275
if (fe_bound > 0.2 && fetch_lat > 0.15)
12761276
color = PERF_COLOR_RED;
1277-
print_metric(config, ctxp, color, "%8.1f%%", "fetch latency",
1277+
print_metric(config, ctxp, color, "%8.1f%%", "Fetch Latency",
12781278
fetch_lat * 100.);
12791279
if (fe_bound > 0.2 && fetch_bw > 0.1)
12801280
color = PERF_COLOR_RED;
12811281
else
12821282
color = NULL;
1283-
print_metric(config, ctxp, color, "%8.1f%%", "fetch bandwidth",
1283+
print_metric(config, ctxp, color, "%8.1f%%", "Fetch Bandwidth",
12841284
fetch_bw * 100.);
12851285
} else if (perf_stat_evsel__is(evsel, TOPDOWN_MEM_BOUND) &&
12861286
full_td(cpu_map_idx, st, &rsd) && (config->topdown_level > 1)) {
@@ -1294,13 +1294,13 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
12941294

12951295
if (be_bound > 0.2 && mem_bound > 0.2)
12961296
color = PERF_COLOR_RED;
1297-
print_metric(config, ctxp, color, "%8.1f%%", "memory bound",
1297+
print_metric(config, ctxp, color, "%8.1f%%", "Memory Bound",
12981298
mem_bound * 100.);
12991299
if (be_bound > 0.2 && core_bound > 0.1)
13001300
color = PERF_COLOR_RED;
13011301
else
13021302
color = NULL;
1303-
print_metric(config, ctxp, color, "%8.1f%%", "Core bound",
1303+
print_metric(config, ctxp, color, "%8.1f%%", "Core Bound",
13041304
core_bound * 100.);
13051305
} else if (evsel->metric_expr) {
13061306
generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,

0 commit comments

Comments
 (0)