Skip to content

Commit 1862a69

Browse files
committed
Merge tag 'perf-tools-fixes-for-v5.18-2022-04-09' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo: - Fix the clang command line option probing and remove some options to filter out, fixing the build with the latest clang versions - Fix 'perf bench' futex and epoll benchmarks to deal with machines with more than 1K CPUs - Fix 'perf test tsc' error message when not supported - Remap perf ring buffer if there is no space for event, fixing perf usage in 32-bit ChromeOS - Drop objdump stderr to avoid getting stuck waiting for stdout output in 'perf annotate' - Fix up garbled output by now showing unwind error messages when augmenting frame in best effort mode - Fix perf's libperf_print callback, use the va_args eprintf() variant - Sync vhost and arm64 cputype headers with the kernel sources - Fix 'perf report --mem-mode' with ARM SPE - Add missing external commands ('iiostat', etc) to 'perf --list-cmds' * tag 'perf-tools-fixes-for-v5.18-2022-04-09' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf annotate: Drop objdump stderr to avoid getting stuck waiting for stdout output perf tools: Add external commands to list-cmds perf docs: Add perf-iostat link to manpages perf session: Remap buf if there is no space for event perf bench: Fix epoll bench to correct usage of affinity for machines with #CPUs > 1K perf bench: Fix futex bench to correct usage of affinity for machines with #CPUs > 1K perf tools: Fix perf's libperf_print callback perf: arm-spe: Fix perf report --mem-mode perf unwind: Don't show unwind error messages when augmenting frame pointer stack tools headers arm64: Sync arm64's cputype.h with the kernel sources perf test tsc: Fix error message when not supported perf build: Don't use -ffat-lto-objects in the python feature test when building with clang-13 perf python: Fix probing for some clang command line options tools build: Filter out options and warnings not supported by clang tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts tools include UAPI: Sync linux/vhost.h with the kernel sources
2 parents 94a4c2b + 940a445 commit 1862a69

26 files changed

+234
-71
lines changed

tools/arch/arm64/include/asm/cputype.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
#define ARM_CPU_PART_CORTEX_A77 0xD0D
7676
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
7777
#define ARM_CPU_PART_CORTEX_A78 0xD41
78+
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
7879
#define ARM_CPU_PART_CORTEX_X1 0xD44
7980
#define ARM_CPU_PART_CORTEX_A510 0xD46
8081
#define ARM_CPU_PART_CORTEX_A710 0xD47
@@ -130,6 +131,7 @@
130131
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
131132
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
132133
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
134+
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
133135
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
134136
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
135137
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)

tools/build/feature/Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,16 @@ strip-libs = $(filter-out -l%,$(1))
217217
PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
218218
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
219219
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
220-
PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
220+
PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
221221
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
222222

223+
ifeq ($(CC_NO_CLANG), 0)
224+
PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
225+
PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
226+
PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
227+
FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
228+
endif
229+
223230
$(OUTPUT)test-libperl.bin:
224231
$(BUILD) $(FLAGS_PERL_EMBED)
225232

tools/include/uapi/linux/vhost.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,4 +150,11 @@
150150
/* Get the valid iova range */
151151
#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
152152
struct vhost_vdpa_iova_range)
153+
154+
/* Get the config size */
155+
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
156+
157+
/* Get the count of all virtqueues */
158+
#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
159+
153160
#endif

tools/perf/Documentation/perf.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ linkperf:perf-buildid-list[1], linkperf:perf-c2c[1],
8383
linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1],
8484
linkperf:perf-evlist[1], linkperf:perf-ftrace[1],
8585
linkperf:perf-help[1], linkperf:perf-inject[1],
86-
linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1],
86+
linkperf:perf-intel-pt[1], linkperf:perf-iostat[1], linkperf:perf-kallsyms[1],
8787
linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1],
8888
linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1],
8989
linkperf:perf-script[1], linkperf:perf-test[1],

tools/perf/Makefile.config

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,9 @@ ifdef PYTHON_CONFIG
272272
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
273273
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
274274
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
275+
ifeq ($(CC_NO_CLANG), 0)
276+
PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS))
277+
endif
275278
endif
276279

277280
FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
@@ -790,6 +793,9 @@ else
790793
LDFLAGS += $(PERL_EMBED_LDFLAGS)
791794
EXTLIBS += $(PERL_EMBED_LIBADD)
792795
CFLAGS += -DHAVE_LIBPERL_SUPPORT
796+
ifeq ($(CC_NO_CLANG), 0)
797+
CFLAGS += -Wno-compound-token-split-by-macro
798+
endif
793799
$(call detected,CONFIG_LIBPERL)
794800
endif
795801
endif

tools/perf/arch/arm64/util/arm-spe.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
239239
arm_spe_set_timestamp(itr, arm_spe_evsel);
240240
}
241241

242+
/*
243+
* Set this only so that perf report knows that SPE generates memory info. It has no effect
244+
* on the opening of the event or the SPE data produced.
245+
*/
246+
evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
247+
242248
/* Add dummy event to keep tracking */
243249
err = parse_events(evlist, "dummy:u", NULL);
244250
if (err)

tools/perf/bench/epoll-ctl.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,20 @@ static void init_fdmaps(struct worker *w, int pct)
222222
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
223223
{
224224
pthread_attr_t thread_attr, *attrp = NULL;
225-
cpu_set_t cpuset;
225+
cpu_set_t *cpuset;
226226
unsigned int i, j;
227227
int ret = 0;
228+
int nrcpus;
229+
size_t size;
228230

229231
if (!noaffinity)
230232
pthread_attr_init(&thread_attr);
231233

234+
nrcpus = perf_cpu_map__nr(cpu);
235+
cpuset = CPU_ALLOC(nrcpus);
236+
BUG_ON(!cpuset);
237+
size = CPU_ALLOC_SIZE(nrcpus);
238+
232239
for (i = 0; i < nthreads; i++) {
233240
struct worker *w = &worker[i];
234241

@@ -252,22 +259,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
252259
init_fdmaps(w, 50);
253260

254261
if (!noaffinity) {
255-
CPU_ZERO(&cpuset);
256-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
262+
CPU_ZERO_S(size, cpuset);
263+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
264+
size, cpuset);
257265

258-
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
259-
if (ret)
266+
ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
267+
if (ret) {
268+
CPU_FREE(cpuset);
260269
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
270+
}
261271

262272
attrp = &thread_attr;
263273
}
264274

265275
ret = pthread_create(&w->thread, attrp, workerfn,
266276
(void *)(struct worker *) w);
267-
if (ret)
277+
if (ret) {
278+
CPU_FREE(cpuset);
268279
err(EXIT_FAILURE, "pthread_create");
280+
}
269281
}
270282

283+
CPU_FREE(cpuset);
271284
if (!noaffinity)
272285
pthread_attr_destroy(&thread_attr);
273286

tools/perf/bench/epoll-wait.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -291,9 +291,11 @@ static void print_summary(void)
291291
static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
292292
{
293293
pthread_attr_t thread_attr, *attrp = NULL;
294-
cpu_set_t cpuset;
294+
cpu_set_t *cpuset;
295295
unsigned int i, j;
296296
int ret = 0, events = EPOLLIN;
297+
int nrcpus;
298+
size_t size;
297299

298300
if (oneshot)
299301
events |= EPOLLONESHOT;
@@ -306,6 +308,11 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
306308
if (!noaffinity)
307309
pthread_attr_init(&thread_attr);
308310

311+
nrcpus = perf_cpu_map__nr(cpu);
312+
cpuset = CPU_ALLOC(nrcpus);
313+
BUG_ON(!cpuset);
314+
size = CPU_ALLOC_SIZE(nrcpus);
315+
309316
for (i = 0; i < nthreads; i++) {
310317
struct worker *w = &worker[i];
311318

@@ -341,22 +348,28 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
341348
}
342349

343350
if (!noaffinity) {
344-
CPU_ZERO(&cpuset);
345-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
351+
CPU_ZERO_S(size, cpuset);
352+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu,
353+
size, cpuset);
346354

347-
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
348-
if (ret)
355+
ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
356+
if (ret) {
357+
CPU_FREE(cpuset);
349358
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
359+
}
350360

351361
attrp = &thread_attr;
352362
}
353363

354364
ret = pthread_create(&w->thread, attrp, workerfn,
355365
(void *)(struct worker *) w);
356-
if (ret)
366+
if (ret) {
367+
CPU_FREE(cpuset);
357368
err(EXIT_FAILURE, "pthread_create");
369+
}
358370
}
359371

372+
CPU_FREE(cpuset);
360373
if (!noaffinity)
361374
pthread_attr_destroy(&thread_attr);
362375

tools/perf/bench/futex-hash.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,14 @@ static void print_summary(void)
122122
int bench_futex_hash(int argc, const char **argv)
123123
{
124124
int ret = 0;
125-
cpu_set_t cpuset;
125+
cpu_set_t *cpuset;
126126
struct sigaction act;
127127
unsigned int i;
128128
pthread_attr_t thread_attr;
129129
struct worker *worker = NULL;
130130
struct perf_cpu_map *cpu;
131+
int nrcpus;
132+
size_t size;
131133

132134
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
133135
if (argc) {
@@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv)
170172
threads_starting = params.nthreads;
171173
pthread_attr_init(&thread_attr);
172174
gettimeofday(&bench__start, NULL);
175+
176+
nrcpus = perf_cpu_map__nr(cpu);
177+
cpuset = CPU_ALLOC(nrcpus);
178+
BUG_ON(!cpuset);
179+
size = CPU_ALLOC_SIZE(nrcpus);
180+
173181
for (i = 0; i < params.nthreads; i++) {
174182
worker[i].tid = i;
175183
worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
176184
if (!worker[i].futex)
177185
goto errmem;
178186

179-
CPU_ZERO(&cpuset);
180-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
187+
CPU_ZERO_S(size, cpuset);
181188

182-
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
183-
if (ret)
189+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
190+
ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
191+
if (ret) {
192+
CPU_FREE(cpuset);
184193
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
185-
194+
}
186195
ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
187196
(void *)(struct worker *) &worker[i]);
188-
if (ret)
197+
if (ret) {
198+
CPU_FREE(cpuset);
189199
err(EXIT_FAILURE, "pthread_create");
200+
}
190201

191202
}
203+
CPU_FREE(cpuset);
192204
pthread_attr_destroy(&thread_attr);
193205

194206
pthread_mutex_lock(&thread_lock);

tools/perf/bench/futex-lock-pi.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,17 @@ static void *workerfn(void *arg)
120120
static void create_threads(struct worker *w, pthread_attr_t thread_attr,
121121
struct perf_cpu_map *cpu)
122122
{
123-
cpu_set_t cpuset;
123+
cpu_set_t *cpuset;
124124
unsigned int i;
125+
int nrcpus = perf_cpu_map__nr(cpu);
126+
size_t size;
125127

126128
threads_starting = params.nthreads;
127129

130+
cpuset = CPU_ALLOC(nrcpus);
131+
BUG_ON(!cpuset);
132+
size = CPU_ALLOC_SIZE(nrcpus);
133+
128134
for (i = 0; i < params.nthreads; i++) {
129135
worker[i].tid = i;
130136

@@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
135141
} else
136142
worker[i].futex = &global_futex;
137143

138-
CPU_ZERO(&cpuset);
139-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
144+
CPU_ZERO_S(size, cpuset);
145+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
140146

141-
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
147+
if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
148+
CPU_FREE(cpuset);
142149
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
150+
}
143151

144-
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
152+
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
153+
CPU_FREE(cpuset);
145154
err(EXIT_FAILURE, "pthread_create");
155+
}
146156
}
157+
CPU_FREE(cpuset);
147158
}
148159

149160
int bench_futex_lock_pi(int argc, const char **argv)

0 commit comments

Comments
 (0)