Skip to content

Commit c9c2a42

Browse files
athira-rajeevacmel
authored andcommitted
perf bench: Fix futex bench to correct usage of affinity for machines with #CPUs > 1K
The 'perf bench futex' testcase fails on systems with more than 1K CPUs. Testcase: perf bench futex all Failure snippet: <<>>Running futex/hash benchmark... perf: pthread_create: No such file or directory <<>> All the futex benchmarks (ie hash, lock-api, requeue, wake, wake-parallel), pthread_create is invoked in respective bench_futex_* function. Though the logs shows direct failure from pthread_create, strace logs showed that actual failure is from "sched_setaffinity" returning EINVAL (invalid argument). This happens because the default mask size in glibc is 1024. To overcome this 1024 CPUs mask size limitation of cpu_set_t, change the mask size using the CPU_*_S macros. Patch addresses this by fixing all the futex benchmarks to use CPU_ALLOC to allocate cpumask, CPU_ALLOC_SIZE for size, and CPU_SET_S to set the mask. Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com> Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Signed-off-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Tested-by: Disha Goel <disgoel@linux.vnet.ibm.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Nageswara R Sastry <rnsastry@linux.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20220406175113.87881-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent aeee9dc commit c9c2a42

File tree

5 files changed

+83
-28
lines changed

5 files changed

+83
-28
lines changed

tools/perf/bench/futex-hash.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,14 @@ static void print_summary(void)
122122
int bench_futex_hash(int argc, const char **argv)
123123
{
124124
int ret = 0;
125-
cpu_set_t cpuset;
125+
cpu_set_t *cpuset;
126126
struct sigaction act;
127127
unsigned int i;
128128
pthread_attr_t thread_attr;
129129
struct worker *worker = NULL;
130130
struct perf_cpu_map *cpu;
131+
int nrcpus;
132+
size_t size;
131133

132134
argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
133135
if (argc) {
@@ -170,25 +172,35 @@ int bench_futex_hash(int argc, const char **argv)
170172
threads_starting = params.nthreads;
171173
pthread_attr_init(&thread_attr);
172174
gettimeofday(&bench__start, NULL);
175+
176+
nrcpus = perf_cpu_map__nr(cpu);
177+
cpuset = CPU_ALLOC(nrcpus);
178+
BUG_ON(!cpuset);
179+
size = CPU_ALLOC_SIZE(nrcpus);
180+
173181
for (i = 0; i < params.nthreads; i++) {
174182
worker[i].tid = i;
175183
worker[i].futex = calloc(params.nfutexes, sizeof(*worker[i].futex));
176184
if (!worker[i].futex)
177185
goto errmem;
178186

179-
CPU_ZERO(&cpuset);
180-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
187+
CPU_ZERO_S(size, cpuset);
181188

182-
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
183-
if (ret)
189+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
190+
ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset);
191+
if (ret) {
192+
CPU_FREE(cpuset);
184193
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
185-
194+
}
186195
ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
187196
(void *)(struct worker *) &worker[i]);
188-
if (ret)
197+
if (ret) {
198+
CPU_FREE(cpuset);
189199
err(EXIT_FAILURE, "pthread_create");
200+
}
190201

191202
}
203+
CPU_FREE(cpuset);
192204
pthread_attr_destroy(&thread_attr);
193205

194206
pthread_mutex_lock(&thread_lock);

tools/perf/bench/futex-lock-pi.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,17 @@ static void *workerfn(void *arg)
120120
static void create_threads(struct worker *w, pthread_attr_t thread_attr,
121121
struct perf_cpu_map *cpu)
122122
{
123-
cpu_set_t cpuset;
123+
cpu_set_t *cpuset;
124124
unsigned int i;
125+
int nrcpus = perf_cpu_map__nr(cpu);
126+
size_t size;
125127

126128
threads_starting = params.nthreads;
127129

130+
cpuset = CPU_ALLOC(nrcpus);
131+
BUG_ON(!cpuset);
132+
size = CPU_ALLOC_SIZE(nrcpus);
133+
128134
for (i = 0; i < params.nthreads; i++) {
129135
worker[i].tid = i;
130136

@@ -135,15 +141,20 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
135141
} else
136142
worker[i].futex = &global_futex;
137143

138-
CPU_ZERO(&cpuset);
139-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
144+
CPU_ZERO_S(size, cpuset);
145+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
140146

141-
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
147+
if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
148+
CPU_FREE(cpuset);
142149
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
150+
}
143151

144-
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
152+
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) {
153+
CPU_FREE(cpuset);
145154
err(EXIT_FAILURE, "pthread_create");
155+
}
146156
}
157+
CPU_FREE(cpuset);
147158
}
148159

149160
int bench_futex_lock_pi(int argc, const char **argv)

tools/perf/bench/futex-requeue.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,22 +123,33 @@ static void *workerfn(void *arg __maybe_unused)
123123
static void block_threads(pthread_t *w,
124124
pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
125125
{
126-
cpu_set_t cpuset;
126+
cpu_set_t *cpuset;
127127
unsigned int i;
128+
int nrcpus = perf_cpu_map__nr(cpu);
129+
size_t size;
128130

129131
threads_starting = params.nthreads;
130132

133+
cpuset = CPU_ALLOC(nrcpus);
134+
BUG_ON(!cpuset);
135+
size = CPU_ALLOC_SIZE(nrcpus);
136+
131137
/* create and block all threads */
132138
for (i = 0; i < params.nthreads; i++) {
133-
CPU_ZERO(&cpuset);
134-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
139+
CPU_ZERO_S(size, cpuset);
140+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
135141

136-
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
142+
if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
143+
CPU_FREE(cpuset);
137144
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
145+
}
138146

139-
if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
147+
if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
148+
CPU_FREE(cpuset);
140149
err(EXIT_FAILURE, "pthread_create");
150+
}
141151
}
152+
CPU_FREE(cpuset);
142153
}
143154

144155
static void toggle_done(int sig __maybe_unused,

tools/perf/bench/futex-wake-parallel.c

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,22 +144,33 @@ static void *blocked_workerfn(void *arg __maybe_unused)
144144
static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
145145
struct perf_cpu_map *cpu)
146146
{
147-
cpu_set_t cpuset;
147+
cpu_set_t *cpuset;
148148
unsigned int i;
149+
int nrcpus = perf_cpu_map__nr(cpu);
150+
size_t size;
149151

150152
threads_starting = params.nthreads;
151153

154+
cpuset = CPU_ALLOC(nrcpus);
155+
BUG_ON(!cpuset);
156+
size = CPU_ALLOC_SIZE(nrcpus);
157+
152158
/* create and block all threads */
153159
for (i = 0; i < params.nthreads; i++) {
154-
CPU_ZERO(&cpuset);
155-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
160+
CPU_ZERO_S(size, cpuset);
161+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
156162

157-
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
163+
if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
164+
CPU_FREE(cpuset);
158165
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
166+
}
159167

160-
if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
168+
if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) {
169+
CPU_FREE(cpuset);
161170
err(EXIT_FAILURE, "pthread_create");
171+
}
162172
}
173+
CPU_FREE(cpuset);
163174
}
164175

165176
static void print_run(struct thread_data *waking_worker, unsigned int run_num)

tools/perf/bench/futex-wake.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,22 +97,32 @@ static void print_summary(void)
9797
static void block_threads(pthread_t *w,
9898
pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
9999
{
100-
cpu_set_t cpuset;
100+
cpu_set_t *cpuset;
101101
unsigned int i;
102-
102+
size_t size;
103+
int nrcpus = perf_cpu_map__nr(cpu);
103104
threads_starting = params.nthreads;
104105

106+
cpuset = CPU_ALLOC(nrcpus);
107+
BUG_ON(!cpuset);
108+
size = CPU_ALLOC_SIZE(nrcpus);
109+
105110
/* create and block all threads */
106111
for (i = 0; i < params.nthreads; i++) {
107-
CPU_ZERO(&cpuset);
108-
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
112+
CPU_ZERO_S(size, cpuset);
113+
CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset);
109114

110-
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
115+
if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) {
116+
CPU_FREE(cpuset);
111117
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
118+
}
112119

113-
if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
120+
if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) {
121+
CPU_FREE(cpuset);
114122
err(EXIT_FAILURE, "pthread_create");
123+
}
115124
}
125+
CPU_FREE(cpuset);
116126
}
117127

118128
static void toggle_done(int sig __maybe_unused,

0 commit comments

Comments
 (0)