Skip to content

Commit 394e430

Browse files
athira-rajeevacmel
authored andcommitted
perf bench numa: Fix the condition checks for max number of NUMA nodes
In systems having higher node numbers available like node 255, perf numa bench will fail with SIGABORT. <<>> perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || g->p.nr_nodes < 0)' failed. Aborted (core dumped) <<>> Snippet from 'numactl -H' below on a powerpc system where the highest node number available is 255: available: 6 nodes (0,8,252-255) node 0 cpus: <cpu-list> node 0 size: 519587 MB node 0 free: 516659 MB node 8 cpus: <cpu-list> node 8 size: 523607 MB node 8 free: 486757 MB node 252 cpus: node 252 size: 0 MB node 252 free: 0 MB node 253 cpus: node 253 size: 0 MB node 253 free: 0 MB node 254 cpus: node 254 size: 0 MB node 254 free: 0 MB node 255 cpus: node 255 size: 0 MB node 255 free: 0 MB node distances: node 0 8 252 253 254 255 Note: <cpu-list> expands to actual cpu list in the original output. These nodes 252-255 are to represent the memory on GPUs and are valid nodes. The perf numa bench init code has a condition check to see if the number of NUMA nodes (nr_nodes) exceeds MAX_NR_NODES. The value of MAX_NR_NODES defined in perf code is 64. And the 'nr_nodes' is the value from numa_max_node() which represents the highest node number available in the system. In some systems where we could have NUMA node 255, this condition check fails and results in SIGABORT. The numa benchmark uses static value of MAX_NR_NODES in the code to represent size of two NUMA node arrays and node bitmask used for setting memory policy. Patch adds a fix to dynamically allocate size for the two arrays and bitmask value based on the node numbers available in the system. With the fix, perf numa benchmark will work with node configuration on any system and thus removes the static MAX_NR_NODES value. Signed-off-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com> Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Madhavan Srinivasan <maddy@linux.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/1614271802-1503-1-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent ffc52b7 commit 394e430

File tree

1 file changed

+29
-13
lines changed

1 file changed

+29
-13
lines changed

tools/perf/bench/numa.c

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -344,18 +344,22 @@ static void mempol_restore(void)
344344

345345
static void bind_to_memnode(int node)
346346
{
347-
unsigned long nodemask;
347+
struct bitmask *node_mask;
348348
int ret;
349349

350350
if (node == NUMA_NO_NODE)
351351
return;
352352

353-
BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
354-
nodemask = 1L << node;
353+
node_mask = numa_allocate_nodemask();
354+
BUG_ON(!node_mask);
355355

356-
ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
357-
dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
356+
numa_bitmask_clearall(node_mask);
357+
numa_bitmask_setbit(node_mask, node);
358358

359+
ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
360+
dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
361+
362+
numa_bitmask_free(node_mask);
359363
BUG_ON(ret);
360364
}
361365

@@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
876880
prctl(0, bytes_worked);
877881
}
878882

879-
#define MAX_NR_NODES 64
880-
881883
/*
882884
* Count the number of nodes a process's threads
883885
* are spread out on.
@@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
888890
*/
889891
static int count_process_nodes(int process_nr)
890892
{
891-
char node_present[MAX_NR_NODES] = { 0, };
893+
char *node_present;
892894
int nodes;
893895
int n, t;
894896

897+
node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
898+
BUG_ON(!node_present);
899+
for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
900+
node_present[nodes] = 0;
901+
895902
for (t = 0; t < g->p.nr_threads; t++) {
896903
struct thread_data *td;
897904
int task_nr;
@@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr)
901908
td = g->threads + task_nr;
902909

903910
node = numa_node_of_cpu(td->curr_cpu);
904-
if (node < 0) /* curr_cpu was likely still -1 */
911+
if (node < 0) /* curr_cpu was likely still -1 */ {
912+
free(node_present);
905913
return 0;
914+
}
906915

907916
node_present[node] = 1;
908917
}
909918

910919
nodes = 0;
911920

912-
for (n = 0; n < MAX_NR_NODES; n++)
921+
for (n = 0; n < g->p.nr_nodes; n++)
913922
nodes += node_present[n];
914923

924+
free(node_present);
915925
return nodes;
916926
}
917927

@@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
980990
{
981991
unsigned int loops_done_min, loops_done_max;
982992
int process_groups;
983-
int nodes[MAX_NR_NODES];
993+
int *nodes;
984994
int distance;
985995
int nr_min;
986996
int nr_max;
@@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
9941004
if (!g->p.show_convergence && !g->p.measure_convergence)
9951005
return;
9961006

1007+
nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
1008+
BUG_ON(!nodes);
9971009
for (node = 0; node < g->p.nr_nodes; node++)
9981010
nodes[node] = 0;
9991011

@@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
10351047

10361048
BUG_ON(sum > g->p.nr_tasks);
10371049

1038-
if (0 && (sum < g->p.nr_tasks))
1050+
if (0 && (sum < g->p.nr_tasks)) {
1051+
free(nodes);
10391052
return;
1053+
}
10401054

10411055
/*
10421056
* Count the number of distinct process groups present
@@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
10881102
}
10891103
tprintf("\n");
10901104
}
1105+
1106+
free(nodes);
10911107
}
10921108

10931109
static void show_summary(double runtime_ns_max, int l, double *convergence)
@@ -1413,7 +1429,7 @@ static int init(void)
14131429
g->p.nr_nodes = numa_max_node() + 1;
14141430

14151431
/* char array in count_process_nodes(): */
1416-
BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
1432+
BUG_ON(g->p.nr_nodes < 0);
14171433

14181434
if (g->p.show_quiet && !g->p.show_details)
14191435
g->p.show_details = -1;

0 commit comments

Comments
 (0)