Skip to content

Commit 3637443

Browse files
committed
adding NUMA_RANK to process metadata
adding PMIX_NUMA_RANK info to process metadata so that the local NUMA rank can be accessed through the opal_process_info object. Signed-off-by: Nikola Dancejic <dancejic@amazon.com>
1 parent 1a46e95 commit 3637443

File tree

4 files changed

+36
-0
lines changed

4 files changed

+36
-0
lines changed

ompi/runtime/ompi_rte.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
* reserved.
1313
* Copyright (c) 2019 Research Organization for Information Science
1414
* and Technology (RIST). All rights reserved.
15+
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
16+
* reserved.
1517
* $COPYRIGHT$
1618
*/
1719
#include "ompi_config.h"
@@ -70,6 +72,7 @@ pmix_process_info_t pmix_process_info = {
7072
.proc_session_dir = NULL,
7173
.my_local_rank = 0,
7274
.my_node_rank = 0,
75+
.my_numa_rank = UINT16_MAX, /* Assume invalid NUMA rank, set to UINT16_MAX */
7376
.num_local_peers = 0,
7477
.num_procs = 0,
7578
.app_num = 0,
@@ -777,6 +780,28 @@ int ompi_rte_init(int *pargc, char ***pargv)
777780
pmix_proc_is_bound = false;
778781
}
779782

783+
/* get our numa rank from PMIx */
784+
if (pmix_proc_is_bound) {
785+
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_NUMA_RANK,
786+
&pmix_process_info.my_name, &u16ptr, PMIX_UINT16);
787+
if (PMIX_SUCCESS != rc) {
788+
if (ompi_singleton) {
789+
/* just assume the numa_rank is invalid, set to UINT16_MAX */
790+
u16 = UINT16_MAX;
791+
} else {
792+
ret = opal_pmix_convert_status(rc);
793+
error = "numa rank";
794+
goto error;
795+
}
796+
}
797+
pmix_process_info.my_numa_rank = u16;
798+
} else {
799+
/* If processes are not bound, the numa_rank is not available
800+
* Assign UINT16_MAX to the numa_rank to indicate an invalid value
801+
*/
802+
pmix_process_info.my_numa_rank = UINT16_MAX;
803+
}
804+
780805
/* get our local peers */
781806
if (0 < pmix_process_info.num_local_peers) {
782807
/* if my local rank if too high, then that's an error */
@@ -866,6 +891,7 @@ int ompi_rte_init(int *pargc, char ***pargv)
866891
opal_process_info.proc_session_dir = pmix_process_info.proc_session_dir;
867892
opal_process_info.num_local_peers = (int32_t)pmix_process_info.num_local_peers;
868893
opal_process_info.my_local_rank = (int32_t)pmix_process_info.my_local_rank;
894+
opal_process_info.my_numa_rank = pmix_process_info.my_numa_rank;
869895
opal_process_info.cpuset = pmix_process_info.cpuset;
870896

871897
return OPAL_SUCCESS;

ompi/runtime/ompi_rte.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
* and Technology (RIST). All rights reserved.
99
* Copyright (c) 2020 Triad National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
12+
* reserved.
1113
*
1214
* $COPYRIGHT$
1315
*
@@ -253,6 +255,8 @@ typedef struct {
253255
char *proc_session_dir;
254256
uint16_t my_local_rank;
255257
uint16_t my_node_rank;
258+
/* process rank on local NUMA node. Set to UINT16_MAX if NUMA rank is unavailable */
259+
uint16_t my_numa_rank;
256260
int32_t num_local_peers;
257261
uint32_t num_procs;
258262
uint32_t app_num;

opal/util/proc.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
* and Technology (RIST). All rights reserved.
1010
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
13+
* reserved.
1214
* $COPYRIGHT$
1315
*
1416
* Additional copyrights may follow
@@ -35,6 +37,7 @@ opal_process_info_t opal_process_info = {
3537
.proc_session_dir = NULL,
3638
.num_local_peers = 0, /* there is nobody else but me */
3739
.my_local_rank = 0, /* I'm the only process around here */
40+
.my_numa_rank = UINT16_MAX, /* Assume numa_rank is unavailable, set to UINT16_MAX */
3841
.cpuset = NULL,
3942
};
4043

opal/util/proc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
* Copyright (c) 2014-2016 Research Organization for Information Science
88
* and Technology (RIST). All rights reserved.
99
* Copyright (c) 2017 Cisco Systems, Inc. All rights reserved
10+
* Copyright (c) 2020 Amazon.com, Inc. or its affiliates. All Rights
11+
* reserved.
1012
* $COPYRIGHT$
1113
*
1214
* Additional copyrights may follow
@@ -108,6 +110,7 @@ typedef struct opal_process_info_t {
108110
char *proc_session_dir; /**< Session directory for the process */
109111
int32_t num_local_peers; /**< number of procs from my job that share my node with me */
110112
int32_t my_local_rank; /**< local rank on this node within my job */
113+
int16_t my_numa_rank; /**< rank on this processes NUMA node. A value of UINT16_MAX indicates unavailable numa_rank */
111114
char *cpuset; /**< String-representation of bitmap where we are bound */
112115
} opal_process_info_t;
113116
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;

0 commit comments

Comments
 (0)