Skip to content

Commit 01e9aca

Browse files
committed
Add topology support for hetero systems
Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent 88ac05f commit 01e9aca

File tree

5 files changed

+621
-99
lines changed

5 files changed

+621
-99
lines changed

orte/mca/odls/odls_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2018 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
1818
* $COPYRIGHT$
@@ -93,6 +93,9 @@ typedef uint8_t orte_daemon_cmd_flag_t;
9393
/* tell DVM daemons to cleanup resources from job */
9494
#define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34
9595

96+
/* pass node info */
97+
#define ORTE_DAEMON_PASS_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 35
98+
9699
/*
97100
* Struct written up the pipe from the child to the parent.
98101
*/

orte/mca/plm/base/plm_base_launch_support.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,11 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
130130
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
131131
orte_topology_t *t;
132132
orte_node_t *node;
133-
int i;
133+
int i, rc;
134+
uint8_t u8;
135+
opal_buffer_t buf;
136+
orte_grpcomm_signature_t *sig;
137+
orte_daemon_cmd_flag_t command = ORTE_DAEMON_PASS_NODE_INFO_CMD;
134138

135139
ORTE_ACQUIRE_OBJECT(caddy);
136140

@@ -177,6 +181,78 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
177181
/* ensure we update the routing plan */
178182
orte_routed.update_routing_plan(NULL);
179183

184+
/* prep the buffer */
185+
OBJ_CONSTRUCT(&buf, opal_buffer_t);
186+
/* load the command */
187+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) {
188+
ORTE_ERROR_LOG(rc);
189+
OBJ_DESTRUCT(&buf);
190+
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
191+
OBJ_RELEASE(caddy);
192+
return;
193+
}
194+
195+
196+
/* if we did not execute a tree-spawn, then the daemons do
197+
* not currently have a nidmap for the job - in that case,
198+
* send one to them */
199+
if (!orte_nidmap_communicated) {
200+
u8 = 1;
201+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) {
202+
ORTE_ERROR_LOG(rc);
203+
OBJ_DESTRUCT(&buf);
204+
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
205+
OBJ_RELEASE(caddy);
206+
return;
207+
}
208+
if (OPAL_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &buf))) {
209+
ORTE_ERROR_LOG(rc);
210+
OBJ_DESTRUCT(&buf);
211+
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
212+
OBJ_RELEASE(caddy);
213+
return;
214+
}
215+
orte_nidmap_communicated = true;
216+
} else {
217+
u8 = 0;
218+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) {
219+
ORTE_ERROR_LOG(rc);
220+
OBJ_DESTRUCT(&buf);
221+
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
222+
OBJ_RELEASE(caddy);
223+
return;
224+
}
225+
}
226+
227+
/* we always send the topologies and the #slots on each node. Note
228+
* that we cannot send the #slots until after the above step since,
229+
* for unmanaged allocations, we might have just determined it! */
230+
if (OPAL_SUCCESS != (rc = orte_util_pass_node_info(&buf))) {
231+
ORTE_ERROR_LOG(rc);
232+
OBJ_DESTRUCT(&buf);
233+
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
234+
OBJ_RELEASE(caddy);
235+
return;
236+
}
237+
238+
/* goes to all daemons */
239+
sig = OBJ_NEW(orte_grpcomm_signature_t);
240+
sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t));
241+
sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid;
242+
sig->signature[0].vpid = ORTE_VPID_WILDCARD;
243+
sig->sz = 1;
244+
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &buf))) {
245+
ORTE_ERROR_LOG(rc);
246+
OBJ_RELEASE(sig);
247+
OBJ_DESTRUCT(&buf);
248+
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
249+
OBJ_RELEASE(caddy);
250+
return;
251+
}
252+
OBJ_DESTRUCT(&buf);
253+
/* maintain accounting */
254+
OBJ_RELEASE(sig);
255+
180256
/* progress the job */
181257
caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
182258
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_VM_READY);

orte/orted/orted_comm.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include "orte/util/proc_info.h"
6060
#include "orte/util/session_dir.h"
6161
#include "orte/util/name_fns.h"
62+
#include "orte/util/nidmap.h"
6263

6364
#include "orte/mca/errmgr/errmgr.h"
6465
#include "orte/mca/grpcomm/base/base.h"
@@ -126,7 +127,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
126127
char *coprocessors;
127128
orte_job_map_t *map;
128129
int8_t flag;
129-
uint8_t *cmpdata;
130+
uint8_t *cmpdata, u8;
130131
size_t cmplen;
131132

132133
/* unpack the command */
@@ -241,6 +242,32 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
241242
}
242243
break;
243244

245+
246+
case ORTE_DAEMON_PASS_NODE_INFO_CMD:
247+
if (orte_debug_daemons_flag) {
248+
opal_output(0, "%s orted_cmd: received pass_node_info",
249+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
250+
}
251+
if (!ORTE_PROC_IS_HNP) {
252+
n = 1;
253+
if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &u8, &n, OPAL_UINT8))) {
254+
ORTE_ERROR_LOG(ret);
255+
goto CLEANUP;
256+
}
257+
if (1 == u8) {
258+
if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(buffer))) {
259+
ORTE_ERROR_LOG(ret);
260+
goto CLEANUP;
261+
}
262+
}
263+
if (ORTE_SUCCESS != (ret = orte_util_parse_node_info(buffer))) {
264+
ORTE_ERROR_LOG(ret);
265+
goto CLEANUP;
266+
}
267+
}
268+
break;
269+
270+
244271
/**** ADD_LOCAL_PROCS ****/
245272
case ORTE_DAEMON_ADD_LOCAL_PROCS:
246273
case ORTE_DAEMON_DVM_ADD_PROCS:

0 commit comments

Comments
 (0)