Skip to content

Commit 35a5971

Browse files
committed
Ensure that nodes are always used in order provided
If a user provides a list of nodes to use via -host or -hostfile, then ensure that the ranks are placed according to that order. Also fix a bug where the number of slots on a node was incorrectly computed for localhost if the name given didn't exactly match the return from get_hostname. Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent ad8c842 commit 35a5971

File tree

2 files changed

+21
-44
lines changed

2 files changed

+21
-44
lines changed

orte/mca/rmaps/base/rmaps_base_support_fns.c

Lines changed: 18 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
1313
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
1414
* All rights reserved.
15-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2016 IBM Corporation. All rights reserved.
1717
* $COPYRIGHT$
1818
*
@@ -210,18 +210,17 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
210210
return ORTE_ERR_SILENT;
211211
}
212212
/* find the nodes in our node array and assemble them
213-
* in daemon order if the vm was launched
213+
* in list order as that is what the user specified
214214
*/
215-
for (i=0; i < orte_node_pool->size; i++) {
216-
nd = NULL;
217-
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
218-
continue;
219-
}
220-
/* ignore nodes that are non-usable */
221-
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
222-
continue;
223-
}
224-
OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) {
215+
OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) {
216+
for (i=0; i < orte_node_pool->size; i++) {
217+
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
218+
continue;
219+
}
220+
/* ignore nodes that are non-usable */
221+
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
222+
continue;
223+
}
225224
if (0 != strcmp(node->name, nptr->name)) {
226225
OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
227226
"NODE %s DOESNT MATCH NODE %s",
@@ -266,37 +265,14 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
266265
*/
267266
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
268267
}
269-
if (NULL == nd || NULL == nd->daemon ||
270-
NULL == node->daemon ||
271-
nd->daemon->name.vpid < node->daemon->name.vpid) {
272-
/* just append to end */
273-
opal_list_append(allocated_nodes, &node->super);
274-
nd = node;
275-
} else {
276-
/* starting from end, put this node in daemon-vpid order */
277-
while (node->daemon->name.vpid < nd->daemon->name.vpid) {
278-
if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) {
279-
/* insert at beginning */
280-
opal_list_prepend(allocated_nodes, &node->super);
281-
goto moveon1;
282-
}
283-
nd = (orte_node_t*)opal_list_get_prev(&nd->super);
284-
}
285-
item = opal_list_get_next(&nd->super);
286-
if (item == opal_list_get_end(allocated_nodes)) {
287-
/* we are at the end - just append */
288-
opal_list_append(allocated_nodes, &node->super);
289-
} else {
290-
nd = (orte_node_t*)item;
291-
opal_list_insert_pos(allocated_nodes, item, &node->super);
292-
}
293-
moveon1:
294-
/* reset us back to the end for the next node */
295-
nd = (orte_node_t*)opal_list_get_last(allocated_nodes);
296-
}
297-
opal_list_remove_item(&nodes, (opal_list_item_t*)nptr);
298-
OBJ_RELEASE(nptr);
268+
/* the list is ordered as per user direction using -host
269+
* or the listing in -hostfile - preserve that ordering */
270+
opal_list_append(allocated_nodes, &node->super);
271+
break;
299272
}
273+
/* remove the item from the list as we have allocated it */
274+
opal_list_remove_item(&nodes, (opal_list_item_t*)nptr);
275+
OBJ_RELEASE(nptr);
300276
}
301277
OBJ_DESTRUCT(&nodes);
302278
/* now prune for usage and compute total slots */

orte/util/dash_host/dash_host.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Copyright (c) 2004-2005 The Regents of the University of California.
1111
* All rights reserved.
1212
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
13-
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
13+
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
1414
* Copyright (c) 2015 Research Organization for Information Science
1515
* and Technology (RIST). All rights reserved.
1616
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@@ -52,7 +52,8 @@ int orte_util_dash_host_compute_slots(orte_node_t *node, char *hosts)
5252

5353
/* see if this node appears in the list */
5454
for (n=0; NULL != specs[n]; n++) {
55-
if (0 == strncmp(node->name, specs[n], strlen(node->name))) {
55+
if (0 == strncmp(node->name, specs[n], strlen(node->name)) ||
56+
(orte_ifislocal(node->name) && orte_ifislocal(specs[n]))) {
5657
/* check if the #slots was specified */
5758
if (NULL != (cptr = strchr(specs[n], ':'))) {
5859
*cptr = '\0';

0 commit comments

Comments
 (0)