Skip to content

Commit d6a81ec

Browse files
author
Ralph Castain
authored
Merge pull request #5287 from rhc54/topic/tools
Correct accounting for tools
2 parents bb15224 + 081a0d9 commit d6a81ec

File tree

8 files changed

+181
-96
lines changed

8 files changed

+181
-96
lines changed

contrib/platform/intel/bend/linux.conf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ mca_base_component_show_load_errors = 1
6464
orte_abort_timeout = 10
6565
hwloc_base_mem_bind_failure_action = silent
6666
btl_tcp_if_include=10.10.10.0/24
67+
oob=^ud

opal/mca/pmix/pmix3x/pmix3x_server_north.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,8 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo,
10741074
opal_value_t *oinfo;
10751075
int rc;
10761076
pmix_status_t err;
1077+
opal_pmix3x_jobid_trkr_t *job;
1078+
bool found;
10771079

10781080
/* setup the caddy */
10791081
opalcaddy = OBJ_NEW(pmix3x_opalcaddy_t);
@@ -1085,12 +1087,36 @@ static void server_tool_connection(pmix_info_t *info, size_t ninfo,
10851087
oinfo = OBJ_NEW(opal_value_t);
10861088
opal_list_append(&opalcaddy->info, &oinfo->super);
10871089
oinfo->key = strdup(info[n].key);
1088-
if (OPAL_SUCCESS != (rc = pmix3x_value_unload(oinfo, &info[n].value))) {
1090+
if (0 == strncmp(oinfo->key, PMIX_NSPACE, PMIX_MAX_KEYLEN)) {
1091+
/* will pass it up as a jobid */
1092+
oinfo->type = OPAL_JOBID;
1093+
/* see if this job is in our list of known nspaces */
1094+
found = false;
1095+
OPAL_LIST_FOREACH(job, &mca_pmix_pmix3x_component.jobids, opal_pmix3x_jobid_trkr_t) {
1096+
if (0 == strncmp(job->nspace, info[n].value.data.proc->nspace, PMIX_MAX_NSLEN)) {
1097+
oinfo->data.name.jobid = job->jobid;
1098+
found = true;
1099+
break;
1100+
}
1101+
}
1102+
if (!found) {
1103+
if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&oinfo->data.name.jobid, info[n].value.data.proc->nspace))) {
1104+
OPAL_ERROR_LOG(rc);
1105+
OBJ_RELEASE(opalcaddy);
1106+
err = pmix3x_convert_opalrc(rc);
1107+
if (NULL != cbfunc) {
1108+
cbfunc(err, NULL, cbdata);
1109+
}
1110+
return;
1111+
}
1112+
}
1113+
} else if (OPAL_SUCCESS != (rc = pmix3x_value_unload(oinfo, &info[n].value))) {
10891114
OBJ_RELEASE(opalcaddy);
10901115
err = pmix3x_convert_opalrc(rc);
10911116
if (NULL != cbfunc) {
10921117
cbfunc(err, NULL, cbdata);
10931118
}
1119+
return;
10941120
}
10951121
}
10961122

orte/mca/rmaps/base/rmaps_base_support_fns.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
217217
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
218218
continue;
219219
}
220+
/* ignore nodes that are non-usable */
221+
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
222+
continue;
223+
}
220224
OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) {
221225
if (0 != strcmp(node->name, nptr->name)) {
222226
OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
@@ -320,6 +324,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
320324
}
321325
for (i=1; i < orte_node_pool->size; i++) {
322326
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
327+
/* ignore nodes that are non-usable */
328+
if (ORTE_FLAG_TEST(node, ORTE_NODE_NON_USABLE)) {
329+
continue;
330+
}
323331
/* ignore nodes that are marked as do-not-use for this mapping */
324332
if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
325333
OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,

orte/mca/state/base/state_base_fns.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*
22
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
3-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
3+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
44
* $COPYRIGHT$
55
*
66
* Additional copyrights may follow
@@ -405,8 +405,10 @@ static void cleanup_node(orte_proc_t *proc)
405405
if (NULL == (node = proc->node)) {
406406
return;
407407
}
408-
node->num_procs--;
409-
node->slots_inuse--;
408+
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_TOOL)) {
409+
node->num_procs--;
410+
node->slots_inuse--;
411+
}
410412
for (i=0; i < node->procs->size; i++) {
411413
if (NULL == (p = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
412414
continue;

orte/mca/state/dvm/state_dvm.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,11 @@ static void check_complete(int fd, short args, void *cbdata)
511511
/* skip procs from another job */
512512
continue;
513513
}
514-
node->slots_inuse--;
515-
node->num_procs--;
514+
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_TOOL)) {
515+
node->slots_inuse--;
516+
node->num_procs--;
517+
}
518+
516519
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
517520
"%s state:dvm releasing proc %s from node %s",
518521
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

orte/mca/state/orted/state_orted.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* Copyright (c) 2011-2017 Los Alamos National Security, LLC.
33
* All rights reserved.
4-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
4+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
55
* $COPYRIGHT$
66
*
77
* Additional copyrights may follow
@@ -480,8 +480,10 @@ static void track_procs(int fd, short argc, void *cbdata)
480480
/* skip procs from another job */
481481
continue;
482482
}
483-
node->slots_inuse--;
484-
node->num_procs--;
483+
if (!ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_TOOL)) {
484+
node->slots_inuse--;
485+
node->num_procs--;
486+
}
485487
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
486488
"%s state:orted releasing proc %s from node %s",
487489
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),

orte/orted/pmix/pmix_server_gen.c

Lines changed: 128 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -832,121 +832,162 @@ static void _toolconn(int sd, short args, void *cbdata)
832832
orte_job_t *jdata;
833833
orte_app_context_t *app;
834834
orte_proc_t *proc;
835-
orte_node_t *node;
836-
orte_process_name_t tool;
837-
int rc;
835+
orte_node_t *node, *nptr;
836+
char *hostname = NULL;
837+
orte_process_name_t tool = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID};
838+
int rc, i;
838839
opal_value_t *val;
839-
bool flag;
840+
bool flag = false, flag_given = false;;
840841

841842
ORTE_ACQUIRE_OBJECT(cd);
842843

843844
opal_output_verbose(2, orte_pmix_server_globals.output,
844845
"%s TOOL CONNECTION PROCESSING",
845846
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
846847

847-
/* if we are the HNP, we can directly assign the jobid */
848-
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER) {
849-
jdata = OBJ_NEW(orte_job_t);
850-
rc = orte_plm_base_create_jobid(jdata);
851-
if (ORTE_SUCCESS != rc) {
852-
tool.jobid = ORTE_JOBID_INVALID;
848+
/* check for directives */
849+
if (NULL != cd->info) {
850+
OPAL_LIST_FOREACH(val, cd->info, opal_value_t) {
851+
if (0 == strcmp(val->key, OPAL_PMIX_EVENT_SILENT_TERMINATION)) {
852+
if (OPAL_UNDEF == val->type || val->data.flag) {
853+
flag = true;
854+
flag_given = true;
855+
}
856+
} else if (0 == strcmp(val->key, OPAL_PMIX_NSPACE)) {
857+
tool.jobid = val->data.name.jobid;
858+
} else if (0 == strcmp(val->key, OPAL_PMIX_RANK)) {
859+
tool.vpid = val->data.name.vpid;
860+
} else if (0 == strcmp(val->key, OPAL_PMIX_HOSTNAME)) {
861+
hostname = strdup(val->data.string);
862+
}
863+
}
864+
}
865+
866+
/* if we are not the HNP or master, and the tool doesn't
867+
* already have a name (i.e., we didn't spawn it), then
868+
* there is nothing we can currently do.
869+
* Eventually, when we switch to nspace instead of an
870+
* integer jobid, we'll just locally assign this value */
871+
if (ORTE_JOBID_INVALID == tool.jobid ||
872+
ORTE_VPID_INVALID == tool.vpid) {
873+
/* if we are the HNP, we can directly assign the jobid */
874+
if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_MASTER) {
875+
jdata = OBJ_NEW(orte_job_t);
876+
rc = orte_plm_base_create_jobid(jdata);
877+
if (ORTE_SUCCESS != rc) {
878+
OBJ_RELEASE(jdata);
879+
if (NULL != cd->toolcbfunc) {
880+
cd->toolcbfunc(ORTE_ERROR, tool, cd->cbdata);
881+
}
882+
OBJ_RELEASE(cd);
883+
return;
884+
}
885+
tool.jobid = jdata->jobid;
853886
tool.vpid = 0;
887+
} else {
888+
/* we currently do not support connections to non-HNP/master
889+
* daemons from tools that were not spawned by a daemon */
854890
if (NULL != cd->toolcbfunc) {
855-
cd->toolcbfunc(rc, tool, cd->cbdata);
891+
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
856892
}
857893
OBJ_RELEASE(cd);
858894
return;
859895
}
860-
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
861-
/* setup some required job-level fields in case this
862-
* tool calls spawn, or uses some other functions that
863-
* need them */
864-
/* must create a map for it (even though it has no
865-
* info in it) so that the job info will be picked
866-
* up in subsequent pidmaps or other daemons won't
867-
* know how to route
868-
*/
869-
jdata->map = OBJ_NEW(orte_job_map_t);
870-
871-
/* setup an app_context for the singleton */
872-
app = OBJ_NEW(orte_app_context_t);
873-
app->app = strdup("tool");
874-
app->num_procs = 1;
875-
opal_pointer_array_add(jdata->apps, app);
876-
jdata->num_apps = 1;
877-
878-
/* setup a proc object for the singleton - since we
879-
* -must- be the HNP, and therefore we stored our
880-
* node on the global node pool, and since the singleton
881-
* -must- be on the same node as us, indicate that
882-
*/
883-
proc = OBJ_NEW(orte_proc_t);
884-
proc->name.jobid = jdata->jobid;
885-
proc->name.vpid = 0;
886-
proc->parent = ORTE_PROC_MY_NAME->vpid;
887-
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
888-
proc->state = ORTE_PROC_STATE_RUNNING;
889-
proc->app_idx = 0;
890-
/* obviously, it is on my node */
896+
} else {
897+
jdata = OBJ_NEW(orte_job_t);
898+
jdata->jobid = tool.jobid;
899+
}
900+
901+
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
902+
/* setup some required job-level fields in case this
903+
* tool calls spawn, or uses some other functions that
904+
* need them */
905+
/* must create a map for it (even though it has no
906+
* info in it) so that the job info will be picked
907+
* up in subsequent pidmaps or other daemons won't
908+
* know how to route
909+
*/
910+
jdata->map = OBJ_NEW(orte_job_map_t);
911+
912+
/* setup an app_context for the singleton */
913+
app = OBJ_NEW(orte_app_context_t);
914+
app->app = strdup("tool");
915+
app->num_procs = 1;
916+
opal_pointer_array_add(jdata->apps, app);
917+
jdata->num_apps = 1;
918+
919+
/* setup a proc object for the singleton - since we
920+
* -must- be the HNP, and therefore we stored our
921+
* node on the global node pool, and since the singleton
922+
* -must- be on the same node as us, indicate that
923+
*/
924+
proc = OBJ_NEW(orte_proc_t);
925+
proc->name.jobid = jdata->jobid;
926+
proc->name.vpid = tool.vpid;
927+
proc->parent = ORTE_PROC_MY_NAME->vpid;
928+
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
929+
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_TOOL);
930+
proc->state = ORTE_PROC_STATE_RUNNING;
931+
/* set the trivial */
932+
proc->local_rank = 0;
933+
proc->node_rank = 0;
934+
proc->app_rank = 0;
935+
proc->app_idx = 0;
936+
if (NULL == hostname) {
937+
/* it is on my node */
891938
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
892-
proc->node = node;
893-
OBJ_RETAIN(node); /* keep accounting straight */
894-
opal_pointer_array_add(jdata->procs, proc);
895-
jdata->num_procs = 1;
896-
/* add the node to the job map */
897-
OBJ_RETAIN(node);
898-
opal_pointer_array_add(jdata->map->nodes, node);
899-
jdata->map->num_nodes++;
900-
/* and it obviously is on the node - note that
901-
* we do _not_ increment the #procs on the node
902-
* as the tool doesn't count against the slot
903-
* allocation */
904-
OBJ_RETAIN(proc);
905-
opal_pointer_array_add(node->procs, proc);
906-
/* set the trivial */
907-
proc->local_rank = 0;
908-
proc->node_rank = 0;
909-
proc->app_rank = 0;
910-
proc->state = ORTE_PROC_STATE_RUNNING;
911-
proc->app_idx = 0;
912939
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_LOCAL);
913-
914-
/* check for directives */
915-
if (NULL != cd->info) {
916-
OPAL_LIST_FOREACH(val, cd->info, opal_value_t) {
917-
if (0 == strcmp(val->key, OPAL_PMIX_EVENT_SILENT_TERMINATION)) {
918-
if (OPAL_UNDEF == val->type || val->data.flag) {
919-
flag = true;
920-
orte_set_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION,
921-
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
922-
}
923-
}
940+
} else {
941+
/* we need to locate it */
942+
node = NULL;
943+
for (i=0; i < orte_node_pool->size; i++) {
944+
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
945+
continue;
924946
}
947+
if (0 == strcmp(hostname, nptr->name)) {
948+
node = nptr;
949+
break;
950+
}
951+
}
952+
if (NULL == node) {
953+
/* not in our allocation - which is still okay */
954+
node = OBJ_NEW(orte_node_t);
955+
node->name = strdup(hostname);
956+
ORTE_FLAG_SET(node, ORTE_NODE_NON_USABLE);
957+
opal_pointer_array_add(orte_node_pool, node);
925958
}
959+
}
960+
proc->node = node;
961+
OBJ_RETAIN(node); /* keep accounting straight */
962+
opal_pointer_array_add(jdata->procs, proc);
963+
jdata->num_procs = 1;
964+
/* add the node to the job map */
965+
OBJ_RETAIN(node);
966+
opal_pointer_array_add(jdata->map->nodes, node);
967+
jdata->map->num_nodes++;
968+
/* and it obviously is on the node - note that
969+
* we do _not_ increment the #procs on the node
970+
* as the tool doesn't count against the slot
971+
* allocation */
972+
OBJ_RETAIN(proc);
973+
opal_pointer_array_add(node->procs, proc);
974+
/* if they indicated a preference for termination, set it */
975+
if (flag_given) {
976+
orte_set_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION,
977+
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
978+
} else {
979+
/* we default to silence */
926980
flag = true;
927981
orte_set_attribute(&jdata->attributes, ORTE_JOB_SILENT_TERMINATION,
928982
ORTE_ATTR_GLOBAL, &flag, OPAL_BOOL);
929-
930-
/* pass back the assigned jobid */
931-
tool.jobid = jdata->jobid;
932-
tool.vpid = 0;
933-
if (NULL != cd->toolcbfunc) {
934-
cd->toolcbfunc(rc, tool, cd->cbdata);
935-
}
936-
OBJ_RELEASE(cd);
937-
return;
938983
}
939984

940-
/* otherwise, we have to send the request to the HNP.
941-
* Eventually, when we switch to nspace instead of an
942-
* integer jobid, we'll just locally assign this value */
943-
tool.jobid = ORTE_JOBID_INVALID;
944-
tool.vpid = ORTE_VPID_INVALID;
945985
if (NULL != cd->toolcbfunc) {
946986
cd->toolcbfunc(ORTE_ERR_NOT_SUPPORTED, tool, cd->cbdata);
947987
}
948988
OBJ_RELEASE(cd);
949989
}
990+
950991
void pmix_tool_connected_fn(opal_list_t *info,
951992
opal_pmix_tool_connection_cbfunc_t cbfunc,
952993
void *cbdata)

orte/util/attr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ typedef uint8_t orte_node_flags_t;
6464
#define ORTE_NODE_FLAG_OVERSUBSCRIBED 0x04 // whether or not this node is oversubscribed
6565
#define ORTE_NODE_FLAG_MAPPED 0x08 // whether we have been added to the current map
6666
#define ORTE_NODE_FLAG_SLOTS_GIVEN 0x10 // the number of slots was specified - used only in non-managed environments
67+
#define ORTE_NODE_NON_USABLE 0x20 // the node is hosting a tool and is NOT to be used for jobs
6768

6869

6970
/*** NODE ATTRIBUTE KEYS - never sent anywhere ***/
@@ -177,6 +178,7 @@ typedef uint16_t orte_proc_flags_t;
177178
#define ORTE_PROC_FLAG_DATA_IN_SM 0x0800 // modex data has been stored in the local shared memory region
178179
#define ORTE_PROC_FLAG_DATA_RECVD 0x1000 // modex data for this proc has been received
179180
#define ORTE_PROC_FLAG_SM_ACCESS 0x2000 // indicate if process can read modex data from shared memory region
181+
#define ORTE_PROC_FLAG_TOOL 0x4000 // proc is a tool and doesn't count against allocations
180182

181183
/*** PROCESS ATTRIBUTE KEYS ***/
182184
#define ORTE_PROC_START_KEY ORTE_JOB_MAX_KEY

0 commit comments

Comments
 (0)