Skip to content

Commit 99bec5a

Browse files
authored
Merge pull request #12960 from hppritcha/fix_for_issue10895
comm: beef up use of PMIx_Group_construct
2 parents 8d71197 + 46ff698 commit 99bec5a

File tree

2 files changed

+37
-12
lines changed

2 files changed

+37
-12
lines changed

ompi/communicator/comm.c

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,22 +1784,22 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
17841784
leader_procs[1] = tmp;
17851785
}
17861786

1787-
/* create a unique tag for allocating the leader communicator. we can eliminate this step
1788-
* if we take a CID from the newly allocated block belonging to local_comm. this is
1789-
* a note to make this change at a later time. */
1790-
opal_asprintf (&sub_tag, "%s-OMPIi-LC", tag);
1791-
if (OPAL_UNLIKELY(NULL == sub_tag)) {
1792-
ompi_comm_free (&local_comm);
1787+
leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2);
1788+
ompi_set_group_rank (leader_group, my_proc);
1789+
if (OPAL_UNLIKELY(NULL == leader_group)) {
17931790
free(leader_procs);
1791+
ompi_comm_free (&local_comm);
17941792
return OMPI_ERR_OUT_OF_RESOURCE;
17951793
}
17961794

1797-
leader_group = ompi_group_allocate_plist_w_procs (NULL, leader_procs, 2);
1798-
ompi_set_group_rank (leader_group, my_proc);
1799-
if (OPAL_UNLIKELY(NULL == leader_group)) {
1800-
free (sub_tag);
1795+
/* create a unique tag for allocating the leader communicator. we can eliminate this step
1796+
* if we take a CID from the newly allocated block belonging to local_comm. this is
1797+
* a note to make this change at a later time. */
1798+
opal_asprintf (&sub_tag, "%s-OMPIi-LC-%s", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (leader_group, 0)));
1799+
if (OPAL_UNLIKELY(NULL == sub_tag)) {
18011800
free(leader_procs);
18021801
ompi_comm_free (&local_comm);
1802+
OBJ_RELEASE(leader_group);
18031803
return OMPI_ERR_OUT_OF_RESOURCE;
18041804
}
18051805

@@ -1809,6 +1809,7 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
18091809
rc = ompi_comm_create_from_group (leader_group, sub_tag, info, errhandler, &leader_comm);
18101810
OBJ_RELEASE(leader_group);
18111811
free (sub_tag);
1812+
sub_tag = NULL;
18121813
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
18131814
free(leader_procs);
18141815
ompi_comm_free (&local_comm);
@@ -1864,7 +1865,16 @@ int ompi_intercomm_create_from_groups (ompi_group_t *local_group, int local_lead
18641865
return rc;
18651866
}
18661867

1867-
rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) tag, NULL, false, OMPI_COMM_CID_GROUP_NEW);
1868+
/*
1869+
* append the pmix CONTEXT_ID obtained when creating the leader comm as discriminator
1870+
*/
1871+
opal_asprintf (&sub_tag, "%s-%ld", tag, data[1]);
1872+
if (OPAL_UNLIKELY(NULL == sub_tag)) {
1873+
return OMPI_ERR_OUT_OF_RESOURCE;
1874+
}
1875+
1876+
rc = ompi_comm_nextcid (newcomp, NULL, NULL, (void *) sub_tag, NULL, false, OMPI_COMM_CID_GROUP_NEW);
1877+
free (sub_tag);
18681878
if ( OMPI_SUCCESS != rc ) {
18691879
OBJ_RELEASE(newcomp);
18701880
return rc;

ompi/mpi/c/comm_create_from_group.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ static const char FUNC_NAME[] = "MPI_Comm_create_from_group";
4646
int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info, MPI_Errhandler errhandler,
4747
MPI_Comm *newcomm) {
4848
int rc;
49+
char *pmix_group_tag = NULL;
4950

5051
MEMCHECKER(
5152
memchecker_comm(comm);
@@ -89,8 +90,22 @@ int MPI_Comm_create_from_group (MPI_Group group, const char *tag, MPI_Info info,
8990
}
9091

9192

92-
rc = ompi_comm_create_from_group ((ompi_group_t *) group, tag, &info->super, errhandler,
93+
/*
94+
* we use PMIx group operations to implement comm/intercomm create from group/groups.
95+
* PMIx group constructors require a unique tag across the processes using the same
96+
* PMIx server. This is not equivalent to the uniqueness requirements of the tag argument
97+
* to MPI_Comm_create_from_group and MPI_Intercomm_create_from_groups, hence an
98+
* additional discriminator needs to be added to the user supplied tag argument.
99+
*/
100+
opal_asprintf (&pmix_group_tag, "%s-%s.%d", tag, OPAL_NAME_PRINT(ompi_group_get_proc_name (group, 0)),
101+
ompi_group_size(group));
102+
if (OPAL_UNLIKELY(NULL == pmix_group_tag)) {
103+
return OMPI_ERR_OUT_OF_RESOURCE;
104+
}
105+
106+
rc = ompi_comm_create_from_group ((ompi_group_t *) group, pmix_group_tag, &info->super, errhandler,
93107
(ompi_communicator_t **) newcomm);
108+
free(pmix_group_tag);
94109
if (MPI_SUCCESS != rc) {
95110
return ompi_errhandler_invoke (errhandler, MPI_COMM_NULL, errhandler->eh_mpi_object_type,
96111
rc, FUNC_NAME);

0 commit comments

Comments
 (0)