Skip to content

Commit 4e3b5e6

Browse files
authored
Merge pull request #11775 from wenduwan/v5.0.x
[v5.0.x] opal/ofi: package rank calculation bugfix
2 parents 25b428b + d9962ae commit 4e3b5e6

File tree

1 file changed

+12
-15
lines changed

1 file changed

+12
-15
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -720,8 +720,7 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
720720
{
721721
int i;
722722
uint16_t relative_locality, *package_rank_ptr;
723-
uint16_t current_package_rank = 0;
724-
uint16_t package_ranks[process_info->num_local_peers + 1];
723+
uint32_t ranks_on_package = 0;
725724
opal_process_name_t pname;
726725
pmix_status_t rc;
727726
char **peers = NULL;
@@ -750,26 +749,24 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
750749
// Get the local peers
751750
OPAL_MODEX_RECV_VALUE(rc, PMIX_LOCAL_PEERS, &pname, &local_peers, PMIX_STRING);
752751
if (PMIX_SUCCESS != rc || NULL == local_peers) {
753-
// We can't find package_rank, fall back to procid
754-
opal_show_help("help-common-ofi.txt", "package_rank failed", true);
755-
return (uint32_t) process_info->myprocid.rank;
752+
goto err;
756753
}
757754
peers = opal_argv_split(local_peers, ',');
758755
free(local_peers);
759756

760757
for (i = 0; NULL != peers[i]; i++) {
761758
pname.vpid = strtoul(peers[i], NULL, 10);
759+
760+
if ((uint16_t) pname.vpid == process_info->my_local_rank) {
761+
return ranks_on_package;
762+
}
763+
762764
locality_string = NULL;
763765
// Get the LOCALITY_STRING for process[i]
764766
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, PMIX_LOCALITY_STRING, &pname, &locality_string,
765767
PMIX_STRING);
766768
if (PMIX_SUCCESS != rc || NULL == locality_string) {
767-
// If we don't have information about locality, fall back to procid
768-
int level = 10;
769-
if (opal_output_get_verbosity(opal_common_ofi.output) >= level) {
770-
opal_show_help("help-common-ofi.txt", "package_rank failed", true, level);
771-
}
772-
return (uint32_t) process_info->myprocid.rank;
769+
goto err;
773770
}
774771

775772
// compute relative locality
@@ -783,12 +780,12 @@ static uint32_t get_package_rank(opal_process_info_t *process_info)
783780
}
784781

785782
if (relative_locality & OPAL_PROC_ON_SOCKET) {
786-
package_ranks[i] = current_package_rank;
787-
current_package_rank++;
783+
ranks_on_package++;
788784
}
789785
}
790-
791-
return (uint32_t) package_ranks[process_info->my_local_rank];
786+
err:
787+
opal_show_help("help-common-ofi.txt", "package_rank failed", true);
788+
return (uint32_t) process_info->myprocid.rank;
792789
}
793790

794791
struct fi_info *opal_common_ofi_select_provider(struct fi_info *provider_list,

0 commit comments

Comments
 (0)