Skip to content

Commit 1fa904e

Browse files
rhc54amirshehataornl
authored andcommitted
Do not compute distances when unbound
Unbound procs cannot have a device distance Signed-off-by: Ralph Castain <rhc@pmix.org> (cherry picked from commit 8a446f6)
1 parent 503b169 commit 1fa904e

File tree

1 file changed

+16
-9
lines changed

1 file changed

+16
-9
lines changed

opal/mca/common/ofi/common_ofi.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
* Copyright (c) 2020-2022 Triad National Security, LLC. All rights
66
* reserved.
77
* Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved.
8-
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
8+
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
99
* Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights
1010
* reserved.
1111
* Copyright (c) 2023 UT-Battelle, LLC. All rights reserved.
@@ -469,11 +469,11 @@ static int check_provider_attr(struct fi_info *provider_info, struct fi_info *pr
469469
static int compute_dev_distances(pmix_device_distance_t **distances,
470470
size_t *ndist)
471471
{
472-
int ret = 0;
472+
int ret = OPAL_SUCCESS;
473473
size_t ninfo;
474474
pmix_info_t *info;
475475
pmix_cpuset_t cpuset;
476-
pmix_topology_t *pmix_topo;
476+
pmix_topology_t pmix_topo;
477477
pmix_device_type_t type = PMIX_DEVTYPE_OPENFABRICS |
478478
PMIX_DEVTYPE_NETWORK;
479479

@@ -482,10 +482,16 @@ static int compute_dev_distances(pmix_device_distance_t **distances,
482482
if (PMIX_SUCCESS != ret) {
483483
goto out;
484484
}
485+
/* if we are not bound, then we cannot compute distances */
486+
if (hwloc_bitmap_iszero(cpuset.bitmap) ||
487+
hwloc_bitmap_isfull(cpuset.bitmap)) {
488+
return OPAL_ERR_NOT_BOUND;
489+
}
485490

486-
/* load the PMIX topology */
487-
PMIx_Topology_free(pmix_topo, 1);
488-
ret = PMIx_Load_topology(pmix_topo);
491+
/* load the PMIX topology - this just loads a pointer to
492+
* the local topology held in PMIx, so you must not
493+
* free it */
494+
ret = PMIx_Load_topology(&pmix_topo);
489495
if (PMIX_SUCCESS != ret) {
490496
goto out;
491497
}
@@ -497,7 +503,6 @@ static int compute_dev_distances(pmix_device_distance_t **distances,
497503
ndist);
498504
PMIx_Info_free(info, ninfo);
499505

500-
PMIx_Topology_free(pmix_topo, 1);
501506
out:
502507
return ret;
503508
}
@@ -533,8 +538,9 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
533538
PMIx_Info_destruct(&directive);
534539
if (ret != PMIX_SUCCESS || !val) {
535540
ret = compute_dev_distances(&distances, &ndist);
536-
if (ret)
541+
if (ret) {
537542
goto out;
543+
}
538544
goto find_nearest;
539545
}
540546

@@ -554,8 +560,9 @@ get_nearest_nics(int *num_distances, pmix_value_t **valin)
554560

555561
find_nearest:
556562
nearest = calloc(sizeof(*distances), ndist);
557-
if (!nearest)
563+
if (!nearest) {
558564
goto out;
565+
}
559566

560567
for (i = 0; i < ndist; i++) {
561568
if (distances[i].type != PMIX_DEVTYPE_NETWORK &&

0 commit comments

Comments
 (0)