Skip to content

Commit 04b9a4a

Browse files
authored
Merge pull request #8470 from hjelmn/btl_vader_fix_for_dynamic_add_procs
btl/vader: improve support for dynamic add_procs
2 parents 56ed3a6 + 4039ef3 commit 04b9a4a

File tree

4 files changed

+17
-11
lines changed

4 files changed

+17
-11
lines changed

opal/mca/btl/sm/btl_sm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ struct mca_btl_sm_component_t {
125125
char *my_segment; /**< this rank's base pointer */
126126
size_t segment_size; /**< size of my_segment */
127127
int32_t num_smp_procs; /**< current number of smp procs on this host */
128-
opal_atomic_int32_t local_rank; /**< current rank index at add_procs() time */
129128
opal_free_list_t sm_frags_eager; /**< free list of sm send frags */
130129
opal_free_list_t sm_frags_max_send; /**< free list of sm max send frags (large fragments) */
131130
opal_free_list_t sm_frags_user; /**< free list of small inline frags */

opal/mca/btl/sm/btl_sm_component.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,8 +560,6 @@ static mca_btl_base_module_t **mca_btl_sm_component_init (int *num_btls,
560560
/* no fast boxes allocated initially */
561561
component->num_fbox_in_endpoints = 0;
562562

563-
component->local_rank = 0;
564-
565563
mca_btl_sm_check_single_copy ();
566564

567565
if (MCA_BTL_SM_XPMEM != mca_btl_sm_component.single_copy_mechanism) {

opal/mca/btl/sm/btl_sm_endpoint.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ typedef struct mca_btl_base_endpoint_t {
6464
opal_free_list_item_t *fbox; /**< fast-box free list item */
6565
} fbox_out;
6666

67-
int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
67+
uint16_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing
6868
* SMP specfic data structures. */
6969
opal_atomic_size_t send_count; /**< number of fragments sent to this peer */
7070
char *segment_base; /**< start of the peer's segment (in the address space

opal/mca/btl/sm/btl_sm_module.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,18 +194,30 @@ static int sm_btl_first_time_init(mca_btl_sm_t *sm_btl, int n)
194194
}
195195

196196

197-
static int init_sm_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_proc_t *proc, int remote_rank) {
197+
static int init_sm_endpoint (struct mca_btl_base_endpoint_t **ep_out, struct opal_proc_t *proc) {
198198
mca_btl_sm_component_t *component = &mca_btl_sm_component;
199199
union sm_modex_t *modex;
200200
ino_t my_user_ns_id;
201201
size_t msg_size;
202202
int rc;
203203

204+
uint16_t peer_local_rank;
205+
uint16_t *ptr = &peer_local_rank;
206+
OPAL_MODEX_RECV_VALUE(rc, PMIX_LOCAL_RANK, &proc->proc_name,
207+
&ptr, PMIX_UINT16);
208+
if (OPAL_SUCCESS != rc) {
209+
BTL_VERBOSE(("could not read the local rank for peer. rc=%d", rc));
210+
return rc;
211+
}
212+
213+
mca_btl_base_endpoint_t *ep = component->endpoints + peer_local_rank;
214+
*ep_out = ep;
215+
204216
OBJ_CONSTRUCT(ep, mca_btl_sm_endpoint_t);
205217

206-
ep->peer_smp_rank = remote_rank;
218+
ep->peer_smp_rank = peer_local_rank;
207219

208-
if (remote_rank != MCA_BTL_SM_LOCAL_RANK) {
220+
if (peer_local_rank != MCA_BTL_SM_LOCAL_RANK) {
209221
OPAL_MODEX_RECV_IMMEDIATE(rc, &component->super.btl_version,
210222
&proc->proc_name, (void **) &modex, &msg_size);
211223
if (OPAL_SUCCESS != rc) {
@@ -363,10 +375,7 @@ static int sm_add_procs (struct mca_btl_base_module_t* btl,
363375
}
364376

365377
/* setup endpoint */
366-
int rank = opal_atomic_fetch_add_32(&component -> local_rank, 1);
367-
368-
peers[proc] = component->endpoints + rank;
369-
rc = init_sm_endpoint (peers[proc], procs[proc], rank);
378+
rc = init_sm_endpoint (peers + proc, procs[proc]);
370379
if (OPAL_SUCCESS != rc) {
371380
break;
372381
}

0 commit comments

Comments
 (0)