Skip to content

Commit 9e2db26

Browse files
committed
Fix vader local modex
Restrict the search to the "immediate" range so at worst we check with our local server and don't go up to the host daemon. Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent 0054de0 commit 9e2db26

File tree

4 files changed

+82
-7
lines changed

4 files changed

+82
-7
lines changed

opal/mca/btl/vader/btl_vader_module.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
206206
ep->peer_smp_rank = remote_rank;
207207

208208
if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) {
209-
OPAL_MODEX_RECV(rc, &component->super.btl_version,
210-
&proc->proc_name, (void **) &modex, &msg_size);
209+
OPAL_MODEX_RECV_IMMEDIATE(rc, &component->super.btl_version,
210+
&proc->proc_name, (void **) &modex, &msg_size);
211211
if (OPAL_SUCCESS != rc) {
212212
return rc;
213213
}

opal/mca/pmix/pmix-internal.h

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ typedef struct {
274274
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
275275
PMIX_INFO_LOAD(&_info, PMIX_OPTIONAL, NULL, PMIX_BOOL); \
276276
(r) = PMIx_Get(&(_proc), (s), &(_info), 1, &(_kv)); \
277+
PMIX_INFO_DESTRUCT(&_info); \
277278
if (NULL == _kv) { \
278279
(r) = PMIX_ERR_NOT_FOUND; \
279280
} else if (_kv->type != (t)) { \
@@ -301,8 +302,8 @@ typedef struct {
301302
* is to be returned
302303
* t - the expected data type
303304
*/
304-
#define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \
305-
do { \
305+
#define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \
306+
do { \
306307
pmix_proc_t _proc; \
307308
pmix_value_t *_kv = NULL; \
308309
pmix_info_t _info; \
@@ -315,6 +316,7 @@ typedef struct {
315316
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
316317
PMIX_INFO_LOAD(&_info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); \
317318
(r) = PMIx_Get(&(_proc), (s), &(_info), 1, &(_kv)); \
319+
PMIX_INFO_DESTRUCT(&_info); \
318320
if (NULL == _kv) { \
319321
(r) = PMIX_ERR_NOT_FOUND; \
320322
} else if (_kv->type != (t)) { \
@@ -349,7 +351,7 @@ typedef struct {
349351
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
350352
__FILE__, __LINE__, \
351353
OPAL_NAME_PRINT(*(p)), (s))); \
352-
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
354+
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
353355
(r) = PMIx_Get(&(_proc), (s), NULL, 0, &(_kv)); \
354356
if (NULL == _kv) { \
355357
(r) = PMIX_ERR_NOT_FOUND; \
@@ -401,6 +403,47 @@ typedef struct {
401403
} \
402404
} while(0);
403405

406+
/**
407+
* Provide a simplified macro for retrieving modex data
408+
* from another process:
409+
*
410+
* r - the integer return status from the modex op (int)
411+
* s - string key (char*)
412+
* p - pointer to the opal_process_name_t of the proc that posted
413+
* the data (opal_process_name_t*)
414+
* d - pointer to a location wherein the data object
415+
* it to be returned (char**)
416+
* sz - pointer to a location wherein the number of bytes
417+
* in the data object can be returned (size_t)
418+
*/
419+
#define OPAL_MODEX_RECV_STRING_IMMEDIATE(r, s, p, d, sz) \
420+
do { \
421+
pmix_proc_t _proc; \
422+
pmix_value_t *_kv = NULL; \
423+
pmix_info_t _info; \
424+
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
425+
"%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \
426+
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
427+
__FILE__, __LINE__, \
428+
OPAL_NAME_PRINT(*(p)), (s))); \
429+
*(d) = NULL; \
430+
*(sz) = 0; \
431+
OPAL_PMIX_CONVERT_NAME(&_proc, (p)); \
432+
PMIX_INFO_LOAD(&_info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); \
433+
(r) = PMIx_Get(&(_proc), (s), &_info, 1, &(_kv)); \
434+
PMIX_INFO_DESTRUCT(&_info); \
435+
if (NULL == _kv) { \
436+
(r) = PMIX_ERR_NOT_FOUND; \
437+
} else if (PMIX_SUCCESS == (r)) { \
438+
*(d) = (uint8_t*)_kv->data.bo.bytes; \
439+
*(sz) = _kv->data.bo.size; \
440+
_kv->data.bo.bytes = NULL; /* protect the data */ \
441+
} \
442+
if (NULL != _kv) { \
443+
PMIX_VALUE_RELEASE(_kv); \
444+
} \
445+
} while(0);
446+
404447
/**
405448
* Provide a simplified macro for retrieving modex data
406449
* from another process:
@@ -432,6 +475,38 @@ typedef struct {
432475
} \
433476
} while(0);
434477

478+
/**
479+
* Provide a simplified macro for retrieving modex data
480+
* from another process:
481+
*
482+
* r - the integer return status from the modex op (int)
483+
* s - the MCA component that posted the data (mca_base_component_t*)
484+
* p - pointer to the opal_process_name_t of the proc that posted
485+
* the data (opal_process_name_t*)
486+
* d - pointer to a location wherein the data object
487+
* it to be returned (char**)
488+
* sz - pointer to a location wherein the number of bytes
489+
* in the data object can be returned (size_t)
490+
*/
491+
#define OPAL_MODEX_RECV_IMMEDIATE(r, s, p, d, sz) \
492+
do { \
493+
char *_key; \
494+
_key = mca_base_component_to_string((s)); \
495+
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
496+
"%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \
497+
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
498+
__FILE__, __LINE__, \
499+
OPAL_NAME_PRINT(*(p)), _key)); \
500+
if (NULL == _key) { \
501+
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
502+
(r) = OPAL_ERR_OUT_OF_RESOURCE; \
503+
} else { \
504+
OPAL_MODEX_RECV_STRING_IMMEDIATE((r), _key, (p), (d), (sz)); \
505+
free(_key); \
506+
} \
507+
} while(0);
508+
509+
435510
#define PMIX_ERROR_LOG(r) \
436511
opal_output(0, "[%s:%d] PMIx Error: %s", __FILE__, __LINE__, PMIx_Error_string((r)))
437512

prrte

0 commit comments

Comments
 (0)