Skip to content

Commit 6d383a0

Browse files
committed
osc/rdma: Add checks for gpu support during dynamic attach
Previously, there were no checks for GPU support during the dynamic attach process, only during window create. Added this check and cleaned up some code. Signed-off-by: William Zhang <wilzhang@amazon.com>
1 parent ebd3f64 commit 6d383a0

File tree

3 files changed

+48
-27
lines changed

3 files changed

+48
-27
lines changed

ompi/mca/osc/rdma/osc_rdma.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "ompi/mca/osc/osc.h"
4545
#include "ompi/mca/osc/base/base.h"
4646
#include "opal/mca/btl/btl.h"
47+
#include "opal/mca/btl/base/base.h"
4748
#include "opal/mca/btl/base/btl_base_am_rdma.h"
4849
#include "ompi/memchecker.h"
4950
#include "ompi/op/op.h"
@@ -758,4 +759,36 @@ static inline int osc_rdma_is_accel(void *buf)
758759
uint64_t flags;
759760
return opal_accelerator.check_addr(buf, &dev_id, &flags);
760761
}
762+
763+
/**
764+
* @brief Checks whether any btl's are capable of accelerator support.
765+
*
766+
* @param[in] btls The modules to check for btl accelerator support
767+
*
768+
* @returns true If any btl module has accelerator support
769+
* @returns false Otherwise
770+
*/
771+
static inline bool osc_rdma_btl_accel_support(opal_list_t *btls)
772+
{
773+
mca_btl_base_selected_module_t* selected_btl;
774+
/* verify if we have any btls available. Since we do not verify
775+
* connectivity across all btls in the alternate case, this is as
776+
* good a test as we are going to have for success. */
777+
if (opal_list_is_empty(btls)) {
778+
return false;
779+
}
780+
781+
/* If we have any btls, we check again if any btl supports
782+
* MCA_BTL_FLAGS_ACCELERATOR_RDMA */
783+
OPAL_LIST_FOREACH(selected_btl, btls, mca_btl_base_selected_module_t) {
784+
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
785+
"osc_rdma_component_query: check ACCELERATOR_RDMA flag: %s",
786+
selected_btl->btl_component->btl_version.mca_component_name);
787+
mca_btl_base_module_t *btl = selected_btl->btl_module;
788+
if (btl->btl_flags & MCA_BTL_FLAGS_ACCELERATOR_RDMA) {
789+
return true;
790+
}
791+
}
792+
return false;
793+
}
761794
#endif /* OMPI_OSC_RDMA_H */

ompi/mca/osc/rdma/osc_rdma_component.c

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -373,10 +373,6 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
373373
int flavor)
374374
{
375375

376-
opal_list_t *btls = NULL;
377-
mca_btl_base_selected_module_t* selected_btl;
378-
int gpu_check = 0;
379-
380376
if (MPI_WIN_FLAVOR_SHARED == flavor) {
381377
return -1;
382378
}
@@ -385,7 +381,9 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
385381
uint64_t flags;
386382
int dev_id;
387383
if (opal_accelerator.check_addr(*base, &dev_id, &flags)) {
388-
gpu_check = 1;
384+
if (!osc_rdma_btl_accel_support(&mca_btl_base_modules_initialized)) {
385+
return -1;
386+
}
389387
}
390388
}
391389

@@ -396,28 +394,6 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
396394
return -1;
397395
}
398396

399-
/* Not on GPU at all, skip the check */
400-
if (!gpu_check) {
401-
goto ok;
402-
}
403-
404-
/* If we have any btls, we check again if any btl supports
405-
* MCA_BTL_FLAGS_ACCELERATOR_RDMA */
406-
btls = &mca_btl_base_modules_initialized;
407-
OPAL_LIST_FOREACH(selected_btl, btls, mca_btl_base_selected_module_t) {
408-
opal_output_verbose(MCA_BASE_VERBOSE_INFO, ompi_osc_base_framework.framework_output,
409-
"osc_rdma_component_query: check ACCELERATOR_RDMA flag: %s",
410-
selected_btl->btl_component->btl_version.mca_component_name);
411-
mca_btl_base_module_t *btl = selected_btl->btl_module;
412-
// Check flag: MCA_BTL_FLAGS_ACCELERATOR_RDMA
413-
if (btl->btl_flags & MCA_BTL_FLAGS_ACCELERATOR_RDMA) {
414-
goto ok;
415-
}
416-
}
417-
/* No BTL supports the accelerator flag */
418-
return -1;
419-
420-
ok:
421397
return mca_osc_rdma_component.priority;
422398
}
423399

ompi/mca/osc/rdma/osc_rdma_dynamic.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
172172
intptr_t page_size = opal_getpagesize ();
173173
int region_index, ret;
174174
size_t aligned_len;
175+
int dev_id;
176+
uint64_t flags;
175177

176178
if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
177179
return OMPI_ERR_RMA_FLAVOR;
@@ -182,6 +184,16 @@ int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
182184
return OMPI_SUCCESS;
183185
}
184186

187+
/* Component query check doesn't apply for dynamic window types.
188+
* We have to check whether a GPU window is supported here. */
189+
if (opal_accelerator.check_addr(base, &dev_id, &flags)) {
190+
if (!osc_rdma_btl_accel_support(&mca_btl_base_modules_initialized)) {
191+
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "Failed to attach %p. No components capable of attaching accelerator buffers.",
192+
base);
193+
return OMPI_ERR_NOT_SUPPORTED;
194+
}
195+
}
196+
185197
OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);
186198

187199
OPAL_THREAD_LOCK(&module->lock);

0 commit comments

Comments
 (0)