Skip to content

Commit de6a52d

Browse files
authored
Merge pull request #7789 from hoopoepg/topic/ucx-test-external-events
COMMON/UCX: improved missing events test
2 parents 283cfbf + d6bff6f commit de6a52d

File tree

2 files changed

+31
-8
lines changed

2 files changed

+31
-8
lines changed

config/ompi_check_ucx.m4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
112112
ucp_request_check_status, ucp_put_nb, ucp_get_nb],
113113
[], [],
114114
[#include <ucp/api/ucp.h>])
115-
AC_CHECK_DECLS([ucm_test_events],
115+
AC_CHECK_DECLS([ucm_test_events,
116+
ucm_test_external_events],
116117
[], [],
117118
[#include <ucm/api/ucm.h>])
118119
AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND,

opal/mca/common/ucx/common_ucx.c

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -135,24 +135,46 @@ static void opal_common_ucx_mca_fence_complete_cb(int status, void *fenced)
135135
*(int*)fenced = 1;
136136
}
137137

138-
void opal_common_ucx_mca_proc_added(void)
139-
{
140138
#if HAVE_DECL_UCM_TEST_EVENTS
139+
static ucs_status_t opal_common_ucx_mca_test_external_events(int events)
140+
{
141+
#if HAVE_DECL_UCM_TEST_EXTERNAL_EVENTS
142+
return ucm_test_external_events(UCM_EVENT_VM_UNMAPPED);
143+
#else
144+
return ucm_test_events(UCM_EVENT_VM_UNMAPPED);
145+
#endif
146+
}
147+
148+
static void opal_common_ucx_mca_test_events(void)
149+
{
141150
static int warned = 0;
142-
static char *mem_hooks_suggestion = "Pls try adding --mca opal_common_ucx_opal_mem_hooks 1 "
143-
"to mpirun/oshrun command line to resolve this issue.";
151+
const char *suggestion;
144152
ucs_status_t status;
145153

146154
if (!warned) {
147-
status = ucm_test_events(UCM_EVENT_VM_UNMAPPED);
155+
if (opal_common_ucx.opal_mem_hooks) {
156+
suggestion = "Please check OPAL memory events infrastructure.";
157+
status = opal_common_ucx_mca_test_external_events(UCM_EVENT_VM_UNMAPPED);
158+
} else {
159+
suggestion = "Pls try adding --mca opal_common_ucx_opal_mem_hooks 1 "
160+
"to mpirun/oshrun command line to resolve this issue.";
161+
status = ucm_test_events(UCM_EVENT_VM_UNMAPPED);
162+
}
163+
148164
if (status != UCS_OK) {
149165
MCA_COMMON_UCX_WARN("UCX is unable to handle VM_UNMAP event. "
150166
"This may cause performance degradation or data "
151-
"corruption. %s",
152-
opal_common_ucx.opal_mem_hooks ? "" : mem_hooks_suggestion);
167+
"corruption. %s", suggestion);
153168
warned = 1;
154169
}
155170
}
171+
}
172+
#endif
173+
174+
void opal_common_ucx_mca_proc_added(void)
175+
{
176+
#if HAVE_DECL_UCM_TEST_EVENTS
177+
opal_common_ucx_mca_test_events();
156178
#endif
157179
}
158180

0 commit comments

Comments
 (0)