Skip to content

Commit 772cf75

Browse files
authored
Merge pull request #9635 from Artemy-Mellanox/topic/multi_send-v5.0
pml/ucx: pml_ucx_multi_send_nb support
2 parents 382ec50 + 0adbecc commit 772cf75

File tree

6 files changed

+28
-7
lines changed

6 files changed

+28
-7
lines changed

config/ompi_check_ucx.m4

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[
138138
UCP_ATOMIC_FETCH_OP_FOR,
139139
UCP_ATOMIC_FETCH_OP_FXOR,
140140
UCP_PARAM_FIELD_ESTIMATED_NUM_PPN,
141-
UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK],
141+
UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK,
142+
UCP_OP_ATTR_FLAG_MULTI_SEND],
142143
[], [],
143144
[#include <ucp/api/ucp.h>])
144145
AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS],

ompi/mca/pml/ucx/pml_ucx.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ int mca_pml_ucx_irecv(void *buf, size_t count, ompi_datatype_t *datatype,
566566
{
567567
#if HAVE_DECL_UCP_TAG_RECV_NBX
568568
pml_ucx_datatype_t *op_data = mca_pml_ucx_get_op_data(datatype);
569-
ucp_request_param_t *param = &op_data->op_param.recv;
569+
ucp_request_param_t *param = &op_data->op_param.irecv;
570570
#endif
571571

572572
ucp_tag_t ucp_tag, ucp_tag_mask;
@@ -834,7 +834,7 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype,
834834
#if HAVE_DECL_UCP_TAG_SEND_NBX
835835
req = (ompi_request_t*)mca_pml_ucx_common_send_nbx(ep, buf, count, datatype,
836836
PML_UCX_MAKE_SEND_TAG(tag, comm), mode,
837-
&mca_pml_ucx_get_op_data(datatype)->op_param.send);
837+
&mca_pml_ucx_get_op_data(datatype)->op_param.isend);
838838
#else
839839
req = (ompi_request_t*)mca_pml_ucx_common_send(ep, buf, count, datatype,
840840
mca_pml_ucx_get_datatype(datatype),

ompi/mca/pml/ucx/pml_ucx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct mca_pml_ucx_module {
5959
int priority;
6060
bool cuda_initialized;
6161
bool request_leak_check;
62+
uint32_t op_attr_nonblocking;
6263
};
6364

6465
extern mca_pml_base_component_2_1_0_t mca_pml_ucx_component;

ompi/mca/pml/ucx/pml_ucx_component.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ mca_pml_base_component_2_1_0_t mca_pml_ucx_component = {
4949

5050
static int mca_pml_ucx_component_register(void)
5151
{
52+
int multi_send_op_attr_enable;
53+
5254
ompi_pml_ucx.priority = 51;
5355
(void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "priority",
5456
"Priority of the UCX component",
@@ -79,6 +81,20 @@ static int mca_pml_ucx_component_register(void)
7981
ompi_pml_ucx.request_leak_check = true;
8082
#endif
8183

84+
ompi_pml_ucx.op_attr_nonblocking = 0;
85+
#if HAVE_DECL_UCP_OP_ATTR_FLAG_MULTI_SEND
86+
multi_send_op_attr_enable = 0;
87+
(void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "multi_send_nb",
88+
"Enable passing multi-send optimization flag for nonblocking operations",
89+
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
90+
OPAL_INFO_LVL_3,
91+
MCA_BASE_VAR_SCOPE_LOCAL,
92+
&multi_send_op_attr_enable);
93+
if (multi_send_op_attr_enable) {
94+
ompi_pml_ucx.op_attr_nonblocking = UCP_OP_ATTR_FLAG_MULTI_SEND;
95+
}
96+
#endif
97+
8298
opal_common_ucx_mca_var_register(&mca_pml_ucx_component.pmlm_version);
8399
return 0;
84100
}

ompi/mca/pml/ucx/pml_ucx_datatype.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#ifdef HAVE_UCP_REQUEST_PARAM_T
2525
#define PML_UCX_DATATYPE_SET_VALUE(_datatype, _val) \
2626
(_datatype)->op_param.send._val; \
27-
(_datatype)->op_param.bsend._val; \
2827
(_datatype)->op_param.recv._val;
2928
#endif
3029

@@ -190,8 +189,6 @@ pml_ucx_datatype_t *mca_pml_ucx_init_nbx_datatype(ompi_datatype_t *datatype,
190189
pml_datatype->datatype = ucp_datatype;
191190
pml_datatype->op_param.send.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK;
192191
pml_datatype->op_param.send.cb.send = mca_pml_ucx_send_nbx_completion;
193-
pml_datatype->op_param.bsend.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK;
194-
pml_datatype->op_param.bsend.cb.send = mca_pml_ucx_bsend_nbx_completion;
195192
pml_datatype->op_param.recv.op_attr_mask = UCP_OP_ATTR_FIELD_CALLBACK |
196193
UCP_OP_ATTR_FLAG_NO_IMM_CMPL;
197194
pml_datatype->op_param.recv.cb.recv = mca_pml_ucx_recv_nbx_completion;
@@ -206,6 +203,11 @@ pml_ucx_datatype_t *mca_pml_ucx_init_nbx_datatype(ompi_datatype_t *datatype,
206203
PML_UCX_DATATYPE_SET_VALUE(pml_datatype, datatype = ucp_datatype);
207204
}
208205

206+
pml_datatype->op_param.isend = pml_datatype->op_param.send;
207+
pml_datatype->op_param.irecv = pml_datatype->op_param.recv;
208+
pml_datatype->op_param.isend.op_attr_mask |= ompi_pml_ucx.op_attr_nonblocking;
209+
pml_datatype->op_param.irecv.op_attr_mask |= ompi_pml_ucx.op_attr_nonblocking;
210+
209211
return pml_datatype;
210212
}
211213
#endif

ompi/mca/pml/ucx/pml_ucx_datatype.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@ typedef struct {
2121
int size_shift;
2222
struct {
2323
ucp_request_param_t send;
24-
ucp_request_param_t bsend;
24+
ucp_request_param_t isend;
2525
ucp_request_param_t recv;
26+
ucp_request_param_t irecv;
2627
} op_param;
2728
} pml_ucx_datatype_t;
2829
#endif

0 commit comments

Comments
 (0)