Skip to content

Commit 619e5c6

Browse files
authored
Merge pull request #11549 from devreal/btl_check_device
Have btls check for whether convertor involves device memory
2 parents 1a7a82a + 2453349 commit 619e5c6

File tree

9 files changed

+37
-12
lines changed

9 files changed

+37
-12
lines changed

ompi/mca/mtl/base/mtl_base_datatype.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,19 @@ ompi_mtl_datatype_pack(struct opal_convertor_t *convertor,
3939
{
4040
struct iovec iov;
4141
uint32_t iov_count = 1;
42+
bool is_accelerator = opal_convertor_on_device(convertor);
4243
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
4344
if (convertor->pDesc &&
4445
!(convertor->flags & CONVERTOR_COMPLETED) &&
4546
opal_datatype_is_contiguous_memory_layout(convertor->pDesc,
4647
convertor->count) &&
47-
!(convertor->flags & CONVERTOR_ACCELERATOR)) {
48+
!is_accelerator) {
4849
*free_after = false;
4950
*buffer = convertor->pBaseBuf + convertor->bConverted + convertor->pDesc->true_lb;
5051
*buffer_len = convertor->local_size;
5152
return OPAL_SUCCESS;
5253
}
5354
#endif
54-
bool is_accelerator = convertor->flags & CONVERTOR_ACCELERATOR;
5555

5656
opal_convertor_get_packed_size(convertor, buffer_len);
5757
*free_after = false;
@@ -94,7 +94,7 @@ ompi_mtl_datatype_recv_buf(struct opal_convertor_t *convertor,
9494
*buffer_len = 0;
9595
return OMPI_SUCCESS;
9696
}
97-
bool is_accelerator = convertor->flags & CONVERTOR_ACCELERATOR;
97+
bool is_accelerator = opal_convertor_on_device(convertor);;
9898

9999
/* If we need buffers or we don't have accelerator support and it is a device buffer, we will need to copy */
100100
if (opal_convertor_need_buffers(convertor) || (is_accelerator && false == ompi_mtl_base_selected_component->accelerator_support)) {
@@ -122,7 +122,7 @@ ompi_mtl_datatype_unpack(struct opal_convertor_t *convertor,
122122
{
123123
struct iovec iov;
124124
uint32_t iov_count = 1;
125-
bool is_accelerator = convertor->flags & CONVERTOR_ACCELERATOR;
125+
bool is_accelerator = opal_convertor_on_device(convertor);
126126

127127
/* If the buffer length is greater than 0 and we allocated buffers previously, we need to unpack them */
128128
if (buffer_len > 0 && (opal_convertor_need_buffers(convertor) || (is_accelerator && false == ompi_mtl_base_selected_component->accelerator_support))) {

opal/datatype/opal_convertor.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,23 @@ static inline int32_t opal_convertor_need_buffers(const opal_convertor_t *pConve
187187
return 1;
188188
}
189189

190+
static inline int32_t opal_convertor_on_device(const opal_convertor_t *pConvertor)
191+
{
192+
return !!(pConvertor->flags & CONVERTOR_ACCELERATOR);
193+
}
194+
195+
static inline int32_t opal_convertor_on_discrete_device(const opal_convertor_t *pConvertor)
196+
{
197+
return (CONVERTOR_ACCELERATOR == ((pConvertor->flags & CONVERTOR_ACCELERATOR) |
198+
(pConvertor->flags & CONVERTOR_ACCELERATOR_UNIFIED)));
199+
}
200+
201+
static inline int32_t opal_convertor_on_unified_device(const opal_convertor_t *pConvertor)
202+
{
203+
return (!!(pConvertor->flags & CONVERTOR_ACCELERATOR) &&
204+
!!(pConvertor->flags & CONVERTOR_ACCELERATOR_UNIFIED));
205+
}
206+
190207
/**
191208
* Update the size of the remote datatype representation. The size will
192209
* depend on the configuration of the master convertor. In homogeneous

opal/mca/btl/portals4/btl_portals4.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,9 @@ mca_btl_base_descriptor_t *mca_btl_portals4_prepare_src(struct mca_btl_base_modu
533533
((struct mca_btl_portals4_module_t *) btl_base)->interface_num,
534534
reserve, *size, max_data));
535535

536-
if (0 != reserve || 0 != opal_convertor_need_buffers(convertor)) {
536+
if (0 != reserve ||
537+
0 != opal_convertor_need_buffers(convertor) ||
538+
0 != opal_convertor_on_device(convertor)) {
537539
OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
538540
"mca_btl_portals4_prepare_src NEED BUFFERS or RESERVE\n"));
539541
frag = (mca_btl_portals4_frag_t *) mca_btl_portals4_alloc(btl_base, peer, MCA_BTL_NO_ORDER,

opal/mca/btl/self/btl_self.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ static struct mca_btl_base_descriptor_t *mca_btl_self_prepare_src(
151151
struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
152152
struct opal_convertor_t *convertor, uint8_t order, size_t reserve, size_t *size, uint32_t flags)
153153
{
154-
bool inline_send = !opal_convertor_need_buffers(convertor);
154+
bool inline_send = !(opal_convertor_need_buffers(convertor) || opal_convertor_on_device(convertor));
155155
size_t buffer_len = reserve + (inline_send ? 0 : *size);
156156
mca_btl_self_frag_t *frag;
157157

@@ -229,7 +229,9 @@ static int mca_btl_self_sendi(struct mca_btl_base_module_t *btl,
229229
{
230230
mca_btl_base_descriptor_t *frag;
231231

232-
if (!payload_size || !opal_convertor_need_buffers(convertor)) {
232+
if (!payload_size ||
233+
!(opal_convertor_need_buffers(convertor) ||
234+
opal_convertor_on_device(convertor))) {
233235
void *data_ptr = NULL;
234236
if (payload_size) {
235237
opal_convertor_get_current_pointer(convertor, &data_ptr);

opal/mca/btl/sm/btl_sm_module.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,10 @@ static struct mca_btl_base_descriptor_t *sm_prepare_src(struct mca_btl_base_modu
429429
assert(NULL != data_ptr);
430430

431431
/* in place send fragment */
432-
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
432+
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor) ||
433+
opal_convertor_on_discrete_device(convertor) ||
434+
(opal_convertor_on_unified_device(convertor) &&
435+
total_size > (size_t) mca_btl_sm_component.max_inline_send))) {
433436
uint32_t iov_count = 1;
434437
struct iovec iov;
435438

opal/mca/btl/sm/btl_sm_sendi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ int mca_btl_sm_sendi(struct mca_btl_base_module_t *btl, struct mca_btl_base_endp
5959
opal_convertor_get_current_pointer(convertor, &data_ptr);
6060
}
6161

62-
if (!(payload_size && opal_convertor_need_buffers(convertor))
62+
if (!(payload_size && (opal_convertor_need_buffers(convertor) || opal_convertor_on_device(convertor)))
6363
&& mca_btl_sm_fbox_sendi(endpoint, tag, header, header_size, data_ptr, payload_size)) {
6464
return OPAL_SUCCESS;
6565
}

opal/mca/btl/tcp/btl_tcp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ mca_btl_base_descriptor_t *mca_btl_tcp_prepare_src(struct mca_btl_base_module_t
250250
frag->segments[0].seg_len = reserve;
251251

252252
frag->base.des_segment_count = 1;
253-
if (opal_convertor_need_buffers(convertor)) {
253+
if (opal_convertor_need_buffers(convertor) || opal_convertor_on_device(convertor)) {
254254

255255
if (max_data + reserve > frag->size) {
256256
max_data = frag->size - reserve;

opal/mca/btl/ugni/btl_ugni_prepare.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ mca_btl_ugni_prepare_src_send(struct mca_btl_base_module_t *btl, mca_btl_base_en
179179

180180
send_in_place = (btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE)
181181
&& !(opal_convertor_need_buffers(convertor)
182+
|| opal_convertor_on_device(convertor)
182183
|| (use_eager_get && ((uintptr_t) data_ptr & 3)));
183184

184185
if (send_in_place) {

opal/mca/btl/usnic/btl_usnic_compat.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ prepare_src_small(struct opal_btl_usnic_module_t *module, struct mca_btl_base_en
9191
* we might still use INLINE for the send, and in that case we do not want
9292
* to copy the data at all.
9393
*/
94-
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
94+
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor) || opal_convertor_on_device(convertor))) {
9595
/* put user data just after end of 1st seg (upper layer header) */
9696
assert(payload_len <= module->max_frag_payload);
9797
usnic_convertor_pack_simple(convertor,
@@ -227,7 +227,7 @@ static opal_btl_usnic_send_frag_t *prepare_src_large(struct opal_btl_usnic_modul
227227
/* make sure upper header small enough */
228228
assert(reserve <= sizeof(lfrag->lsf_ompi_header));
229229

230-
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
230+
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor) || opal_convertor_on_device(convertor))) {
231231
/* threshold == -1 means always pack eagerly */
232232
if (mca_btl_usnic_component.pack_lazy_threshold >= 0
233233
&& *size >= (size_t) mca_btl_usnic_component.pack_lazy_threshold) {

0 commit comments

Comments
 (0)