Skip to content

Commit d172aef

Browse files
authored
Merge pull request #10063 from bosilca/fix/no_chunk_for_cuda_memcpy
No segmenting for datatype copy on CUDA devices.
2 parents 1c32ea5 + 0fc6adf commit d172aef

File tree

2 files changed

+8
-1
lines changed

2 files changed

+8
-1
lines changed

opal/datatype/opal_datatype_copy.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ static size_t opal_datatype_memop_block_size = 128 * 1024;
4848
/**
4949
* Non overlapping memory regions
5050
*/
51+
#undef MEM_OP_BLOCK_SIZE
52+
#define MEM_OP_BLOCK_SIZE opal_datatype_memop_block_size
5153
#undef MEM_OP_NAME
5254
#define MEM_OP_NAME non_overlap
5355
#undef MEM_OP
@@ -75,6 +77,8 @@ static size_t opal_datatype_memop_block_size = 128 * 1024;
7577
#if OPAL_CUDA_SUPPORT
7678
# include "opal/mca/common/cuda/common_cuda.h"
7779

80+
# undef MEM_OP_BLOCK_SIZE
81+
# define MEM_OP_BLOCK_SIZE total_length
7882
# undef MEM_OP_NAME
7983
# define MEM_OP_NAME non_overlap_cuda
8084
# undef MEM_OP

opal/datatype/opal_datatype_copy.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
#if !defined(MEM_OP)
2424
# error
2525
#endif /* !defined(MEM_OP) */
26+
#if !defined(MEM_OP_BLOCK_SIZE)
27+
# error
28+
#endif /* !defined(MEM_OP_BLOCK_SIZE) */
2629

2730
#ifndef STRINGIFY
2831
# define STRINGIFY_(arg) # arg
@@ -136,7 +139,7 @@ static inline int32_t _copy_content_same_ddt(const opal_datatype_t *datatype, in
136139
source += datatype->true_lb;
137140
if ((ptrdiff_t) datatype->size == extent) { /* all contiguous == no gaps around */
138141
size_t total_length = iov_len_local;
139-
size_t memop_chunk = opal_datatype_memop_block_size;
142+
size_t memop_chunk = MEM_OP_BLOCK_SIZE;
140143
OPAL_DATATYPE_SAFEGUARD_POINTER(source, iov_len_local, (unsigned char *) source_base,
141144
datatype, count);
142145
while (total_length > 0) {

0 commit comments

Comments
 (0)