Skip to content

Commit 4c972c1

Browse files
authored
Merge pull request #12315 from edgargabriel/topic/accel-copy-type-fix
accelerator: use correct copy type if known
2 parents 7fc4535 + 943c4b2 commit 4c972c1

File tree

3 files changed

+31
-13
lines changed

3 files changed

+31
-13
lines changed

opal/datatype/opal_datatype_copy.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_t size)
5959
{
6060
int res;
61-
int dev_id;
61+
int dst_type, dst_dev, src_type, src_dev;
62+
int copy_type = MCA_ACCELERATOR_TRANSFER_DTOD;
6263
uint64_t flags;
6364
/* If accelerator check addr returns an error, we can only
6465
* assume it is a host buffer. If device buffer checking fails,
@@ -67,12 +68,19 @@ static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_
6768
* and retries are also unlikely to succeed. We identify these
6869
* buffers as host buffers as attempting a memcpy would provide
6970
* a chance to succeed. */
70-
if (0 >= opal_accelerator.check_addr(dest, &dev_id, &flags) &&
71-
0 >= opal_accelerator.check_addr(src, &dev_id, &flags)) {
71+
dst_type = opal_accelerator.check_addr(dest, &dst_dev, &flags);
72+
src_type = opal_accelerator.check_addr(src, &src_dev, &flags);
73+
if (0 >= dst_type && 0 >= src_type) {
7274
return memcpy(dest, src, size);
7375
}
74-
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
75-
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
76+
else if (0 >= dst_type && 0 < src_type) {
77+
copy_type = MCA_ACCELERATOR_TRANSFER_DTOH;
78+
}
79+
else if (0 < dst_type && 0 >= dst_type) {
80+
copy_type = MCA_ACCELERATOR_TRANSFER_HTOD;
81+
}
82+
res = opal_accelerator.mem_copy(dst_dev, src_dev,
83+
dest, src, size, copy_type);
7684
if (OPAL_SUCCESS != res) {
7785
opal_output(0, "Error in accelerator memcpy");
7886
abort();
@@ -83,7 +91,8 @@ static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_
8391
static void *opal_datatype_accelerator_memmove(void *dest, const void *src, size_t size)
8492
{
8593
int res;
86-
int dev_id;
94+
int dst_type, dst_dev, src_type, src_dev;
95+
int copy_type = MCA_ACCELERATOR_TRANSFER_DTOD;
8796
uint64_t flags;
8897
/* If accelerator check addr returns an error, we can only
8998
* assume it is a host buffer. If device buffer checking fails,
@@ -92,12 +101,19 @@ static void *opal_datatype_accelerator_memmove(void *dest, const void *src, size
92101
* and retries are also unlikely to succeed. We identify these
93102
* buffers as host buffers as attempting a memmove would provide
94103
* a chance to succeed. */
95-
if (0 >= opal_accelerator.check_addr(dest, &dev_id, &flags) &&
96-
0 >= opal_accelerator.check_addr(src, &dev_id, &flags)) {
104+
dst_type = opal_accelerator.check_addr(dest, &dst_dev, &flags);
105+
src_type = opal_accelerator.check_addr(src, &src_dev, &flags);
106+
if (0 >= dst_type && 0 >= src_type) {
97107
return memmove(dest, src, size);
98108
}
99-
res = opal_accelerator.mem_move(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
100-
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
109+
else if (0 >= dst_type && 0 < src_type) {
110+
copy_type = MCA_ACCELERATOR_TRANSFER_DTOH;
111+
}
112+
else if (0 < dst_type && 0 >= dst_type) {
113+
copy_type = MCA_ACCELERATOR_TRANSFER_HTOD;
114+
}
115+
res = opal_accelerator.mem_move(dst_dev, src_dev,
116+
dest, src, size, copy_type);
101117
if (OPAL_SUCCESS != res) {
102118
opal_output(0, "Error in accelerator memmove");
103119
abort();

opal/mca/accelerator/rocm/accelerator_rocm_module.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,9 @@ static int mca_accelerator_rocm_memcpy(int dest_dev_id, int src_dev_id, void *de
334334
return OPAL_SUCCESS;
335335
}
336336

337-
if (type == MCA_ACCELERATOR_TRANSFER_DTOH && size <= opal_accelerator_rocm_memcpyD2H_limit) {
337+
if ((type == MCA_ACCELERATOR_TRANSFER_DTOH ||
338+
type == MCA_ACCELERATOR_TRANSFER_UNSPEC) &&
339+
size <= opal_accelerator_rocm_memcpyD2H_limit) {
338340
memcpy(dest, src, size);
339341
return OPAL_SUCCESS;
340342
}

opal/mca/btl/smcuda/btl_smcuda_accelerator.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ int mca_btl_smcuda_accelerator_init(void)
6565
}
6666
/* Create the events since they can be reused. */
6767
for (i = 0; i < accelerator_event_max; i++) {
68-
rc = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_ipc_array[i], true);
68+
rc = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_ipc_array[i], opal_accelerator_use_sync_memops ? false : true);
6969
if (OPAL_SUCCESS != rc) {
7070
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "Accelerator create event failed.");
7171
rc = OPAL_ERROR;
@@ -215,7 +215,7 @@ int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
215215
}
216216

217217
result = opal_accelerator.mem_copy_async(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
218-
dst, src, amount, ipc_stream, MCA_ACCELERATOR_TRANSFER_UNSPEC);
218+
dst, src, amount, ipc_stream, MCA_ACCELERATOR_TRANSFER_DTOD);
219219
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
220220
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "smcuda: memcpy async failed: %d",
221221
result);

0 commit comments

Comments
 (0)