Skip to content

Commit 943c4b2

Browse files
committed
accelerator: use correct copy type if known
fix the copy type passed to the accelerator framework if the information is available. (Similarly use the correct device id if the info is already available, that might be required at a later stage if we want to support multiple GPUs per process). Signed-off-by: Edgar Gabriel <Edgar.Gabriel@amd.com>
1 parent bcac86f commit 943c4b2

File tree

3 files changed

+31
-13
lines changed

3 files changed

+31
-13
lines changed

opal/datatype/opal_datatype_copy.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@
5858
static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_t size)
5959
{
6060
int res;
61-
int dev_id;
61+
int dst_type, dst_dev, src_type, src_dev;
62+
int copy_type = MCA_ACCELERATOR_TRANSFER_DTOD;
6263
uint64_t flags;
6364
/* If accelerator check addr returns an error, we can only
6465
* assume it is a host buffer. If device buffer checking fails,
@@ -67,12 +68,19 @@ static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_
6768
* and retries are also unlikely to succeed. We identify these
6869
* buffers as host buffers as attempting a memcpy would provide
6970
* a chance to succeed. */
70-
if (0 >= opal_accelerator.check_addr(dest, &dev_id, &flags) &&
71-
0 >= opal_accelerator.check_addr(src, &dev_id, &flags)) {
71+
dst_type = opal_accelerator.check_addr(dest, &dst_dev, &flags);
72+
src_type = opal_accelerator.check_addr(src, &src_dev, &flags);
73+
if (0 >= dst_type && 0 >= src_type) {
7274
return memcpy(dest, src, size);
7375
}
74-
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
75-
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
76+
else if (0 >= dst_type && 0 < src_type) {
77+
copy_type = MCA_ACCELERATOR_TRANSFER_DTOH;
78+
}
79+
else if (0 < dst_type && 0 >= dst_type) {
80+
copy_type = MCA_ACCELERATOR_TRANSFER_HTOD;
81+
}
82+
res = opal_accelerator.mem_copy(dst_dev, src_dev,
83+
dest, src, size, copy_type);
7684
if (OPAL_SUCCESS != res) {
7785
opal_output(0, "Error in accelerator memcpy");
7886
abort();
@@ -83,7 +91,8 @@ static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_
8391
static void *opal_datatype_accelerator_memmove(void *dest, const void *src, size_t size)
8492
{
8593
int res;
86-
int dev_id;
94+
int dst_type, dst_dev, src_type, src_dev;
95+
int copy_type = MCA_ACCELERATOR_TRANSFER_DTOD;
8796
uint64_t flags;
8897
/* If accelerator check addr returns an error, we can only
8998
* assume it is a host buffer. If device buffer checking fails,
@@ -92,12 +101,19 @@ static void *opal_datatype_accelerator_memmove(void *dest, const void *src, size
92101
* and retries are also unlikely to succeed. We identify these
93102
* buffers as host buffers as attempting a memmove would provide
94103
* a chance to succeed. */
95-
if (0 >= opal_accelerator.check_addr(dest, &dev_id, &flags) &&
96-
0 >= opal_accelerator.check_addr(src, &dev_id, &flags)) {
104+
dst_type = opal_accelerator.check_addr(dest, &dst_dev, &flags);
105+
src_type = opal_accelerator.check_addr(src, &src_dev, &flags);
106+
if (0 >= dst_type && 0 >= src_type) {
97107
return memmove(dest, src, size);
98108
}
99-
res = opal_accelerator.mem_move(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
100-
dest, src, size, MCA_ACCELERATOR_TRANSFER_UNSPEC);
109+
else if (0 >= dst_type && 0 < src_type) {
110+
copy_type = MCA_ACCELERATOR_TRANSFER_DTOH;
111+
}
112+
else if (0 < dst_type && 0 >= dst_type) {
113+
copy_type = MCA_ACCELERATOR_TRANSFER_HTOD;
114+
}
115+
res = opal_accelerator.mem_move(dst_dev, src_dev,
116+
dest, src, size, copy_type);
101117
if (OPAL_SUCCESS != res) {
102118
opal_output(0, "Error in accelerator memmove");
103119
abort();

opal/mca/accelerator/rocm/accelerator_rocm_module.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,9 @@ static int mca_accelerator_rocm_memcpy(int dest_dev_id, int src_dev_id, void *de
334334
return OPAL_SUCCESS;
335335
}
336336

337-
if (type == MCA_ACCELERATOR_TRANSFER_DTOH && size <= opal_accelerator_rocm_memcpyD2H_limit) {
337+
if ((type == MCA_ACCELERATOR_TRANSFER_DTOH ||
338+
type == MCA_ACCELERATOR_TRANSFER_UNSPEC) &&
339+
size <= opal_accelerator_rocm_memcpyD2H_limit) {
338340
memcpy(dest, src, size);
339341
return OPAL_SUCCESS;
340342
}

opal/mca/btl/smcuda/btl_smcuda_accelerator.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ int mca_btl_smcuda_accelerator_init(void)
6565
}
6666
/* Create the events since they can be reused. */
6767
for (i = 0; i < accelerator_event_max; i++) {
68-
rc = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_ipc_array[i], true);
68+
rc = opal_accelerator.create_event(MCA_ACCELERATOR_NO_DEVICE_ID, &accelerator_event_ipc_array[i], opal_accelerator_use_sync_memops ? false : true);
6969
if (OPAL_SUCCESS != rc) {
7070
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "Accelerator create event failed.");
7171
rc = OPAL_ERROR;
@@ -215,7 +215,7 @@ int mca_btl_smcuda_memcpy(void *dst, void *src, size_t amount, char *msg,
215215
}
216216

217217
result = opal_accelerator.mem_copy_async(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
218-
dst, src, amount, ipc_stream, MCA_ACCELERATOR_TRANSFER_UNSPEC);
218+
dst, src, amount, ipc_stream, MCA_ACCELERATOR_TRANSFER_DTOD);
219219
if (OPAL_UNLIKELY(OPAL_SUCCESS != result)) {
220220
opal_output_verbose(1, mca_btl_smcuda_component.cuda_ipc_output, "smcuda: memcpy async failed: %d",
221221
result);

0 commit comments

Comments
 (0)