Skip to content

Commit 308d7f7

Browse files
authored
Merge pull request #11393 from wckzhang/is_accel_check
opal/datatype: Account for error possibility in check addr
2 parents 228ca96 + 0ec220e commit 308d7f7

File tree

1 file changed

+22
-14
lines changed

1 file changed

+22
-14
lines changed

opal/datatype/opal_datatype_copy.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,22 +55,20 @@
5555
} \
5656
} while (0)
5757

58-
static bool opal_datatype_is_accel(void *dest, const void *src) {
59-
int dev_id;
60-
uint64_t flags;
61-
if (opal_accelerator.check_addr(dest, &dev_id, &flags)) {
62-
return true;
63-
}
64-
if (opal_accelerator.check_addr(src, &dev_id, &flags)) {
65-
return true;
66-
}
67-
return false;
68-
}
69-
7058
static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_t size)
7159
{
7260
int res;
73-
if (!opal_datatype_is_accel(dest, src)) {
61+
int dev_id;
62+
uint64_t flags;
63+
/* If accelerator check addr returns an error, we can only
64+
* assume it is a host buffer. If device buffer checking fails,
65+
* it's also highly likely that a device copy will fail. The best
66+
* we can do is fail as this is not a recoverable/ignorable failure
67+
* and retries are also unlikely to succeed. We identify these
68+
* buffers as host buffers as attempting a memcpy would provide
69+
* a chance to succeed. */
70+
if (0 >= opal_accelerator.check_addr(dest, &dev_id, &flags) &&
71+
0 >= opal_accelerator.check_addr(src, &dev_id, &flags)) {
7472
return memcpy(dest, src, size);
7573
}
7674
res = opal_accelerator.mem_copy(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,
@@ -85,7 +83,17 @@ static void *opal_datatype_accelerator_memcpy(void *dest, const void *src, size_
8583
static void *opal_datatype_accelerator_memmove(void *dest, const void *src, size_t size)
8684
{
8785
int res;
88-
if (!opal_datatype_is_accel(dest, src)) {
86+
int dev_id;
87+
uint64_t flags;
88+
/* If accelerator check addr returns an error, we can only
89+
* assume it is a host buffer. If device buffer checking fails,
90+
* it's also highly likely that a device copy will fail. The best
91+
* we can do is fail as this is not a recoverable/ignorable failure
92+
* and retries are also unlikely to succeed. We identify these
93+
* buffers as host buffers as attempting a memmove would provide
94+
* a chance to succeed. */
95+
if (0 >= opal_accelerator.check_addr(dest, &dev_id, &flags) &&
96+
0 >= opal_accelerator.check_addr(src, &dev_id, &flags)) {
8997
return memmove(dest, src, size);
9098
}
9199
res = opal_accelerator.mem_move(MCA_ACCELERATOR_NO_DEVICE_ID, MCA_ACCELERATOR_NO_DEVICE_ID,

0 commit comments

Comments
 (0)