Skip to content

Commit 624bd83

Browse files
committed
move the scope of sync to only device to device copies instead of all.
1 parent f489829 commit 624bd83

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

source/adapters/cuda/image.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
659659
enqueueEventsWait(hQueue, Stream, numEventsInWaitList, phEventWaitList);
660660

661661
// We have to use a different copy function for each image dimensionality.
662+
// All the async copy function should be treated as synchronous because of
663+
// the explicit call to cuStreamSynchronize at the end
662664

663665
if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_HOST_TO_DEVICE) {
664666
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
@@ -893,12 +895,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
893895
cpy_desc.Depth = pImageDesc->arraySize;
894896
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
895897
}
898+
// Synchronization is required here to handle the case of copying data
899+
// from
900+
// host to device, then device to device and finally device to host.
901+
// Without it, there is a risk of the copies not being executed in the
902+
// intended order.
903+
cuStreamSynchronize(Stream);
896904
}
897-
// Synchronization is required here to handle the case of copying data from
898-
// host to device, then device to device and finally device to host.
899-
// Without it, there is a risk of the copies not being executed in the
900-
// intended order.
901-
cuStreamSynchronize(Stream);
905+
902906
if (phEvent) {
903907
auto NewEvent = ur_event_handle_t_::makeNative(UR_COMMAND_MEM_IMAGE_COPY,
904908
hQueue, Stream);

0 commit comments

Comments
 (0)