Skip to content

Commit f642b63

Browse files
authored
[flang][cuda] Update condition in descriptor data transfer (#148306)
When the two descriptor have the same number of elements and are contiguous, the transfer can be done via pointers.
1 parent 4ce34f1 commit f642b63

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

flang-rt/lib/cuda/memory.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,12 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
110110
dstDesc->ApplyMold(*srcDesc, dstDesc->rank());
111111
dstDesc->Allocate(/*asyncObject=*/nullptr);
112112
}
113-
if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) {
113+
if ((srcDesc->rank() > 0) && (dstDesc->Elements() <= srcDesc->Elements()) &&
114+
srcDesc->IsContiguous() && dstDesc->IsContiguous()) {
114115
// Special case when rhs is bigger than lhs and both are contiguous arrays.
115116
// In this case we do a simple ptr to ptr transfer with the size of lhs.
116117
// This is be allowed in the reference compiler and it avoids error
117118
// triggered in the Assign runtime function used for the main case below.
118-
if (!srcDesc->IsContiguous() || !dstDesc->IsContiguous())
119-
terminator.Crash("Unsupported data transfer: mismatching element counts "
120-
"with non-contiguous arrays");
121119
RTNAME(CUFDataTransferPtrPtr)(dstDesc->raw().base_addr,
122120
srcDesc->raw().base_addr, dstDesc->Elements() * dstDesc->ElementBytes(),
123121
mode, sourceFile, sourceLine);

0 commit comments

Comments
 (0)