Skip to content

Commit 767cfd1

Browse files
committed
Merge branch 'main' into sanitizer-device-global
2 parents a59dc74 + 91c6068 commit 767cfd1

File tree

2 files changed

+58
-6
lines changed

2 files changed

+58
-6
lines changed

scripts/core/CONTRIB.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ available.
167167

168168
.. code-block:: console
169169
170-
$ cmake build/ -DUR_FORMAT_CPP_STYLE=ON
170+
$ cmake -B build/ -DUR_FORMAT_CPP_STYLE=ON
171171
172172
You can then follow the instructions below to use the ``generate`` target to
173173
regenerate the source.
@@ -207,7 +207,7 @@ equivalent):
207207
Writing YAML
208208
============
209209

210-
Please read the :ref:`core/INTRO:Naming Convention` section prior to making a
210+
Please read the :ref:`core/CONTRIB:Naming Convention` section prior to making a
211211
contribution and refer to the `YAML syntax`_ for specifics of how to define the
212212
required constructs.
213213

@@ -386,7 +386,7 @@ values.
386386

387387

388388
Naming Convention
389-
-----------------
389+
=================
390390

391391
The following naming conventions must be followed:
392392

source/adapters/cuda/image.cpp

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,15 +395,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
395395

396396
array_desc.Flags = 0; // No flags required
397397
array_desc.Width = pImageDesc->width;
398-
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
398+
switch (pImageDesc->type) {
399+
case UR_MEM_TYPE_IMAGE1D:
399400
array_desc.Height = 0;
400401
array_desc.Depth = 0;
401-
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
402+
break;
403+
case UR_MEM_TYPE_IMAGE2D:
402404
array_desc.Height = pImageDesc->height;
403405
array_desc.Depth = 0;
404-
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
406+
break;
407+
case UR_MEM_TYPE_IMAGE3D:
405408
array_desc.Height = pImageDesc->height;
406409
array_desc.Depth = pImageDesc->depth;
410+
break;
411+
case UR_MEM_TYPE_IMAGE1D_ARRAY:
412+
array_desc.Height = 0;
413+
array_desc.Depth = pImageDesc->arraySize;
414+
array_desc.Flags |= CUDA_ARRAY3D_LAYERED;
415+
break;
416+
case UR_MEM_TYPE_IMAGE2D_ARRAY:
417+
array_desc.Height = pImageDesc->height;
418+
array_desc.Depth = pImageDesc->arraySize;
419+
array_desc.Flags |= CUDA_ARRAY3D_LAYERED;
420+
break;
421+
default:
422+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
407423
}
408424

409425
ScopedContext Active(hDevice->getContext());
@@ -698,6 +714,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
698714
cpy_desc.Height = copyExtent.height;
699715
cpy_desc.Depth = copyExtent.depth;
700716
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
717+
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
718+
pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) {
719+
CUDA_MEMCPY3D cpy_desc = {};
720+
cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
721+
cpy_desc.srcY = srcOffset.y;
722+
cpy_desc.srcZ = srcOffset.z;
723+
cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
724+
cpy_desc.dstY = dstOffset.y;
725+
cpy_desc.dstZ = dstOffset.z;
726+
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
727+
cpy_desc.srcHost = pSrc;
728+
cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes;
729+
cpy_desc.srcHeight = hostExtent.height;
730+
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
731+
cpy_desc.dstArray = (CUarray)pDst;
732+
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
733+
cpy_desc.Height = std::max(uint64_t{1}, copyExtent.height);
734+
cpy_desc.Depth = pImageDesc->arraySize;
735+
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
701736
}
702737
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
703738
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
@@ -762,6 +797,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
762797
cpy_desc.Height = copyExtent.height;
763798
cpy_desc.Depth = copyExtent.depth;
764799
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
800+
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
801+
pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) {
802+
CUDA_MEMCPY3D cpy_desc = {};
803+
cpy_desc.srcXInBytes = srcOffset.x;
804+
cpy_desc.srcY = srcOffset.y;
805+
cpy_desc.srcZ = srcOffset.z;
806+
cpy_desc.dstXInBytes = dstOffset.x;
807+
cpy_desc.dstY = dstOffset.y;
808+
cpy_desc.dstZ = dstOffset.z;
809+
cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
810+
cpy_desc.srcArray = (CUarray)pSrc;
811+
cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
812+
cpy_desc.dstHost = pDst;
813+
cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width;
814+
cpy_desc.Height = std::max(uint64_t{1}, copyExtent.height);
815+
cpy_desc.Depth = pImageDesc->arraySize;
816+
UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream));
765817
}
766818
} else {
767819
/// imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE

0 commit comments

Comments
 (0)