@@ -395,15 +395,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
395
395
396
396
array_desc.Flags = 0 ; // No flags required
397
397
array_desc.Width = pImageDesc->width ;
398
- if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
398
+ switch (pImageDesc->type ) {
399
+ case UR_MEM_TYPE_IMAGE1D:
399
400
array_desc.Height = 0 ;
400
401
array_desc.Depth = 0 ;
401
- } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
402
+ break ;
403
+ case UR_MEM_TYPE_IMAGE2D:
402
404
array_desc.Height = pImageDesc->height ;
403
405
array_desc.Depth = 0 ;
404
- } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE3D) {
406
+ break ;
407
+ case UR_MEM_TYPE_IMAGE3D:
405
408
array_desc.Height = pImageDesc->height ;
406
409
array_desc.Depth = pImageDesc->depth ;
410
+ break ;
411
+ case UR_MEM_TYPE_IMAGE1D_ARRAY:
412
+ array_desc.Height = 0 ;
413
+ array_desc.Depth = pImageDesc->arraySize ;
414
+ array_desc.Flags |= CUDA_ARRAY3D_LAYERED;
415
+ break ;
416
+ case UR_MEM_TYPE_IMAGE2D_ARRAY:
417
+ array_desc.Height = pImageDesc->height ;
418
+ array_desc.Depth = pImageDesc->arraySize ;
419
+ array_desc.Flags |= CUDA_ARRAY3D_LAYERED;
420
+ break ;
421
+ default :
422
+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
407
423
}
408
424
409
425
ScopedContext Active (hDevice->getContext ());
@@ -698,6 +714,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
698
714
cpy_desc.Height = copyExtent.height ;
699
715
cpy_desc.Depth = copyExtent.depth ;
700
716
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
717
+ } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
718
+ pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) {
719
+ CUDA_MEMCPY3D cpy_desc = {};
720
+ cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes;
721
+ cpy_desc.srcY = srcOffset.y ;
722
+ cpy_desc.srcZ = srcOffset.z ;
723
+ cpy_desc.dstXInBytes = dstOffset.x * PixelSizeBytes;
724
+ cpy_desc.dstY = dstOffset.y ;
725
+ cpy_desc.dstZ = dstOffset.z ;
726
+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
727
+ cpy_desc.srcHost = pSrc;
728
+ cpy_desc.srcPitch = hostExtent.width * PixelSizeBytes;
729
+ cpy_desc.srcHeight = hostExtent.height ;
730
+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
731
+ cpy_desc.dstArray = (CUarray)pDst;
732
+ cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
733
+ cpy_desc.Height = std::max (uint64_t {1 }, copyExtent.height );
734
+ cpy_desc.Depth = pImageDesc->arraySize ;
735
+ UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
701
736
}
702
737
} else if (imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST) {
703
738
if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D) {
@@ -762,6 +797,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
762
797
cpy_desc.Height = copyExtent.height ;
763
798
cpy_desc.Depth = copyExtent.depth ;
764
799
UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
800
+ } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY ||
801
+ pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) {
802
+ CUDA_MEMCPY3D cpy_desc = {};
803
+ cpy_desc.srcXInBytes = srcOffset.x ;
804
+ cpy_desc.srcY = srcOffset.y ;
805
+ cpy_desc.srcZ = srcOffset.z ;
806
+ cpy_desc.dstXInBytes = dstOffset.x ;
807
+ cpy_desc.dstY = dstOffset.y ;
808
+ cpy_desc.dstZ = dstOffset.z ;
809
+ cpy_desc.srcMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_ARRAY;
810
+ cpy_desc.srcArray = (CUarray)pSrc;
811
+ cpy_desc.dstMemoryType = CUmemorytype_enum::CU_MEMORYTYPE_HOST;
812
+ cpy_desc.dstHost = pDst;
813
+ cpy_desc.WidthInBytes = PixelSizeBytes * copyExtent.width ;
814
+ cpy_desc.Height = std::max (uint64_t {1 }, copyExtent.height );
815
+ cpy_desc.Depth = pImageDesc->arraySize ;
816
+ UR_CHECK_ERROR (cuMemcpy3DAsync (&cpy_desc, Stream));
765
817
}
766
818
} else {
767
819
// / imageCopyFlags == UR_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE
0 commit comments