Skip to content

Commit 2d5a79b

Browse files
przemektmalonfabiomestre
authored andcommitted
[SYCL][CUDA][Bindless] Add support for normalized channel types (#11120)
- Support was added for the following image channel types: - `unorm_int8` - `unorm_int16` - `snorm_int8` - `snorm_int16` - Reading these types through `read_image` returns the denormalized floating point data. - A test was added for these new types. - Support for the following packed normalized image channel types was removed from the proposal: - `unorm_short_565` - `unorm_short_555` - `unorm_int_101010` - This was done due to lack of device support. If the need for these types arises in the future, we can revisit support for these types.
1 parent 02383b0 commit 2d5a79b

File tree

2 files changed

+125
-28
lines changed

2 files changed

+125
-28
lines changed

image.cpp

Lines changed: 124 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
//===----------------------------------------------------------------------===//
88

99
#include <cuda.h>
10+
#include <map>
11+
#include <utility>
1012

1113
#include "common.hpp"
1214
#include "context.hpp"
@@ -52,30 +54,33 @@ ur_result_t urCalculateNumChannels(ur_image_channel_order_t order,
5254
/// Convert a UR image format to a CUDA image format and
5355
/// get the pixel size in bytes.
5456
/// /param image_channel_type is the ur_image_channel_type_t.
57+
/// /param image_channel_order is the ur_image_channel_order_t.
58+
/// this is used for normalized channel formats, as CUDA
59+
/// combines the channel format and order for normalized
60+
/// channel types.
5561
/// /param return_cuda_format will be set to the equivalent cuda
56-
/// format if not nullptr.
57-
/// /param return_pixel_types_size_bytes will be set to the pixel
58-
/// byte size if not nullptr.
62+
/// format if not nullptr.
63+
/// /param return_pixel_size_bytes will be set to the pixel
64+
/// byte size if not nullptr.
5965
ur_result_t
6066
urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
67+
ur_image_channel_order_t image_channel_order,
6168
CUarray_format *return_cuda_format,
62-
size_t *return_pixel_types_size_bytes) {
69+
size_t *return_pixel_size_bytes) {
6370

6471
CUarray_format cuda_format;
65-
size_t PixelTypeSizeBytes;
72+
size_t pixel_size_bytes = 0;
73+
unsigned int num_channels = 0;
74+
UR_CHECK_ERROR(urCalculateNumChannels(image_channel_order, &num_channels));
6675

6776
switch (image_channel_type) {
6877
#define CASE(FROM, TO, SIZE) \
6978
case FROM: { \
7079
cuda_format = TO; \
71-
PixelTypeSizeBytes = SIZE; \
80+
pixel_size_bytes = SIZE * num_channels; \
7281
break; \
7382
}
74-
// These new formats were brought in in CUDA 11.5
75-
#if CUDA_VERSION >= 11050
76-
CASE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, CU_AD_FORMAT_UNORM_INT8X1, 1)
77-
CASE(UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, CU_AD_FORMAT_UNORM_INT16X1, 2)
78-
#endif
83+
7984
CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8, CU_AD_FORMAT_UNSIGNED_INT8, 1)
8085
CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT8, CU_AD_FORMAT_SIGNED_INT8, 1)
8186
CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16, CU_AD_FORMAT_UNSIGNED_INT16, 2)
@@ -84,16 +89,73 @@ urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
8489
CASE(UR_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32, CU_AD_FORMAT_UNSIGNED_INT32, 4)
8590
CASE(UR_IMAGE_CHANNEL_TYPE_SIGNED_INT32, CU_AD_FORMAT_SIGNED_INT32, 4)
8691
CASE(UR_IMAGE_CHANNEL_TYPE_FLOAT, CU_AD_FORMAT_FLOAT, 4)
92+
8793
#undef CASE
8894
default:
89-
return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
95+
break;
96+
}
97+
98+
// These new formats were brought in in CUDA 11.5
99+
#if CUDA_VERSION >= 11050
100+
101+
// If none of the above channel types were passed, check those below
102+
if (pixel_size_bytes == 0) {
103+
104+
// We can't use a switch statement here because these single
105+
// UR_IMAGE_CHANNEL_TYPEs can correspond to multiple [u/s]norm CU_AD_FORMATs
106+
// depending on the number of channels. We use a std::map instead to
107+
// retrieve the correct CUDA format
108+
109+
// map < <channel type, num channels> , <CUDA format, data type byte size> >
110+
const std::map<std::pair<ur_image_channel_type_t, uint32_t>,
111+
std::pair<CUarray_format, uint32_t>>
112+
norm_channel_type_map{
113+
{{UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, 1},
114+
{CU_AD_FORMAT_UNORM_INT8X1, 1}},
115+
{{UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, 2},
116+
{CU_AD_FORMAT_UNORM_INT8X2, 2}},
117+
{{UR_IMAGE_CHANNEL_TYPE_UNORM_INT8, 4},
118+
{CU_AD_FORMAT_UNORM_INT8X4, 4}},
119+
120+
{{UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, 1},
121+
{CU_AD_FORMAT_SNORM_INT8X1, 1}},
122+
{{UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, 2},
123+
{CU_AD_FORMAT_SNORM_INT8X2, 2}},
124+
{{UR_IMAGE_CHANNEL_TYPE_SNORM_INT8, 4},
125+
{CU_AD_FORMAT_SNORM_INT8X4, 4}},
126+
127+
{{UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, 1},
128+
{CU_AD_FORMAT_UNORM_INT16X1, 2}},
129+
{{UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, 2},
130+
{CU_AD_FORMAT_UNORM_INT16X2, 4}},
131+
{{UR_IMAGE_CHANNEL_TYPE_UNORM_INT16, 4},
132+
{CU_AD_FORMAT_UNORM_INT16X4, 8}},
133+
134+
{{UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, 1},
135+
{CU_AD_FORMAT_SNORM_INT16X1, 2}},
136+
{{UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, 2},
137+
{CU_AD_FORMAT_SNORM_INT16X2, 4}},
138+
{{UR_IMAGE_CHANNEL_TYPE_SNORM_INT16, 4},
139+
{CU_AD_FORMAT_SNORM_INT16X4, 8}},
140+
};
141+
142+
try {
143+
auto cuda_format_and_size = norm_channel_type_map.at(
144+
std::make_pair(image_channel_type, num_channels));
145+
cuda_format = cuda_format_and_size.first;
146+
pixel_size_bytes = cuda_format_and_size.second;
147+
} catch (std::out_of_range &e) {
148+
return UR_RESULT_ERROR_IMAGE_FORMAT_NOT_SUPPORTED;
149+
}
90150
}
91151

152+
#endif
153+
92154
if (return_cuda_format) {
93155
*return_cuda_format = cuda_format;
94156
}
95-
if (return_pixel_types_size_bytes) {
96-
*return_pixel_types_size_bytes = PixelTypeSizeBytes;
157+
if (return_pixel_size_bytes) {
158+
*return_pixel_size_bytes = pixel_size_bytes;
97159
}
98160
return UR_RESULT_SUCCESS;
99161
}
@@ -125,10 +187,42 @@ cudaToUrImageChannelFormat(CUarray_format cuda_format,
125187
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_FLOAT,
126188
UR_IMAGE_CHANNEL_TYPE_FLOAT);
127189
#if CUDA_VERSION >= 11050
190+
191+
// Note that the CUDA UNORM and SNORM formats also encode the number of
192+
// channels.
193+
// Since UR does not encode this, we map different CUDA formats to the same
194+
// UR channel type.
195+
// Since this function is only called from `urBindlessImagesImageGetInfoExp`
196+
// which has access to `CUDA_ARRAY3D_DESCRIPTOR`, we can determine the
197+
// number of channels in the calling function.
198+
128199
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X1,
129200
UR_IMAGE_CHANNEL_TYPE_UNORM_INT8);
201+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X2,
202+
UR_IMAGE_CHANNEL_TYPE_UNORM_INT8);
203+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT8X4,
204+
UR_IMAGE_CHANNEL_TYPE_UNORM_INT8);
205+
130206
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X1,
131207
UR_IMAGE_CHANNEL_TYPE_UNORM_INT16);
208+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X2,
209+
UR_IMAGE_CHANNEL_TYPE_UNORM_INT16);
210+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_UNORM_INT16X4,
211+
UR_IMAGE_CHANNEL_TYPE_UNORM_INT16);
212+
213+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT8X1,
214+
UR_IMAGE_CHANNEL_TYPE_SNORM_INT8);
215+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT8X2,
216+
UR_IMAGE_CHANNEL_TYPE_SNORM_INT8);
217+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT8X4,
218+
UR_IMAGE_CHANNEL_TYPE_SNORM_INT8);
219+
220+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT16X1,
221+
UR_IMAGE_CHANNEL_TYPE_SNORM_INT16);
222+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT16X2,
223+
UR_IMAGE_CHANNEL_TYPE_SNORM_INT16);
224+
CUDA_TO_UR_IMAGE_CHANNEL_TYPE(CU_AD_FORMAT_SNORM_INT16X4,
225+
UR_IMAGE_CHANNEL_TYPE_SNORM_INT16);
132226
#endif
133227
#undef MAP
134228
default:
@@ -283,6 +377,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
283377
&array_desc.NumChannels));
284378

285379
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
380+
pImageFormat->channelOrder,
286381
&array_desc.Format, nullptr));
287382

288383
array_desc.Flags = 0; // No flags required
@@ -365,9 +460,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp(
365460
urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
366461

367462
CUarray_format format;
368-
size_t PixelTypeSizeBytes;
369-
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, &format,
370-
&PixelTypeSizeBytes));
463+
size_t PixelSizeBytes;
464+
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
465+
pImageFormat->channelOrder, &format,
466+
&PixelSizeBytes));
371467

372468
try {
373469

@@ -418,9 +514,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
418514
urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
419515

420516
CUarray_format format;
421-
size_t PixelTypeSizeBytes;
422-
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, &format,
423-
&PixelTypeSizeBytes));
517+
size_t PixelSizeBytes;
518+
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
519+
pImageFormat->channelOrder, &format,
520+
&PixelSizeBytes));
424521

425522
try {
426523
CUDA_RESOURCE_DESC image_res_desc = {};
@@ -451,7 +548,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp(
451548
image_res_desc.res.linear.format = format;
452549
image_res_desc.res.linear.numChannels = NumChannels;
453550
image_res_desc.res.linear.sizeInBytes =
454-
pImageDesc->width * PixelTypeSizeBytes * NumChannels;
551+
pImageDesc->width * PixelSizeBytes;
455552
} else if (pImageDesc->type == UR_MEM_TYPE_IMAGE2D) {
456553
image_res_desc.resType = CU_RESOURCE_TYPE_PITCH2D;
457554
image_res_desc.res.pitch2D.devPtr = (CUdeviceptr)hImageMem;
@@ -503,17 +600,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp(
503600
UR_RESULT_ERROR_INVALID_VALUE);
504601

505602
unsigned int NumChannels = 0;
506-
size_t PixelTypeSizeBytes = 0;
603+
size_t PixelSizeBytes = 0;
507604

508605
UR_CHECK_ERROR(
509606
urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
510607

511608
// We need to get this now in bytes for calculating the total image size
512609
// later.
513-
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType, nullptr,
514-
&PixelTypeSizeBytes));
515-
516-
size_t PixelSizeBytes = PixelTypeSizeBytes * NumChannels;
610+
UR_CHECK_ERROR(urToCudaImageChannelFormat(pImageFormat->channelType,
611+
pImageFormat->channelOrder, nullptr,
612+
&PixelSizeBytes));
517613

518614
try {
519615
ScopedContext Active(hQueue->getContext());
@@ -789,8 +885,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp(
789885
urCalculateNumChannels(pImageFormat->channelOrder, &NumChannels));
790886

791887
CUarray_format format;
792-
UR_CHECK_ERROR(
793-
urToCudaImageChannelFormat(pImageFormat->channelType, &format, nullptr));
888+
UR_CHECK_ERROR(urToCudaImageChannelFormat(
889+
pImageFormat->channelType, pImageFormat->channelOrder, &format, nullptr));
794890

795891
try {
796892
ScopedContext Active(hDevice->getContext());

image.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ ur_result_t urCalculateNumChannels(ur_image_channel_order_t order,
1717

1818
ur_result_t
1919
urToCudaImageChannelFormat(ur_image_channel_type_t image_channel_type,
20+
ur_image_channel_order_t image_channel_order,
2021
CUarray_format *return_cuda_format,
2122
size_t *return_pixel_types_size_bytes);
2223

0 commit comments

Comments
 (0)