Skip to content

Commit 63d688c

Browse files
authored
[SYCL][Bindless][E2E] Workaround backend/type divergent integer rounding (#17017)
Enable read_sampled.cpp and read_sampled_array.cpp to handle divergent rounding when performing linear sampling depending on the backend and integer type.
1 parent 4953e53 commit 63d688c

File tree

3 files changed

+215
-37
lines changed

3 files changed

+215
-37
lines changed

sycl/test-e2e/bindless_images/array/read_sampled_array.cpp

Lines changed: 78 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
// REQUIRES: aspect-ext_oneapi_image_array
1+
// REQUIRES: aspect-ext_oneapi_bindless_images
2+
3+
// UNSUPPORTED: hip
4+
// UNSUPPORTED-INTENDED: Image arrays are unimplemented in the HIP adapter.
25

36
// RUN: %{build} -o %t.out
47
// RUN: %{run} %t.out
@@ -30,7 +33,8 @@ static void
3033
runNDimTestHost(sycl::range<NDims> globalSize, float offset,
3134
syclexp::bindless_image_sampler &samp,
3235
std::vector<sycl::vec<DType, NChannels>> &inputImage,
33-
std::vector<sycl::vec<DType, NChannels>> &output) {
36+
std::vector<sycl::vec<DType, NChannels>> &output,
37+
sycl::backend backend) {
3438

3539
using VecType = sycl::vec<DType, NChannels>;
3640
bool isNorm =
@@ -67,7 +71,7 @@ runNDimTestHost(sycl::range<NDims> globalSize, float offset,
6771
inputImage.begin() + arr_idx * globalSizeTwoComp.size(),
6872
inputImage.begin() + (arr_idx + 1) * globalSizeTwoComp.size());
6973
VecType result = sampling_helpers::read<NDims - 1, DType, NChannels>(
70-
globalSizeTwoComp, coords, offset, samp, layer);
74+
globalSizeTwoComp, coords, offset, samp, layer, backend);
7175

7276
output[arr_idx * globalSizeTwoComp.size() + i + (globalSize[0] * j)] =
7377
result;
@@ -151,6 +155,8 @@ static bool runTest(sycl::range<NDims> dims, sycl::range<NDims> localSize,
151155
sycl::queue q(dev);
152156
auto ctxt = q.get_context();
153157

158+
sycl::backend backend = dev.get_backend();
159+
154160
size_t numElems = dims.size();
155161
auto image_array_dims = bindless_helpers::ImageArrayDims<NDims>(dims);
156162

@@ -164,7 +170,7 @@ static bool runTest(sycl::range<NDims> dims, sycl::range<NDims> localSize,
164170
{
165171
sycl::range<NDims> globalSize = dims;
166172
runNDimTestHost<NDims, DType, NChannels>(globalSize, offset, samp, input,
167-
expected);
173+
expected, backend);
168174
}
169175

170176
try {
@@ -319,9 +325,42 @@ bool runTests(sycl::range<2> dims, sycl::range<2> localSize, float offset,
319325
syclexp::bindless_image_sampler samp(addrMode, normMode, filtMode);
320326

321327
#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3)
322-
util::printTestInfo(samp, offset);
328+
sampling_helpers::printTestInfo(samp, offset);
323329
#endif
324330

331+
bindless_helpers::printTestName<NDims>("Running 1D int", dims, localSize);
332+
failed |=
333+
util::runTest<NDims, int, 1, sycl::image_channel_type::signed_int32,
334+
class int_1d>(dims, localSize, offset, samp, seed);
335+
bindless_helpers::printTestName<NDims>("Running 1D int2", dims,
336+
localSize);
337+
failed |=
338+
util::runTest<NDims, int, 2, sycl::image_channel_type::signed_int32,
339+
class int2_1d>(dims, localSize, offset, samp, seed);
340+
bindless_helpers::printTestName<NDims>("Running 1D int4", dims,
341+
localSize);
342+
failed |=
343+
util::runTest<NDims, int, 4, sycl::image_channel_type::signed_int32,
344+
class int4_1d>(dims, localSize, offset, samp, seed);
345+
346+
bindless_helpers::printTestName<NDims>("Running 1D uint", dims,
347+
localSize);
348+
failed |=
349+
util::runTest<NDims, unsigned int, 1,
350+
sycl::image_channel_type::unsigned_int32,
351+
class uint_1d>(dims, localSize, offset, samp, seed);
352+
bindless_helpers::printTestName<NDims>("Running 1D uint2", dims,
353+
localSize);
354+
failed |=
355+
util::runTest<NDims, unsigned int, 2,
356+
sycl::image_channel_type::unsigned_int32,
357+
class uint2_1d>(dims, localSize, offset, samp, seed);
358+
bindless_helpers::printTestName<NDims>("Running 1D uint4", dims,
359+
localSize);
360+
failed |=
361+
util::runTest<NDims, int, 4, sycl::image_channel_type::signed_int32,
362+
class uint4_1d>(dims, localSize, offset, samp, seed);
363+
325364
bindless_helpers::printTestName<NDims>("Running 1D short", dims,
326365
localSize);
327366
failed |=
@@ -468,9 +507,42 @@ bool runTests(sycl::range<3> dims, sycl::range<3> localSize, float offset,
468507
syclexp::bindless_image_sampler samp(addrMode, normMode, filtMode);
469508

470509
#if defined(VERBOSE_LV2) || defined(VERBOSE_LV3)
471-
util::printTestInfo(samp, offset);
510+
sampling_helpers::printTestInfo(samp, offset);
472511
#endif
473512

513+
bindless_helpers::printTestName<NDims>("Running 2D int", dims, localSize);
514+
failed |=
515+
util::runTest<NDims, int, 1, sycl::image_channel_type::signed_int32,
516+
class int_2d>(dims, localSize, offset, samp, seed);
517+
bindless_helpers::printTestName<NDims>("Running 2D int2", dims,
518+
localSize);
519+
failed |=
520+
util::runTest<NDims, int, 2, sycl::image_channel_type::signed_int32,
521+
class int2_2d>(dims, localSize, offset, samp, seed);
522+
bindless_helpers::printTestName<NDims>("Running 2D int4", dims,
523+
localSize);
524+
failed |=
525+
util::runTest<NDims, int, 4, sycl::image_channel_type::signed_int32,
526+
class int4_2d>(dims, localSize, offset, samp, seed);
527+
528+
bindless_helpers::printTestName<NDims>("Running 2D uint", dims,
529+
localSize);
530+
failed |=
531+
util::runTest<NDims, unsigned int, 1,
532+
sycl::image_channel_type::unsigned_int32,
533+
class uint_2d>(dims, localSize, offset, samp, seed);
534+
bindless_helpers::printTestName<NDims>("Running 2D uint2", dims,
535+
localSize);
536+
failed |=
537+
util::runTest<NDims, unsigned int, 2,
538+
sycl::image_channel_type::unsigned_int32,
539+
class uint2_2d>(dims, localSize, offset, samp, seed);
540+
bindless_helpers::printTestName<NDims>("Running 2D uint4", dims,
541+
localSize);
542+
failed |=
543+
util::runTest<NDims, int, 4, sycl::image_channel_type::signed_int32,
544+
class uint4_2d>(dims, localSize, offset, samp, seed);
545+
474546
bindless_helpers::printTestName<NDims>("Running 2D short", dims,
475547
localSize);
476548
failed |=

sycl/test-e2e/bindless_images/helpers/sampling.hpp

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ template <int NChannels, typename DType>
5555
static sycl::vec<DType, NChannels>
5656
linearOp(sycl::vec<DType, NChannels> pix1, sycl::vec<DType, NChannels> pix2,
5757
sycl::vec<DType, NChannels> pix3, sycl::vec<DType, NChannels> pix4,
58-
float weight1, float weight2) {
58+
float weight1, float weight2, sycl::backend backend) {
5959

6060
sycl::vec<float, NChannels> weightArr1(weight1);
6161
sycl::vec<float, NChannels> weightArr2(weight2);
@@ -73,14 +73,41 @@ linearOp(sycl::vec<DType, NChannels> pix1, sycl::vec<DType, NChannels> pix2,
7373
(one - weightArr1) * weightArr2 * Ti0j1 +
7474
weightArr1 * weightArr2 * Ti1j1));
7575

76-
// Round to nearest whole number.
77-
// There is no option to do this via sycl::rounding_mode.
78-
if constexpr (std::is_same_v<DType, short> ||
79-
std::is_same_v<DType, unsigned short> ||
80-
std::is_same_v<DType, signed char> ||
81-
std::is_same_v<DType, unsigned char>) {
82-
for (int i = 0; i < NChannels; i++) {
83-
result[i] = std::round(result[i]);
76+
if (backend == sycl::backend::ext_oneapi_cuda) {
77+
// On Nvidia devices, if the image being accessed contains smaller than
78+
// 32-bit integer data, then the fractional result of linear interpolation
79+
// is rounded to the nearest number.
80+
if constexpr (std::is_same_v<DType, short> ||
81+
std::is_same_v<DType, unsigned short> ||
82+
std::is_same_v<DType, signed char> ||
83+
std::is_same_v<DType, unsigned char>) {
84+
for (int i = 0; i < NChannels; i++) {
85+
result[i] = std::round(result[i]);
86+
}
87+
}
88+
89+
// On Nvidia devices, if the image being accessed contains 32-bit integer
90+
// data, then the fractional result of linear interpolation is rounded down.
91+
if constexpr (std::is_same_v<DType, int> ||
92+
std::is_same_v<DType, unsigned int>) {
93+
for (int i = 0; i < NChannels; i++) {
94+
result[i] = std::floor(result[i]);
95+
}
96+
}
97+
}
98+
99+
if (backend == sycl::backend::ext_oneapi_level_zero) {
100+
// On Intel devices, if the image being accessed contains integer data, then
101+
// the fractional result of linear interpolation is rounded down.
102+
if constexpr (std::is_same_v<DType, short> ||
103+
std::is_same_v<DType, unsigned short> ||
104+
std::is_same_v<DType, signed char> ||
105+
std::is_same_v<DType, unsigned char> ||
106+
std::is_same_v<DType, int> ||
107+
std::is_same_v<DType, unsigned int>) {
108+
for (int i = 0; i < NChannels; i++) {
109+
result[i] = std::floor(result[i]);
110+
}
84111
}
85112
}
86113

@@ -360,7 +387,8 @@ struct InterpolRes {
360387
template <typename DType, int NChannels>
361388
static sycl::vec<DType, NChannels>
362389
clampLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
363-
const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
390+
const std::vector<sycl::vec<DType, NChannels>> &inputImage,
391+
sycl::backend backend) {
364392
using VecType = sycl::vec<DType, NChannels>;
365393

366394
float coordX = coords[0];
@@ -391,14 +419,16 @@ clampLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
391419
clampLinearCheckBounds<VecType>(i1, j1, width, height, inputImage);
392420

393421
// Perform linear sampling
394-
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
422+
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
423+
backend);
395424
}
396425

397426
// Out of range coords are clamped to the extent.
398427
template <typename DType, int NChannels>
399428
static sycl::vec<DType, NChannels>
400429
clampToEdgeLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
401-
const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
430+
const std::vector<sycl::vec<DType, NChannels>> &inputImage,
431+
sycl::backend backend) {
402432
using VecType = sycl::vec<DType, NChannels>;
403433

404434
float coordX = coords[0];
@@ -428,7 +458,8 @@ clampToEdgeLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
428458
VecType pix4 = inputImage[i1 + (width * j1)];
429459

430460
// Perform linear sampling
431-
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
461+
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
462+
backend);
432463
}
433464

434465
// Out of range coords return a border color
@@ -451,7 +482,8 @@ static InterpolRes repeatLinearCoord(float coord, int dimSize) {
451482
template <typename DType, int NChannels>
452483
static sycl::vec<DType, NChannels>
453484
repeatLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
454-
const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
485+
const std::vector<sycl::vec<DType, NChannels>> &inputImage,
486+
sycl::backend backend) {
455487
using VecType = sycl::vec<DType, NChannels>;
456488

457489
float coordX = coords[0];
@@ -482,7 +514,8 @@ repeatLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
482514
VecType pix4 = inputImage[i1 + (width * j1)];
483515

484516
// Perform linear sampling
485-
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
517+
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
518+
backend);
486519
}
487520

488521
// Out of range coordinates are flipped at every integer junction
@@ -517,9 +550,10 @@ static InterpolRes mirroredRepeatLinearCoord(float coord, int dimSize) {
517550

518551
// Out of range coordinates are flipped at every integer junction
519552
template <typename DType, int NChannels>
520-
static sycl::vec<DType, NChannels> mirroredRepeatLinear(
521-
sycl::vec<float, 2> coords, sycl::range<2> globalSize,
522-
const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
553+
static sycl::vec<DType, NChannels>
554+
mirroredRepeatLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
555+
const std::vector<sycl::vec<DType, NChannels>> &inputImage,
556+
sycl::backend backend) {
523557
using VecType = sycl::vec<DType, NChannels>;
524558

525559
float coordX = coords[0];
@@ -551,7 +585,8 @@ static sycl::vec<DType, NChannels> mirroredRepeatLinear(
551585
VecType pix4 = inputImage[i1 + (width * j1)];
552586

553587
// Perform linear sampling
554-
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
588+
return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
589+
backend);
555590
}
556591

557592
// Some vector sizes here are hardcoded because the sampling functions are
@@ -560,7 +595,8 @@ template <int NDims, typename DType, int NChannels>
560595
static sycl::vec<DType, NChannels>
561596
read(sycl::range<2> globalSize, sycl::vec<float, 2> coords, float offset,
562597
const sycl::ext::oneapi::experimental::bindless_image_sampler &samp,
563-
const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
598+
const std::vector<sycl::vec<DType, NChannels>> &inputImage,
599+
sycl::backend backend) {
564600
using VecType = sycl::vec<DType, NChannels>;
565601

566602
// Add offset to coords
@@ -624,26 +660,28 @@ read(sycl::range<2> globalSize, sycl::vec<float, 2> coords, float offset,
624660
} else { // linear
625661
sycl::addressing_mode SampAddrMode = samp.addressing[0];
626662
if (SampAddrMode == sycl::addressing_mode::ext_oneapi_clamp_to_border) {
627-
return clampLinear<DType, NChannels>(coords, globalSize, inputImage);
663+
return clampLinear<DType, NChannels>(coords, globalSize, inputImage,
664+
backend);
628665
}
629666
if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) {
630-
return clampToEdgeLinear<DType, NChannels>(coords, globalSize,
631-
inputImage);
667+
return clampToEdgeLinear<DType, NChannels>(coords, globalSize, inputImage,
668+
backend);
632669
}
633670
if (SampAddrMode == sycl::addressing_mode::repeat) {
634671
if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) {
635672
assert(false &&
636673
"Repeat addressing mode must be used with normalized coords");
637674
}
638-
return repeatLinear<DType, NChannels>(coords, globalSize, inputImage);
675+
return repeatLinear<DType, NChannels>(coords, globalSize, inputImage,
676+
backend);
639677
}
640678
if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) {
641679
if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) {
642680
assert(false && "Mirrored repeat addressing mode must be used with "
643681
"normalized coords");
644682
}
645683
return mirroredRepeatLinear<DType, NChannels>(coords, globalSize,
646-
inputImage);
684+
inputImage, backend);
647685
}
648686
if (SampAddrMode == sycl::addressing_mode::none) {
649687
// Ensure no access out of bounds when addressing_mode is none

0 commit comments

Comments
 (0)