@@ -55,7 +55,7 @@ template <int NChannels, typename DType>
55
55
static sycl::vec<DType, NChannels>
56
56
linearOp (sycl::vec<DType, NChannels> pix1, sycl::vec<DType, NChannels> pix2,
57
57
sycl::vec<DType, NChannels> pix3, sycl::vec<DType, NChannels> pix4,
58
- float weight1, float weight2) {
58
+ float weight1, float weight2, sycl::backend backend ) {
59
59
60
60
sycl::vec<float , NChannels> weightArr1 (weight1);
61
61
sycl::vec<float , NChannels> weightArr2 (weight2);
@@ -73,14 +73,41 @@ linearOp(sycl::vec<DType, NChannels> pix1, sycl::vec<DType, NChannels> pix2,
73
73
(one - weightArr1) * weightArr2 * Ti0j1 +
74
74
weightArr1 * weightArr2 * Ti1j1));
75
75
76
- // Round to nearest whole number.
77
- // There is no option to do this via sycl::rounding_mode.
78
- if constexpr (std::is_same_v<DType, short > ||
79
- std::is_same_v<DType, unsigned short > ||
80
- std::is_same_v<DType, signed char > ||
81
- std::is_same_v<DType, unsigned char >) {
82
- for (int i = 0 ; i < NChannels; i++) {
83
- result[i] = std::round (result[i]);
76
+ if (backend == sycl::backend::ext_oneapi_cuda) {
77
+ // On Nvidia devices, if the image being accessed contains smaller than
78
+ // 32-bit integer data, then the fractional result of linear interpolation
79
+ // is rounded to the nearest number.
80
+ if constexpr (std::is_same_v<DType, short > ||
81
+ std::is_same_v<DType, unsigned short > ||
82
+ std::is_same_v<DType, signed char > ||
83
+ std::is_same_v<DType, unsigned char >) {
84
+ for (int i = 0 ; i < NChannels; i++) {
85
+ result[i] = std::round (result[i]);
86
+ }
87
+ }
88
+
89
+ // On Nvidia devices, if the image being accessed contains 32-bit integer
90
+ // data, then the fractional result of linear interpolation is rounded down.
91
+ if constexpr (std::is_same_v<DType, int > ||
92
+ std::is_same_v<DType, unsigned int >) {
93
+ for (int i = 0 ; i < NChannels; i++) {
94
+ result[i] = std::floor (result[i]);
95
+ }
96
+ }
97
+ }
98
+
99
+ if (backend == sycl::backend::ext_oneapi_level_zero) {
100
+ // On Intel devices, if the image being accessed contains integer data, then
101
+ // the fractional result of linear interpolation is rounded down.
102
+ if constexpr (std::is_same_v<DType, short > ||
103
+ std::is_same_v<DType, unsigned short > ||
104
+ std::is_same_v<DType, signed char > ||
105
+ std::is_same_v<DType, unsigned char > ||
106
+ std::is_same_v<DType, int > ||
107
+ std::is_same_v<DType, unsigned int >) {
108
+ for (int i = 0 ; i < NChannels; i++) {
109
+ result[i] = std::floor (result[i]);
110
+ }
84
111
}
85
112
}
86
113
@@ -360,7 +387,8 @@ struct InterpolRes {
360
387
template <typename DType, int NChannels>
361
388
static sycl::vec<DType, NChannels>
362
389
clampLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
363
- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
390
+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
391
+ sycl::backend backend) {
364
392
using VecType = sycl::vec<DType, NChannels>;
365
393
366
394
float coordX = coords[0 ];
@@ -391,14 +419,16 @@ clampLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
391
419
clampLinearCheckBounds<VecType>(i1, j1, width, height, inputImage);
392
420
393
421
// Perform linear sampling
394
- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
422
+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
423
+ backend);
395
424
}
396
425
397
426
// Out of range coords are clamped to the extent.
398
427
template <typename DType, int NChannels>
399
428
static sycl::vec<DType, NChannels>
400
429
clampToEdgeLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
401
- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
430
+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
431
+ sycl::backend backend) {
402
432
using VecType = sycl::vec<DType, NChannels>;
403
433
404
434
float coordX = coords[0 ];
@@ -428,7 +458,8 @@ clampToEdgeLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
428
458
VecType pix4 = inputImage[i1 + (width * j1)];
429
459
430
460
// Perform linear sampling
431
- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
461
+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
462
+ backend);
432
463
}
433
464
434
465
// Out of range coords return a border color
@@ -451,7 +482,8 @@ static InterpolRes repeatLinearCoord(float coord, int dimSize) {
451
482
template <typename DType, int NChannels>
452
483
static sycl::vec<DType, NChannels>
453
484
repeatLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
454
- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
485
+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
486
+ sycl::backend backend) {
455
487
using VecType = sycl::vec<DType, NChannels>;
456
488
457
489
float coordX = coords[0 ];
@@ -482,7 +514,8 @@ repeatLinear(sycl::vec<float, 2> coords, sycl::range<2> globalSize,
482
514
VecType pix4 = inputImage[i1 + (width * j1)];
483
515
484
516
// Perform linear sampling
485
- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
517
+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
518
+ backend);
486
519
}
487
520
488
521
// Out of range coordinates are flipped at every integer junction
@@ -517,9 +550,10 @@ static InterpolRes mirroredRepeatLinearCoord(float coord, int dimSize) {
517
550
518
551
// Out of range coordinates are flipped at every integer junction
519
552
template <typename DType, int NChannels>
520
- static sycl::vec<DType, NChannels> mirroredRepeatLinear (
521
- sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
522
- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
553
+ static sycl::vec<DType, NChannels>
554
+ mirroredRepeatLinear (sycl::vec<float , 2 > coords, sycl::range<2 > globalSize,
555
+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
556
+ sycl::backend backend) {
523
557
using VecType = sycl::vec<DType, NChannels>;
524
558
525
559
float coordX = coords[0 ];
@@ -551,7 +585,8 @@ static sycl::vec<DType, NChannels> mirroredRepeatLinear(
551
585
VecType pix4 = inputImage[i1 + (width * j1)];
552
586
553
587
// Perform linear sampling
554
- return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY);
588
+ return linearOp<NChannels, DType>(pix1, pix2, pix3, pix4, weightX, weightY,
589
+ backend);
555
590
}
556
591
557
592
// Some vector sizes here are hardcoded because the sampling functions are
@@ -560,7 +595,8 @@ template <int NDims, typename DType, int NChannels>
560
595
static sycl::vec<DType, NChannels>
561
596
read (sycl::range<2 > globalSize, sycl::vec<float , 2 > coords, float offset,
562
597
const sycl::ext::oneapi::experimental::bindless_image_sampler &samp,
563
- const std::vector<sycl::vec<DType, NChannels>> &inputImage) {
598
+ const std::vector<sycl::vec<DType, NChannels>> &inputImage,
599
+ sycl::backend backend) {
564
600
using VecType = sycl::vec<DType, NChannels>;
565
601
566
602
// Add offset to coords
@@ -624,26 +660,28 @@ read(sycl::range<2> globalSize, sycl::vec<float, 2> coords, float offset,
624
660
} else { // linear
625
661
sycl::addressing_mode SampAddrMode = samp.addressing [0 ];
626
662
if (SampAddrMode == sycl::addressing_mode::ext_oneapi_clamp_to_border) {
627
- return clampLinear<DType, NChannels>(coords, globalSize, inputImage);
663
+ return clampLinear<DType, NChannels>(coords, globalSize, inputImage,
664
+ backend);
628
665
}
629
666
if (SampAddrMode == sycl::addressing_mode::clamp_to_edge) {
630
- return clampToEdgeLinear<DType, NChannels>(coords, globalSize,
631
- inputImage );
667
+ return clampToEdgeLinear<DType, NChannels>(coords, globalSize, inputImage,
668
+ backend );
632
669
}
633
670
if (SampAddrMode == sycl::addressing_mode::repeat) {
634
671
if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) {
635
672
assert (false &&
636
673
" Repeat addressing mode must be used with normalized coords" );
637
674
}
638
- return repeatLinear<DType, NChannels>(coords, globalSize, inputImage);
675
+ return repeatLinear<DType, NChannels>(coords, globalSize, inputImage,
676
+ backend);
639
677
}
640
678
if (SampAddrMode == sycl::addressing_mode::mirrored_repeat) {
641
679
if (SampNormMode == sycl::coordinate_normalization_mode::unnormalized) {
642
680
assert (false && " Mirrored repeat addressing mode must be used with "
643
681
" normalized coords" );
644
682
}
645
683
return mirroredRepeatLinear<DType, NChannels>(coords, globalSize,
646
- inputImage);
684
+ inputImage, backend );
647
685
}
648
686
if (SampAddrMode == sycl::addressing_mode::none) {
649
687
// Ensure no access out of bounds when addressing_mode is none
0 commit comments