Skip to content

Commit 84861f6

Browse files
authored
[SYCL][ESIMD][E2E] Fix linear test on new GPU driver (#15790)
The root cause of this test failure on the new driver is that this test by design reads beyond the bounds of the input image and uses the out of boundary data in calculations for the output. New driver returns 0s for all the data read beyond the image boundaries but the old driver replicates the last read data. I am not sure if this is the driver issue or an issue with image/accessor and therefore the safest solution that will work on old and new driver is to replicate the behavior of the old driver in the test so it will work both for old and new drivers
1 parent 573470f commit 84861f6

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

sycl/test-e2e/ESIMD/linear/linear.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,16 +96,26 @@ int main(int argc, char *argv[]) {
9696
auto in = vin.bit_cast_view<unsigned char, 8, 32>();
9797

9898
simd<unsigned char, 6 * 24> vout;
99-
auto out = vout.bit_cast_view<uchar, 6, 24>();
10099

101100
simd<float, 6 * 24> vm;
102101
auto m = vm.bit_cast_view<float, 6, 24>();
103102

104103
uint h_pos = it.get_id(0);
105104
uint v_pos = it.get_id(1);
106105

107-
in = media_block_load<unsigned char, 8, 32>(accInput, h_pos * 24,
108-
v_pos * 6);
106+
vin = media_block_load<unsigned char, 8, 32>(accInput, h_pos * 24,
107+
v_pos * 6);
108+
if (h_pos == range_width - 1) {
109+
#pragma unroll
110+
for (int i = 0; i < 8; i++) {
111+
vin.select<4, 1>(i * 32 + 24) = vin.select<4, 1>(i * 32 + 20);
112+
vin.select<4, 1>(i * 32 + 28) = vin.select<4, 1>(i * 32 + 20);
113+
}
114+
}
115+
if (v_pos == range_height - 1) {
116+
vin.select<32, 1>(7 * 32) = vin.select<32, 1>(5 * 32);
117+
vin.select<32, 1>(6 * 32) = vin.select<32, 1>(5 * 32);
118+
}
109119

110120
m = in.select<6, 1, 24, 1>(1, 3);
111121
m += in.select<6, 1, 24, 1>(0, 0);
@@ -121,7 +131,7 @@ int main(int argc, char *argv[]) {
121131
vout = convert<unsigned char>(vm);
122132

123133
media_block_store<unsigned char, 6, 24>(accOutput, h_pos * 24,
124-
v_pos * 6, out);
134+
v_pos * 6, vout);
125135
});
126136
});
127137
e.wait();

0 commit comments

Comments
 (0)