1
1
// REQUIRES: aspect-ext_intel_legacy_image
2
- // RUN: %{build} -o %t.out
2
+
3
+ // %O0 added because of GSD-10960. Without it, IGC will fail with
4
+ // an access violation error.
5
+ // RUN: %{build} %O0 -o %t.out
3
6
// RUN: %{run} %t.out
4
7
5
8
// UNSUPPORTED: cuda
13
16
#include < sycl/detail/core.hpp>
14
17
using namespace sycl ;
15
18
16
- void init (uint32_t *A, uint32_t *B, size_t NumI32Elts) {
17
- for (int I = 0 ; I < NumI32Elts; I++) {
18
- A[I] = I;
19
- B[I] = 0 ;
20
- }
21
- }
19
+ template <int Dimensions> class CopyKernel ;
22
20
23
- int check (uint32_t *B, size_t NumI32Elts) {
24
- for (int I = 0 ; I < NumI32Elts; I++) {
25
- if (B[I] != I) {
26
- std::cout << " Failed" << std::endl;
27
- std::cerr << " Error for the index: " << I << " , computed: " << B[I]
28
- << std::endl;
29
- return 1 ;
30
- }
31
- }
32
- std::cout << " Passed" << std::endl;
33
- return 0 ;
34
- }
21
+ template <int Dimensions>
22
+ bool testND (queue &Q, size_t XSize, size_t YSize, size_t ZSize = 1 ) {
35
23
36
- int test2D (queue &Q, size_t XSize, size_t YSize) {
37
- std::cout << " Starting the test with size = {" << XSize << " , " << YSize
38
- << " } ... " ;
39
- size_t NumI32Elts = XSize * YSize * 4 ;
40
- uint32_t *A = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
41
- uint32_t *B = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
42
- init (A, B, NumI32Elts);
24
+ static_assert (Dimensions == 2 || Dimensions == 3 ,
25
+ " Only 2D and 3D images are supported." );
43
26
44
- try {
45
- image<2 > ImgA (A, image_channel_order::rgba,
46
- image_channel_type::unsigned_int32, range<2 >{XSize, YSize});
47
- image<2 > ImgB (B, image_channel_order::rgba,
48
- image_channel_type::unsigned_int32, range<2 >{XSize, YSize});
27
+ if constexpr (Dimensions == 2 )
28
+ std::cout << " Starting the test with size = {" << XSize << " , " << YSize
29
+ << " } ... " ;
30
+ else
31
+ std::cout << " Starting the test with size = {" << XSize << " , " << YSize
32
+ << " , " << ZSize << " } ... " ;
49
33
50
- Q.submit ([&](handler &CGH) {
51
- auto AAcc = ImgA.get_access <uint4, access::mode::read>(CGH);
52
- auto BAcc = ImgB.get_access <uint4, access::mode::write>(CGH);
53
- CGH.parallel_for <class I2D >(range<2 >{XSize, YSize}, [=](id<2 > Id) {
54
- sycl::int2 Coord (Id[0 ], Id[1 ]);
55
- BAcc.write (Coord, AAcc.read (Coord));
56
- });
57
- }).wait ();
58
- } catch (exception const &e) {
59
- std::cout << " Failed" << std::endl;
60
- std::cerr << " SYCL Exception caught: " << e.what ();
61
- return 1 ;
62
- }
34
+ const size_t NumI32Elts = XSize * YSize * ZSize * 4 ;
35
+ range<Dimensions> ImgRange;
36
+ if constexpr (Dimensions == 2 )
37
+ ImgRange = range<Dimensions>{XSize, YSize};
38
+ else
39
+ ImgRange = range<Dimensions>{XSize, YSize, ZSize};
63
40
64
- int NumErrors = check (B, NumI32Elts);
65
- free (A);
66
- free (B);
67
- return NumErrors;
68
- }
41
+ // Allocate input buffer and initialize it with some values.
42
+ uint32_t *Input = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
43
+ for (int i = 0 ; i < NumI32Elts; i++)
44
+ Input[i] = i;
69
45
70
- int test3D (queue &Q, size_t XSize, size_t YSize, size_t ZSize) {
71
- std::cout << " Starting the test with size = {" << XSize << " , " << YSize
72
- << " , " << ZSize << " } ... " ;
73
- size_t NumI32Elts = XSize * YSize * ZSize * 4 ;
74
- uint32_t *A = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
75
- uint32_t *B = (uint32_t *)malloc (NumI32Elts * sizeof (uint32_t ));
76
- init (A, B, NumI32Elts);
46
+ // calloc to ensure that the output buffer is initialized to zero.
47
+ uint32_t *Output = (uint32_t *)calloc (NumI32Elts, sizeof (uint32_t ));
77
48
49
+ // Create the image and submit the copy kernel.
78
50
try {
79
- image<3 > ImgA (A, image_channel_order::rgba,
80
- image_channel_type::unsigned_int32,
81
- range<3 >{XSize, YSize, ZSize});
82
- image<3 > ImgB (B, image_channel_order::rgba,
83
- image_channel_type::unsigned_int32,
84
- range<3 >{XSize, YSize, ZSize});
51
+ image<Dimensions> ImgA (Input, image_channel_order::rgba,
52
+ image_channel_type::unsigned_int32, ImgRange);
53
+ image<Dimensions> ImgB (Output, image_channel_order::rgba,
54
+ image_channel_type::unsigned_int32, ImgRange);
85
55
86
56
Q.submit ([&](handler &CGH) {
87
- auto AAcc = ImgA.get_access <uint4, access::mode::read>(CGH);
88
- auto BAcc = ImgB.get_access <uint4, access::mode::write>(CGH);
89
- CGH.parallel_for <class I3D >(range<3 >{XSize, YSize, ZSize},
90
- [=](id<3 > Id) {
91
- sycl::int4 Coord (Id[0 ], Id[1 ], Id[2 ], 0 );
92
- BAcc.write (Coord, AAcc.read (Coord));
93
- });
57
+ auto AAcc = ImgA.template get_access <uint4, access::mode::read>(CGH);
58
+ auto BAcc = ImgB.template get_access <uint4, access::mode::write>(CGH);
59
+ CGH.parallel_for <CopyKernel<Dimensions>>(
60
+ ImgRange, [=](id<Dimensions> Id) {
61
+ // Use int2 for 2D and int4 for 3D images.
62
+ if constexpr (Dimensions == 3 ) {
63
+ sycl::int4 Coord (Id[0 ], Id[1 ], Id[2 ], 0 );
64
+ BAcc.write (Coord, AAcc.read (Coord));
65
+ } else {
66
+ sycl::int2 Coord (Id[0 ], Id[1 ]);
67
+ BAcc.write (Coord, AAcc.read (Coord));
68
+ }
69
+ });
94
70
}).wait ();
95
71
} catch (exception const &e) {
72
+
96
73
std::cout << " Failed" << std::endl;
97
74
std::cerr << " SYCL Exception caught: " << e.what ();
75
+ free (Input);
76
+ free (Output);
98
77
return 1 ;
99
78
}
100
79
101
- int NumErrors = check (B, NumI32Elts);
102
- free (A);
103
- free (B);
104
- return NumErrors;
80
+ // Check the output buffer.
81
+ bool HasError = false ;
82
+ for (int i = 0 ; i < NumI32Elts; i++) {
83
+ if (Output[i] != i) {
84
+ HasError = true ;
85
+ break ;
86
+ }
87
+ }
88
+
89
+ if (!HasError) {
90
+ std::cout << " Passed" << std::endl;
91
+ } else {
92
+ std::cout << " Failed" << std::endl;
93
+ }
94
+
95
+ free (Input);
96
+ free (Output);
97
+ return HasError;
105
98
}
106
99
107
100
int main () {
108
- int NumErrors = 0 ;
109
-
110
101
queue Q;
111
102
device Dev = Q.get_device ();
112
103
std::cout << " Running on " << Dev.get_info <info::device::name>()
@@ -127,17 +118,18 @@ int main() {
127
118
128
119
// Using max sizes in one image may require too much memory.
129
120
// Check them one by one.
130
- NumErrors += test2D (Q, MaxWidth2D, 2 );
131
- NumErrors += test2D (Q, 2 , MaxHeight2D);
121
+ bool HasError = false ;
122
+ HasError |= testND<2 >(Q, MaxWidth2D, 2 );
123
+ HasError |= testND<2 >(Q, 2 , MaxHeight2D);
132
124
133
- NumErrors += test3D (Q, MaxWidth3D, 2 , 3 );
134
- NumErrors += test3D (Q, 2 , MaxHeight3D, 3 );
135
- NumErrors += test3D (Q, 2 , 3 , MaxDepth3D);
125
+ HasError |= testND< 3 > (Q, MaxWidth3D, 2 , 3 );
126
+ HasError |= testND< 3 > (Q, 2 , MaxHeight3D, 3 );
127
+ HasError |= testND< 3 > (Q, 2 , 3 , MaxDepth3D);
136
128
137
- if (NumErrors )
138
- std::cerr << " Test failed." << std::endl;
129
+ if (HasError )
130
+ std::cout << " Test failed." << std::endl;
139
131
else
140
132
std::cout << " Test passed." << std::endl;
141
133
142
- return NumErrors ;
134
+ return HasError ? 1 : 0 ;
143
135
}
0 commit comments