Skip to content

Commit ab56658

Browse files
authored
FastTrack code and slides update for ISC25 (#398)
* fast track, slide 13 typo * Updated base source and solution for AsynchronousExecution: replaced auto with actual types, namespace sycl, removed the two codes with USM * updated README with links to CompilerExplorer * updated README table * remove/comment convolution example * slide update: adding reference to Compiler Explorer * minor fix on FastTrack slide * typo in FastTrack CompilerExplorer slide * fixing format
1 parent 9953a8b commit ab56658

File tree

4 files changed

+85
-202
lines changed

4 files changed

+85
-202
lines changed

Code_Exercises/Asynchronous_Execution/solution.cpp

Lines changed: 46 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,7 @@
1111
#include <sycl/sycl.hpp>
1212

1313
#include "../helpers.hpp"
14-
15-
class vector_add_1;
16-
class vector_add_2;
17-
class vector_add_3;
18-
class vector_add_4;
19-
class vector_add_5;
20-
class vector_add_6;
21-
22-
int usm_selector(const sycl::device& dev) {
23-
if (dev.has(sycl::aspect::usm_device_allocations)) {
24-
if (dev.has(sycl::aspect::gpu)) return 2;
25-
return 1;
26-
}
27-
return -1;
28-
}
14+
using namespace sycl;
2915

3016
void test_buffer_event_wait() {
3117
constexpr size_t dataSize = 1024;
@@ -38,26 +24,26 @@ void test_buffer_event_wait() {
3824
}
3925

4026
try {
41-
auto defaultQueue = sycl::queue{};
27+
queue defaultQueue = queue{};
4228

43-
auto bufA = sycl::buffer{a, sycl::range{dataSize}};
44-
auto bufB = sycl::buffer{b, sycl::range{dataSize}};
45-
auto bufR = sycl::buffer{r, sycl::range{dataSize}};
29+
buffer bufA = buffer{a, range{dataSize}};
30+
buffer bufB = buffer{b, range{dataSize}};
31+
buffer bufR = buffer{r, range{dataSize}};
4632

4733
defaultQueue
48-
.submit([&](sycl::handler& cgh) {
49-
auto accA = sycl::accessor{bufA, cgh, sycl::read_only};
50-
auto accB = sycl::accessor{bufB, cgh, sycl::read_only};
51-
auto accR = sycl::accessor{bufR, cgh, sycl::write_only};
52-
53-
cgh.parallel_for<vector_add_1>(
54-
sycl::range{dataSize},
55-
[=](sycl::id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
34+
.submit([&](handler& cgh) {
35+
accessor accA = accessor{bufA, cgh, read_only};
36+
accessor accB = accessor{bufB, cgh, read_only};
37+
accessor accR = accessor{bufR, cgh, write_only};
38+
39+
cgh.parallel_for(range{dataSize}, [=](id<1> idx) {
40+
accR[idx] = accA[idx] + accB[idx];
41+
});
5642
})
5743
.wait(); // Synchronize
5844

5945
defaultQueue.throw_asynchronous();
60-
} catch (const sycl::exception& e) { // Copy back
46+
} catch (const exception& e) { // Copy back
6147
std::cout << "Exception caught: " << e.what() << std::endl;
6248
}
6349

@@ -75,20 +61,19 @@ void test_buffer_queue_wait() {
7561
}
7662

7763
try {
78-
auto defaultQueue = sycl::queue{};
64+
queue defaultQueue = queue{};
7965

80-
auto bufA = sycl::buffer{a, sycl::range{dataSize}};
81-
auto bufB = sycl::buffer{b, sycl::range{dataSize}};
82-
auto bufR = sycl::buffer{r, sycl::range{dataSize}};
66+
buffer bufA = buffer{a, range{dataSize}};
67+
buffer bufB = buffer{b, range{dataSize}};
68+
buffer bufR = buffer{r, range{dataSize}};
8369

84-
defaultQueue.submit([&](sycl::handler& cgh) {
85-
auto accA = sycl::accessor{bufA, cgh, sycl::read_only};
86-
auto accB = sycl::accessor{bufB, cgh, sycl::read_only};
87-
auto accR = sycl::accessor{bufR, cgh, sycl::write_only};
70+
defaultQueue.submit([&](handler& cgh) {
71+
accessor accA = accessor{bufA, cgh, read_only};
72+
accessor accB = accessor{bufB, cgh, read_only};
73+
accessor accR = accessor{bufR, cgh, write_only};
8874

89-
cgh.parallel_for<vector_add_2>(
90-
sycl::range{dataSize},
91-
[=](sycl::id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
75+
cgh.parallel_for(range{dataSize},
76+
[=](id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
9277
});
9378

9479
defaultQueue.wait_and_throw(); // Synchronize
@@ -110,21 +95,20 @@ void test_buffer_buffer_destruction() {
11095
}
11196

11297
try {
113-
auto defaultQueue = sycl::queue{};
98+
queue defaultQueue = queue{};
11499

115100
{
116-
auto bufA = sycl::buffer{a, sycl::range{dataSize}};
117-
auto bufB = sycl::buffer{b, sycl::range{dataSize}};
118-
auto bufR = sycl::buffer{r, sycl::range{dataSize}};
101+
buffer bufA = buffer{a, range{dataSize}};
102+
buffer bufB = buffer{b, range{dataSize}};
103+
buffer bufR = buffer{r, range{dataSize}};
119104

120105
defaultQueue.submit([&](sycl::handler& cgh) {
121-
auto accA = sycl::accessor{bufA, cgh, sycl::read_only};
122-
auto accB = sycl::accessor{bufB, cgh, sycl::read_only};
123-
auto accR = sycl::accessor{bufR, cgh, sycl::write_only};
106+
accessor accA = accessor{bufA, cgh, read_only};
107+
accessor accB = accessor{bufB, cgh, read_only};
108+
accessor accR = accessor{bufR, cgh, write_only};
124109

125-
cgh.parallel_for<vector_add_3>(
126-
sycl::range{dataSize},
127-
[=](sycl::id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
110+
cgh.parallel_for(range{dataSize},
111+
[=](id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
128112
});
129113
} // Synchronize and copy-back
130114

@@ -136,104 +120,6 @@ void test_buffer_buffer_destruction() {
136120
SYCLACADEMY_ASSERT_EQUAL(r, [](size_t i) { return i * 2; });
137121
}
138122

139-
void test_usm_event_wait() {
140-
constexpr size_t dataSize = 1024;
141-
142-
float a[dataSize], b[dataSize], r[dataSize];
143-
for (int i = 0; i < dataSize; ++i) {
144-
a[i] = static_cast<float>(i);
145-
b[i] = static_cast<float>(i);
146-
r[i] = 0.0f;
147-
}
148-
149-
try {
150-
auto usmQueue = sycl::queue{usm_selector};
151-
152-
auto devicePtrA = sycl::malloc_device<float>(dataSize, usmQueue);
153-
auto devicePtrB = sycl::malloc_device<float>(dataSize, usmQueue);
154-
auto devicePtrR = sycl::malloc_device<float>(dataSize, usmQueue);
155-
156-
usmQueue.memcpy(devicePtrA, a,
157-
sizeof(float) * dataSize)
158-
.wait(); // Synchronize
159-
usmQueue.memcpy(devicePtrB, b,
160-
sizeof(float) * dataSize)
161-
.wait(); // Synchronize
162-
163-
usmQueue
164-
.parallel_for<vector_add_4>(sycl::range{dataSize},
165-
[=](sycl::id<1> idx) {
166-
auto globalId = idx[0];
167-
devicePtrR[globalId] =
168-
devicePtrA[globalId] +
169-
devicePtrB[globalId];
170-
})
171-
.wait(); // Synchronize
172-
173-
usmQueue.memcpy(r, devicePtrR,
174-
sizeof(float) * dataSize)
175-
.wait(); // Synchronize and copy-back
176-
177-
sycl::free(devicePtrA, usmQueue);
178-
sycl::free(devicePtrB, usmQueue);
179-
sycl::free(devicePtrR, usmQueue);
180-
181-
usmQueue.throw_asynchronous();
182-
} catch (const sycl::exception& e) {
183-
std::cout << "Exception caught: " << e.what() << std::endl;
184-
}
185-
186-
SYCLACADEMY_ASSERT_EQUAL(r, [](size_t i) { return i * 2; });
187-
}
188-
189-
void test_usm_queue_wait() {
190-
constexpr size_t dataSize = 1024;
191-
192-
float a[dataSize], b[dataSize], r[dataSize];
193-
for (int i = 0; i < dataSize; ++i) {
194-
a[i] = static_cast<float>(i);
195-
b[i] = static_cast<float>(i);
196-
r[i] = 0.0f;
197-
}
198-
199-
try {
200-
auto usmQueue = sycl::queue{usm_selector};
201-
202-
auto devicePtrA = sycl::malloc_device<float>(dataSize, usmQueue);
203-
auto devicePtrB = sycl::malloc_device<float>(dataSize, usmQueue);
204-
auto devicePtrR = sycl::malloc_device<float>(dataSize, usmQueue);
205-
206-
usmQueue.memcpy(devicePtrA, a, sizeof(float) * dataSize);
207-
usmQueue.memcpy(devicePtrB, b, sizeof(float) * dataSize);
208-
209-
usmQueue.wait(); // Synchronize
210-
211-
usmQueue.parallel_for<vector_add_5>(
212-
sycl::range{dataSize}, [=](sycl::id<1> idx) {
213-
auto globalId = idx[0];
214-
devicePtrR[globalId] = devicePtrA[globalId] + devicePtrB[globalId];
215-
});
216-
217-
usmQueue.wait(); // Synchronize
218-
219-
usmQueue.memcpy(r, devicePtrR,
220-
sizeof(float) * dataSize)
221-
.wait(); // Copy-back
222-
223-
usmQueue.wait(); // Synchronize
224-
225-
sycl::free(devicePtrA, usmQueue);
226-
sycl::free(devicePtrB, usmQueue);
227-
sycl::free(devicePtrR, usmQueue);
228-
229-
usmQueue.throw_asynchronous();
230-
} catch (const sycl::exception& e) {
231-
std::cout << "Exception caught: " << e.what() << std::endl;
232-
}
233-
234-
SYCLACADEMY_ASSERT_EQUAL(r, [](size_t i) { return i * 2; });
235-
}
236-
237123
void test_buffer_host_accessor() {
238124
constexpr size_t dataSize = 1024;
239125

@@ -245,35 +131,36 @@ void test_buffer_host_accessor() {
245131
}
246132

247133
try {
248-
auto defaultQueue = sycl::queue{};
134+
queue defaultQueue = queue{};
249135

250136
{
251-
auto bufA = sycl::buffer{a, sycl::range{dataSize}};
252-
auto bufB = sycl::buffer{b, sycl::range{dataSize}};
253-
auto bufR = sycl::buffer{r, sycl::range{dataSize}};
137+
buffer bufA = buffer{a, range{dataSize}};
138+
buffer bufB = buffer{b, range{dataSize}};
139+
buffer bufR = buffer{r, range{dataSize}};
254140

255141
defaultQueue.submit([&](sycl::handler& cgh) {
256-
auto accA = sycl::accessor{bufA, cgh, sycl::read_only};
257-
auto accB = sycl::accessor{bufB, cgh, sycl::read_only};
258-
auto accR = sycl::accessor{bufR, cgh, sycl::write_only};
142+
accessor accA = accessor{bufA, cgh, read_only};
143+
accessor accB = accessor{bufB, cgh, read_only};
144+
accessor accR = accessor{bufR, cgh, write_only};
259145

260-
cgh.parallel_for<vector_add_6>(
261-
sycl::range{dataSize},
262-
[=](sycl::id<1> idx) { accR[idx] = accA[idx] + accB[idx]; });
146+
cgh.parallel_for(range{dataSize}, [=](sycl::id<1> idx) {
147+
accR[idx] = accA[idx] + accB[idx];
148+
});
263149
});
264150

265151
defaultQueue.wait(); // Synchronize
266152

267153
{
268-
auto hostAccR = bufR.get_host_access(sycl::read_only); // Copy-to-host
154+
host_accessor hostAccR =
155+
bufR.get_host_access(read_only); // Copy-to-host
269156

270157
SYCLACADEMY_ASSERT_EQUAL(hostAccR, [](size_t i) { return i * 2; });
271158
}
272159

273160
} // Copy-back
274161

275162
defaultQueue.throw_asynchronous();
276-
} catch (const sycl::exception& e) {
163+
} catch (const exception& e) {
277164
std::cout << "Exception caught: " << e.what() << std::endl;
278165
}
279166
}
@@ -282,7 +169,5 @@ int main() {
282169
test_buffer_event_wait();
283170
test_buffer_queue_wait();
284171
test_buffer_buffer_destruction();
285-
test_usm_event_wait();
286-
test_usm_queue_wait();
287172
test_buffer_host_accessor();
288173
}

Code_Exercises/Asynchronous_Execution/source.cpp

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,10 @@
1111
* ~~~~~~~~~~~~~~~~~~~~
1212
*
1313
* // Default construct a queue
14-
* auto q = sycl::queue{};
14+
* sycl::queue queue q = sycl::queue{};
1515
*
1616
* // Declare a buffer pointing to ptr
17-
* auto buf = sycl::buffer{ptr, sycl::range{n}};
18-
*
19-
* // Do a USM malloc_device
20-
* auto ptr = sycl::malloc_device<T>(n, q);
21-
*
22-
* // Do a USM memcpy
23-
* q.memcpy(dst_ptr, src_ptr, sizeof(T)*n);
17+
* sycl::buffer buf = sycl::buffer{ptr, sycl::range{n}};
2418
*
2519
* // Wait on a queue
2620
* q.wait();
@@ -33,10 +27,10 @@
3327
*
3428
* // Within the command group you can
3529
* // 1. Declare an accessor to a buffer
36-
* auto read_write_acc = sycl::accessor{buf, cgh};
37-
* auto read_acc = sycl::accessor{buf, cgh, sycl::read_only};
38-
* auto write_acc = sycl::accessor{buf, cgh, sycl::write_only};
39-
* auto no_init_acc = sycl::accessor{buf, cgh, sycl::no_init};
30+
* accessor read_write_acc = sycl::accessor{buf, cgh};
31+
* accessor read_acc = sycl::accessor{buf, cgh, sycl::read_only};
32+
* accessor write_acc = sycl::accessor{buf, cgh, sycl::write_only};
33+
* accessor no_init_acc = sycl::accessor{buf, cgh, sycl::no_init};
4034
* // 2. Enqueue a parallel for:
4135
* cgh.parallel_for<class mykernel>(sycl::range{n},
4236
* [=](sycl::id<1> i) { // Do something });
@@ -46,17 +40,9 @@
4640

4741
#include "../helpers.hpp"
4842

49-
void test_usm() {
50-
// Use your code from the "Data Parallelism" exercise to start
51-
SYCLACADEMY_ASSERT_EQUAL(/*output data*/ 0, /*expected data*/ 0);
52-
}
53-
5443
void test_buffer() {
5544
// Use your code from the "Data Parallelism" exercise to start
5645
SYCLACADEMY_ASSERT_EQUAL(/*output data*/ 0, /*expected data*/ 0);
5746
}
5847

59-
int main() {
60-
test_usm();
61-
test_buffer();
62-
}
48+
int main() { test_buffer(); }

0 commit comments

Comments
 (0)