Skip to content

Commit b56f941

Browse files
authored
[SYCL] [Graph] Add E2E tests for Graphs using sycl_ext_oneapi_work_group_static extension (#16644)
Two e2e tests were added: - `Inputs/work_group_static_memory.cpp` checks if using the extension in a static graph works, - `Update/work_group_static_memory_with_dyn_cgf_and_dyn_params.cpp` tests the work_group extension against graph with dynamic CGF which at the same time, uses one dynamic parameter
1 parent 160509b commit b56f941

File tree

4 files changed

+176
-0
lines changed

4 files changed

+176
-0
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// UNSUPPORTED: hip
9+
// UNSUPPORTED-INTENDED: sycl_ext_oneapi_work_group_static is not supported on
10+
// AMD
11+
12+
#define GRAPH_E2E_EXPLICIT
13+
14+
#include "../Inputs/work_group_static_memory.cpp"
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Tests using sycl_ext_oneapi_work_group_static in a graph node
2+
3+
#include "../graph_common.hpp"
4+
#include <sycl/ext/oneapi/work_group_static.hpp>
5+
6+
constexpr size_t WgSize = 32;
7+
8+
// Local mem used in kernel
9+
sycl::ext::oneapi::experimental::work_group_static<int[WgSize]> LocalIDBuff;
10+
11+
int main() {
12+
queue Queue;
13+
exp_ext::command_graph Graph{Queue};
14+
15+
std::vector<int> HostData(Size, 0);
16+
17+
int *Ptr = malloc_device<int>(Size, Queue);
18+
Queue.copy(HostData.data(), Ptr, Size).wait();
19+
20+
auto node = add_node(Graph, Queue, [&](handler &CGH) {
21+
CGH.parallel_for(nd_range({Size}, {WgSize}), [=](nd_item<1> Item) {
22+
LocalIDBuff[Item.get_local_linear_id()] = Item.get_local_linear_id();
23+
24+
Item.barrier();
25+
26+
// Check that the memory is accessible from other work-items
27+
size_t LocalIdx = Item.get_local_linear_id() ^ 1;
28+
size_t GlobalIdx = Item.get_global_linear_id() ^ 1;
29+
Ptr[GlobalIdx] = LocalIDBuff[LocalIdx];
30+
});
31+
});
32+
33+
auto GraphExec = Graph.finalize();
34+
35+
for (unsigned N = 0; N < Iterations; N++) {
36+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
37+
}
38+
Queue.wait_and_throw();
39+
40+
Queue.copy(Ptr, HostData.data(), Size);
41+
Queue.wait_and_throw();
42+
43+
for (size_t i = 0; i < Size; i++) {
44+
int Ref = i % WgSize;
45+
assert(check_value(i, Ref, HostData[i], "Ptr"));
46+
}
47+
48+
free(Ptr, Queue);
49+
return 0;
50+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// UNSUPPORTED: hip
9+
// UNSUPPORTED-INTENDED: sycl_ext_oneapi_work_group_static is not supported on
10+
// AMD
11+
12+
#define GRAPH_E2E_RECORD_REPLAY
13+
14+
#include "../Inputs/work_group_static_memory.cpp"
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
// UNSUPPORTED: hip
9+
// UNSUPPORTED-INTENDED: sycl_ext_oneapi_work_group_static is not supported on
10+
// AMD
11+
12+
// Tests using sycl_ext_oneapi_work_group_static in a graph node with dynamic
13+
// cgf and dynamic parameter
14+
15+
#include "../graph_common.hpp"
16+
#include <sycl/ext/oneapi/work_group_static.hpp>
17+
18+
constexpr size_t WgSize = 32;
19+
20+
// Local mem used in kernel
21+
sycl::ext::oneapi::experimental::work_group_static<int[WgSize]> LocalIDBuff;
22+
23+
int main() {
24+
queue Queue;
25+
exp_ext::command_graph Graph{Queue};
26+
27+
int *PtrA = malloc_device<int>(Size, Queue);
28+
int *PtrB = malloc_device<int>(Size, Queue);
29+
30+
std::vector<int> HostDataA(Size);
31+
std::vector<int> HostDataB(Size);
32+
33+
exp_ext::dynamic_parameter DynParam(Graph, PtrA);
34+
35+
auto CGFA = [&](handler &CGH) {
36+
CGH.set_arg(0, DynParam);
37+
CGH.parallel_for(nd_range({Size}, {WgSize}), [=](nd_item<1> Item) {
38+
LocalIDBuff[Item.get_local_linear_id()] = Item.get_local_linear_id();
39+
40+
Item.barrier();
41+
42+
// Check that the memory is accessible from other work-items
43+
size_t LocalIdx = Item.get_local_linear_id() ^ 1;
44+
size_t GlobalIdx = Item.get_global_linear_id() ^ 1;
45+
PtrA[GlobalIdx] = LocalIDBuff[LocalIdx];
46+
});
47+
};
48+
49+
auto CGFB = [&](handler &CGH) {
50+
CGH.set_arg(0, DynParam);
51+
CGH.parallel_for(nd_range({Size}, {WgSize}), [=](nd_item<1> Item) {
52+
LocalIDBuff[Item.get_local_linear_id()] = Item.get_local_linear_id();
53+
54+
Item.barrier();
55+
56+
// Check that the memory is accessible from other work-items
57+
size_t LocalIdx = Item.get_local_linear_id() ^ 1;
58+
size_t GlobalIdx = Item.get_global_linear_id() ^ 1;
59+
PtrA[GlobalIdx] = LocalIDBuff[LocalIdx] - 1;
60+
});
61+
};
62+
63+
auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB});
64+
auto DynamicCGNode = Graph.add(DynamicCG);
65+
auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{});
66+
67+
auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, bool nextCGF) {
68+
Queue.memset(PtrA, 0, Size * sizeof(int));
69+
Queue.memset(PtrB, 0, Size * sizeof(int));
70+
Queue.wait();
71+
72+
Queue.ext_oneapi_graph(ExecGraph).wait();
73+
74+
Queue.copy(PtrA, HostDataA.data(), Size);
75+
Queue.copy(PtrB, HostDataB.data(), Size);
76+
Queue.wait();
77+
78+
for (size_t i = 0; i < Size; i++) {
79+
int Ref = nextCGF ? (i % WgSize) - 1 : i % WgSize;
80+
assert(HostDataA[i] == (A ? Ref : 0));
81+
assert(HostDataB[i] == (B ? Ref : 0));
82+
}
83+
};
84+
85+
ExecuteGraphAndVerifyResults(true, false, false);
86+
87+
DynParam.update(PtrB);
88+
ExecGraph.update(DynamicCGNode);
89+
ExecuteGraphAndVerifyResults(false, true, false);
90+
91+
DynamicCG.set_active_index(1);
92+
ExecGraph.update(DynamicCGNode);
93+
ExecuteGraphAndVerifyResults(false, true, true);
94+
95+
free(PtrA, Queue);
96+
free(PtrB, Queue);
97+
return 0;
98+
}

0 commit comments

Comments
 (0)