Skip to content

Commit cda38de

Browse files
BensuocallumfareEwanC
authored
[SYCL][Graph]Fix and add E2E tests for using local accessors in graphs (#15920)
- Update UR tag for fix for updating local accessors on CUDA/HIP - Add e2e tests covering local accessor usage --------- Co-authored-by: Callum Fare <callum@codeplay.com> Co-authored-by: Ewan Crawford <ewan@codeplay.com>
1 parent 37b339e commit cda38de

File tree

7 files changed

+194
-7
lines changed

7 files changed

+194
-7
lines changed
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# commit f01741af022cfe82afcb026b9aa0be251eb6a497
2-
# Merge: 004d2474 85bb5f62
3-
# Author: Callum Fare <callum@codeplay.com>
4-
# Date: Tue Nov 5 13:39:53 2024 +0000
5-
# Merge pull request #2260 from nrspruit/refactor_l0_default_init
6-
# [L0] Refactor to remove default constructor inits
7-
set(UNIFIED_RUNTIME_TAG f01741af022cfe82afcb026b9aa0be251eb6a497)
1+
# commit 3edf99755ce2af3b53102a7d8438e0fe969efac3
2+
# Merge: 5955bad3 0b968661
3+
# Author: Ross Brunton <ross@codeplay.com>
4+
# Date: Wed Nov 6 11:07:29 2024 +0000
5+
# Merge pull request #2082 from RossBrunton/ross/multiadapt
6+
# [CI] Add "loader" support to conformance testing
7+
set(UNIFIED_RUNTIME_TAG 3edf99755ce2af3b53102a7d8438e0fe969efac3)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_EXPLICIT
9+
10+
#include "../Inputs/local_accessor.cpp"
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Tests basic adding of nodes with local accessors,
2+
// and submission of the graph.
3+
4+
#include "../graph_common.hpp"
5+
6+
int main() {
7+
queue Queue{};
8+
9+
using T = int;
10+
11+
const size_t LocalSize = 128;
12+
13+
std::vector<T> DataA(Size), DataB(Size), DataC(Size);
14+
15+
std::iota(DataA.begin(), DataA.end(), 10);
16+
17+
std::vector<T> ReferenceA(DataA);
18+
19+
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
20+
21+
T *PtrA = malloc_device<T>(Size, Queue);
22+
23+
Queue.copy(DataA.data(), PtrA, Size);
24+
Queue.wait_and_throw();
25+
26+
auto node = add_node(Graph, Queue, [&](handler &CGH) {
27+
local_accessor<T, 1> LocalMem(LocalSize, CGH);
28+
29+
CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
30+
LocalMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2;
31+
PtrA[Item.get_global_linear_id()] += LocalMem[Item.get_local_linear_id()];
32+
});
33+
});
34+
35+
auto GraphExec = Graph.finalize();
36+
37+
for (unsigned n = 0; n < Iterations; n++) {
38+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
39+
}
40+
41+
Queue.wait_and_throw();
42+
43+
Queue.copy(PtrA, DataA.data(), Size);
44+
Queue.wait_and_throw();
45+
46+
free(PtrA, Queue);
47+
48+
for (size_t i = 0; i < Size; i++) {
49+
T Ref = 10 + i + (i * 2);
50+
check_value(i, Ref, ReferenceA[i], "PtrA");
51+
}
52+
53+
return 0;
54+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Tests whole graph update of nodes with local accessors,
2+
// and submission of the graph.
3+
4+
#include "../graph_common.hpp"
5+
6+
using T = int;
7+
8+
auto add_graph_node(
9+
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
10+
queue &Queue, size_t Size, size_t LocalSize, T *Ptr) {
11+
return add_node(Graph, Queue, [&](handler &CGH) {
12+
local_accessor<T, 1> LocalMem(LocalSize, CGH);
13+
14+
CGH.parallel_for(nd_range({Size}, {LocalSize}), [=](nd_item<1> Item) {
15+
LocalMem[Item.get_local_linear_id()] = Item.get_global_linear_id() * 2;
16+
Ptr[Item.get_global_linear_id()] +=
17+
LocalMem[Item.get_local_linear_id()] + Item.get_local_range(0);
18+
});
19+
});
20+
}
21+
int main() {
22+
queue Queue{};
23+
24+
const size_t LocalSize = 128;
25+
26+
std::vector<T> DataA(Size), DataB(Size);
27+
28+
std::iota(DataA.begin(), DataA.end(), 10);
29+
std::iota(DataB.begin(), DataB.end(), 10);
30+
31+
std::vector<T> ReferenceA(DataA), ReferenceB(DataB);
32+
33+
exp_ext::command_graph GraphA{Queue.get_context(), Queue.get_device()};
34+
35+
T *PtrA = malloc_device<T>(Size, Queue);
36+
T *PtrB = malloc_device<T>(Size, Queue);
37+
38+
Queue.copy(DataA.data(), PtrA, Size);
39+
Queue.copy(DataB.data(), PtrB, Size);
40+
Queue.wait_and_throw();
41+
42+
auto NodeA = add_graph_node(GraphA, Queue, Size, LocalSize / 2, PtrA);
43+
44+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
45+
46+
// Create second graph for whole graph update with a different local size
47+
exp_ext::command_graph GraphB{Queue.get_context(), Queue.get_device()};
48+
auto NodeB = add_graph_node(GraphB, Queue, Size, LocalSize, PtrB);
49+
50+
// Execute graphs before updating and check outputs
51+
for (unsigned n = 0; n < Iterations; n++) {
52+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); });
53+
}
54+
55+
Queue.wait_and_throw();
56+
57+
Queue.copy(PtrA, DataA.data(), Size);
58+
Queue.copy(PtrB, DataB.data(), Size);
59+
Queue.wait_and_throw();
60+
61+
for (size_t i = 0; i < Size; i++) {
62+
T RefA = 10 + i + (i * 2) + LocalSize / 2;
63+
T RefB = 10 + i;
64+
check_value(i, RefA, ReferenceA[i], "PtrA");
65+
check_value(i, RefB, ReferenceB[i], "PtrB");
66+
}
67+
68+
// Update GraphExecA using whole graph update
69+
70+
GraphExecA.update(GraphB);
71+
72+
// Execute graphs again and check outputs
73+
for (unsigned n = 0; n < Iterations; n++) {
74+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExecA); });
75+
}
76+
77+
Queue.wait_and_throw();
78+
79+
Queue.copy(PtrA, DataA.data(), Size);
80+
Queue.copy(PtrB, DataB.data(), Size);
81+
Queue.wait_and_throw();
82+
83+
for (size_t i = 0; i < Size; i++) {
84+
T RefA = 10 + i + (i * 2) + LocalSize / 2;
85+
T RefB = 10 + i + (i * 2) + LocalSize;
86+
check_value(i, RefA, ReferenceA[i], "PtrA");
87+
check_value(i, RefB, ReferenceB[i], "PtrB");
88+
}
89+
90+
free(PtrA, Queue);
91+
free(PtrB, Queue);
92+
return 0;
93+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_RECORD_REPLAY
9+
10+
#include "../Inputs/local_accessor.cpp"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_EXPLICIT
9+
10+
#include "../../Inputs/whole_update_local_acc.cpp"
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
#define GRAPH_E2E_RECORD_REPLAY
9+
10+
#include "../../Inputs/whole_update_local_acc.cpp"

0 commit comments

Comments
 (0)