Skip to content

Commit f33b941

Browse files
authored
Merge pull request #1335 from fabiomestre/fabio/queue_e2e_port
[CTS] Add extra tests for Queues
2 parents 803dcce + cc74d71 commit f33b941

19 files changed

+708
-96
lines changed

test/conformance/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ if(UR_DPCXX)
135135
add_subdirectory(kernel)
136136
add_subdirectory(program)
137137
add_subdirectory(enqueue)
138+
add_subdirectory(integration)
138139
add_subdirectory(exp_command_buffer)
139140
add_subdirectory(exp_usm_p2p)
140141
else()

test/conformance/device_code/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/fill_usm.cpp)
106106
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/foo.cpp)
107107
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/image_copy.cpp)
108108
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/mean.cpp)
109+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/cpy_and_mult.cpp)
110+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/cpy_and_mult_usm.cpp)
111+
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/multiply.cpp)
109112
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant.cpp)
110113
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/spec_constant_multiple.cpp)
111114
add_device_binary(${CMAKE_CURRENT_SOURCE_DIR}/usm_ll.cpp)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <CL/sycl.hpp>
7+
8+
int main() {
9+
size_t array_size = 16;
10+
cl::sycl::queue sycl_queue;
11+
std::vector<uint32_t> src(array_size, 1);
12+
std::vector<uint32_t> dst(array_size, 1);
13+
auto src_buff =
14+
cl::sycl::buffer<uint32_t>(src.data(), cl::sycl::range<1>(array_size));
15+
auto dst_buff =
16+
cl::sycl::buffer<uint32_t>(dst.data(), cl::sycl::range<1>(array_size));
17+
18+
sycl_queue.submit([&](cl::sycl::handler &cgh) {
19+
auto src_acc = src_buff.get_access<cl::sycl::access::mode::read>(cgh);
20+
auto dst_acc = dst_buff.get_access<cl::sycl::access::mode::write>(cgh);
21+
cgh.parallel_for<class cpy_and_mult>(
22+
cl::sycl::range<1>{array_size},
23+
[src_acc, dst_acc](cl::sycl::item<1> itemId) {
24+
auto id = itemId.get_id(0);
25+
dst_acc[id] = src_acc[id] * 2;
26+
});
27+
});
28+
return 0;
29+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <CL/sycl.hpp>
7+
8+
int main() {
9+
size_t array_size = 16;
10+
cl::sycl::queue sycl_queue;
11+
uint32_t *src = cl::sycl::malloc_device<uint32_t>(array_size, sycl_queue);
12+
uint32_t *dst = cl::sycl::malloc_device<uint32_t>(array_size, sycl_queue);
13+
sycl_queue.submit([&](cl::sycl::handler &cgh) {
14+
cgh.parallel_for<class cpy_and_mult_usm>(
15+
cl::sycl::range<1>{array_size},
16+
[src, dst](cl::sycl::item<1> itemId) {
17+
auto id = itemId.get_id(0);
18+
dst[id] = src[id] * 2;
19+
});
20+
});
21+
return 0;
22+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <sycl/sycl.hpp>
7+
8+
int main() {
9+
10+
const size_t inputSize = 1;
11+
sycl::queue sycl_queue;
12+
uint32_t *inputArray = sycl::malloc_shared<uint32_t>(inputSize, sycl_queue);
13+
14+
sycl_queue.submit([&](sycl::handler &cgh) {
15+
cgh.parallel_for<class MultiplyBy2>(
16+
sycl::range<1>(inputSize),
17+
[=](sycl::id<1> itemID) { inputArray[itemID] *= 2; });
18+
});
19+
return 0;
20+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
# See LICENSE.TXT
4+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
add_conformance_test_with_kernels_environment(integration
7+
QueueEmptyStatus.cpp
8+
QueueUSM.cpp
9+
QueueBuffer.cpp
10+
)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include "fixtures.h"
7+
#include <chrono>
8+
#include <thread>
9+
10+
struct QueueBufferTestWithParam : uur::IntegrationQueueTestWithParam {
11+
void SetUp() override {
12+
program_name = "cpy_and_mult";
13+
UUR_RETURN_ON_FATAL_FAILURE(
14+
uur::IntegrationQueueTestWithParam::SetUp());
15+
}
16+
17+
void TearDown() override { uur::IntegrationQueueTestWithParam::TearDown(); }
18+
19+
void verifyResults(ur_mem_handle_t Buffer, uint32_t ExpectedValue) {
20+
uint32_t HostMem[ArraySize] = {};
21+
ASSERT_SUCCESS(urEnqueueMemBufferRead(Queue, Buffer, true, 0,
22+
sizeof(uint32_t) * ArraySize,
23+
HostMem, 0, nullptr, nullptr));
24+
25+
for (uint32_t i : HostMem) {
26+
ASSERT_EQ(i, ExpectedValue);
27+
}
28+
}
29+
30+
ur_mem_handle_t Buffer1 = nullptr;
31+
ur_mem_handle_t Buffer2 = nullptr;
32+
};
33+
34+
UUR_TEST_SUITE_P(QueueBufferTestWithParam,
35+
testing::Values(0, /* In-Order */
36+
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE),
37+
uur::IntegrationQueueTestWithParam::paramPrinter);
38+
39+
/* Submits multiple kernels that interact with each other by accessing and
40+
* writing to the same buffers.
41+
* Checks that when using an IN_ORDER queue, no synchronization is needed
42+
* between calls to urEnqueueKernelLaunch.
43+
* Checks that when using an OUT_OF_ORDER queue, synchronizing using only
44+
* event barriers is enough. */
45+
TEST_P(QueueBufferTestWithParam, QueueBufferTest) {
46+
47+
std::vector<ur_event_handle_t> EventsFill;
48+
ur_event_handle_t Event;
49+
50+
size_t Buffer1Index;
51+
size_t Buffer2Index;
52+
ASSERT_NO_FATAL_FAILURE(
53+
AddBuffer1DArg(ArraySize * sizeof(uint32_t), &Buffer1, &Buffer1Index));
54+
ASSERT_NO_FATAL_FAILURE(
55+
AddBuffer1DArg(ArraySize * sizeof(uint32_t), &Buffer2, &Buffer2Index));
56+
57+
ASSERT_SUCCESS(urEnqueueMemBufferFill(
58+
Queue, Buffer1, &InitialValue, sizeof(uint32_t), 0,
59+
ArraySize * sizeof(uint32_t), 0, nullptr, &Event));
60+
EventsFill.push_back(Event);
61+
62+
ASSERT_SUCCESS(urEnqueueMemBufferFill(
63+
Queue, Buffer2, &InitialValue, sizeof(uint32_t), 0,
64+
ArraySize * sizeof(uint32_t), 0, nullptr, &Event));
65+
EventsFill.push_back(Event);
66+
67+
ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(EventsFill));
68+
69+
constexpr size_t GlobalOffset = 0;
70+
constexpr size_t NDimensions = 1;
71+
constexpr uint32_t NumIterations = 5;
72+
73+
uint32_t CurValueMem1 = InitialValue;
74+
uint32_t CurValueMem2 = InitialValue;
75+
for (uint32_t i = 0; i < NumIterations; ++i) {
76+
77+
/* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */
78+
ASSERT_SUCCESS(
79+
urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer2));
80+
ASSERT_SUCCESS(
81+
urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer1));
82+
83+
ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions,
84+
&GlobalOffset, &ArraySize, nullptr,
85+
0, nullptr, &Event));
86+
ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
87+
88+
CurValueMem2 = CurValueMem1 * 2;
89+
90+
/* Copy from DeviceMem1 to DeviceMem2 and multiply by 2 */
91+
ASSERT_SUCCESS(
92+
urKernelSetArgMemObj(kernel, Buffer1Index, nullptr, Buffer2));
93+
ASSERT_SUCCESS(
94+
urKernelSetArgMemObj(kernel, Buffer2Index, nullptr, Buffer1));
95+
96+
ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, NDimensions,
97+
&GlobalOffset, &ArraySize, nullptr,
98+
0, nullptr, &Event));
99+
ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
100+
101+
CurValueMem1 = CurValueMem2 * 2;
102+
}
103+
104+
ASSERT_SUCCESS(urQueueFinish(Queue));
105+
106+
ASSERT_NO_FATAL_FAILURE(verifyResults(Buffer1, CurValueMem1));
107+
ASSERT_NO_FATAL_FAILURE(verifyResults(Buffer2, CurValueMem2));
108+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include "fixtures.h"
7+
#include <chrono>
8+
#include <thread>
9+
10+
struct QueueEmptyStatusTestWithParam : uur::IntegrationQueueTestWithParam {
11+
12+
void SetUp() override {
13+
14+
program_name = "multiply";
15+
UUR_RETURN_ON_FATAL_FAILURE(
16+
uur::IntegrationQueueTestWithParam::SetUp());
17+
18+
ur_device_usm_access_capability_flags_t shared_usm_flags = 0;
19+
ASSERT_SUCCESS(
20+
uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags));
21+
if (!(shared_usm_flags & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) {
22+
GTEST_SKIP() << "Shared USM is not supported.";
23+
}
24+
25+
ASSERT_SUCCESS(urUSMSharedAlloc(context, device, nullptr, nullptr,
26+
ArraySize * sizeof(uint32_t),
27+
&SharedMem));
28+
}
29+
30+
void TearDown() override {
31+
ASSERT_SUCCESS(urUSMFree(context, SharedMem));
32+
uur::IntegrationQueueTestWithParam::TearDown();
33+
}
34+
35+
void submitWorkToQueue() {
36+
ur_event_handle_t Event;
37+
ASSERT_SUCCESS(
38+
urEnqueueUSMFill(Queue, SharedMem, sizeof(uint32_t), &InitialValue,
39+
ArraySize * sizeof(uint32_t), 0, nullptr, &Event));
40+
ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
41+
42+
ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, &SharedMem));
43+
44+
constexpr size_t global_offset = 0;
45+
constexpr size_t n_dimensions = 1;
46+
constexpr uint32_t num_iterations = 5;
47+
for (uint32_t i = 0; i < num_iterations; ++i) {
48+
ASSERT_SUCCESS(urEnqueueKernelLaunch(Queue, kernel, n_dimensions,
49+
&global_offset, &ArraySize,
50+
nullptr, 0, nullptr, &Event));
51+
ASSERT_NO_FATAL_FAILURE(submitBarrierIfNeeded(Event));
52+
}
53+
54+
ASSERT_SUCCESS(urQueueFlush(Queue));
55+
}
56+
57+
void waitUntilQueueEmpty() const {
58+
59+
using namespace std::chrono_literals;
60+
61+
constexpr auto step = 500ms;
62+
constexpr auto maxWait = 5000ms;
63+
64+
/* Wait a bit until work finishes running. We don't synchronize with
65+
* urQueueFinish() because we want to check if the status is set without
66+
* calling it explicitly. */
67+
for (auto currentWait = 0ms; currentWait < maxWait;
68+
currentWait += step) {
69+
std::this_thread::sleep_for(step);
70+
71+
ur_bool_t is_queue_empty;
72+
ASSERT_SUCCESS(urQueueGetInfo(Queue, UR_QUEUE_INFO_EMPTY,
73+
sizeof(ur_bool_t), &is_queue_empty,
74+
nullptr));
75+
if (is_queue_empty) {
76+
return;
77+
}
78+
}
79+
80+
/* If we are here, the test failed. Let's call queue finish to avoid
81+
* issues when freeing memory */
82+
ASSERT_SUCCESS(urQueueFinish(Queue));
83+
GTEST_FAIL();
84+
}
85+
86+
void *SharedMem = nullptr;
87+
};
88+
89+
UUR_TEST_SUITE_P(QueueEmptyStatusTestWithParam,
90+
testing::Values(0, /* In-Order */
91+
UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE),
92+
uur::IntegrationQueueTestWithParam::paramPrinter);
93+
94+
/* Submits kernels that have a dependency on each other and checks that the
95+
* queue submits all the work in the correct order to the device.
96+
* Explicit synchronization (except for barriers) is avoided in these tests to
97+
* check that the properties of In-Order and OutOfOrder queues are working as
98+
* expected */
99+
TEST_P(QueueEmptyStatusTestWithParam, QueueEmptyStatusTest) {
100+
ASSERT_NO_FATAL_FAILURE(submitWorkToQueue());
101+
ASSERT_NO_FATAL_FAILURE(waitUntilQueueEmpty());
102+
103+
constexpr size_t expected_value = 3200;
104+
for (uint32_t i = 0; i < ArraySize; ++i) {
105+
ASSERT_EQ(reinterpret_cast<uint32_t *>(SharedMem)[i], expected_value);
106+
}
107+
}

0 commit comments

Comments
 (0)