Skip to content

Commit fb342f0

Browse files
authored
Merge pull request #1549 from igchor/event_fix
Fix adding event to queue cache
2 parents a44e81b + 7186d6c commit fb342f0

File tree

7 files changed

+208
-3
lines changed

7 files changed

+208
-3
lines changed

source/adapters/level_zero/common.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,11 @@ bool setEnvVar(const char *name, const char *value) {
8888
ZeUSMImportExtension ZeUSMImport;
8989

9090
// This will count the calls to Level-Zero
91+
// TODO: remove the ifdef once
92+
// https://github.com/oneapi-src/unified-runtime/issues/1454 is implemented
93+
#ifndef UR_L0_CALL_COUNT_IN_TESTS
9194
std::map<std::string, int> *ZeCallCount = nullptr;
95+
#endif
9296

9397
inline void zeParseError(ze_result_t ZeError, const char *&ErrorString) {
9498
switch (ZeError) {

source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,8 +1514,7 @@ ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
15141514
}
15151515

15161516
ur_result_t ur_queue_handle_t_::addEventToQueueCache(ur_event_handle_t Event) {
1517-
if (!Event->IsMultiDevice && Event->UrQueue) {
1518-
auto Device = Event->UrQueue->Device;
1517+
if (!Event->IsMultiDevice) {
15191518
auto EventCachesMap = Event->isHostVisible() ? &EventCachesDeviceMap[0]
15201519
: &EventCachesDeviceMap[1];
15211520
if (EventCachesMap->find(Device) == EventCachesMap->end()) {

test/adapters/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (C) 2023 Intel Corporation
1+
# Copyright (C) 2023-2024 Intel Corporation
22
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
33
# See LICENSE.TXT
44
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

test/adapters/level_zero/CMakeLists.txt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,22 @@ else()
2525
add_dependencies(test-adapter-level_zero
2626
generate_device_binaries kernel_names_header)
2727
endif()
28+
29+
if(LINUX)
30+
# Make L0 use CallMap from a seprate shared lib so that we can access the map
31+
# from the tests. This only seems to work on linux
32+
add_library(zeCallMap SHARED zeCallMap.cpp)
33+
target_compile_definitions(ur_adapter_level_zero PRIVATE UR_L0_CALL_COUNT_IN_TESTS)
34+
target_link_libraries(ur_adapter_level_zero PRIVATE zeCallMap)
35+
36+
add_adapter_test(level_zero
37+
FIXTURE DEVICES
38+
SOURCES
39+
event_cache_tests.cpp
40+
ENVIRONMENT
41+
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
42+
"UR_L0_LEAKS_DEBUG=1"
43+
)
44+
45+
target_link_libraries(test-adapter-level_zero PRIVATE zeCallMap)
46+
endif()
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include "ur_print.hpp"
7+
#include "uur/fixtures.h"
8+
#include "uur/raii.h"
9+
10+
#include <map>
11+
#include <string>
12+
13+
template <typename... Args> auto combineFlags(std::tuple<Args...> tuple) {
14+
return std::apply([](auto... args) { return (... |= args); }, tuple);
15+
}
16+
17+
extern std::map<std::string, int> *ZeCallCount;
18+
19+
using FlagsTupleType = std::tuple<ur_queue_flags_t, ur_queue_flags_t,
20+
ur_queue_flags_t, ur_queue_flags_t>;
21+
22+
struct urEventCacheTest : uur::urContextTestWithParam<FlagsTupleType> {
23+
void SetUp() override {
24+
UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::SetUp());
25+
26+
flags = combineFlags(getParam());
27+
28+
ur_queue_properties_t props;
29+
props.flags = flags;
30+
ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue));
31+
ASSERT_NE(queue, nullptr);
32+
33+
ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_WRITE_ONLY, size,
34+
nullptr, &buffer));
35+
36+
(*ZeCallCount)["zeEventCreate"] = 0;
37+
(*ZeCallCount)["zeEventDestroy"] = 0;
38+
}
39+
40+
void TearDown() override {
41+
if (buffer) {
42+
EXPECT_SUCCESS(urMemRelease(buffer));
43+
}
44+
if (queue) {
45+
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueRelease(queue));
46+
}
47+
UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::TearDown());
48+
}
49+
50+
auto enqueueWork(ur_event_handle_t *hEvent, int data) {
51+
input.assign(count, data);
52+
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urEnqueueMemBufferWrite(
53+
queue, buffer, false, 0, size, input.data(), 0, nullptr, hEvent));
54+
}
55+
56+
void verifyData() {
57+
std::vector<uint32_t> output(count, 1);
58+
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urEnqueueMemBufferRead(
59+
queue, buffer, true, 0, size, output.data(), 0, nullptr, nullptr));
60+
61+
if (!(flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
62+
ASSERT_EQ(input, output);
63+
}
64+
}
65+
66+
const size_t count = 1024;
67+
const size_t size = sizeof(uint32_t) * count;
68+
ur_mem_handle_t buffer = nullptr;
69+
ur_queue_handle_t queue = nullptr;
70+
std::vector<uint32_t> input;
71+
ur_queue_flags_t flags;
72+
};
73+
74+
TEST_P(urEventCacheTest, eventsReuseNoVisibleEvent) {
75+
static constexpr int numIters = 16;
76+
static constexpr int numEnqueues = 128;
77+
78+
for (int i = 0; i < numIters; i++) {
79+
for (int j = 0; j < numEnqueues; j++) {
80+
enqueueWork(nullptr, i * numEnqueues + j);
81+
}
82+
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue));
83+
verifyData();
84+
}
85+
86+
// TODO: why events are not reused for UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE?
87+
if ((flags & UR_QUEUE_FLAG_DISCARD_EVENTS) &&
88+
!(flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
89+
ASSERT_EQ((*ZeCallCount)["zeEventCreate"], 2);
90+
} else {
91+
ASSERT_GE((*ZeCallCount)["zeEventCreate"], numIters * numEnqueues);
92+
}
93+
}
94+
95+
TEST_P(urEventCacheTest, eventsReuseWithVisibleEvent) {
96+
static constexpr int numIters = 16;
97+
static constexpr int numEnqueues = 128;
98+
99+
for (int i = 0; i < numIters; i++) {
100+
std::vector<uur::raii::Event> events(numEnqueues);
101+
for (int j = 0; j < numEnqueues; j++) {
102+
enqueueWork(events[j].ptr(), i * numEnqueues + j);
103+
}
104+
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue));
105+
verifyData();
106+
}
107+
108+
ASSERT_LT((*ZeCallCount)["zeEventCreate"], numIters * numEnqueues);
109+
}
110+
111+
TEST_P(urEventCacheTest, eventsReuseWithVisibleEventAndWait) {
112+
static constexpr int numIters = 16;
113+
static constexpr int numEnqueues = 128;
114+
static constexpr int waitEveryN = 16;
115+
116+
for (int i = 0; i < numIters; i++) {
117+
std::vector<uur::raii::Event> events;
118+
for (int j = 0; j < numEnqueues; j++) {
119+
events.emplace_back();
120+
enqueueWork(events.back().ptr(), i * numEnqueues + j);
121+
122+
if (j > 0 && j % waitEveryN == 0) {
123+
ASSERT_SUCCESS(urEventWait(waitEveryN,
124+
(ur_event_handle_t *)events.data()));
125+
verifyData();
126+
events.clear();
127+
}
128+
}
129+
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue));
130+
}
131+
132+
ASSERT_GE((*ZeCallCount)["zeEventCreate"], waitEveryN);
133+
// TODO: why there are more events than this?
134+
// ASSERT_LE((*ZeCallCount)["zeEventCreate"], waitEveryN * 2 + 2);
135+
}
136+
137+
template <typename T>
138+
inline std::string
139+
printFlags(const testing::TestParamInfo<typename T::ParamType> &info) {
140+
const auto device_handle = std::get<0>(info.param);
141+
const auto platform_device_name =
142+
uur::GetPlatformAndDeviceName(device_handle);
143+
auto flags = combineFlags(std::get<1>(info.param));
144+
145+
std::stringstream ss;
146+
ur::details::printFlag<ur_queue_flag_t>(ss, flags);
147+
148+
auto str = ss.str();
149+
std::replace(str.begin(), str.end(), ' ', '_');
150+
std::replace(str.begin(), str.end(), '|', '_');
151+
return platform_device_name + "__" + str;
152+
}
153+
154+
UUR_TEST_SUITE_P(
155+
urEventCacheTest,
156+
::testing::Combine(
157+
testing::Values(0, UR_QUEUE_FLAG_DISCARD_EVENTS),
158+
testing::Values(0, UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE),
159+
// TODO: why the test fails with UR_QUEUE_FLAG_SUBMISSION_BATCHED?
160+
testing::Values(
161+
UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE /*, UR_QUEUE_FLAG_SUBMISSION_BATCHED */),
162+
testing::Values(0, UR_QUEUE_FLAG_PROFILING_ENABLE)),
163+
printFlags<urEventCacheTest>);
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
// See LICENSE.TXT
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
#include <map>
7+
#include <string>
8+
9+
// Map used by L0 adapter to count the number of calls to each L0 function
10+
// Lifetime is managed by the adapter, this variable is defined here
11+
// only so that we can read it from the tests.
12+
std::map<std::string, int> *ZeCallCount = nullptr;

test/conformance/testing/include/uur/fixtures.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@
2020
} \
2121
(void)0
2222

23+
#define UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(ret) \
24+
auto status = ret; \
25+
if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { \
26+
GTEST_SKIP(); \
27+
} else { \
28+
ASSERT_EQ(status, UR_RESULT_SUCCESS); \
29+
}
30+
2331
namespace uur {
2432

2533
struct urPlatformTest : ::testing::Test {

0 commit comments

Comments
 (0)