Skip to content

Commit 3dd675c

Browse files
committed
'[L0 v2] initialize command lists in queue
by requesting them from command list cache, add test for command list reuse.
1 parent ce3ba4d commit 3dd675c

16 files changed

+194
-667
lines changed

source/adapters/level_zero/queue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
503503

504504
// optimized path for immediate, in-order command lists
505505
if (v2::shouldUseQueueV2(Device, Flags)) {
506-
*Queue = v2::createQueue(Context, Device, Flags);
506+
*Queue = v2::createQueue(Context, Device, Props);
507507
return UR_RESULT_SUCCESS;
508508
}
509509

source/adapters/level_zero/v2/command_list_cache.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,4 +140,25 @@ void command_list_cache_t::addCommandList(const command_list_descriptor_t &desc,
140140
auto [it, _] = ZeCommandListCache.try_emplace(desc);
141141
it->second.emplace(std::move(cmdList));
142142
}
143+
144+
size_t command_list_cache_t::getNumImmediateCommandLists() {
145+
std::unique_lock<ur_mutex> Lock(ZeCommandListCacheMutex);
146+
size_t NumLists = 0;
147+
for (auto &Pair : ZeCommandListCache) {
148+
if (std::holds_alternative<immediate_command_list_descriptor_t>(Pair.first))
149+
NumLists += Pair.second.size();
150+
}
151+
return NumLists;
152+
}
153+
154+
size_t command_list_cache_t::getNumRegularCommandLists() {
155+
std::unique_lock<ur_mutex> Lock(ZeCommandListCacheMutex);
156+
size_t NumLists = 0;
157+
for (auto &Pair : ZeCommandListCache) {
158+
if (std::holds_alternative<regular_command_list_descriptor_t>(Pair.first))
159+
NumLists += Pair.second.size();
160+
}
161+
return NumLists;
162+
}
163+
143164
} // namespace v2

source/adapters/level_zero/v2/command_list_cache.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ struct command_list_cache_t {
6464
getRegularCommandList(ze_device_handle_t ZeDevice, bool IsInOrder,
6565
uint32_t Ordinal);
6666

67+
// For testing purposes
68+
size_t getNumImmediateCommandLists();
69+
size_t getNumRegularCommandLists();
70+
6771
private:
6872
ze_context_handle_t ZeContext;
6973
std::unordered_map<command_list_descriptor_t,

source/adapters/level_zero/v2/queue_factory.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ inline ur_queue_handle_t createQueue(::ur_context_handle_t hContext,
3434
}
3535
// TODO: For now, always use immediate, in-order
3636
return new ur_queue_immediate_in_order_t(
37-
static_cast<v2::ur_context_handle_t>(hContext), hDevice, pProps->flags);
37+
static_cast<v2::ur_context_handle_t>(hContext), hDevice, pProps);
3838
}
3939

4040
} // namespace v2

source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,49 @@
1111
#include "queue_immediate_in_order.hpp"
1212

1313
namespace v2 {
14+
15+
static int32_t getZeOrdinal(ur_device_handle_t hDevice, queue_group_type type) {
16+
if (type == queue_group_type::MainCopy && hDevice->hasMainCopyEngine()) {
17+
return hDevice->QueueGroup[queue_group_type::MainCopy].ZeOrdinal;
18+
}
19+
return hDevice->QueueGroup[queue_group_type::Compute].ZeOrdinal;
20+
}
21+
22+
static std::optional<int32_t> getZeIndex(const ur_queue_properties_t *pProps) {
23+
if (pProps && pProps->pNext) {
24+
const ur_base_properties_t *extendedDesc =
25+
reinterpret_cast<const ur_base_properties_t *>(pProps->pNext);
26+
if (extendedDesc->stype == UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES) {
27+
const ur_queue_index_properties_t *indexProperties =
28+
reinterpret_cast<const ur_queue_index_properties_t *>(extendedDesc);
29+
return indexProperties->computeIndex;
30+
}
31+
}
32+
return std::nullopt;
33+
}
34+
35+
static ze_command_queue_priority_t getZePriority(ur_queue_flags_t flags) {
36+
if ((flags & UR_QUEUE_FLAG_PRIORITY_LOW) != 0)
37+
return ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
38+
if ((flags & UR_QUEUE_FLAG_PRIORITY_HIGH) != 0)
39+
return ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH;
40+
return ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
41+
}
42+
43+
ur_command_list_handler_t::ur_command_list_handler_t(
44+
v2::ur_context_handle_t hContext, ur_device_handle_t hDevice,
45+
const ur_queue_properties_t *pProps, queue_group_type type)
46+
: commandList(hContext->commandListCache.getImmediateCommandList(
47+
hDevice->ZeDevice, true, getZeOrdinal(hDevice, type),
48+
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
49+
getZePriority(pProps ? pProps->flags : ur_queue_flags_t{}),
50+
getZeIndex(pProps))) {}
51+
1452
ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t(
15-
v2::ur_context_handle_t, ur_device_handle_t, ur_queue_flags_t) {}
53+
v2::ur_context_handle_t hContext, ur_device_handle_t hDevice,
54+
const ur_queue_properties_t *pProps)
55+
: copyHandler(hContext, hDevice, pProps, queue_group_type::MainCopy),
56+
computeHandler(hContext, hDevice, pProps, queue_group_type::Compute) {}
1657

1758
ur_result_t
1859
ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
@@ -26,11 +67,16 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName,
2667
}
2768

2869
ur_result_t ur_queue_immediate_in_order_t::queueRetain() {
29-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
70+
RefCount.increment();
71+
return UR_RESULT_SUCCESS;
3072
}
3173

3274
ur_result_t ur_queue_immediate_in_order_t::queueRelease() {
33-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
75+
if (!RefCount.decrementAndTest())
76+
return UR_RESULT_SUCCESS;
77+
78+
delete this;
79+
return UR_RESULT_SUCCESS;
3480
}
3581

3682
ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle(

source/adapters/level_zero/v2/queue_immediate_in_order.hpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,26 @@
1616
#include "ur/ur.hpp"
1717

1818
namespace v2 {
19+
20+
using queue_group_type = ur_device_handle_t_::queue_group_info_t::type;
21+
22+
struct ur_command_list_handler_t {
23+
ur_command_list_handler_t(v2::ur_context_handle_t hContext,
24+
ur_device_handle_t hDevice,
25+
const ur_queue_properties_t *pProps,
26+
queue_group_type type);
27+
28+
raii::cache_borrowed_command_list_t commandList;
29+
};
30+
1931
struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ {
32+
private:
33+
ur_command_list_handler_t copyHandler;
34+
ur_command_list_handler_t computeHandler;
35+
36+
public:
2037
ur_queue_immediate_in_order_t(v2::ur_context_handle_t, ur_device_handle_t,
21-
ur_queue_flags_t);
38+
const ur_queue_properties_t *);
2239

2340
ur_result_t queueGetInfo(ur_queue_info_t propName, size_t propSize,
2441
void *pPropValue, size_t *pPropSizeRet) override;

test/adapters/level_zero/v2/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ function(add_unittest name)
99
FIXTURE DEVICES
1010
ENVIRONMENT
1111
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
12+
"UR_L0_USE_QUEUE_V2=1"
1213
SOURCES
1314
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp
1415
${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp

test/adapters/level_zero/v2/command_list_cache_test.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55

66
#include "command_list_cache.hpp"
77
#include "common.hpp"
8+
#include "v2/context.hpp"
89

910
#include "context.hpp"
1011
#include "device.hpp"
1112

1213
#include "uur/fixtures.h"
14+
#include "uur/raii.h"
1315

1416
#include <gtest/gtest.h>
1517
#include <map>
@@ -160,3 +162,97 @@ TEST_P(CommandListCacheTest, ImmediateCommandListsHaveProperAttributes) {
160162
}
161163
}
162164
}
165+
166+
TEST_P(CommandListCacheTest, CommandListsAreReusedByQueues) {
167+
static constexpr int NumQueuesPerType = 5;
168+
size_t NumUniqueQueueTypes = 0;
169+
170+
for (int I = 0; I < NumQueuesPerType; I++) {
171+
NumUniqueQueueTypes = 0;
172+
173+
{ // Queues scope
174+
std::vector<uur::raii::Queue> Queues;
175+
for (auto Priority :
176+
std::vector<uint32_t>{UR_QUEUE_FLAG_PRIORITY_LOW,
177+
UR_QUEUE_FLAG_PRIORITY_HIGH, 0}) {
178+
for (auto Index :
179+
std::vector<std::optional<int32_t>>{std::nullopt, 0}) {
180+
NumUniqueQueueTypes++;
181+
182+
ur_queue_properties_t QueueProps{
183+
UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0};
184+
QueueProps.flags |= UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE;
185+
if (Priority) {
186+
QueueProps.flags |= Priority;
187+
}
188+
189+
ur_queue_index_properties_t IndexProps{
190+
UR_STRUCTURE_TYPE_QUEUE_INDEX_PROPERTIES, nullptr, 0};
191+
if (Index) {
192+
IndexProps.computeIndex = *Index;
193+
QueueProps.pNext = &IndexProps;
194+
}
195+
196+
ur_queue_handle_t Queue;
197+
ASSERT_EQ(
198+
urQueueCreate(context, device, &QueueProps, &Queue),
199+
UR_RESULT_SUCCESS);
200+
201+
Queues.emplace_back(Queue);
202+
}
203+
}
204+
205+
ASSERT_EQ(static_cast<v2::ur_context_handle_t>(context)
206+
->commandListCache.getNumImmediateCommandLists(),
207+
0);
208+
ASSERT_EQ(static_cast<v2::ur_context_handle_t>(context)
209+
->commandListCache.getNumRegularCommandLists(),
210+
0);
211+
} // Queues scope
212+
213+
ASSERT_EQ(static_cast<v2::ur_context_handle_t>(context)
214+
->commandListCache.getNumImmediateCommandLists(),
215+
NumUniqueQueueTypes * 2); // * 2 for compute and copy
216+
ASSERT_EQ(static_cast<v2::ur_context_handle_t>(context)
217+
->commandListCache.getNumRegularCommandLists(),
218+
0);
219+
}
220+
}
221+
222+
TEST_P(CommandListCacheTest, CommandListsCacheIsThreadSafe) {
223+
static constexpr int NumThreads = 10;
224+
static constexpr int NumIters = 10;
225+
226+
std::vector<std::thread> Threads;
227+
for (int I = 0; I < NumThreads; I++) {
228+
Threads.emplace_back([I, this]() {
229+
for (int J = 0; J < NumIters; J++) {
230+
ur_queue_properties_t QueueProps{
231+
UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0};
232+
QueueProps.flags |= UR_QUEUE_FLAG_SUBMISSION_IMMEDIATE;
233+
if (I < NumThreads / 2) {
234+
QueueProps.flags |= UR_QUEUE_FLAG_PRIORITY_LOW;
235+
} else {
236+
QueueProps.flags |= UR_QUEUE_FLAG_PRIORITY_HIGH;
237+
}
238+
239+
uur::raii::Queue Queue;
240+
ASSERT_EQ(
241+
urQueueCreate(context, device, &QueueProps, Queue.ptr()),
242+
UR_RESULT_SUCCESS);
243+
244+
ASSERT_LE(static_cast<v2::ur_context_handle_t>(context)
245+
->commandListCache.getNumImmediateCommandLists(),
246+
NumThreads * 2); // * 2 for compute and copy
247+
}
248+
});
249+
}
250+
251+
for (auto &Thread : Threads) {
252+
Thread.join();
253+
}
254+
255+
ASSERT_LE(static_cast<v2::ur_context_handle_t>(context)
256+
->commandListCache.getNumImmediateCommandLists(),
257+
NumThreads * 2);
258+
}

0 commit comments

Comments
 (0)