Skip to content

Commit c5d8106

Browse files
committed
[L0 v2] Make L0 v2 implementation a seperate adapter
Initially, L0 v2 adapter was supposed to reside in a separate namespace but be a part of legacy L0 adapter (with runtime option to switch between executing on legacy or v2). However, this turns out to require a lot of changes in the legacy code to allow for function dispatching to legacy/v2 implementations of queue, event, etc. This approach allows us to keep the implementations separate while still resuing files when appropriate (e.g. for adapter.cpp or platform.cpp).
1 parent 84f5e70 commit c5d8106

File tree

48 files changed

+2035
-146
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2035
-146
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ option(UR_BUILD_ADAPTER_CUDA "Build the CUDA adapter" OFF)
5151
option(UR_BUILD_ADAPTER_HIP "Build the HIP adapter" OFF)
5252
option(UR_BUILD_ADAPTER_NATIVE_CPU "Build the Native-CPU adapter" OFF)
5353
option(UR_BUILD_ADAPTER_ALL "Build all currently supported adapters" OFF)
54+
option(UR_BUILD_ADAPTER_L0_V2 "Build the (experimental) Level-Zero v2 adapter" OFF)
5455
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
5556
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
5657
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)

scripts/generate_code.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ def generate_level_zero_queue_api(path, section, namespace, tags, version, specs
465465

466466
name = "queue_api"
467467
filename = "queue_api.cpp"
468-
layer_dstpath = os.path.join(path, "adapters/level_zero")
468+
layer_dstpath = os.path.join(path, "adapters", "level_zero", "v2")
469469
os.makedirs(layer_dstpath, exist_ok=True)
470470
fout = os.path.join(layer_dstpath, filename)
471471

source/adapters/level_zero/CMakeLists.txt

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,6 @@ add_ur_adapter(${TARGET_NAME}
113113
${CMAKE_CURRENT_SOURCE_DIR}/queue_api.hpp
114114
${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp
115115
${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp
116-
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
117-
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_factory.hpp
118-
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
119-
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
120116
${CMAKE_CURRENT_SOURCE_DIR}/ur_level_zero.cpp
121117
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
122118
${CMAKE_CURRENT_SOURCE_DIR}/context.cpp
@@ -136,9 +132,6 @@ add_ur_adapter(${TARGET_NAME}
136132
${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp
137133
${CMAKE_CURRENT_SOURCE_DIR}/image.cpp
138134
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
139-
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
140-
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
141-
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
142135
)
143136

144137
if(NOT WIN32)
@@ -175,3 +168,81 @@ target_include_directories(${TARGET_NAME} PRIVATE
175168
"${CMAKE_CURRENT_SOURCE_DIR}/../../"
176169
LevelZeroLoader-Headers
177170
)
171+
172+
if(UR_BUILD_ADAPTER_L0_V2)
173+
add_ur_adapter(ur_adapter_level_zero_v2
174+
SHARED
175+
# sources shared with legacy adapter
176+
${CMAKE_CURRENT_SOURCE_DIR}/adapter.hpp
177+
${CMAKE_CURRENT_SOURCE_DIR}/common.hpp
178+
${CMAKE_CURRENT_SOURCE_DIR}/device.hpp
179+
${CMAKE_CURRENT_SOURCE_DIR}/platform.hpp
180+
${CMAKE_CURRENT_SOURCE_DIR}/adapter.cpp
181+
${CMAKE_CURRENT_SOURCE_DIR}/common.cpp
182+
${CMAKE_CURRENT_SOURCE_DIR}/device.cpp
183+
${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp
184+
${CMAKE_CURRENT_SOURCE_DIR}/platform.cpp
185+
${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp
186+
# v2-only sources
187+
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp
188+
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp
189+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.hpp
190+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.hpp
191+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.hpp
192+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.hpp
193+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider.hpp
194+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.hpp
195+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.hpp
196+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp
197+
${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
198+
${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp
199+
${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp
200+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.cpp
201+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.cpp
202+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_counter.cpp
203+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event_provider_normal.cpp
204+
${CMAKE_CURRENT_SOURCE_DIR}/v2/event.cpp
205+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_api.cpp
206+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_create.cpp
207+
${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.cpp
208+
)
209+
210+
# api.cpp contains NOT_SUPPORTED functions-only
211+
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp
212+
PROPERTIES APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-unused-parameter")
213+
214+
if(NOT WIN32)
215+
target_sources(ur_adapter_level_zero_v2
216+
PRIVATE
217+
${CMAKE_CURRENT_SOURCE_DIR}/adapter_lib_init_linux.cpp
218+
)
219+
endif()
220+
221+
# TODO: fix level_zero adapter conversion warnings
222+
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
223+
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
224+
)
225+
226+
set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
227+
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
228+
SOVERSION "${PROJECT_VERSION_MAJOR}"
229+
)
230+
231+
if (WIN32)
232+
# 0x800: Search for the DLL only in the System32 folder
233+
target_link_options(ur_adapter_level_zero_v2 PUBLIC /DEPENDENTLOADFLAG:0x800)
234+
endif()
235+
236+
target_link_libraries(ur_adapter_level_zero_v2 PRIVATE
237+
${PROJECT_NAME}::headers
238+
${PROJECT_NAME}::common
239+
${PROJECT_NAME}::umf
240+
LevelZeroLoader
241+
LevelZeroLoader-Headers
242+
)
243+
244+
target_include_directories(ur_adapter_level_zero_v2 PRIVATE
245+
"${CMAKE_CURRENT_SOURCE_DIR}/../.."
246+
LevelZeroLoader-Headers
247+
)
248+
endif()

source/adapters/level_zero/context.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
#include "queue.hpp"
1919
#include "ur_level_zero.hpp"
2020

21-
#include "v2/context.hpp"
22-
2321
UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
2422
uint32_t DeviceCount, ///< [in] the number of devices given in phDevices
2523
const ur_device_handle_t
@@ -38,7 +36,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate(
3836
ZE2UR_CALL(zeContextCreate, (Platform->ZeDriver, &ContextDesc, &ZeContext));
3937
try {
4038
ur_context_handle_t_ *Context =
41-
new v2::ur_context_handle_t_(ZeContext, DeviceCount, Devices, true);
39+
new ur_context_handle_t_(ZeContext, DeviceCount, Devices, true);
4240

4341
Context->initialize();
4442
*RetContext = reinterpret_cast<ur_context_handle_t>(Context);

source/adapters/level_zero/queue.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
#include "ur_util.hpp"
2525
#include "ze_api.h"
2626

27-
#include "v2/queue_factory.hpp"
28-
2927
// Hard limit for the event completion batches.
3028
static const uint64_t CompletionBatchesMax = [] {
3129
// Default value chosen empirically to maximize the number of asynchronous
@@ -501,12 +499,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(
501499

502500
UR_ASSERT(Context->isValidDevice(Device), UR_RESULT_ERROR_INVALID_DEVICE);
503501

504-
// optimized path for immediate, in-order command lists
505-
if (v2::shouldUseQueueV2(Device, Flags)) {
506-
*Queue = v2::createQueue(Context, Device, Props);
507-
return UR_RESULT_SUCCESS;
508-
}
509-
510502
// Create placeholder queues in the compute queue group.
511503
// Actual L0 queues will be created at first use.
512504
std::vector<ze_command_queue_handle_t> ZeComputeCommandQueues(

source/adapters/level_zero/v2/README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22

33
This is the home directory for L0 v2 adapter sources. This is a redesigned version of the L0 adapter that focuses on maximizing the performance of each queue mode individually (immediate/batched, in-order/out-of-order).
44

5-
L0 v2 adapter can be enabled by setting `UR_L0_USE_QUEUE_V2=1` env variable. If the variable is not set, legacy path will be used.
5+
L0 v2 adapter can be enabled by setting passing `UR_BUILD_ADAPTER_L0_V2=1` option to cmake. When enabled, `libur_adapter_level_zero_v2.[so|dll]` will be created.
66

77
# Code structure
88

9-
v2 adapter only rewrites certain functions (mostly urEnqueue* functions) while reusing the rest. `ur_queue_handle_t` has become an abstract class and each enqueue function a virtual function.
9+
v2 adapters is is a standalone adapter but reuses some logic from the legacy L0 adapter implementation - most notably: adapter.cpp, platform.cpp, device.cpp
1010

11-
Legacy enqeue path is implemented in `ur_queue_handle_legacy_t` which inherits from `ur_queue_handle_t`. For new, optimized path, each queue mode will be implemented as a separate queue class (e.g. `v2::ur_queue_immediate_in_order_t`) inheriting from `ur_queue_handle_t`.
11+
Each queue mode will be implemented as a separate queue class (e.g. `v2::ur_queue_immediate_in_order_t`) inheriting from `ur_queue_handle_t` which is an abstract class
12+
in v2 adapter.
1213

1314
`ur_queue_handle_t` is auto-generated by `make generate-code` - for every API function that accepts `ur_queue_handle_t` as a first parameter, new pure virtual method is created. The API function is then
14-
auto-implemented (see ../queue_api.cpp) by dispatching to that virtual method. Developer is only responsbile for implementing that virtual function for every queue base class.
15+
auto-implemented (see ./queue_api.cpp) by dispatching to that virtual method. Developer is only responsbile for implementing that virtual function for every queue base class.

0 commit comments

Comments
 (0)