From c099481ae1ac39ac979ed2e140759e315aaeca9e Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Mon, 7 Jul 2025 19:13:02 +0000 Subject: [PATCH 1/2] [SYCL][UR] Unify logging and leak checking for L0 v1 and v2 --- .../ur_win_proxy_loader.cpp | 24 +--- .../source/adapters/level_zero/adapter.cpp | 107 ------------------ .../source/adapters/level_zero/common.cpp | 23 ---- .../source/adapters/level_zero/common.hpp | 3 - 4 files changed, 1 insertion(+), 156 deletions(-) diff --git a/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp b/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp index 38868e43465c3..aeff4cafa5a04 100644 --- a/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp +++ b/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp @@ -107,27 +107,6 @@ void *&getDllHandle() { return dllHandle; } -static bool shouldLoadL0V2adapter() { - auto SyclEnv = std::getenv("SYCL_UR_USE_LEVEL_ZERO_V2"); - auto UREnv = std::getenv("UR_LOADER_USE_LEVEL_ZERO_V2"); - - try { - if (SyclEnv && std::stoi(SyclEnv) == 1) { - return true; - } - } catch (...) { - } - - try { - if (UREnv && std::stoi(UREnv) == 1) { - return true; - } - } catch (...) { - } - - return false; -} - /// Load the adapter libraries void preloadLibraries() { // Suppress system errors. @@ -163,8 +142,7 @@ void preloadLibraries() { getDllHandle() = loadAdapter(UR_LIBRARY_NAME(loader)); loadAdapter(UR_LIBRARY_NAME(adapter_opencl)); loadAdapter(UR_LIBRARY_NAME(adapter_level_zero)); - if (shouldLoadL0V2adapter()) - loadAdapter(UR_LIBRARY_NAME(adapter_level_zero_v2)); + loadAdapter(UR_LIBRARY_NAME(adapter_level_zero_v2)); loadAdapter(UR_LIBRARY_NAME(adapter_cuda)); loadAdapter(UR_LIBRARY_NAME(adapter_hip)); loadAdapter(UR_LIBRARY_NAME(adapter_native_cpu)); diff --git a/unified-runtime/source/adapters/level_zero/adapter.cpp b/unified-runtime/source/adapters/level_zero/adapter.cpp index 6b23d0161a4f5..429dc860ab312 100644 --- a/unified-runtime/source/adapters/level_zero/adapter.cpp +++ b/unified-runtime/source/adapters/level_zero/adapter.cpp @@ -309,12 +309,10 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() if (UrL0Debug & UR_L0_DEBUG_BASIC) { logger.setLegacySink(std::make_unique()); -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 setEnvVar("ZEL_ENABLE_LOADER_LOGGING", "1"); setEnvVar("ZEL_LOADER_LOGGING_LEVEL", "trace"); setEnvVar("ZEL_LOADER_LOG_CONSOLE", "1"); setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1"); -#endif }; if (UrL0Debug & UR_L0_DEBUG_VALIDATION) { @@ -323,18 +321,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() } PlatformCache.Compute = [](Result &result) { - static std::once_flag ZeCallCountInitialized; - try { - std::call_once(ZeCallCountInitialized, []() { - if (UrL0LeaksDebug) { - ZeCallCount = new std::map; - } - }); - } catch (...) { - result = exceptionToResult(std::current_exception()); - return; - } - uint32_t UserForcedSysManInit = 0; // Check if the user has disabled the default L0 Env initialization. const int UrSysManEnvInitEnabled = [&UserForcedSysManInit] { @@ -426,7 +412,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() useInitDrivers = true; } -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 if ((loader_version.major == 1 && loader_version.minor < 21) || (loader_version.major == 1 && loader_version.minor == 21 && loader_version.patch < 2)) { @@ -435,7 +420,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() "WARNING: Level Zero Loader version is older than 1.21.2. " "Please update to the latest version for API logging support.\n"); } -#endif } if (useInitDrivers) { @@ -552,97 +536,6 @@ void globalAdapterOnDemandCleanup() { } ur_result_t adapterStateTeardown() { - // Print the balance of various create/destroy native calls. - // The idea is to verify if the number of create(+) and destroy(-) calls are - // matched. - if (ZeCallCount && (UrL0LeaksDebug) != 0) { - bool LeakFound = false; - // clang-format off - // - // The format of this table is such that each row accounts for a - // specific type of objects, and all elements in the raw except the last - // one are allocating objects of that type, while the last element is known - // to deallocate objects of that type. - // - std::vector> CreateDestroySet = { - {"zeContextCreate", "zeContextDestroy"}, - {"zeCommandQueueCreate", "zeCommandQueueDestroy"}, - {"zeModuleCreate", "zeModuleDestroy"}, - {"zeKernelCreate", "zeKernelDestroy"}, - {"zeEventPoolCreate", "zeEventPoolDestroy"}, - {"zeCommandListCreateImmediate", "zeCommandListCreate", "zeCommandListDestroy"}, - {"zeEventCreate", "zeEventDestroy"}, - {"zeFenceCreate", "zeFenceDestroy"}, - {"zeImageCreate","zeImageViewCreateExt", "zeImageDestroy"}, - {"zeSamplerCreate", "zeSamplerDestroy"}, - {"zeMemAllocDevice", "zeMemAllocHost", "zeMemAllocShared", "zeMemFree"}, - }; - - // A sample output aimed below is this: - // ------------------------------------------------------------------------ - // zeContextCreate = 1 \---> zeContextDestroy = 1 - // zeCommandQueueCreate = 1 \---> zeCommandQueueDestroy = 1 - // zeModuleCreate = 1 \---> zeModuleDestroy = 1 - // zeKernelCreate = 1 \---> zeKernelDestroy = 1 - // zeEventPoolCreate = 1 \---> zeEventPoolDestroy = 1 - // zeCommandListCreateImmediate = 1 | - // zeCommandListCreate = 1 \---> zeCommandListDestroy = 1 ---> LEAK = 1 - // zeEventCreate = 2 \---> zeEventDestroy = 2 - // zeFenceCreate = 1 \---> zeFenceDestroy = 1 - // zeImageCreate = 0 \---> zeImageDestroy = 0 - // zeSamplerCreate = 0 \---> zeSamplerDestroy = 0 - // zeMemAllocDevice = 0 | - // zeMemAllocHost = 1 | - // zeMemAllocShared = 0 \---> zeMemFree = 1 - // - // clang-format on - // TODO: use logger to print this messages - std::cerr << "Check balance of create/destroy calls\n"; - std::cerr << "----------------------------------------------------------\n"; - std::stringstream ss; - for (const auto &Row : CreateDestroySet) { - int diff = 0; - for (auto I = Row.begin(); I != Row.end();) { - const char *ZeName = (*I).c_str(); - const auto &ZeCount = (*ZeCallCount)[*I]; - - bool First = (I == Row.begin()); - bool Last = (++I == Row.end()); - - if (Last) { - ss << " \\--->"; - diff -= ZeCount; - } else { - diff += ZeCount; - if (!First) { - ss << " | "; - std::cerr << ss.str() << "\n"; - ss.str(""); - ss.clear(); - } - } - ss << std::setw(30) << std::right << ZeName; - ss << " = "; - ss << std::setw(5) << std::left << ZeCount; - } - - if (diff) { - LeakFound = true; - ss << " ---> LEAK = " << diff; - } - - std::cerr << ss.str() << '\n'; - ss.str(""); - ss.clear(); - } - - ZeCallCount->clear(); - delete ZeCallCount; - ZeCallCount = nullptr; - if (LeakFound) - return UR_RESULT_ERROR_INVALID_MEM_OBJECT; - } - // Due to multiple DLLMain definitions with SYCL, register to cleanup the // Global Adapter after refcnt is 0 #if defined(_WIN32) diff --git a/unified-runtime/source/adapters/level_zero/common.cpp b/unified-runtime/source/adapters/level_zero/common.cpp index c41264fe3e861..8ed6d7e579590 100644 --- a/unified-runtime/source/adapters/level_zero/common.cpp +++ b/unified-runtime/source/adapters/level_zero/common.cpp @@ -86,8 +86,6 @@ bool setEnvVar(const char *name, const char *value) { ZeUSMImportExtension ZeUSMImport; -std::map *ZeCallCount = nullptr; - void zeParseError(ze_result_t ZeError, const char *&ErrorString) { switch (ZeError) { #define ZE_ERRCASE(ERR) \ @@ -137,31 +135,10 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString) { } // switch } -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *, const char *, bool) { return ZeResult; } -#else -ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName, - const char *ZeArgs, bool TraceError) { - UR_LOG(DEBUG, "ZE ---> {}{}", ZeName, ZeArgs); - - if (ZeResult == ZE_RESULT_SUCCESS) { - if (UrL0LeaksDebug) { - ++(*ZeCallCount)[ZeName]; - } - return ZE_RESULT_SUCCESS; - } - - if (TraceError) { - const char *ErrorString = "Unknown"; - zeParseError(ZeResult, ErrorString); - UR_LOG(ERR, "Error ({}) in {}", ErrorString, ZeName); - } - return ZeResult; -} -#endif // Specializations for various L0 structures template <> ze_structure_type_t getZeStructureType() { diff --git a/unified-runtime/source/adapters/level_zero/common.hpp b/unified-runtime/source/adapters/level_zero/common.hpp index 19e22de14605d..6b7e7ea4f60ab 100644 --- a/unified-runtime/source/adapters/level_zero/common.hpp +++ b/unified-runtime/source/adapters/level_zero/common.hpp @@ -371,9 +371,6 @@ class ZeUSMImportExtension { // Helper wrapper for working with USM import extension in Level Zero. extern ZeUSMImportExtension ZeUSMImport; -// This will count the calls to Level-Zero -extern std::map *ZeCallCount; - // Some opencl extensions we know are supported by all Level Zero devices. constexpr char ZE_SUPPORTED_EXTENSIONS[] = "cl_khr_il_program cl_khr_subgroups cl_intel_subgroups " From b1877aeda637f837caa3a79aa9f0ba559ea40026 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Mon, 7 Jul 2025 19:55:36 +0000 Subject: [PATCH 2/2] [SYCL][UR] Make v2 adapter default for BMG and newer Whenever we detect ANY device newer than BMG on the platform we will use V2, otherwise we will use V1. The default behavior can still be overwritten by setting SYCL_UR_USE_LEVEL_ZERO_V2=1 to use V2 adapter or SYCL_UR_USE_LEVEL_ZERO_V2=0 to use V1 adapter. --- .../source/adapters/level_zero/adapter.cpp | 61 +++++++++++++++++++ .../source/loader/ur_adapter_registry.hpp | 16 ----- 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/adapter.cpp b/unified-runtime/source/adapters/level_zero/adapter.cpp index 429dc860ab312..af808e4edd5b8 100644 --- a/unified-runtime/source/adapters/level_zero/adapter.cpp +++ b/unified-runtime/source/adapters/level_zero/adapter.cpp @@ -258,6 +258,51 @@ ur_result_t adapterStateInit() { return UR_RESULT_SUCCESS; } +static bool isBMGorNewer() { + auto urResult = checkDeviceIntelGPUIpVersionOrNewer(0x05004000); + if (urResult != UR_RESULT_SUCCESS && + urResult != UR_RESULT_ERROR_UNSUPPORTED_VERSION) { + UR_LOG(ERR, "Intel GPU IP Version check failed: {}\n", urResult); + throw urResult; + } + + return urResult == UR_RESULT_SUCCESS; +} + +// returns a pair indicating whether to use the V1 adapter and a string +// indicating the reason for the decision. +static std::pair shouldUseV1Adapter() { + auto specificAdapterVersionRequested = + ur_getenv("UR_LOADER_USE_LEVEL_ZERO_V2").has_value() || + ur_getenv("SYCL_UR_USE_LEVEL_ZERO_V2").has_value(); + + auto v2Requested = getenv_tobool("UR_LOADER_USE_LEVEL_ZERO_V2", false); + v2Requested |= getenv_tobool("SYCL_UR_USE_LEVEL_ZERO_V2", false); + + std::string reason = + specificAdapterVersionRequested + ? "Specific adapter version requested by UR_LOADER_USE_LEVEL_ZERO_V2 " + "or SYCL_UR_USE_LEVEL_ZERO_V2" + : "Using default adapter version based on device IP version"; + + if (v2Requested) { + return {false, reason}; + } + + if (!v2Requested && specificAdapterVersionRequested) { + // v1 specifically requested + return {true, reason}; + } + + // default: only enable for devices older than BMG + return {!isBMGorNewer(), reason}; +} + +static std::pair shouldUseV2Adapter() { + auto [useV1, reason] = shouldUseV1Adapter(); + return {!useV1, reason}; +} + /* This constructor initializes the `ur_adapter_handle_t_` object and sets up the environment for Level Zero (L0) initialization. @@ -471,6 +516,22 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() return; } +#ifdef UR_ADAPTER_LEVEL_ZERO_V2 + auto [useV2, reason] = shouldUseV2Adapter(); + if (!useV2) { + UR_LOG(INFO, "Skipping L0 V2 adapter: {}", reason); + result = std::move(platforms); + return; + } +#else + auto [useV1, reason] = shouldUseV1Adapter(); + if (!useV1) { + UR_LOG(INFO, "Skipping L0 V1 adapter: {}", reason); + result = std::move(platforms); + return; + } +#endif + // Check if the user has enabled the default L0 SysMan initialization. const int UrSysmanZesinitEnable = [&UserForcedSysManInit] { const char *UrRet = std::getenv("UR_L0_ENABLE_ZESINIT_DEFAULT"); diff --git a/unified-runtime/source/loader/ur_adapter_registry.hpp b/unified-runtime/source/loader/ur_adapter_registry.hpp index 05c3c3e9ceeca..cabed246a6da2 100644 --- a/unified-runtime/source/loader/ur_adapter_registry.hpp +++ b/unified-runtime/source/loader/ur_adapter_registry.hpp @@ -355,22 +355,6 @@ class AdapterRegistry { } for (const auto &adapterName : adapterNames) { - // Skip legacy L0 adapter if the v2 adapter is requested, and vice versa. - if (std::string(adapterName).find("level_zero") != std::string::npos) { - auto v2Requested = getenv_tobool("UR_LOADER_USE_LEVEL_ZERO_V2", false); - v2Requested |= getenv_tobool("SYCL_UR_USE_LEVEL_ZERO_V2", false); - auto v2Adapter = - std::string(adapterName).find("v2") != std::string::npos; - - if (v2Requested != v2Adapter) { - UR_LOG(INFO, "The adapter '{}' is skipped because {} {}.", - adapterName, - "UR_LOADER_USE_LEVEL_ZERO_V2 or SYCL_UR_USE_LEVEL_ZERO_V2", - v2Requested ? "is set" : "is not set"); - continue; - } - } - std::vector loadPaths; // Adapter search order: