From 9da95681cb411bcb10f21566a8a341e10f0e8a4e Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 25 Mar 2024 10:03:01 +0000 Subject: [PATCH 01/21] Merge pull request #1461 from Bensuo/coverity_L0_update [Cmd-Buf][L0] Fix Coverity unsigned comparison report --- source/adapters/level_zero/command_buffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 36cf76d111..d38bac92f6 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1030,7 +1030,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( UR_ASSERT(Command, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(Command->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(CommandDesc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(CommandDesc->newWorkDim >= 0 && CommandDesc->newWorkDim <= 3, + UR_ASSERT(CommandDesc->newWorkDim <= 3, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); // Lock command, kernel and command buffer for update. From f67c6e43969ee90abd3ff41af13c9d90a2fba679 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 25 Mar 2024 14:45:32 +0000 Subject: [PATCH 02/21] Merge pull request #1468 from pbalcer/l0-recursive-event-deadlock [L0] fix a deadlock on a recursive event rwlock --- source/adapters/level_zero/event.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index c9d1c7d6b4..7f611208ff 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -1307,6 +1307,15 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( } } + ur_command_list_ptr_t CommandList; + if (Queue && Queue->Device != CurQueue->Device) { + // Get a command list prior to acquiring an event lock. + // This prevents a potential deadlock with recursive + // event locks. + UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList, + false, true)); + } + std::shared_lock Lock(EventList[I]->Mutex); if (Queue && Queue->Device != CurQueue->Device && @@ -1316,10 +1325,6 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( bool IsInternal = true; bool IsMultiDevice = true; - ur_command_list_ptr_t CommandList{}; - UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList, - false, true)); - UR_CALL(createEventAndAssociateQueue( Queue, &MultiDeviceEvent, EventList[I]->CommandType, CommandList, IsInternal, IsMultiDevice)); From a75f7d099b6a2c5df3e03ec4d9dd503c320d5aa5 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Wed, 27 Mar 2024 11:28:34 +0000 Subject: [PATCH 03/21] Merge pull request #1363 from hdelan/refactor-device-initialization [CUDA] Refactor device initialization --- source/adapters/cuda/device.hpp | 32 +++++++++++-------------------- source/adapters/cuda/enqueue.cpp | 16 ++++++---------- source/adapters/cuda/kernel.cpp | 15 ++++----------- source/adapters/cuda/platform.cpp | 16 ---------------- 4 files changed, 21 insertions(+), 58 deletions(-) diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp index 08a1b5852a..4edb4d9c7a 100644 --- a/source/adapters/cuda/device.hpp +++ b/source/adapters/cuda/device.hpp @@ -27,8 +27,6 @@ struct ur_device_handle_t_ { size_t MaxWorkItemSizes[MaxWorkItemDimensions]; size_t MaxWorkGroupSize{0}; size_t MaxAllocSize{0}; - int MaxBlockDimY{0}; - int MaxBlockDimZ{0}; int MaxRegsPerBlock{0}; int MaxCapacityLocalMem{0}; int MaxChosenLocalMem{0}; @@ -40,10 +38,6 @@ struct ur_device_handle_t_ { : CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), RefCount{1}, Platform(platform) { - UR_CHECK_ERROR(cuDeviceGetAttribute( - &MaxBlockDimY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, cuDevice)); - UR_CHECK_ERROR(cuDeviceGetAttribute( - &MaxBlockDimZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, cuDevice)); UR_CHECK_ERROR(cuDeviceGetAttribute( &MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, cuDevice)); @@ -51,6 +45,14 @@ struct ur_device_handle_t_ { &MaxCapacityLocalMem, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN, cuDevice)); + UR_CHECK_ERROR(urDeviceGetInfo(this, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, + sizeof(MaxWorkItemSizes), MaxWorkItemSizes, + nullptr)); + + UR_CHECK_ERROR(urDeviceGetInfo(this, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, + sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, + nullptr)); + // Set local mem max size if env var is present static const char *LocalMemSizePtrUR = std::getenv("UR_CUDA_MAX_LOCAL_MEM_SIZE"); @@ -91,24 +93,12 @@ struct ur_device_handle_t_ { uint64_t getElapsedTime(CUevent) const; - void saveMaxWorkItemSizes(size_t Size, - size_t *SaveMaxWorkItemSizes) noexcept { - memcpy(MaxWorkItemSizes, SaveMaxWorkItemSizes, Size); - }; - - void saveMaxWorkGroupSize(int Value) noexcept { MaxWorkGroupSize = Value; }; - - void getMaxWorkItemSizes(size_t RetSize, - size_t *RetMaxWorkItemSizes) const noexcept { - memcpy(RetMaxWorkItemSizes, MaxWorkItemSizes, RetSize); - }; + size_t getMaxWorkItemSizes(int index) const noexcept { + return MaxWorkItemSizes[index]; + } size_t getMaxWorkGroupSize() const noexcept { return MaxWorkGroupSize; }; - size_t getMaxBlockDimY() const noexcept { return MaxBlockDimY; }; - - size_t getMaxBlockDimZ() const noexcept { return MaxBlockDimZ; }; - size_t getMaxRegsPerBlock() const noexcept { return MaxRegsPerBlock; }; size_t getMaxAllocSize() const noexcept { return MaxAllocSize; }; diff --git a/source/adapters/cuda/enqueue.cpp b/source/adapters/cuda/enqueue.cpp index d44cb15709..7c1de98837 100644 --- a/source/adapters/cuda/enqueue.cpp +++ b/source/adapters/cuda/enqueue.cpp @@ -140,7 +140,6 @@ ur_result_t setCuMemAdvise(CUdeviceptr DevPtr, size_t Size, // dimension. void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock, const size_t *GlobalWorkSize, const uint32_t WorkDim, - const size_t MaxThreadsPerBlock[3], ur_kernel_handle_t Kernel) { assert(ThreadsPerBlock != nullptr); assert(GlobalWorkSize != nullptr); @@ -154,14 +153,14 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock, } size_t MaxBlockDim[3]; - MaxBlockDim[0] = MaxThreadsPerBlock[0]; - MaxBlockDim[1] = Device->getMaxBlockDimY(); - MaxBlockDim[2] = Device->getMaxBlockDimZ(); + MaxBlockDim[0] = Device->getMaxWorkItemSizes(0); + MaxBlockDim[1] = Device->getMaxWorkItemSizes(1); + MaxBlockDim[2] = Device->getMaxWorkItemSizes(2); int MinGrid, MaxBlockSize; UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize( &MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(), - MaxThreadsPerBlock[0])); + MaxBlockDim[0])); roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized, MaxBlockDim, MaxBlockSize); @@ -197,7 +196,6 @@ setKernelParams(const ur_context_handle_t Context, size_t (&BlocksPerGrid)[3]) { ur_result_t Result = UR_RESULT_SUCCESS; size_t MaxWorkGroupSize = 0u; - size_t MaxThreadsPerBlock[3] = {}; bool ProvidedLocalWorkGroupSize = LocalWorkSize != nullptr; uint32_t LocalSize = Kernel->getLocalSize(); @@ -207,8 +205,6 @@ setKernelParams(const ur_context_handle_t Context, { size_t *ReqdThreadsPerBlock = Kernel->ReqdThreadsPerBlock; MaxWorkGroupSize = Device->getMaxWorkGroupSize(); - Device->getMaxWorkItemSizes(sizeof(MaxThreadsPerBlock), - MaxThreadsPerBlock); if (ProvidedLocalWorkGroupSize) { auto IsValid = [&](int Dim) { @@ -216,7 +212,7 @@ setKernelParams(const ur_context_handle_t Context, LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim]) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; - if (LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim]) + if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim)) return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE; // Checks that local work sizes are a divisor of the global work sizes // which includes that the local work sizes are neither larger than @@ -245,7 +241,7 @@ setKernelParams(const ur_context_handle_t Context, } } else { guessLocalWorkSize(Device, ThreadsPerBlock, GlobalWorkSize, WorkDim, - MaxThreadsPerBlock, Kernel); + Kernel); } } diff --git a/source/adapters/cuda/kernel.cpp b/source/adapters/cuda/kernel.cpp index 7eb9f7ed8c..c9334add15 100644 --- a/source/adapters/cuda/kernel.cpp +++ b/source/adapters/cuda/kernel.cpp @@ -68,14 +68,6 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, case UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE: { size_t GlobalWorkSize[3] = {0, 0, 0}; - int MaxBlockDimX{0}, MaxBlockDimY{0}, MaxBlockDimZ{0}; - UR_CHECK_ERROR(cuDeviceGetAttribute( - &MaxBlockDimX, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, hDevice->get())); - UR_CHECK_ERROR(cuDeviceGetAttribute( - &MaxBlockDimY, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, hDevice->get())); - UR_CHECK_ERROR(cuDeviceGetAttribute( - &MaxBlockDimZ, CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, hDevice->get())); - int MaxGridDimX{0}, MaxGridDimY{0}, MaxGridDimZ{0}; UR_CHECK_ERROR(cuDeviceGetAttribute( &MaxGridDimX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, hDevice->get())); @@ -84,9 +76,10 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, UR_CHECK_ERROR(cuDeviceGetAttribute( &MaxGridDimZ, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, hDevice->get())); - GlobalWorkSize[0] = MaxBlockDimX * MaxGridDimX; - GlobalWorkSize[1] = MaxBlockDimY * MaxGridDimY; - GlobalWorkSize[2] = MaxBlockDimZ * MaxGridDimZ; + GlobalWorkSize[0] = hDevice->getMaxWorkItemSizes(0) * MaxGridDimX; + GlobalWorkSize[1] = hDevice->getMaxWorkItemSizes(1) * MaxGridDimY; + GlobalWorkSize[2] = hDevice->getMaxWorkItemSizes(2) * MaxGridDimZ; + return ReturnValue(GlobalWorkSize, 3); } case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { diff --git a/source/adapters/cuda/platform.cpp b/source/adapters/cuda/platform.cpp index f37af1149b..4c730f997a 100644 --- a/source/adapters/cuda/platform.cpp +++ b/source/adapters/cuda/platform.cpp @@ -95,22 +95,6 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, Platforms[i].Devices.emplace_back(new ur_device_handle_t_{ Device, Context, EvBase, &Platforms[i]}); - { - const auto &Dev = Platforms[i].Devices.back().get(); - size_t MaxWorkGroupSize = 0u; - size_t MaxThreadsPerBlock[3] = {}; - UR_CHECK_ERROR(urDeviceGetInfo( - Dev, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, - sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr)); - - UR_CHECK_ERROR(urDeviceGetInfo( - Dev, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - sizeof(MaxWorkGroupSize), &MaxWorkGroupSize, nullptr)); - - Dev->saveMaxWorkItemSizes(sizeof(MaxThreadsPerBlock), - MaxThreadsPerBlock); - Dev->saveMaxWorkGroupSize(MaxWorkGroupSize); - } } } catch (const std::bad_alloc &) { // Signal out-of-memory situation From 20102fdfa0b1a1042eea2b0d7ee9a2bcc8fda84d Mon Sep 17 00:00:00 2001 From: aarongreig Date: Thu, 28 Mar 2024 10:25:01 +0000 Subject: [PATCH 04/21] Merge pull request #1467 from aarongreig/aaron/eventExecStatusError Add new ERROR status for events, and corresponding error code. --- include/ur_api.h | 52 +++++++++++++++++++++++++++++++ include/ur_print.hpp | 6 ++++ scripts/core/common.yml | 2 ++ scripts/core/enqueue.yml | 52 +++++++++++++++++++++++++++++++ scripts/core/event.yml | 4 +++ source/adapters/opencl/common.cpp | 2 ++ source/adapters/opencl/event.cpp | 4 +++ source/loader/ur_libapi.cpp | 50 +++++++++++++++++++++++++++++ source/ur_api.cpp | 50 +++++++++++++++++++++++++++++ 9 files changed, 222 insertions(+) diff --git a/include/ur_api.h b/include/ur_api.h index 19ba599c7d..ce47d528aa 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -492,6 +492,7 @@ typedef enum ur_result_t { UR_RESULT_ERROR_ADAPTER_SPECIFIC = 67, ///< An adapter specific warning/error has been reported and can be ///< retrieved via the urPlatformGetLastError entry point. UR_RESULT_ERROR_LAYER_NOT_PRESENT = 68, ///< A requested layer was not found by the loader. + UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS = 69, ///< An event in the provided wait list has ::UR_EVENT_STATUS_ERROR. UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid @@ -5603,6 +5604,7 @@ typedef enum ur_event_status_t { UR_EVENT_STATUS_RUNNING = 1, ///< Command is running UR_EVENT_STATUS_SUBMITTED = 2, ///< Command is submitted UR_EVENT_STATUS_QUEUED = 3, ///< Command is queued + UR_EVENT_STATUS_ERROR = 4, ///< Command was abnormally terminated /// @cond UR_EVENT_STATUS_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -5734,6 +5736,8 @@ urEventGetProfilingInfo( /// + `NULL == phEventWaitList` /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `numEvents == 0` +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_EVENT /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY @@ -5949,6 +5953,8 @@ urEventSetCallback( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE @@ -6005,6 +6011,8 @@ urEnqueueKernelLaunch( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6049,6 +6057,8 @@ urEnqueueEventsWait( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6092,6 +6102,8 @@ urEnqueueEventsWaitWithBarrier( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -6141,6 +6153,8 @@ urEnqueueMemBufferRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -6193,6 +6207,8 @@ urEnqueueMemBufferWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.width == 0` @@ -6259,6 +6275,8 @@ urEnqueueMemBufferReadRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.width == 0` @@ -6318,6 +6336,8 @@ urEnqueueMemBufferWriteRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `srcOffset + size` results in an out-of-bounds access. @@ -6364,6 +6384,8 @@ urEnqueueMemBufferCopy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -6422,6 +6444,8 @@ urEnqueueMemBufferCopyRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `patternSize == 0 || size == 0` @@ -6477,6 +6501,8 @@ urEnqueueMemBufferFill( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -6530,6 +6556,8 @@ urEnqueueMemImageRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -6577,6 +6605,8 @@ urEnqueueMemImageWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -6663,6 +6693,8 @@ typedef enum ur_usm_migration_flag_t { /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -6711,6 +6743,8 @@ urEnqueueMemBufferMap( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6753,6 +6787,8 @@ urEnqueueMemUnmap( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6795,6 +6831,8 @@ urEnqueueUSMFill( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6842,6 +6880,8 @@ urEnqueueUSMMemcpy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6924,6 +6964,8 @@ urEnqueueUSMAdvise( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6973,6 +7015,8 @@ urEnqueueUSMFill2D( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -7016,6 +7060,8 @@ urEnqueueUSMMemcpy2D( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. @@ -7053,6 +7099,8 @@ urEnqueueDeviceGlobalVariableWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. @@ -7089,6 +7137,8 @@ urEnqueueDeviceGlobalVariableRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( ur_queue_handle_t hQueue, ///< [in] a valid host command-queue in which the read command @@ -7127,6 +7177,8 @@ urEnqueueReadHostPipe( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( ur_queue_handle_t hQueue, ///< [in] a valid host command-queue in which the write command diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 649f9f63cb..b2ba6ddcab 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -1539,6 +1539,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_result_t value) { case UR_RESULT_ERROR_LAYER_NOT_PRESENT: os << "UR_RESULT_ERROR_LAYER_NOT_PRESENT"; break; + case UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + os << "UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS"; + break; case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP"; break; @@ -8673,6 +8676,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_event_status_t value) case UR_EVENT_STATUS_QUEUED: os << "UR_EVENT_STATUS_QUEUED"; break; + case UR_EVENT_STATUS_ERROR: + os << "UR_EVENT_STATUS_ERROR"; + break; default: os << "unknown enumerator"; break; diff --git a/scripts/core/common.yml b/scripts/core/common.yml index 1825461b78..be95b9aa45 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -274,6 +274,8 @@ etors: via the urPlatformGetLastError entry point." - name: ERROR_LAYER_NOT_PRESENT desc: "A requested layer was not found by the loader." + - name: ERROR_IN_EVENT_LIST_EXEC_STATUS + desc: "An event in the provided wait list has $X_EVENT_STATUS_ERROR." - name: ERROR_UNKNOWN value: "0x7ffffffe" desc: "Unknown or internal error" diff --git a/scripts/core/enqueue.yml b/scripts/core/enqueue.yml index 7af03074c9..e91330ca77 100644 --- a/scripts/core/enqueue.yml +++ b/scripts/core/enqueue.yml @@ -60,6 +60,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_WORK_DIMENSION - $X_RESULT_ERROR_INVALID_WORK_GROUP_SIZE - $X_RESULT_ERROR_INVALID_VALUE @@ -100,6 +102,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_VALUE - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -139,6 +143,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_VALUE - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -190,6 +196,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "If `offset + size` results in an out-of-bounds access." @@ -243,6 +251,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "If `offset + size` results in an out-of-bounds access." @@ -312,6 +322,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`region.width == 0 || region.height == 0 || region.width == 0`" @@ -388,6 +400,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`region.width == 0 || region.height == 0 || region.width == 0`" @@ -446,6 +460,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "If `srcOffset + size` results in an out-of-bounds access." @@ -510,6 +526,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`region.width == 0 || region.height == 0 || region.depth == 0`" @@ -569,6 +587,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`patternSize == 0 || size == 0`" @@ -633,6 +653,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`region.width == 0 || region.height == 0 || region.depth == 0`" @@ -692,6 +714,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`region.width == 0 || region.height == 0 || region.depth == 0`" @@ -743,6 +767,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "`region.width == 0 || region.height == 0 || region.depth == 0`" @@ -797,6 +823,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -877,6 +905,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_INVALID_SIZE: - "If `offset + size` results in an out-of-bounds access." @@ -942,6 +972,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -982,6 +1014,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -1032,6 +1066,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -1079,6 +1115,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -1126,6 +1164,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -1219,6 +1259,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -1279,6 +1321,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_MEM_OBJECT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES @@ -1328,6 +1372,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." --- #-------------------------------------------------------------------------- type: function desc: "Enqueue a command to read data from a device global variable to the host." @@ -1373,6 +1419,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." --- #-------------------------------------------------------------------------- type: function desc: "Enqueue a command to read from a pipe to the host." @@ -1419,6 +1467,8 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." --- #-------------------------------------------------------------------------- type: function desc: "Enqueue a command to write data from the host to a pipe." @@ -1465,3 +1515,5 @@ returns: - "`phEventWaitList == NULL && numEventsInWaitList > 0`" - "`phEventWaitList != NULL && numEventsInWaitList == 0`" - "If event objects in phEventWaitList are not valid events." + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." diff --git a/scripts/core/event.yml b/scripts/core/event.yml index ba0ae968c8..4e8be75cf4 100644 --- a/scripts/core/event.yml +++ b/scripts/core/event.yml @@ -87,6 +87,8 @@ etors: desc: Command is submitted - name: QUEUED desc: Command is queued + - name: ERROR + desc: Command was abnormally terminated --- #-------------------------------------------------------------------------- type: enum desc: "Event query information type" @@ -211,6 +213,8 @@ params: returns: - $X_RESULT_ERROR_INVALID_VALUE: - "`numEvents == 0`" + - $X_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: + - "An event in `phEventWaitList` has $X_EVENT_STATUS_ERROR." - $X_RESULT_ERROR_INVALID_EVENT - $X_RESULT_ERROR_INVALID_CONTEXT - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 4fe8bed408..acce51939b 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -81,6 +81,8 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP; case CL_INVALID_SYNC_POINT_WAIT_LIST_KHR: return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; + case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: + return UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index d180cfb097..6eca3b052b 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -181,6 +181,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, const auto param_value_int = static_cast(pPropValue); if (*param_value_int == UR_EVENT_STATUS_QUEUED) { *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } else if (*param_value_int < 0) { + // This can contain a negative return code to signify that the command + // terminated in an unexpected way. + *param_value_int = UR_EVENT_STATUS_ERROR; } } } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 2165065097..d5092ffe1c 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -4521,6 +4521,8 @@ ur_result_t UR_APICALL urEventGetProfilingInfo( /// + `NULL == phEventWaitList` /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `numEvents == 0` +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_EVENT /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY @@ -4752,6 +4754,8 @@ ur_result_t UR_APICALL urEventSetCallback( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE @@ -4824,6 +4828,8 @@ ur_result_t UR_APICALL urEnqueueKernelLaunch( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4878,6 +4884,8 @@ ur_result_t UR_APICALL urEnqueueEventsWait( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4933,6 +4941,8 @@ ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -4995,6 +5005,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -5062,6 +5074,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.width == 0` @@ -5148,6 +5162,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.width == 0` @@ -5229,6 +5245,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `srcOffset + size` results in an out-of-bounds access. @@ -5290,6 +5308,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -5369,6 +5389,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `patternSize == 0 || size == 0` @@ -5438,6 +5460,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -5506,6 +5530,8 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -5570,6 +5596,8 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -5645,6 +5673,8 @@ ur_result_t UR_APICALL urEnqueueMemImageCopy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -5706,6 +5736,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferMap( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5760,6 +5792,8 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5816,6 +5850,8 @@ ur_result_t UR_APICALL urEnqueueUSMFill( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5876,6 +5912,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5980,6 +6018,8 @@ ur_result_t UR_APICALL urEnqueueUSMAdvise( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6045,6 +6085,8 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -6104,6 +6146,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. ur_program_handle_t @@ -6157,6 +6201,8 @@ ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. ur_program_handle_t @@ -6209,6 +6255,8 @@ ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueReadHostPipe( ur_queue_handle_t hQueue, ///< [in] a valid host command-queue in which the read command @@ -6263,6 +6311,8 @@ ur_result_t UR_APICALL urEnqueueReadHostPipe( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueWriteHostPipe( ur_queue_handle_t hQueue, ///< [in] a valid host command-queue in which the write command diff --git a/source/ur_api.cpp b/source/ur_api.cpp index cbc51a437e..b65a2627fe 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -3832,6 +3832,8 @@ ur_result_t UR_APICALL urEventGetProfilingInfo( /// + `NULL == phEventWaitList` /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `numEvents == 0` +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_EVENT /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY @@ -4024,6 +4026,8 @@ ur_result_t UR_APICALL urEventSetCallback( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_WORK_DIMENSION /// - ::UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE /// - ::UR_RESULT_ERROR_INVALID_VALUE @@ -4088,6 +4092,8 @@ ur_result_t UR_APICALL urEnqueueKernelLaunch( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4136,6 +4142,8 @@ ur_result_t UR_APICALL urEnqueueEventsWait( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4183,6 +4191,8 @@ ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -4237,6 +4247,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -4296,6 +4308,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.width == 0` @@ -4372,6 +4386,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferReadRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.width == 0` @@ -4443,6 +4459,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `srcOffset + size` results in an out-of-bounds access. @@ -4495,6 +4513,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -4564,6 +4584,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `patternSize == 0 || size == 0` @@ -4624,6 +4646,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferFill( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -4684,6 +4708,8 @@ ur_result_t UR_APICALL urEnqueueMemImageRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -4739,6 +4765,8 @@ ur_result_t UR_APICALL urEnqueueMemImageWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + `region.width == 0 || region.height == 0 || region.depth == 0` @@ -4806,6 +4834,8 @@ ur_result_t UR_APICALL urEnqueueMemImageCopy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_INVALID_SIZE /// + If `offset + size` results in an out-of-bounds access. @@ -4859,6 +4889,8 @@ ur_result_t UR_APICALL urEnqueueMemBufferMap( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4906,6 +4938,8 @@ ur_result_t UR_APICALL urEnqueueMemUnmap( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -4955,6 +4989,8 @@ ur_result_t UR_APICALL urEnqueueUSMFill( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5008,6 +5044,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5099,6 +5137,8 @@ ur_result_t UR_APICALL urEnqueueUSMAdvise( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5157,6 +5197,8 @@ ur_result_t UR_APICALL urEnqueueUSMFill2D( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. /// - ::UR_RESULT_ERROR_INVALID_MEM_OBJECT /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -5208,6 +5250,8 @@ ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. ur_program_handle_t @@ -5252,6 +5296,8 @@ ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( ur_queue_handle_t hQueue, ///< [in] handle of the queue to submit to. ur_program_handle_t @@ -5295,6 +5341,8 @@ ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueReadHostPipe( ur_queue_handle_t hQueue, ///< [in] a valid host command-queue in which the read command @@ -5342,6 +5390,8 @@ ur_result_t UR_APICALL urEnqueueReadHostPipe( /// + `phEventWaitList == NULL && numEventsInWaitList > 0` /// + `phEventWaitList != NULL && numEventsInWaitList == 0` /// + If event objects in phEventWaitList are not valid events. +/// - ::UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS +/// + An event in `phEventWaitList` has ::UR_EVENT_STATUS_ERROR. ur_result_t UR_APICALL urEnqueueWriteHostPipe( ur_queue_handle_t hQueue, ///< [in] a valid host command-queue in which the write command From 40b21ccd5fa9b636f56bbaed8d66cacc5dd32454 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 1 Apr 2024 10:22:25 +0100 Subject: [PATCH 05/21] Merge pull request #1460 from hdelan/remove-unused-prototypes [HIP][CUDA] Remove function prototypes from enqueue.hpp --- source/adapters/cuda/enqueue.hpp | 5 ----- source/adapters/hip/enqueue.hpp | 5 ----- 2 files changed, 10 deletions(-) diff --git a/source/adapters/cuda/enqueue.hpp b/source/adapters/cuda/enqueue.hpp index 64c590f742..c925a27295 100644 --- a/source/adapters/cuda/enqueue.hpp +++ b/source/adapters/cuda/enqueue.hpp @@ -17,11 +17,6 @@ ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, CUstream Stream, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList); -void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock, - const size_t *GlobalWorkSize, const uint32_t WorkDim, - const size_t MaxThreadsPerBlock[3], - ur_kernel_handle_t Kernel, uint32_t LocalSize); - bool hasExceededMaxRegistersPerBlock(ur_device_handle_t Device, ur_kernel_handle_t Kernel, size_t BlockSize); diff --git a/source/adapters/hip/enqueue.hpp b/source/adapters/hip/enqueue.hpp index c84b47d479..a1f86b3678 100644 --- a/source/adapters/hip/enqueue.hpp +++ b/source/adapters/hip/enqueue.hpp @@ -17,11 +17,6 @@ ur_result_t enqueueEventsWait(ur_queue_handle_t CommandQueue, hipStream_t Stream, uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList); -void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock, - const size_t *GlobalWorkSize, const uint32_t WorkDim, - const size_t MaxThreadsPerBlock[3], - ur_kernel_handle_t Kernel, uint32_t LocalSize); - ur_result_t setKernelParams(const ur_device_handle_t Device, const uint32_t WorkDim, const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize, From 74a8f047563d3528fccbf87cd729ceb3dcad6e1d Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 1 Apr 2024 15:16:30 +0100 Subject: [PATCH 06/21] Merge pull request #1485 from aarongreig/aaron/addDeviceNotAvailableErrC Add UR_ERROR_DEVICE_NOT_AVAILABLE and appropriate translation for CL. --- include/ur_api.h | 1 + include/ur_print.hpp | 3 +++ scripts/core/common.yml | 2 ++ source/adapters/opencl/common.cpp | 2 ++ 4 files changed, 8 insertions(+) diff --git a/include/ur_api.h b/include/ur_api.h index ce47d528aa..15ffaf9f1e 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -493,6 +493,7 @@ typedef enum ur_result_t { ///< retrieved via the urPlatformGetLastError entry point. UR_RESULT_ERROR_LAYER_NOT_PRESENT = 68, ///< A requested layer was not found by the loader. UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS = 69, ///< An event in the provided wait list has ::UR_EVENT_STATUS_ERROR. + UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE = 70, ///< Device in question has `::UR_DEVICE_INFO_AVAILABLE == false` UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP = 0x1000, ///< Invalid Command-Buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP = 0x1001, ///< Sync point is not valid for the command-buffer UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP = 0x1002, ///< Sync point wait list is invalid diff --git a/include/ur_print.hpp b/include/ur_print.hpp index b2ba6ddcab..a773af3166 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -1542,6 +1542,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_result_t value) { case UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS: os << "UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS"; break; + case UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE: + os << "UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE"; + break; case UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP: os << "UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP"; break; diff --git a/scripts/core/common.yml b/scripts/core/common.yml index be95b9aa45..0dad27d028 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -276,6 +276,8 @@ etors: desc: "A requested layer was not found by the loader." - name: ERROR_IN_EVENT_LIST_EXEC_STATUS desc: "An event in the provided wait list has $X_EVENT_STATUS_ERROR." + - name: ERROR_DEVICE_NOT_AVAILABLE + desc: "Device in question has `$X_DEVICE_INFO_AVAILABLE == false`" - name: ERROR_UNKNOWN value: "0x7ffffffe" desc: "Unknown or internal error" diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index acce51939b..5e03c0f4cb 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -83,6 +83,8 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_WAIT_LIST_EXP; case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return UR_RESULT_ERROR_IN_EVENT_LIST_EXEC_STATUS; + case CL_DEVICE_NOT_AVAILABLE: + return UR_RESULT_ERROR_DEVICE_NOT_AVAILABLE; default: return UR_RESULT_ERROR_UNKNOWN; } From 1ee81f480fc9d6645eb8cb61acfc53f809ce8d69 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Tue, 2 Apr 2024 10:14:23 +0100 Subject: [PATCH 07/21] Merge pull request #1450 from kbenzie/benie/cl-fpga-atomic-memory-order-caps [CL] Atomic fence scope capability for Intel FPGA driver --- source/adapters/opencl/device.cpp | 64 ++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 115b9b2e09..229c2429a3 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -569,6 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue( static_cast(URCapabilities)); } + case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: { /* Initialize result to minimum mandated capabilities according to * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support @@ -624,6 +625,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue( static_cast(URCapabilities)); } + case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { /* Initialize result to minimum mandated capabilities according to * SYCL2020 4.6.3.2 */ @@ -671,6 +673,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue( static_cast(URCapabilities)); } + case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { /* Initialize result to minimum mandated capabilities according to * SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support @@ -686,38 +689,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( cl_adapter::cast(hDevice), DevVer)); - cl_device_atomic_capabilities CLCapabilities; + auto convertCapabilities = + [](cl_device_atomic_capabilities CLCapabilities) { + ur_memory_scope_capability_flags_t URCapabilities = 0; + /* Because scopes are hierarchical, wider scopes support all narrower + * scopes. At a minimum, each device must support WORK_ITEM, + * SUB_GROUP and WORK_GROUP. + * (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We already + * initialized to these minimum mandated capabilities. Just check + * wider scopes. */ + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; + } + + if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + } + return URCapabilities; + }; + if (DevVer >= oclv::V3_0) { + cl_device_atomic_capabilities CLCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( cl_adapter::cast(hDevice), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)); - assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) && "Violates minimum mandated guarantee"); + URCapabilities |= convertCapabilities(CLCapabilities); + } else if (DevVer >= oclv::V2_0) { + /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | + ALL_DEVICES */ + URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | + UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; - /* Because scopes are hierarchical, wider scopes support all narrower - * scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and - * WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We - * already initialized to these minimum mandated capabilities. Just check - * wider scopes. */ - if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) { - URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE; - } - - if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) { - URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; - } } else { - /* This info is only available in OpenCL version >= 3.0. Just return - * minimum mandated capabilities for older versions. OpenCL 1.x minimum - * mandated capabilities are WORK_GROUP, we already initialized using it. - */ - if (DevVer >= oclv::V2_0) { - /* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE | - * ALL_DEVICES */ - URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE | - UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM; + // FIXME: Special case for Intel FPGA driver which is currently an + // OpenCL 1.2 device but is more capable than the default. This is a + // temporary work around until the Intel FPGA driver is updated to + // OpenCL 3.0. If the query is successful, then use the result but do + // not return an error if the query is unsuccessful as this is expected + // of an OpenCL 1.2 driver. + cl_device_atomic_capabilities CLCapabilities; + if (CL_SUCCESS == clGetDeviceInfo(cl_adapter::cast(hDevice), + CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, + sizeof(cl_device_atomic_capabilities), + &CLCapabilities, nullptr)) { + URCapabilities |= convertCapabilities(CLCapabilities); } } From 65af257bb4adeb59c7cdfec9246e6b4fc4312343 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Fri, 5 Apr 2024 14:26:59 +0100 Subject: [PATCH 08/21] Merge pull request #1486 from nrspruit/fix_memfree_report [L0] Fix DeviceInfo global mem free to report unsupported given MemCount==0 --- source/adapters/level_zero/device.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index f4d7f95e0b..b1eb1a7b1b 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -718,7 +718,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } } } - return ReturnValue(std::min(GlobalMemSize, FreeMemory)); + if (MemCount > 0) { + return ReturnValue(std::min(GlobalMemSize, FreeMemory)); + } else { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { // If there are not any memory modules then return 0. From 410c02365eb8505ace69d3cb6b8192dbe0077161 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Thu, 4 Apr 2024 10:23:33 +0200 Subject: [PATCH 09/21] Merge pull request #1448 from steffenlarsen/steffen/make_ext_func_fail_unsupported [OpenCL] Make extension function lookup return unusupported error --- source/adapters/opencl/command_buffer.cpp | 80 ++++++++--------------- source/adapters/opencl/common.hpp | 6 +- source/adapters/opencl/enqueue.cpp | 30 ++++----- source/adapters/opencl/memory.cpp | 4 +- 4 files changed, 44 insertions(+), 76 deletions(-) diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 88c661b4ae..ac5650b1a1 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -21,14 +21,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( cl_context CLContext = cl_adapter::cast(hContext); cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, - cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR); - - if (!clCreateCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR)); + cl_int Res = CL_SUCCESS; auto CLCommandBuffer = clCreateCommandBufferKHR( 1, cl_adapter::cast(&Queue), nullptr, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); @@ -51,12 +49,10 @@ urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clRetainCommandBufferKHR_fn clRetainCommandBuffer = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, - cl_ext::RetainCommandBufferName, &clRetainCommandBuffer); - - if (!clRetainCommandBuffer || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clRetainCommandBufferKHRCache, + cl_ext::RetainCommandBufferName, &clRetainCommandBuffer)); CL_RETURN_ON_FAILURE(clRetainCommandBuffer(hCommandBuffer->CLCommandBuffer)); return UR_RESULT_SUCCESS; @@ -68,13 +64,10 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, - cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); - - if (!clReleaseCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR)); CL_RETURN_ON_FAILURE( clReleaseCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); @@ -85,13 +78,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, - cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR); - - if (!clFinalizeCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR)); CL_RETURN_ON_FAILURE( clFinalizeCommandBufferKHR(hCommandBuffer->CLCommandBuffer)); @@ -109,13 +99,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, - cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR); - - if (!clCommandNDRangeKernelKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR)); CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( hCommandBuffer->CLCommandBuffer, nullptr, nullptr, @@ -157,12 +144,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, - cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR); - - if (!clCommandCopyBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, + cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( hCommandBuffer->CLCommandBuffer, nullptr, @@ -193,13 +178,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, - cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR); - - if (!clCommandCopyBufferRectKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( hCommandBuffer->CLCommandBuffer, nullptr, @@ -283,12 +265,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; - cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, - cl_ext::CommandFillBufferName, &clCommandFillBufferKHR); - - if (!clCommandFillBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, + cl_ext::CommandFillBufferName, &clCommandFillBufferKHR)); CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( hCommandBuffer->CLCommandBuffer, nullptr, @@ -339,13 +319,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, - cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR); - - if (!clEnqueueCommandBufferKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR)); const uint32_t NumberOfQueues = 1; @@ -382,13 +359,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferGetInfoExp( cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); cl_ext::clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr; - cl_int Res = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetCommandBufferInfoKHRCache, - cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR); - - if (!clGetCommandBufferInfoKHR || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::GetCommandBufferInfoName, &clGetCommandBufferInfoKHR)); if (propName != UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT) { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index d99a0bd417..256fce0c22 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -373,9 +373,9 @@ static ur_result_t getExtFuncFromContext(cl_context Context, if (It != FPtrMap.end()) { auto F = It->second; // if cached that extension is not available return nullptr and - // UR_RESULT_ERROR_INVALID_VALUE + // UR_RESULT_ERROR_UNSUPPORTED_FEATURE *Fptr = F; - return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_INVALID_VALUE; + return F ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } cl_uint DeviceCount; @@ -409,7 +409,7 @@ static ur_result_t getExtFuncFromContext(cl_context Context, if (!FuncPtr) { // Cache that the extension is not available FPtrMap[Context] = nullptr; - return UR_RESULT_ERROR_INVALID_VALUE; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } *Fptr = FuncPtr; diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 506796a07b..9fb4239c38 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -347,12 +347,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( return mapCLErrorToUR(Res); cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; - Res = cl_ext::getExtFuncFromContext( + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, - cl_ext::EnqueueWriteGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::EnqueueWriteGlobalVariableName, &F)); Res = F(cl_adapter::cast(hQueue), cl_adapter::cast(hProgram), name, blockingWrite, count, @@ -378,12 +375,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( return mapCLErrorToUR(Res); cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; - Res = cl_ext::getExtFuncFromContext( + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, - cl_ext::EnqueueReadGlobalVariableName, &F); - - if (!F || Res != CL_SUCCESS) - return UR_RESULT_ERROR_INVALID_OPERATION; + cl_ext::EnqueueReadGlobalVariableName, &F)); Res = F(cl_adapter::cast(hQueue), cl_adapter::cast(hProgram), name, blockingRead, count, @@ -409,13 +403,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( } cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, - cl_ext::EnqueueReadHostPipeName, &FuncPtr); + cl_ext::EnqueueReadHostPipeName, &FuncPtr)); if (FuncPtr) { - RetVal = mapCLErrorToUR( + CL_RETURN_ON_FAILURE( FuncPtr(cl_adapter::cast(hQueue), cl_adapter::cast(hProgram), pipe_symbol, blocking, pDst, size, numEventsInWaitList, @@ -423,7 +417,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( cl_adapter::cast(phEvent))); } - return RetVal; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( @@ -441,13 +435,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( } cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, - cl_ext::EnqueueWriteHostPipeName, &FuncPtr); + cl_ext::EnqueueWriteHostPipeName, &FuncPtr)); if (FuncPtr) { - RetVal = mapCLErrorToUR( + CL_RETURN_ON_FAILURE( FuncPtr(cl_adapter::cast(hQueue), cl_adapter::cast(hProgram), pipe_symbol, blocking, pSrc, size, numEventsInWaitList, @@ -455,5 +449,5 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( cl_adapter::cast(phEvent))); } - return RetVal; + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 2397e2b5f9..5de850f1e2 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -232,11 +232,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( clCreateBufferWithPropertiesINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); // First we need to look up the function pointer - RetErr = + UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, - cl_ext::CreateBufferWithPropertiesName, &FuncPtr); + cl_ext::CreateBufferWithPropertiesName, &FuncPtr)); if (FuncPtr) { std::vector PropertiesIntel; auto Prop = static_cast(pProperties->pNext); From 26cc04e258b82696bfd7738d08d95e34db4aecf6 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 8 Apr 2024 09:52:16 +0100 Subject: [PATCH 10/21] Merge pull request #1433 from Seanst98/sean/cubemaps-UR [Bindless][CUDA] Add support for cubemaps --- include/ur_api.h | 48 +++++++++-- include/ur_print.h | 16 ++++ include/ur_print.hpp | 86 +++++++++++++++++++ scripts/core/EXP-BINDLESS-IMAGES.rst | 15 ++++ scripts/core/exp-bindless-images.yml | 41 +++++++++ source/adapters/cuda/device.cpp | 8 ++ source/adapters/cuda/image.cpp | 21 ++++- source/adapters/cuda/sampler.cpp | 5 ++ source/adapters/cuda/sampler.hpp | 8 +- source/loader/layers/validation/ur_valddi.cpp | 2 +- source/loader/ur_libapi.cpp | 2 +- source/loader/ur_print.cpp | 16 ++++ source/ur_api.cpp | 2 +- tools/urinfo/urinfo.hpp | 5 ++ 14 files changed, 260 insertions(+), 15 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 15ffaf9f1e..41efee4583 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -278,6 +278,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR = 0x2003, ///< ::ur_exp_file_descriptor_t UR_STRUCTURE_TYPE_EXP_WIN32_HANDLE = 0x2004, ///< ::ur_exp_win32_handle_t UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES = 0x2005, ///< ::ur_exp_sampler_addr_modes_t + UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES = 0x2006, ///< ::ur_exp_sampler_cubemap_properties_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1625,6 +1626,10 @@ typedef enum ur_device_info_t { ///< semaphore resources UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP = 0x200F, ///< [::ur_bool_t] returns true if the device supports exporting internal ///< event resources + UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP = 0x2010, ///< [::ur_bool_t] returns true if the device supports allocating and + ///< accessing cubemap resources + UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP = 0x2011, ///< [::ur_bool_t] returns true if the device supports sampling cubemapped + ///< images across face boundaries /// @cond UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -1650,7 +1655,7 @@ typedef enum ur_device_info_t { /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE @@ -2421,13 +2426,14 @@ typedef enum ur_mem_flag_t { /////////////////////////////////////////////////////////////////////////////// /// @brief Memory types typedef enum ur_mem_type_t { - UR_MEM_TYPE_BUFFER = 0, ///< Buffer object - UR_MEM_TYPE_IMAGE2D = 1, ///< 2D image object - UR_MEM_TYPE_IMAGE3D = 2, ///< 3D image object - UR_MEM_TYPE_IMAGE2D_ARRAY = 3, ///< 2D image array object - UR_MEM_TYPE_IMAGE1D = 4, ///< 1D image object - UR_MEM_TYPE_IMAGE1D_ARRAY = 5, ///< 1D image array object - UR_MEM_TYPE_IMAGE1D_BUFFER = 6, ///< 1D image buffer object + UR_MEM_TYPE_BUFFER = 0, ///< Buffer object + UR_MEM_TYPE_IMAGE2D = 1, ///< 2D image object + UR_MEM_TYPE_IMAGE3D = 2, ///< 3D image object + UR_MEM_TYPE_IMAGE2D_ARRAY = 3, ///< 2D image array object + UR_MEM_TYPE_IMAGE1D = 4, ///< 1D image object + UR_MEM_TYPE_IMAGE1D_ARRAY = 5, ///< 1D image array object + UR_MEM_TYPE_IMAGE1D_BUFFER = 6, ///< 1D image buffer object + UR_MEM_TYPE_IMAGE_CUBEMAP_EXP = 0x2000, ///< Experimental cubemap image object /// @cond UR_MEM_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -7238,6 +7244,17 @@ typedef enum ur_exp_image_copy_flag_t { /// @brief Bit Mask for validating ur_exp_image_copy_flags_t #define UR_EXP_IMAGE_COPY_FLAGS_MASK 0xfffffff8 +/////////////////////////////////////////////////////////////////////////////// +/// @brief Sampler cubemap seamless filtering mode. +typedef enum ur_exp_sampler_cubemap_filter_mode_t { + UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED = 0, ///< Disable seamless filtering + UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS = 1, ///< Enable Seamless filtering + /// @cond + UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_exp_sampler_cubemap_filter_mode_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief File descriptor typedef struct ur_exp_file_descriptor_t { @@ -7292,6 +7309,21 @@ typedef struct ur_exp_sampler_addr_modes_t { } ur_exp_sampler_addr_modes_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Describes cubemap sampler properties +/// +/// @details +/// - Specify these properties in ::urSamplerCreate via ::ur_sampler_desc_t +/// as part of a `pNext` chain. +typedef struct ur_exp_sampler_cubemap_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + ur_exp_sampler_cubemap_filter_mode_t cubemapFilterMode; ///< [in] enables or disables seamless cubemap filtering between cubemap + ///< faces + +} ur_exp_sampler_cubemap_properties_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Describes an interop memory resource descriptor typedef struct ur_exp_interop_mem_desc_t { diff --git a/include/ur_print.h b/include/ur_print.h index ae80817092..ecb85cb519 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -882,6 +882,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migrati /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_sampler_cubemap_filter_mode_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpSamplerCubemapFilterMode(enum ur_exp_sampler_cubemap_filter_mode_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_file_descriptor_t struct /// @returns @@ -914,6 +922,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpSamplerMipProperties(const struct /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpSamplerAddrModes(const struct ur_exp_sampler_addr_modes_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_exp_sampler_cubemap_properties_t struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintExpSamplerCubemapProperties(const struct ur_exp_sampler_cubemap_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_interop_mem_desc_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index a773af3166..07df5a1874 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -320,10 +320,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value); +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_file_descriptor_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_win32_handle_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_mip_properties_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_addr_modes_t params); +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_sampler_cubemap_properties_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_interop_semaphore_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value); @@ -1068,6 +1070,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_structure_type_t value case UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES: os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES"; break; + case UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES: + os << "UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES"; + break; default: os << "unknown enumerator"; break; @@ -1319,6 +1324,11 @@ inline ur_result_t printStruct(std::ostream &os, const void *ptr) { const ur_exp_sampler_addr_modes_t *pstruct = (const ur_exp_sampler_addr_modes_t *)ptr; printPtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES: { + const ur_exp_sampler_cubemap_properties_t *pstruct = (const ur_exp_sampler_cubemap_properties_t *)ptr; + printPtr(os, pstruct); + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -2546,6 +2556,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP: os << "UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP"; break; + case UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP"; + break; + case UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP: + os << "UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP"; + break; default: os << "unknown enumerator"; break; @@ -4150,6 +4166,30 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; + case UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; + case UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; default: os << "unknown enumerator"; return UR_RESULT_ERROR_INVALID_ENUMERATION; @@ -5326,6 +5366,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_mem_type_t value) { case UR_MEM_TYPE_IMAGE1D_BUFFER: os << "UR_MEM_TYPE_IMAGE1D_BUFFER"; break; + case UR_MEM_TYPE_IMAGE_CUBEMAP_EXP: + os << "UR_MEM_TYPE_IMAGE_CUBEMAP_EXP"; + break; default: os << "unknown enumerator"; break; @@ -9137,6 +9180,24 @@ inline ur_result_t printFlag(std::ostream &os, uint32_ } } // namespace ur::details /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_sampler_cubemap_filter_mode_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value) { + switch (value) { + case UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED: + os << "UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED"; + break; + case UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS: + os << "UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_file_descriptor_t type /// @returns /// std::ostream & @@ -9259,6 +9320,31 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_sampler_ad return os; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_exp_sampler_cubemap_properties_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_sampler_cubemap_properties_t params) { + os << "(struct ur_exp_sampler_cubemap_properties_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur::details::printStruct(os, + (params.pNext)); + + os << ", "; + os << ".cubemapFilterMode = "; + + os << (params.cubemapFilterMode); + + os << "}"; + return os; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_interop_mem_desc_t type /// @returns /// std::ostream & diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index fe6a1ac32b..af90c1ea0f 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -50,6 +50,7 @@ Runtime: * Sampled images * Unsampled images * Mipmaps + * Cubemaps * USM backed images * Interoperability support @@ -69,6 +70,7 @@ Enums ${X}_STRUCTURE_TYPE_EXP_FILE_DESCRIPTOR ${X}_STRUCTURE_TYPE_EXP_WIN32_HANDLE ${X}_STRUCTURE_TYPE_EXP_SAMPLER_ADDR_MODES + ${X}_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES * ${x}_device_info_t * ${X}_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP @@ -87,6 +89,8 @@ Enums * ${X}_DEVICE_INFO_INTEROP_MEMORY_EXPORT_SUPPORT_EXP * ${X}_DEVICE_INFO_INTEROP_SEMAPHORE_IMPORT_SUPPORT_EXP * ${X}_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP + * ${X}_DEVICE_INFO_CUBEMAP_SUPPORT_EXP + * ${X}_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP * ${x}_command_t * ${X}_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP @@ -97,6 +101,10 @@ Enums * ${X}_EXP_IMAGE_COPY_FLAG_DEVICE_TO_HOST * ${X}_EXP_IMAGE_COPY_FLAG_DEVICE_TO_DEVICE +* ${x}_exp_sampler_cubemap_filter_mode_t + * ${X}_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS + * ${X}_EXP_SAMPLER_CUBEMAP_FILTER_MODE_DISJOINTED + * ${x}_function_t * ${X}_FUNCTION_USM_PITCHED_ALLOC_EXP * ${X}_FUNCTION_BINDLESS_IMAGES_UNSAMPLED_IMAGE_HANDLE_DESTROY_EXP @@ -117,6 +125,9 @@ Enums * ${X}_FUNCTION_BINDLESS_IMAGES_WAIT_EXTERNAL_SEMAPHORE_EXP * ${X}_FUNCTION_BINDLESS_IMAGES_SIGNAL_EXTERNAL_SEMAPHORE_EXP +* ${x}_mem_type_t + * ${X}_MEM_TYPE_IMAGE_CUBEMAP_EXP + Types ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_exp_sampler_mip_properties_t @@ -129,6 +140,7 @@ Types * ${x}_exp_file_descriptor_t * ${x}_exp_win32_handle_t * ${x}_exp_sampler_addr_modes_t +* ${x}_exp_sampler_cubemap_properties_t Functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -184,6 +196,9 @@ Changelog +------------------------------------------------------------------------+ | 9.0 | Remove layered image properties struct. | +------------------------------------------------------------------------+ +| 10.0 | Added cubemap image type, sampling properties, and device | +| | queries. | ++----------+-------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index d2e508c4a7..a6f17b1a74 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -86,6 +86,12 @@ etors: - name: INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP value: "0x200F" desc: "[$x_bool_t] returns true if the device supports exporting internal event resources" + - name: CUBEMAP_SUPPORT_EXP + value: "0x2010" + desc: "[$x_bool_t] returns true if the device supports allocating and accessing cubemap resources" + - name: CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP + value: "0x2011" + desc: "[$x_bool_t] returns true if the device supports sampling cubemapped images across face boundaries" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -110,6 +116,9 @@ etors: - name: EXP_SAMPLER_ADDR_MODES desc: $x_exp_sampler_addr_modes_t value: "0x2005" + - name: EXP_SAMPLER_CUBEMAP_PROPERTIES + desc: $x_exp_sampler_cubemap_properties_t + value: "0x2006" --- #-------------------------------------------------------------------------- type: enum extend: true @@ -135,6 +144,25 @@ etors: - name: DEVICE_TO_DEVICE desc: "Device to device" --- #-------------------------------------------------------------------------- +type: enum +extend: True +desc: "Memory types" +name: $x_mem_type_t +etors: + - name: IMAGE_CUBEMAP_EXP + value: "0x2000" + desc: "Experimental cubemap image object" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Sampler cubemap seamless filtering mode." +class: $xBindlessImages +name: $x_exp_sampler_cubemap_filter_mode_t +etors: + - name: DISJOINTED + desc: "Disable seamless filtering" + - name: SEAMLESS + desc: "Enable Seamless filtering" +--- #-------------------------------------------------------------------------- type: struct desc: "File descriptor" name: $x_exp_file_descriptor_t @@ -189,6 +217,19 @@ members: desc: "[in] Specify the address mode of the sampler per dimension" --- #-------------------------------------------------------------------------- type: struct +desc: "Describes cubemap sampler properties" +details: + - Specify these properties in $xSamplerCreate via $x_sampler_desc_t as part + of a `pNext` chain. +class: $xBindlessImages +name: $x_exp_sampler_cubemap_properties_t +base: $x_base_properties_t +members: + - type: $x_exp_sampler_cubemap_filter_mode_t + name: cubemapFilterMode + desc: "[in] enables or disables seamless cubemap filtering between cubemap faces" +--- #-------------------------------------------------------------------------- +type: struct desc: "Describes an interop memory resource descriptor" class: $xBindlessImages name: $x_exp_interop_mem_desc_t diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 56addc4eef..69665fb456 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -917,6 +917,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // CUDA does not support exporting semaphores or events. return ReturnValue(false); } + case UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP: { + // CUDA supports cubemaps. + return ReturnValue(true); + } + case UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP: { + // CUDA supports cubemap seamless filtering. + return ReturnValue(true); + } case UR_DEVICE_INFO_DEVICE_ID: { int Value = 0; UR_CHECK_ERROR(cuDeviceGetAttribute( diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index 8d2610626e..b9cc832a02 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -244,7 +244,8 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler, /// Sampler property layout: /// | | /// ----------------------------------- - /// | 31 30 ... 12 | N/A + /// | 31 30 ... 13 | N/A + /// | 12 | cubemap filter mode /// | 11 | mip filter mode /// | 10 9 8 | addressing mode 3 /// | 7 6 5 | addressing mode 2 @@ -306,6 +307,13 @@ ur_result_t urTextureCreate(ur_sampler_handle_t hSampler, // This flag prevents this behaviour. ImageTexDesc.flags |= CU_TRSF_READ_AS_INTEGER; + // Cubemap attributes + ur_exp_sampler_cubemap_filter_mode_t CubemapFilterModeProp = + hSampler->getCubemapFilterMode(); + if (CubemapFilterModeProp == UR_EXP_SAMPLER_CUBEMAP_FILTER_MODE_SEAMLESS) { + ImageTexDesc.flags |= CU_TRSF_SEAMLESS_CUBEMAP; + } + CUtexObject Texture; UR_CHECK_ERROR( cuTexObjectCreate(&Texture, &ResourceDesc, &ImageTexDesc, nullptr)); @@ -418,6 +426,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( array_desc.Depth = pImageDesc->arraySize; array_desc.Flags |= CUDA_ARRAY3D_LAYERED; break; + case UR_MEM_TYPE_IMAGE_CUBEMAP_EXP: + array_desc.Height = pImageDesc->height; + array_desc.Depth = pImageDesc->arraySize; // Should be 6 ONLY + array_desc.Flags |= CUDA_ARRAY3D_CUBEMAP; + break; default: return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -715,7 +728,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.Depth = copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY || - pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) { + pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || + pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { CUDA_MEMCPY3D cpy_desc = {}; cpy_desc.srcXInBytes = srcOffset.x * PixelSizeBytes; cpy_desc.srcY = srcOffset.y; @@ -798,7 +812,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageCopyExp( cpy_desc.Depth = copyExtent.depth; UR_CHECK_ERROR(cuMemcpy3DAsync(&cpy_desc, Stream)); } else if (pImageDesc->type == UR_MEM_TYPE_IMAGE1D_ARRAY || - pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY) { + pImageDesc->type == UR_MEM_TYPE_IMAGE2D_ARRAY || + pImageDesc->type == UR_MEM_TYPE_IMAGE_CUBEMAP_EXP) { CUDA_MEMCPY3D cpy_desc = {}; cpy_desc.srcXInBytes = srcOffset.x; cpy_desc.srcY = srcOffset.y; diff --git a/source/adapters/cuda/sampler.cpp b/source/adapters/cuda/sampler.cpp index bbbf9d86f0..904c529fd2 100644 --- a/source/adapters/cuda/sampler.cpp +++ b/source/adapters/cuda/sampler.cpp @@ -44,6 +44,11 @@ urSamplerCreate(ur_context_handle_t hContext, const ur_sampler_desc_t *pDesc, Sampler->Props |= SamplerAddrModes->addrModes[0] << 2; Sampler->Props |= SamplerAddrModes->addrModes[1] << 5; Sampler->Props |= SamplerAddrModes->addrModes[2] << 8; + } else if (BaseDesc->stype == + UR_STRUCTURE_TYPE_EXP_SAMPLER_CUBEMAP_PROPERTIES) { + const ur_exp_sampler_cubemap_properties_t *SamplerCubemapProperties = + reinterpret_cast(pNext); + Sampler->Props |= SamplerCubemapProperties->cubemapFilterMode << 12; } pNext = const_cast(BaseDesc->pNext); } diff --git a/source/adapters/cuda/sampler.hpp b/source/adapters/cuda/sampler.hpp index 11121e3ecf..4823541c73 100644 --- a/source/adapters/cuda/sampler.hpp +++ b/source/adapters/cuda/sampler.hpp @@ -15,7 +15,8 @@ /// Sampler property layout: /// | | /// ----------------------------------- -/// | 31 30 ... 12 | N/A +/// | 31 30 ... 13 | N/A +/// | 12 | cubemap filter mode /// | 11 | mip filter mode /// | 10 9 8 | addressing mode 3 /// | 7 6 5 | addressing mode 2 @@ -60,4 +61,9 @@ struct ur_sampler_handle_t_ { ur_sampler_filter_mode_t getMipFilterMode() const noexcept { return static_cast((Props >> 11) & 0b1); } + + ur_exp_sampler_cubemap_filter_mode_t getCubemapFilterMode() const noexcept { + return static_cast((Props >> 12) & + 0b1); + } }; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index fbdbbe45cf..0fd99fb563 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -496,7 +496,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - if (UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName) { + if (UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP < propName) { return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index d5092ffe1c..7c1a02c38d 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -842,7 +842,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 7facba104f..9c3b6f1277 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -887,6 +887,14 @@ ur_result_t urPrintExpImageCopyFlags(enum ur_exp_image_copy_flag_t value, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpSamplerCubemapFilterMode( + enum ur_exp_sampler_cubemap_filter_mode_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpFileDescriptor(const struct ur_exp_file_descriptor_t params, char *buffer, const size_t buff_size, @@ -921,6 +929,14 @@ urPrintExpSamplerAddrModes(const struct ur_exp_sampler_addr_modes_t params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintExpSamplerCubemapProperties( + const struct ur_exp_sampler_cubemap_properties_t params, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpInteropMemDesc(const struct ur_exp_interop_mem_desc_t params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index b65a2627fe..139281bd8b 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -736,7 +736,7 @@ ur_result_t UR_APICALL urDeviceGetSelected( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP < propName` +/// + `::UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP < propName` /// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION /// + If `propName` is not supported by the adapter. /// - ::UR_RESULT_ERROR_INVALID_SIZE diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index 15894cafb8..9745eed6cb 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -378,5 +378,10 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo( hDevice, UR_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo(hDevice, UR_DEVICE_INFO_CUBEMAP_SUPPORT_EXP); + std::cout << prefix; + printDeviceInfo( + hDevice, UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP); } } // namespace urinfo From 532dac51118881e91831287538d634e5e6d54f5c Mon Sep 17 00:00:00 2001 From: aarongreig Date: Wed, 10 Apr 2024 10:21:22 +0100 Subject: [PATCH 11/21] Merge pull request #1455 from GeorgeWeb/georgi/fix-hip-usm-copy2d [HIP] Fix memory type detection in allocation info queries and USM copy2D --- source/adapters/hip/enqueue.cpp | 50 +++++++++++++++++++++++++++------ source/adapters/hip/usm.cpp | 21 +++++++++----- 2 files changed, 55 insertions(+), 16 deletions(-) diff --git a/source/adapters/hip/enqueue.cpp b/source/adapters/hip/enqueue.cpp index 33691ec112..101f664901 100644 --- a/source/adapters/hip/enqueue.cpp +++ b/source/adapters/hip/enqueue.cpp @@ -1618,25 +1618,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( hipPointerAttribute_t srcAttribs{}; hipPointerAttribute_t dstAttribs{}; + // Determine if pSrc and/or pDst are system allocated pageable host memory. bool srcIsSystemAlloc{false}; bool dstIsSystemAlloc{false}; hipError_t hipRes{}; - // hipErrorInvalidValue returned from hipPointerGetAttributes for a non-null - // pointer refers to an OS-allocation, hence pageable host memory. However, - // this means we cannot rely on the attributes result, hence we mark system - // pageable memory allocation manually as host memory. The HIP runtime can - // handle the registering/unregistering of the memory as long as the right - // copy-kind (direction) is provided to hipMemcpy2DAsync for this case. - hipRes = hipPointerGetAttributes(&srcAttribs, (const void *)pSrc); + // Error code hipErrorInvalidValue returned from hipPointerGetAttributes + // for a non-null pointer refers to an OS-allocation, hence we can work + // with the assumption that this is a pointer to a pageable host memory. + // Since ROCm version 6.0.0, the enum hipMemoryType can also be marked as + // hipMemoryTypeUnregistered explicitly to relay that information better. + // This means we cannot rely on any attribute result, hence we just mark + // the pointer handle as system allocated pageable host memory. + // The HIP runtime can handle the registering/unregistering of the memory + // as long as the right copy-kind (direction) is provided to hipMemcpy2D*. + hipRes = hipPointerGetAttributes(&srcAttribs, pSrc); if (hipRes == hipErrorInvalidValue && pSrc) srcIsSystemAlloc = true; hipRes = hipPointerGetAttributes(&dstAttribs, (const void *)pDst); if (hipRes == hipErrorInvalidValue && pDst) dstIsSystemAlloc = true; +#if HIP_VERSION_MAJOR >= 6 + srcIsSystemAlloc |= srcAttribs.type == hipMemoryTypeUnregistered; + dstIsSystemAlloc |= dstAttribs.type == hipMemoryTypeUnregistered; +#endif - const unsigned int srcMemType{srcAttribs.type}; - const unsigned int dstMemType{dstAttribs.type}; + unsigned int srcMemType{srcAttribs.type}; + unsigned int dstMemType{dstAttribs.type}; + + // ROCm 5.7.1 finally started updating the type attribute member to + // hipMemoryTypeManaged for shared memory allocations(hipMallocManaged). + // Hence, we use a separate query that verifies the pointer use via flags. +#if HIP_VERSION >= 50700001 + // Determine the source/destination memory type for shared allocations. + // + // NOTE: The hipPointerGetAttribute API is marked as [BETA] and fails with + // exit code -11 when passing a system allocated pointer to it. + if (!srcIsSystemAlloc && srcAttribs.isManaged) { + UR_ASSERT(srcAttribs.hostPointer && srcAttribs.devicePointer, + UR_RESULT_ERROR_INVALID_VALUE); + UR_CHECK_ERROR(hipPointerGetAttribute( + &srcMemType, HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, + reinterpret_cast(const_cast(pSrc)))); + } + if (!dstIsSystemAlloc && dstAttribs.isManaged) { + UR_ASSERT(dstAttribs.hostPointer && dstAttribs.devicePointer, + UR_RESULT_ERROR_INVALID_VALUE); + UR_CHECK_ERROR( + hipPointerGetAttribute(&dstMemType, HIP_POINTER_ATTRIBUTE_MEMORY_TYPE, + reinterpret_cast(pDst))); + } +#endif const bool srcIsHost{(srcMemType == hipMemoryTypeHost) || srcIsSystemAlloc}; const bool srcIsDevice{srcMemType == hipMemoryTypeDevice}; diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 4e140ce5c1..f29fab7b92 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -160,7 +160,6 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, try { switch (propName) { case UR_USM_ALLOC_INFO_TYPE: { - unsigned int Value; // do not throw if hipPointerGetAttribute returns hipErrorInvalidValue hipError_t Ret = hipPointerGetAttributes(&hipPointerAttributeType, pMem); if (Ret == hipErrorInvalidValue) { @@ -170,19 +169,27 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, // Direct usage of the function, instead of UR_CHECK_ERROR, so we can get // the line offset. checkErrorUR(Ret, __func__, __LINE__ - 5, __FILE__); - Value = hipPointerAttributeType.isManaged; - if (Value) { - // pointer to managed memory - return ReturnValue(UR_USM_TYPE_SHARED); + // ROCm 6.0.0 introduces hipMemoryTypeUnregistered in the hipMemoryType + // enum to mark unregistered allocations (i.e., via system allocators). +#if HIP_VERSION_MAJOR >= 6 + if (hipPointerAttributeType.type == hipMemoryTypeUnregistered) { + // pointer not known to the HIP subsystem + return ReturnValue(UR_USM_TYPE_UNKNOWN); } - UR_CHECK_ERROR(hipPointerGetAttributes(&hipPointerAttributeType, pMem)); +#endif + unsigned int Value; #if HIP_VERSION >= 50600000 Value = hipPointerAttributeType.type; #else Value = hipPointerAttributeType.memoryType; #endif - UR_ASSERT(Value == hipMemoryTypeDevice || Value == hipMemoryTypeHost, + UR_ASSERT(Value == hipMemoryTypeDevice || Value == hipMemoryTypeHost || + Value == hipMemoryTypeManaged, UR_RESULT_ERROR_INVALID_MEM_OBJECT); + if (hipPointerAttributeType.isManaged || Value == hipMemoryTypeManaged) { + // pointer to managed memory + return ReturnValue(UR_USM_TYPE_SHARED); + } if (Value == hipMemoryTypeDevice) { // pointer to device memory return ReturnValue(UR_USM_TYPE_DEVICE); From bcbe255536e9a0332b5fa572021909a70da955e0 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Wed, 10 Apr 2024 16:15:45 +0100 Subject: [PATCH 12/21] Merge pull request #1483 from nrspruit/fix_inorder_lists_reuse [L0] Fix regular in order command list reuse given inorder queue --- source/adapters/level_zero/context.cpp | 11 +++++++++++ source/adapters/level_zero/context.hpp | 9 +++++++-- source/adapters/level_zero/queue.cpp | 18 +++++++++++++----- source/adapters/level_zero/queue.hpp | 2 ++ 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index c4b5423adb..cec1dc1947 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -712,6 +712,11 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( for (auto ZeCommandListIt = ZeCommandListCache.begin(); ZeCommandListIt != ZeCommandListCache.end(); ++ZeCommandListIt) { + // If this is an InOrder Queue, then only allow lists which are in order. + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue() && + !(ZeCommandListIt->second.InOrderList)) { + continue; + } auto &ZeCommandList = ZeCommandListIt->first; auto it = Queue->CommandListMap.find(ZeCommandList); if (it != Queue->CommandListMap.end()) { @@ -766,6 +771,12 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( if (UseCopyEngine != it->second.isCopy(Queue)) continue; + // If this is an InOrder Queue, then only allow lists which are in order. + if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue() && + !(it->second.IsInOrderList)) { + continue; + } + ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus, (it->second.ZeFence)); if (ZeResult == ZE_RESULT_SUCCESS) { diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 8cb1d5369f..6e4244eea0 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -27,6 +27,11 @@ #include +struct l0_command_list_cache_info { + ZeStruct ZeQueueDesc; + bool InOrderList = false; +}; + struct ur_context_handle_t_ : _ur_object { ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices, const ur_device_handle_t *Devs, bool OwnZeContext) @@ -87,11 +92,11 @@ struct ur_context_handle_t_ : _ur_object { // std::unordered_map>>> + l0_command_list_cache_info>>> ZeComputeCommandListCache; std::unordered_map>>> + l0_command_list_cache_info>>> ZeCopyCommandListCache; // Store USM pool for USM shared and device allocations. There is 1 memory diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 187f4f75f9..cfa703eea0 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -449,7 +449,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( ->ZeCopyCommandListCache[Queue->Device->ZeDevice] : Queue->Context ->ZeComputeCommandListCache[Queue->Device->ZeDevice]; - ZeCommandListCache.push_back({it->first, it->second.ZeQueueDesc}); + struct l0_command_list_cache_info ListInfo; + ListInfo.ZeQueueDesc = it->second.ZeQueueDesc; + ListInfo.InOrderList = it->second.IsInOrderList; + ZeCommandListCache.push_back({it->first, ListInfo}); } else { // A non-reusable comamnd list that came from a make_queue call is // destroyed since it cannot be recycled. @@ -1708,8 +1711,10 @@ ur_result_t ur_queue_handle_t_::resetCommandList( UseCopyEngine ? this->Context->ZeCopyCommandListCache[this->Device->ZeDevice] : this->Context->ZeComputeCommandListCache[this->Device->ZeDevice]; - ZeCommandListCache.push_back( - {CommandList->first, CommandList->second.ZeQueueDesc}); + struct l0_command_list_cache_info ListInfo; + ListInfo.ZeQueueDesc = CommandList->second.ZeQueueDesc; + ListInfo.InOrderList = CommandList->second.IsInOrderList; + ZeCommandListCache.push_back({CommandList->first, ListInfo}); } return UR_RESULT_SUCCESS; @@ -1870,8 +1875,10 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeStruct ZeCommandListDesc; ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal; + bool IsInOrderList = false; if (Device->useDriverInOrderLists() && isInOrderQueue()) { ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER; + IsInOrderList = true; } ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice, @@ -1882,7 +1889,8 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZeQueueDesc.ordinal = QueueGroupOrdinal; std::tie(CommandList, std::ignore) = CommandListMap.insert( std::pair( - ZeCommandList, {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc})); + ZeCommandList, + {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc, IsInOrderList})); UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList)); UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine)); @@ -2011,7 +2019,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { ->ZeComputeCommandListCache[Queue->Device->ZeDevice]; for (auto ZeCommandListIt = ZeCommandListCache.begin(); ZeCommandListIt != ZeCommandListCache.end(); ++ZeCommandListIt) { - const auto &Desc = (*ZeCommandListIt).second; + const auto &Desc = (*ZeCommandListIt).second.ZeQueueDesc; if (Desc.index == ZeCommandQueueDesc.index && Desc.flags == ZeCommandQueueDesc.flags && Desc.mode == ZeCommandQueueDesc.mode && diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 06751e03c1..978ea70c0e 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -65,6 +65,8 @@ struct ur_command_list_info_t { // the make_queue API the descriptor is unavailable so a dummy descriptor is // used and then this entry is marked as not eligible for recycling. ZeStruct ZeQueueDesc; + // Indicates if this is an inorder list + bool IsInOrderList{false}; bool CanReuse{true}; // Helper functions to tell if this is a copy command-list. From 6465c983ab623e10cdfa33f5a55f16527c6e0cb8 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Fri, 12 Apr 2024 10:22:00 +0100 Subject: [PATCH 13/21] Merge pull request #1498 from wenju-he/ZeImageDesc-urMemImageCreateWithNativeHandle [L0][Image] Set ZeImageDesc member of _ur_image in release build for legacy image --- source/adapters/level_zero/memory.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index 76d53c1d81..94e808eb50 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -1552,16 +1552,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( ze_image_handle_t ZeHImage = ur_cast(NativeMem); ZeStruct ZeImageDesc; -#ifndef NDEBUG ur_result_t Res = ur2zeImageDesc(ImageFormat, ImageDesc, ZeImageDesc); if (Res != UR_RESULT_SUCCESS) { *Mem = nullptr; return Res; } -#else - std::ignore = ImageFormat; - std::ignore = ImageDesc; -#endif // !NDEBUG UR_CALL(createUrMemFromZeImage( Context, ZeHImage, Properties->isNativeHandleOwned, ZeImageDesc, Mem)); From 4f6997ee022260c74fc236ba923f21c95ed0d1d3 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 15 Apr 2024 10:12:33 +0100 Subject: [PATCH 14/21] Merge pull request #1492 from nrspruit/l0_queue_sync_unblocking [L0] Enable Disabling of Queue lock during L0 Sync calls --- source/adapters/level_zero/common.hpp | 10 ++++++++++ source/adapters/level_zero/queue.cpp | 27 +++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index 5425d4eb05..7a56f6e149 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -231,6 +231,16 @@ static const uint32_t UrL0Serialize = [] { return SerializeModeValue; }(); +static const uint32_t UrL0QueueSyncNonBlocking = [] { + const char *UrL0QueueSyncNonBlocking = + std::getenv("UR_L0_QUEUE_SYNCHRONIZE_NON_BLOCKING"); + uint32_t L0QueueSyncLockingModeValue = 1; + if (UrL0QueueSyncNonBlocking) { + L0QueueSyncLockingModeValue = std::atoi(UrL0QueueSyncNonBlocking); + } + return L0QueueSyncLockingModeValue; +}(); + // This class encapsulates actions taken along with a call to Level Zero API. class ZeCall { private: diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index cfa703eea0..c57892c80b 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1403,7 +1403,13 @@ ur_result_t ur_queue_handle_t_::synchronize() { return UR_RESULT_SUCCESS; // wait for all commands previously submitted to this immediate command list - ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX)); + if (UrL0QueueSyncNonBlocking) { + Queue->Mutex.unlock(); + ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX)); + Queue->Mutex.lock(); + } else { + ZE2UR_CALL(zeCommandListHostSynchronize, (ImmCmdList->first, UINT64_MAX)); + } // Cleanup all events from the synced command list. CleanupEventListFromResetCmdList(ImmCmdList->second.EventList, true); @@ -1417,7 +1423,13 @@ ur_result_t ur_queue_handle_t_::synchronize() { // zero handle can have device scope, so we can't synchronize the last // event. if (isInOrderQueue() && !LastCommandEvent->IsDiscarded) { - ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent)); + if (UrL0QueueSyncNonBlocking) { + this->Mutex.unlock(); + ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent)); + this->Mutex.lock(); + } else { + ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent)); + } // clean up all events known to have been completed as well, // so they can be reused later @@ -1444,8 +1456,15 @@ ur_result_t ur_queue_handle_t_::synchronize() { UR_CALL(syncImmCmdList(this, ImmCmdList)); } else { for (auto &ZeQueue : QueueGroup.second.ZeQueues) - if (ZeQueue) - ZE2UR_CALL(zeHostSynchronize, (ZeQueue)); + if (ZeQueue) { + if (UrL0QueueSyncNonBlocking) { + this->Mutex.unlock(); + ZE2UR_CALL(zeHostSynchronize, (ZeQueue)); + this->Mutex.lock(); + } else { + ZE2UR_CALL(zeHostSynchronize, (ZeQueue)); + } + } } } } From 151d5a6f4dcb05cdfda9aafaa9d319a3ffccdd77 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Mon, 15 Apr 2024 10:12:46 +0100 Subject: [PATCH 15/21] Merge pull request #1494 from nrspruit/usm_exp_table_enable [L0] Enable USM Import/Release Exp Functions --- source/adapters/level_zero/ur_interface_loader.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index f85fcaef0a..051db73145 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -404,6 +404,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( return result; } pDdiTable->pfnPitchedAllocExp = urUSMPitchedAllocExp; + pDdiTable->pfnImportExp = urUSMImportExp; + pDdiTable->pfnReleaseExp = urUSMReleaseExp; return UR_RESULT_SUCCESS; } From e9f363d5eed413e53f9f96b0f4550921d5fb86b3 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Tue, 16 Apr 2024 10:10:10 +0100 Subject: [PATCH 16/21] Merge pull request #1507 from nrspruit/fix_p2p_properties_init [L0] Fix to p2p properties init for pNext and stype --- source/adapters/level_zero/common.cpp | 4 ++++ source/adapters/level_zero/usm_p2p.cpp | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index a927c8b444..af79adeb5d 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -254,6 +254,10 @@ template <> ze_structure_type_t getZeStructureType() { return ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; } template <> +ze_structure_type_t getZeStructureType() { + return ZE_STRUCTURE_TYPE_DEVICE_P2P_PROPERTIES; +} +template <> ze_structure_type_t getZeStructureType() { return ZE_STRUCTURE_TYPE_DEVICE_COMPUTE_PROPERTIES; } diff --git a/source/adapters/level_zero/usm_p2p.cpp b/source/adapters/level_zero/usm_p2p.cpp index 7a9e4e3b9b..f122b8cbdb 100644 --- a/source/adapters/level_zero/usm_p2p.cpp +++ b/source/adapters/level_zero/usm_p2p.cpp @@ -41,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( switch (propName) { case UR_EXP_PEER_INFO_UR_PEER_ACCESS_SUPPORTED: { bool p2pAccessSupported = false; - ze_device_p2p_properties_t p2pProperties; + ZeStruct p2pProperties; ZE2UR_CALL(zeDeviceGetP2PProperties, (commandDevice->ZeDevice, peerDevice->ZeDevice, &p2pProperties)); if (p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS) { @@ -55,7 +55,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( break; } case UR_EXP_PEER_INFO_UR_PEER_ATOMICS_SUPPORTED: { - ze_device_p2p_properties_t p2pProperties; + ZeStruct p2pProperties; ZE2UR_CALL(zeDeviceGetP2PProperties, (commandDevice->ZeDevice, peerDevice->ZeDevice, &p2pProperties)); propertyValue = p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS; From 5c2017fce9ff8dbf0b0a6b63350a4f63c407e4bd Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 25 Mar 2024 11:22:45 +0100 Subject: [PATCH 17/21] Merge pull request #1471 from pbalcer/remove-coverage [CI] remove coverage workflow --- .github/workflows/coverage.yml | 83 ---------------------------------- 1 file changed, 83 deletions(-) delete mode 100644 .github/workflows/coverage.yml diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml deleted file mode 100644 index f8ec9555cb..0000000000 --- a/.github/workflows/coverage.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: Coverage - -# Don't run this workflow on push trigger, because it requires -# a token, which we don't have and can't generate. -# Long term solution is to switch to a different tool, ref. #1465 -on: [pull_request] - -permissions: - contents: read - -jobs: - ubuntu-build: - name: Build - Ubuntu - strategy: - matrix: - os: ['ubuntu-22.04'] - build_type: [Debug] - compiler: [{c: gcc, cxx: g++}] - libbacktrace: ['-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON'] - pool_tracking: ['-DUMF_ENABLE_POOL_TRACKING=ON'] - - runs-on: ${{matrix.os}} - - steps: - - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Install apt packages - run: | - sudo apt-get update - sudo apt-get install -y doxygen ${{matrix.compiler.c}} - - - name: Install pip packages - run: pip install -r third_party/requirements.txt - - - name: Install libbacktrace - if: matrix.libbacktrace == '-DVAL_USE_LIBBACKTRACE_BACKTRACE=ON' - run: | - git clone https://github.com/ianlancetaylor/libbacktrace.git - cd libbacktrace - ./configure - make - sudo make install - cd .. - - - name: Download DPC++ - run: | - sudo apt install libncurses5 - wget -O ${{github.workspace}}/dpcpp_compiler.tar.gz https://github.com/intel/llvm/releases/download/sycl-nightly%2F20230626/dpcpp-compiler.tar.gz - tar -xvf ${{github.workspace}}/dpcpp_compiler.tar.gz - - - name: Configure CMake - run: > - cmake - -B${{github.workspace}}/build - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -DUR_ENABLE_TRACING=ON - -DUR_DEVELOPER_MODE=ON - -DCMAKE_BUILD_TYPE=${{matrix.build_type}} - -DUR_BUILD_TESTS=ON - -DUR_FORMAT_CPP_STYLE=ON - -DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++ - -DCMAKE_CXX_FLAGS="--coverage -fkeep-inline-functions -fkeep-static-functions" - -DCMAKE_EXE_LINKER_FLAGS="--coverage" - -DCMAKE_SHARED_LINKER_FLAGS="--coverage" - ${{matrix.libbacktrace}} - ${{matrix.pool_tracking}} - - - name: Build - run: cmake --build ${{github.workspace}}/build -j $(nproc) - - - name: Test - working-directory: ${{github.workspace}}/build - run: ctest -C ${{matrix.build_type}} --output-on-failure -L "umf|loader|validation|tracing|unit|urtrace" - - - name: Quick Coverage Info - working-directory: ${{github.workspace}}/build - run: ctest -T Coverage - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@e0b68c6749509c5f83f984dd99a76a1c1a231044 # v4.0.1 - with: - fail_ci_if_error: true From db80d60914f962812625c71c79ca27917c174da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 17 Apr 2024 13:37:37 +0200 Subject: [PATCH 18/21] Merge pull request #1516 from pbalcer/disable-clang-l0 disable clang build for L0 adapter tests --- .github/workflows/cmake.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 8ce93b3867..a44981fce5 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -175,6 +175,10 @@ jobs: ] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}, {c: clang, cxx: clang++}] + # TODO: The latest L0 loader segfaults when built with clang. + exclude: + - adapter: {name: L0, platform: ""} + compiler: {c: clang, cxx: clang++} runs-on: ${{matrix.adapter.name}} From ce58dfde9f18a13d6043a2828513a387ee7775b3 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Fri, 19 Apr 2024 10:34:52 +0100 Subject: [PATCH 19/21] Merge pull request #1298 from pbalcer/immcmd-out-of-order-completion-batching [L0] optimize # of event status queries through batching --- source/adapters/level_zero/context.cpp | 34 +-- source/adapters/level_zero/event.cpp | 2 + source/adapters/level_zero/event.hpp | 4 + source/adapters/level_zero/queue.cpp | 277 ++++++++++++++++++++++++- source/adapters/level_zero/queue.hpp | 160 +++++++++++++- source/common/ur_util.hpp | 19 +- 6 files changed, 452 insertions(+), 44 deletions(-) diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index cec1dc1947..6ae028a535 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -13,6 +13,7 @@ #include #include +#include "adapters/level_zero/queue.hpp" #include "context.hpp" #include "ur_level_zero.hpp" @@ -596,29 +597,6 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { return UR_RESULT_SUCCESS; } -// Get value of the threshold for number of events in immediate command lists. -// If number of events in the immediate command list exceeds this threshold then -// cleanup process for those events is executed. -static const size_t ImmCmdListsEventCleanupThreshold = [] { - const char *UrRet = - std::getenv("UR_L0_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD"); - const char *PiRet = std::getenv( - "SYCL_PI_LEVEL_ZERO_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD"); - const char *ImmCmdListsEventCleanupThresholdStr = - UrRet ? UrRet : (PiRet ? PiRet : nullptr); - static constexpr int Default = 1000; - if (!ImmCmdListsEventCleanupThresholdStr) - return Default; - - int Threshold = std::atoi(ImmCmdListsEventCleanupThresholdStr); - - // Basically disable threshold if negative value is provided. - if (Threshold < 0) - return INT_MAX; - - return Threshold; -}(); - // Get value of the threshold for number of active command lists allowed before // we start heuristically cleaning them up. static const size_t CmdListsCleanupThreshold = [] { @@ -648,8 +626,8 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( // Immediate commandlists have been pre-allocated and are always available. if (Queue->UsingImmCmdLists) { CommandList = Queue->getQueueGroup(UseCopyEngine).getImmCmdList(); - if (CommandList->second.EventList.size() > - ImmCmdListsEventCleanupThreshold) { + if (CommandList->second.EventList.size() >= + Queue->getImmdCmmdListsEventCleanupThreshold()) { std::vector EventListToCleanup; Queue->resetCommandList(CommandList, false, EventListToCleanup); CleanupEventListFromResetCmdList(EventListToCleanup, true); @@ -743,11 +721,13 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence)); ZeStruct ZeQueueDesc; ZeQueueDesc.ordinal = QueueGroupOrdinal; + CommandList = Queue->CommandListMap .emplace(ZeCommandList, - ur_command_list_info_t{ZeFence, true, false, - ZeCommandQueue, ZeQueueDesc}) + ur_command_list_info_t(ZeFence, true, false, + ZeCommandQueue, ZeQueueDesc, + Queue->useCompletionBatching())) .first; } ZeCommandListCache.erase(ZeCommandListIt); diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 7f611208ff..32c0951793 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "command_buffer.hpp" @@ -1129,6 +1130,7 @@ ur_result_t ur_event_handle_t_::reset() { RefCountExternal = 0; RefCount.reset(); CommandList = std::nullopt; + completionBatch = std::nullopt; if (!isHostVisible()) HostVisibleEvent = nullptr; diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index c266de8c0d..a566c77825 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -222,6 +222,10 @@ struct ur_event_handle_t_ : _ur_object { // Get the host-visible event or create one and enqueue its signal. ur_result_t getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent); + + // completion batch for this event. Only used for out-of-order immediate + // command lists. + std::optional completionBatch; }; // Helper function to implement zeHostSynchronize. diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index c57892c80b..03c6c46651 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -10,13 +10,214 @@ #include #include +#include #include #include +#include #include "adapter.hpp" +#include "adapters/level_zero/event.hpp" #include "common.hpp" #include "queue.hpp" +#include "ur_api.h" #include "ur_level_zero.hpp" +#include "ur_util.hpp" +#include "ze_api.h" + +// Hard limit for the event completion batches. +static const uint64_t CompletionBatchesMax = [] { + // Default value chosen empirically to maximize the number of asynchronous + // in-flight operations and avoid excessive synchronous waits. + + return getenv_to_unsigned("UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_MAX") + .value_or(10); +}(); + +static const uint64_t CompletionEventsPerBatch = [] { + // The number of events to accumulate in each batch prior to waiting for + // completion. + return getenv_to_unsigned("UR_L0_IMMEDIATE_COMMANDLISTS_EVENTS_PER_BATCH") + .value_or(256); +}(); + +ur_completion_batch::ur_completion_batch() + : barrierEvent(nullptr), st(EMPTY), numEvents(0) {} + +ur_completion_batch::~ur_completion_batch() { + if (barrierEvent) + urEventReleaseInternal(barrierEvent); +} + +bool ur_completion_batch::isFull() { + assert(st == ACCUMULATING); + + return numEvents >= CompletionEventsPerBatch; +} + +void ur_completion_batch::append() { + assert(st == ACCUMULATING); + numEvents++; +} + +ur_result_t ur_completion_batch::reset() { + st = EMPTY; + numEvents = 0; + + // we reuse the UR event handle but reset the internal level-zero one + if (barrierEvent) + ZE2UR_CALL(zeEventHostReset, (barrierEvent->ZeEvent)); + + return UR_RESULT_SUCCESS; +} + +void ur_completion_batch::use() { + assert(st == EMPTY); + st = ACCUMULATING; +} + +ur_completion_batch::state ur_completion_batch::getState() { return st; } + +ur_completion_batch::state ur_completion_batch::queryState() { + if (st == SEALED) { + checkComplete(); + } + + return st; +} + +bool ur_completion_batch::checkComplete() { + assert(st == COMPLETED || st == SEALED); + + if (st == COMPLETED) + return true; + + auto zeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, (barrierEvent->ZeEvent)); + if (zeResult == ZE_RESULT_SUCCESS) { + st = COMPLETED; + } + + return st == COMPLETED; +} + +ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, + ze_command_list_handle_t cmdlist) { + assert(st == ACCUMULATING); + + if (!barrierEvent) { + UR_CALL(EventCreate(queue->Context, queue, false, true, &barrierEvent)); + } + + // Instead of collecting all the batched events, we simply issue a global + // barrier for all prior events on the command list. This is simpler and + // showed to be faster in practice. + ZE2UR_CALL(zeCommandListAppendBarrier, + (cmdlist, barrierEvent->ZeEvent, 0, nullptr)); + + st = SEALED; + + return UR_RESULT_SUCCESS; +} + +void ur_completion_batches::append(ur_event_handle_t event) { + active->append(); + event->completionBatch = active; +} + +void ur_completion_batches::moveCompletedEvents( + ur_completion_batch_it it, std::vector &events, + std::vector &EventListToCleanup) { + // This works by tagging all events belonging to a batch, and then removing + // all events in a vector with the tag (iterator) of the active batch. + // This could be optimized to remove a specific range of entries if we had a + // guarantee that all the appended events in the vector remain there in the + // same order. Unfortunately that is not simple to enforce. + // TODO: An even better approach would be to split the EventList vector into + // smaller batch-sized ones, but that would require a significant refactor. + + auto end = std::remove_if(events.begin(), events.end(), [&](auto &event) { + if (event->completionBatch == it) { + EventListToCleanup.push_back(event); + return true; + } else { + return false; + } + }); + events.erase(end, events.end()); +} + +ur_result_t ur_completion_batches::cleanup( + std::vector &events, + std::vector &EventListToCleanup) { + bool cleaned = false; + while (!sealed.empty()) { + auto oldest_sealed = sealed.front(); + if (oldest_sealed->queryState() == ur_completion_batch::COMPLETED) { + sealed.pop(); + moveCompletedEvents(oldest_sealed, events, EventListToCleanup); + UR_CALL(oldest_sealed->reset()); + cleaned = true; + } else { + break; + } + } + + return cleaned ? UR_RESULT_SUCCESS : UR_RESULT_ERROR_OUT_OF_RESOURCES; +} + +std::optional +ur_completion_batches::findFirstEmptyBatchOrCreate() { + for (auto it = batches.begin(); it != batches.end(); ++it) { + if (it->getState() == ur_completion_batch::EMPTY) { + return it; + } + } + + // try creating a new batch if allowed by the limit. + if (batches.size() < CompletionBatchesMax) { + return batches.emplace(batches.end()); + } + + return std::nullopt; +} + +ur_completion_batches::ur_completion_batches() { + // Batches are created lazily on-demand. Start with just one. + active = batches.emplace(batches.begin()); + active->use(); +} + +ur_result_t ur_completion_batches::tryCleanup( + ur_queue_handle_t queue, ze_command_list_handle_t cmdlist, + std::vector &events, + std::vector &EventListToCleanup) { + cleanup(events, EventListToCleanup); + + if (active->isFull()) { + auto next_batch = findFirstEmptyBatchOrCreate(); + if (!next_batch) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; // EWOULDBLOCK + } + + UR_CALL(active->seal(queue, cmdlist)); + sealed.push(active); + active = *next_batch; + active->use(); + } + + return UR_RESULT_SUCCESS; +} + +void ur_completion_batches::forceReset() { + for (auto &b : batches) { + b.reset(); + } + while (!sealed.empty()) { + sealed.pop(); + } + + active = batches.begin(); + active->use(); +} /// @brief Cleanup events in the immediate lists of the queue. /// @param Queue Queue where events need to be cleaned up. @@ -424,6 +625,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease( // If the fence is a nullptr we are using immediate commandlists, // otherwise regular commandlists which use a fence. if (it->second.ZeFence == nullptr || it->second.ZeFenceInUse) { + // Destroy completions batches if they are being used. This needs + // to happen prior to resetCommandList so that all events are + // checked. + it->second.completions.reset(); Queue->resetCommandList(it, true, EventListToCleanup); } // TODO: remove "if" when the problem is fixed in the level zero @@ -520,16 +725,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle( } void ur_queue_handle_t_::ur_queue_group_t::setImmCmdList( - ze_command_list_handle_t ZeCommandList) { + ur_queue_handle_t queue, ze_command_list_handle_t ZeCommandList) { // An immediate command list was given to us but we don't have the queue // descriptor information. Create a dummy and note that it is not recycleable. ZeStruct ZeQueueDesc; + ImmCmdLists = std::vector( 1, Queue->CommandListMap .insert(std::pair{ ZeCommandList, - {nullptr, true, false, nullptr, ZeQueueDesc, false}}) + ur_command_list_info_t(nullptr, true, false, nullptr, ZeQueueDesc, + queue->useCompletionBatching(), false)}) .first); } @@ -597,7 +804,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle( return UR_RESULT_ERROR_UNKNOWN; } auto &InitialGroup = (*RetQueue)->ComputeQueueGroupsByTID.begin()->second; - InitialGroup.setImmCmdList(ur_cast(NativeQueue)); + InitialGroup.setImmCmdList(*RetQueue, + ur_cast(NativeQueue)); } else { auto ZeQueue = ur_cast(NativeQueue); // Assume this is the "0" index queue in the compute command-group. @@ -1414,6 +1622,9 @@ ur_result_t ur_queue_handle_t_::synchronize() { // Cleanup all events from the synced command list. CleanupEventListFromResetCmdList(ImmCmdList->second.EventList, true); ImmCmdList->second.EventList.clear(); + if (auto &completions = ImmCmdList->second.completions; completions) { + completions->forceReset(); + } return UR_RESULT_SUCCESS; }; @@ -1706,6 +1917,14 @@ ur_result_t ur_queue_handle_t_::resetCommandList( } return UR_RESULT_SUCCESS; } + + if (auto &completions = CommandList->second.completions; completions) { + if (completions->tryCleanup(this, CommandList->first, EventList, + EventListToCleanup) == UR_RESULT_SUCCESS) { + return UR_RESULT_SUCCESS; + } + } + // For immediate commandlist reset only those events that have signalled. for (auto it = EventList.begin(); it != EventList.end();) { // Break early as soon as we found first incomplete event because next @@ -1747,6 +1966,13 @@ bool ur_command_list_info_t::isCopy(ur_queue_handle_t Queue) const { .ZeOrdinal; } +void ur_command_list_info_t::append(ur_event_handle_t Event) { + if (completions) { + completions->append(Event); + } + EventList.push_back(Event); +} + ur_command_list_ptr_t ur_queue_handle_t_::eventOpenCommandList(ur_event_handle_t Event) { using IsCopy = bool; @@ -1872,6 +2098,12 @@ int32_t ur_queue_handle_t_::ur_queue_group_t::getCmdQueueOrdinal( return Queue->Device->QueueGroup[QueueType].ZeOrdinal; } +bool ur_queue_handle_t_::useCompletionBatching() { + static bool enabled = getenv_tobool( + "UR_L0_IMMEDIATE_COMMANDLISTS_BATCH_EVENT_COMPLETIONS", false); + return enabled && !isInOrderQueue() && UsingImmCmdLists; +} + // Helper function to create a new command-list to this queue and associated // fence tracking its completion. This command list & fence are added to the // map of command lists in this queue with ZeFenceInUse = false. @@ -1906,10 +2138,12 @@ ur_result_t ur_queue_handle_t_::createCommandList( ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence)); ZeStruct ZeQueueDesc; ZeQueueDesc.ordinal = QueueGroupOrdinal; + std::tie(CommandList, std::ignore) = CommandListMap.insert( std::pair( - ZeCommandList, - {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc, IsInOrderList})); + ZeCommandList, ur_command_list_info_t( + ZeFence, false, false, ZeCommandQueue, ZeQueueDesc, + useCompletionBatching(), true, IsInOrderList))); UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList)); UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine)); @@ -2061,12 +2295,43 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { (Queue->Context->ZeContext, Queue->Device->ZeDevice, &ZeCommandQueueDesc, &ZeCommandList)); } + ImmCmdLists[Index] = Queue->CommandListMap .insert(std::pair{ ZeCommandList, - {nullptr, true, false, nullptr, ZeCommandQueueDesc}}) + ur_command_list_info_t(nullptr, true, false, nullptr, + ZeCommandQueueDesc, + Queue->useCompletionBatching())}) .first; return ImmCmdLists[Index]; } + +// Get value of the threshold for number of events in immediate command lists. +// If number of events in the immediate command list exceeds this threshold then +// cleanup process for those events is executed. +static const size_t ImmCmdListsEventCleanupThreshold = [] { + const char *UrRet = + std::getenv("UR_L0_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD"); + const char *PiRet = std::getenv( + "SYCL_PI_LEVEL_ZERO_IMMEDIATE_COMMANDLISTS_EVENT_CLEANUP_THRESHOLD"); + const char *ImmCmdListsEventCleanupThresholdStr = + UrRet ? UrRet : (PiRet ? PiRet : nullptr); + static constexpr int Default = 1000; + if (!ImmCmdListsEventCleanupThresholdStr) + return Default; + + int Threshold = std::atoi(ImmCmdListsEventCleanupThresholdStr); + + // Basically disable threshold if negative value is provided. + if (Threshold < 0) + return INT_MAX; + + return Threshold; +}(); + +size_t ur_queue_handle_t_::getImmdCmmdListsEventCleanupThreshold() { + return useCompletionBatching() ? CompletionEventsPerBatch + : ImmCmdListsEventCleanupThreshold; +} diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 978ea70c0e..03922bd2dc 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,124 @@ extern "C" { ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue); } // extern "C" +struct ur_completion_batch; +using ur_completion_batch_list = std::list; +using ur_completion_batch_it = ur_completion_batch_list::iterator; + +// Event completion batch for aggregating status checks of many events into +// a single one through a barrier. Batches can be continuously reused. +// Batches start empty and accumulate events, get sealed (which issues +// an asynchronous barrier on the command list), and then must be waited on +// for all the events to complete. +struct ur_completion_batch { + ur_completion_batch(); + ~ur_completion_batch(); + + enum state { + EMPTY, // use() -> ACCUMULATING + ACCUMULATING, // append() -> full -> seal() -> SEALED + SEALED, // checkComplete() -> COMPLETED + COMPLETED, // reset() -> EMPTY + }; + + // Returns the state of the batch. Might be stale. + state getState(); + + // Return the most up-to-date state of the batch. Might query the state of the + // underlying barrier event. + state queryState(); + + // Must be called on any completion batch prior to being used for events. + void use(); + + // Checks whether the batch is at capacity. This is a soft limit and can be + // exceeded if necessary. + bool isFull(); + + // Appends an event to the batch. + void append(); + + // Seals the event batch and appends a barrier to the command list. + // Adding any further events after this, but before reset, is undefined. + ur_result_t seal(ur_queue_handle_t queue, ze_command_list_handle_t cmdlist); + + // Resets a complete batch back to an empty state. Cleanups internal state + // but keeps allocated resources for reuse. + ur_result_t reset(); + +private: + // Checks whether all the events in the batch have completed. Might query + // the underlying event status. Can only be called on a sealed batch. + bool checkComplete(); + + // Internal barrier event that is signaled on completion of the batched + // events. + ur_event_handle_t barrierEvent; + + // Current batch state. Don't use directly. + state st; + + // Number of accumulated events. + size_t numEvents; +}; + +// A collection of event completion batches. Manages querying event status +// in batches of events instead of individually, reducing the number of total +// queries necessary to determine whether a set of events have signaled. +struct ur_completion_batches { + // This structure should never be copied because it contains a stable iterator + // into a list. Copying it would likely result in unexpected behavior. + ur_completion_batches(const ur_completion_batches &) = delete; + ur_completion_batches &operator=(const ur_completion_batches &) = delete; + ur_completion_batches(ur_completion_batches &&) = default; + ur_completion_batches &operator=(ur_completion_batches &&) = default; + + ur_completion_batches(); + + // Cleans up completed batches, and, if the currently active batch + // is full, attempts to find an empty batch to be used as active. + // If one is found, the current one is sealed, and the new one is + // set as active. Otherwise, UR_RESULT_ERROR_OUT_OF_RESOURCES is + // returned to indicate that there are no batches available. + // This is safe, but will increase how many events are associated + // with the active batch. + ur_result_t tryCleanup(ur_queue_handle_t queue, + ze_command_list_handle_t cmdlist, + std::vector &EventList, + std::vector &EventListToCleanup); + + // Adds an event to the the active batch. + // Ideally, all events that are appended here are then provided in the + // vector for cleanup. Otherwise the event batch will simply ignore + // missing events when it comes time for cleanup. + void append(ur_event_handle_t event); + + // Resets all the batches without waiting for event completion. + // Only safe when the command list was fully synchronized through + // other means. + void forceReset(); + +private: + // Checks the state of all previously sealed batches. If any are complete, + // moves the associated events from the EventList to EventListToCleanup, + // and then resets the batch for reuse. + ur_result_t cleanup(std::vector &EventList, + std::vector &EventListToCleanup); + + // Moves the completed events from EventList to EventListToCleanup. + void moveCompletedEvents(ur_completion_batch_it it, + std::vector &EventList, + std::vector &EventListToCleanup); + + // Find or creates an empty batch. This might fail if there are now empty + // batches and a batch limit has been reached. + std::optional findFirstEmptyBatchOrCreate(); + + ur_completion_batch_list batches; + std::queue sealed; + ur_completion_batch_it active; +}; + ur_result_t resetCommandLists(ur_queue_handle_t Queue); ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false, @@ -40,23 +159,35 @@ CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false, // This is because command-lists are re-used across multiple queues // in the same context. struct ur_command_list_info_t { + ur_command_list_info_t(ze_fence_handle_t ZeFence, bool ZeFenceInUse, + bool IsClosed, ze_command_queue_handle_t ZeQueue, + ZeStruct ZeQueueDesc, + bool UseCompletionBatching, bool CanReuse = true, + bool IsInOrderList = false) + : ZeFence(ZeFence), ZeFenceInUse(ZeFenceInUse), IsClosed(IsClosed), + ZeQueue(ZeQueue), ZeQueueDesc(ZeQueueDesc), + IsInOrderList(IsInOrderList), CanReuse(CanReuse) { + if (UseCompletionBatching) { + completions = ur_completion_batches(); + } + } // The Level-Zero fence that will be signalled at completion. // Immediate commandlists do not have an associated fence. // A nullptr for the fence indicates that this is an immediate commandlist. - ze_fence_handle_t ZeFence{nullptr}; + ze_fence_handle_t ZeFence; // Record if the fence is in use. // This is needed to avoid leak of the tracked command-list if the fence // was not yet signaled at the time all events in that list were already // completed (we are polling the fence at events completion). The fence // may be still "in-use" due to sporadic delay in HW. - bool ZeFenceInUse{false}; + bool ZeFenceInUse; // Indicates if command list is in closed state. This is needed to avoid // appending commands to the closed command list. - bool IsClosed{false}; + bool IsClosed; // Record the queue to which the command list will be submitted. - ze_command_queue_handle_t ZeQueue{nullptr}; + ze_command_queue_handle_t ZeQueue; // Record the queue descriptor fields used when creating the command list // because we cannot recover these fields from the command list. Immediate @@ -66,20 +197,24 @@ struct ur_command_list_info_t { // used and then this entry is marked as not eligible for recycling. ZeStruct ZeQueueDesc; // Indicates if this is an inorder list - bool IsInOrderList{false}; - bool CanReuse{true}; + bool IsInOrderList; + bool CanReuse; // Helper functions to tell if this is a copy command-list. bool isCopy(ur_queue_handle_t Queue) const; + // An optional event completion batching mechanism for out-of-order immediate + // command lists. + std::optional completions; + // Keeps events created by commands submitted into this command-list. // TODO: use this for explicit wait/cleanup of events at command-list // completion. // TODO: use this for optimizing events in the same command-list, e.g. // only have last one visible to the host. - std::vector EventList{}; + std::vector EventList; size_t size() const { return EventList.size(); } - void append(ur_event_handle_t Event) { EventList.push_back(Event); } + void append(ur_event_handle_t Event); }; // The map type that would track all command-lists in a queue. @@ -134,7 +269,7 @@ struct ur_queue_handle_t_ : _ur_object { ze_command_queue_handle_t &getZeQueue(uint32_t *QueueGroupOrdinal); // This function sets an immediate commandlist from the interop interface. - void setImmCmdList(ze_command_list_handle_t); + void setImmCmdList(ur_queue_handle_t queue, ze_command_list_handle_t); // This function returns the next immediate commandlist to use. ur_command_list_ptr_t &getImmCmdList(); @@ -526,6 +661,13 @@ struct ur_queue_handle_t_ : _ur_object { bool isProfilingEnabled() { return ((this->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) != 0); } + + // Checks whether this queue supports and uses event completion batching. + // Can be true only when using out-of-order immediate command lists. + bool useCompletionBatching(); + + // Threshold for cleaning up the EventList for immediate command lists. + size_t getImmdCmmdListsEventCleanupThreshold(); }; // This helper function creates a ur_event_handle_t and associate a diff --git a/source/common/ur_util.hpp b/source/common/ur_util.hpp index 3bd7214b8c..94e306f48f 100644 --- a/source/common/ur_util.hpp +++ b/source/common/ur_util.hpp @@ -8,6 +8,7 @@ * */ +#include #ifndef UR_UTIL_H #define UR_UTIL_H 1 @@ -87,9 +88,23 @@ inline std::string create_library_path(const char *name, const char *path) { /////////////////////////////////////////////////////////////////////////////// std::optional ur_getenv(const char *name); -inline bool getenv_tobool(const char *name) { +inline bool getenv_tobool(const char *name, bool def = false) { + if (auto env = ur_getenv(name); env) { + std::transform(env->begin(), env->end(), env->begin(), + [](unsigned char c) { return std::tolower(c); }); + auto true_str = {"y", "yes", "t", "true", "1"}; + return std::find(true_str.begin(), true_str.end(), *env) != + true_str.end(); + } + + return def; +} + +inline std::optional getenv_to_unsigned(const char *name) try { auto env = ur_getenv(name); - return env.has_value(); + return env ? std::optional(std::stoi(*env)) : std::nullopt; +} catch (...) { + return std::nullopt; } static void throw_wrong_format_vec(const char *env_var_name, From 02e7f285d8b1f7151c45f522a6478eadad6cb0d4 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Fri, 19 Apr 2024 10:35:04 +0100 Subject: [PATCH 20/21] Merge pull request #1495 from nrspruit/kernel_work_group_size [L0] Add support for reading maxGroupSize from kernel prop extension --- source/adapters/level_zero/common.cpp | 5 +++++ source/adapters/level_zero/kernel.cpp | 18 ++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index af79adeb5d..7ec5f9ef6b 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -215,6 +215,11 @@ ze_structure_type_t getZeStructureType() { return ZE_STRUCTURE_TYPE_RELAXED_ALLOCATION_LIMITS_EXP_DESC; } +template <> +ze_structure_type_t +getZeStructureType() { + return ZE_STRUCTURE_TYPE_KERNEL_MAX_GROUP_SIZE_EXT_PROPERTIES; +} template <> ze_structure_type_t getZeStructureType() { return ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; } diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index c40e4ef0e3..2cdf4b2cd2 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -574,10 +574,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo( return ReturnValue(GlobalWorkSize); } case UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE: { - // As of right now, L0 is missing API to query kernel and device specific - // max work group size. - return ReturnValue( - uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize}); + ZeStruct workGroupProperties; + workGroupProperties.maxGroupSize = 0; + + ZeStruct kernelProperties; + kernelProperties.pNext = &workGroupProperties; + + auto ZeResult = ZE_CALL_NOCHECK( + zeKernelGetProperties, + (Kernel->ZeKernelMap[Device->ZeDevice], &kernelProperties)); + if (ZeResult || workGroupProperties.maxGroupSize == 0) { + return ReturnValue( + uint64_t{Device->ZeDeviceComputeProperties->maxTotalGroupSize}); + } + return ReturnValue(workGroupProperties.maxGroupSize); } case UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE: { struct { From 84d7a4970a6e094a0648a0d1d60d10389b71fad3 Mon Sep 17 00:00:00 2001 From: aarongreig Date: Fri, 19 Apr 2024 10:35:14 +0100 Subject: [PATCH 21/21] Merge pull request #1517 from nrspruit/fix_l0_coverity_sync [L0] Store LastCommandEvent before unlock during queue sync --- source/adapters/level_zero/queue.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 03c6c46651..e21e4e1b0a 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1635,8 +1635,9 @@ ur_result_t ur_queue_handle_t_::synchronize() { // event. if (isInOrderQueue() && !LastCommandEvent->IsDiscarded) { if (UrL0QueueSyncNonBlocking) { + auto SyncZeEvent = LastCommandEvent->ZeEvent; this->Mutex.unlock(); - ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent)); + ZE2UR_CALL(zeHostSynchronize, (SyncZeEvent)); this->Mutex.lock(); } else { ZE2UR_CALL(zeHostSynchronize, (LastCommandEvent->ZeEvent));