From aea0591a260b3cab34c04fc0787d40d33e0a6572 Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Thu, 20 Feb 2025 15:41:00 +0000 Subject: [PATCH 1/2] Add acceleration structure support --- .../source/layer_device_functions.hpp | 20 +- layer_gpu_timeline/README_LAYER.md | 19 +- layer_gpu_timeline/docs/developer-docs.md | 46 ++++ .../source/layer_device_functions.hpp | 40 +++- .../layer_device_functions_trace_rays.cpp | 79 +++++++ .../layer_device_functions_transfer.cpp | 95 ++++++++ .../source/timeline_protobuf_encoder.cpp | 213 +++++++++++++++--- .../source/timeline_protobuf_encoder.hpp | 4 + layer_gpu_timeline/timeline.proto | 51 ++++- .../protos/layer_driver/timeline_pb2.py | 34 +-- source_common/trackers/command_buffer.cpp | 38 +++- source_common/trackers/command_buffer.hpp | 22 +- .../trackers/layer_command_stream.cpp | 54 +++++ .../trackers/layer_command_stream.hpp | 113 +++++++++- source_common/trackers/queue.cpp | 6 +- source_common/trackers/queue.hpp | 18 ++ 16 files changed, 763 insertions(+), 89 deletions(-) create mode 100644 layer_gpu_timeline/docs/developer-docs.md diff --git a/layer_gpu_support/source/layer_device_functions.hpp b/layer_gpu_support/source/layer_device_functions.hpp index faa98f8..0343ff7 100644 --- a/layer_gpu_support/source/layer_device_functions.hpp +++ b/layer_gpu_support/source/layer_device_functions.hpp @@ -262,32 +262,32 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer(VkCommandBuffe /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL - layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, - const VkCopyAccelerationStructureInfoKHR* pInfo); + layer_vkCmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL - layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, - const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); + layer_vkCmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL - layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, - const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo); /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL - layer_vkCmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL - layer_vkCmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); // Functions for queues diff --git a/layer_gpu_timeline/README_LAYER.md b/layer_gpu_timeline/README_LAYER.md index ea2ba06..f30d0df 100644 --- a/layer_gpu_timeline/README_LAYER.md +++ b/layer_gpu_timeline/README_LAYER.md @@ -189,20 +189,11 @@ render pass. When the layer detects suspended render pass in a multi-submit command buffer, it will still capture and report the workload, but with an unknown draw call count. -## Command stream modelling - -Most properties we track are a property of the command buffer recording in -isolation. However, the user debug label stack is a property of the queue and -persists across submits. We can therefore only determine the debug label -associated with a workload in the command stream at submit time, and must -resolve it per workload inside the command buffer. - -To support this we implement a software command stream that contains simple -bytecode actions that represent the sequence of debug label and workload -commands inside each command buffer. This "command stream" can be played to -update the the queue state at submit time, triggering metadata submission -for each workload that can snapshot the current state of the user debug label -stack at that point in the command stream. +## Developer documentation + +This page covers using the layer as a tool for application development. For +documentation about developing the layer itself, please refer to the +[developer documentation](./docs/developer-docs.md). - - - diff --git a/layer_gpu_timeline/docs/developer-docs.md b/layer_gpu_timeline/docs/developer-docs.md new file mode 100644 index 0000000..229a623 --- /dev/null +++ b/layer_gpu_timeline/docs/developer-docs.md @@ -0,0 +1,46 @@ +# Layer: GPU Timeline - Developer Documentation + +This layer is used with Arm GPU tooling that can show the scheduling of +workloads on to the GPU hardware. The layer provides additional semantic +annotation, extending the scheduling data from the Android Perfetto render +stages telemetry with useful API-aware context. + +## Command stream modelling + +Most properties we track are a property of the command buffer recording in +isolation. However, the user debug label stack is a property of the queue and +persists across submits. We can therefore only determine the debug label +associated with a workload in the command stream at submit time, and must +resolve it per workload inside the command buffer. + +To support this we implement a software command stream that contains simple +bytecode actions that represent the sequence of debug label and workload +commands inside each command buffer. This "command stream" can be played to +update the the queue state at submit time, triggering metadata submission +for each workload that can snapshot the current state of the user debug label +stack at that point in the command stream. + +## Updating protobuf + +The protocol between the layer and the host tools uses Google Protocol +Buffers to implement the message encoding. + +The layer implementation uses Protopuf, a light-weight implementation which +can be trivially integrated into the layer. Protopuf message definitions are +defined directly in the C++ code (see `timeline_protobuf_encoder.cpp`) and do +not use the `timeline.proto` definitions. + +The host implementation uses the Google `protoc` compiler to generate native +bindings from the `timeline.proto` definition. When updating the protocol +buffers you must ensure that the C++ and `proto` definitions match. + +To regenerate the Python bindings, run the following command from the +`layer_gpu_timeline` directory: + +```sh +protoc ./timeline.proto --python_out=../lglpy/timeline/protos/layer_driver/ +``` + +- - - + +_Copyright © 2024-2025, Arm Limited and contributors._ diff --git a/layer_gpu_timeline/source/layer_device_functions.hpp b/layer_gpu_timeline/source/layer_device_functions.hpp index 2fc8460..7b3ba0f 100644 --- a/layer_gpu_timeline/source/layer_device_functions.hpp +++ b/layer_gpu_timeline/source/layer_device_functions.hpp @@ -1,7 +1,7 @@ /* * SPDX-License-Identifier: MIT * ---------------------------------------------------------------------------- - * Copyright (c) 2024 Arm Limited + * Copyright (c) 2024-2025 Arm Limited * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to @@ -298,6 +298,26 @@ VKAPI_ATTR void VKAPI_CALL uint32_t height, uint32_t depth); +// Commands for acceleration structure builds + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos); + // Commands for transfers /* See Vulkan API for documentation. */ @@ -406,6 +426,24 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo); +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo); + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo); + // Functions for debug /* See Vulkan API for documentation. */ diff --git a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp index b37e4de..927d1e1 100644 --- a/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_trace_rays.cpp @@ -33,6 +33,26 @@ extern std::mutex g_vulkanLock; +/** + * @brief Register an acceleration structure build with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param buildType The build type. + * @param primitiveCount The number of primitives in the build. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerAccelerationStructureBuild(Device* layer, + VkCommandBuffer commandBuffer, + Tracker::LCSAccelerationStructureBuild::Type buildType, + int64_t primitiveCount) +{ + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.accelerationStructureBuild(buildType, primitiveCount); +} + /** * @brief Register a trace rays dispatch with the tracker. * @@ -55,6 +75,65 @@ static uint64_t registerTraceRays(Device* layer, return cb.traceRays(itemsX, itemsY, itemsZ); } +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresIndirectKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkDeviceAddress* pIndirectDeviceAddresses, + const uint32_t* pIndirectStrides, + const uint32_t* const* ppMaxPrimitiveCounts) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerAccelerationStructureBuild(layer, + commandBuffer, + Tracker::LCSAccelerationStructureBuild::Type::unknown, + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBuildAccelerationStructuresIndirectKHR(commandBuffer, + infoCount, + pInfos, + pIndirectDeviceAddresses, + pIndirectStrides, + ppMaxPrimitiveCounts); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL layer_vkCmdBuildAccelerationStructuresKHR( + VkCommandBuffer commandBuffer, + uint32_t infoCount, + const VkAccelerationStructureBuildGeometryInfoKHR* pInfos, + const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = registerAccelerationStructureBuild(layer, + commandBuffer, + Tracker::LCSAccelerationStructureBuild::Type::unknown, + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdBuildAccelerationStructuresKHR(commandBuffer, infoCount, pInfos, ppBuildRangeInfos); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + /* See Vulkan API for documentation. */ template<> VKAPI_ATTR void VKAPI_CALL layer_vkCmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp index 253c970..fe04f84 100644 --- a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp @@ -74,6 +74,26 @@ static uint64_t registerImageTransfer(Device* layer, return cb.imageTransfer(transferType, pixelCount); } +/** + * @brief Register a transfer to an image with the tracker. + * + * @param layer The layer context for the device. + * @param commandBuffer The command buffer we are recording. + * @param transferType The type of transfer being performed. + * @param byteCount The number of bytes transferred. + * + * @return The assigned tagID for the workload. + */ +static uint64_t registerAccelerationStructureTransfer(Device* layer, + VkCommandBuffer commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type transferType, + int64_t byteCount) +{ + auto& tracker = layer->getStateTracker(); + auto& cb = tracker.getCommandBuffer(commandBuffer); + return cb.accelerationStructureTransfer(transferType, byteCount); +} + // Commands for transfers /* See Vulkan API for documentation. */ @@ -561,3 +581,78 @@ VKAPI_ATTR void VKAPI_CALL layer->driver.vkCmdCopyImageToBuffer2KHR(commandBuffer, pCopyImageToBufferInfo); layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); } + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = + registerAccelerationStructureTransfer(layer, + commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type::struct_to_struct, + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyAccelerationStructureKHR(commandBuffer, pInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, + const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = + registerAccelerationStructureTransfer(layer, + commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type::struct_to_mem, + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyAccelerationStructureToMemoryKHR(commandBuffer, pInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} + +/* See Vulkan API for documentation. */ +template<> +VKAPI_ATTR void VKAPI_CALL + layer_vkCmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, + const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo) +{ + LAYER_TRACE(__func__); + + // Hold the lock to access layer-wide global store + std::unique_lock lock {g_vulkanLock}; + auto* layer = Device::retrieve(commandBuffer); + + uint64_t tagID = + registerAccelerationStructureTransfer(layer, + commandBuffer, + Tracker::LCSAccelerationStructureTransfer::Type::mem_to_struct, + -1); + + // Release the lock to call into the driver + lock.unlock(); + emitStartTag(layer, commandBuffer, tagID); + layer->driver.vkCmdCopyMemoryToAccelerationStructureKHR(commandBuffer, pInfo); + layer->driver.vkCmdEndDebugUtilsLabelEXT(commandBuffer); +} diff --git a/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp index c69c58e..2d5bdfd 100644 --- a/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp +++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp @@ -173,7 +173,7 @@ enum class ImageTransferType using ImageTransfer = pp::message< /* The unique identifier for this operation */ pp::uint64_field<"tag_id", 1>, - /* The number of pixels being transfered */ + /* The number of pixels being transferred */ pp::int64_field<"pixel_count", 2>, /* The image type */ pp::enum_field<"transfer_type", 3, ImageTransferType>, @@ -192,24 +192,66 @@ enum class BufferTransferType using BufferTransfer = pp::message< /* The unique identifier for this operation */ pp::uint64_field<"tag_id", 1>, - /* The number of bytes being transfered */ + /* The number of bytes being transferred */ pp::int64_field<"byte_count", 2>, /* The buffer type */ pp::enum_field<"transfer_type", 3, BufferTransferType>, /* Any user defined debug labels associated with the dispatch */ pp::string_field<"debug_label", 4, pp::repeated>>; +/* Enumerates possible acceleration structure build types */ +enum class AccelerationStructureBuildType +{ + unknown_as_build = 0, + fast_build = 1, + fast_trace = 2, +}; + +/* An acceleration structure transfer submission */ +using AccelerationStructureBuild = pp::message< + /* The unique identifier for this operation */ + pp::uint64_field<"tag_id", 1>, + /* The number of primitives in the build */ + pp::int64_field<"primitive_count", 2>, + /* The build type */ + pp::enum_field<"build_type", 3, AccelerationStructureBuildType>, + /* Any user defined debug labels associated with the build */ + pp::string_field<"debug_label", 4, pp::repeated>>; + +/* Enumerates possible acceleration structure transfer types */ +enum class AccelerationStructureTransferType +{ + unknown_as_transfer = 0, + struct_to_struct = 1, + struct_to_mem = 2, + mem_to_struct = 3, +}; + +/* An acceleration structure transfer submission */ +using AccelerationStructureTransfer = pp::message< + /* The unique identifier for this operation */ + pp::uint64_field<"tag_id", 1>, + /* The number of bytes being transferred */ + pp::int64_field<"byte_count", 2>, + /* The transfer type */ + pp::enum_field<"transfer_type", 3, AccelerationStructureTransferType>, + /* Any user defined debug labels associated with the transfer */ + pp::string_field<"debug_label", 4, pp::repeated>>; + /* The data payload message that wraps all other messages */ -using TimelineRecord = pp::message, - pp::message_field<"metadata", 2, DeviceMetadata>, - pp::message_field<"frame", 3, Frame>, - pp::message_field<"submit", 4, Submit>, - pp::message_field<"renderpass", 5, BeginRenderpass>, - pp::message_field<"continue_renderpass", 6, ContinueRenderpass>, - pp::message_field<"dispatch", 7, Dispatch>, - pp::message_field<"trace_rays", 8, TraceRays>, - pp::message_field<"image_transfer", 9, ImageTransfer>, - pp::message_field<"buffer_transfer", 10, BufferTransfer>>; +using TimelineRecord = + pp::message, + pp::message_field<"metadata", 2, DeviceMetadata>, + pp::message_field<"frame", 3, Frame>, + pp::message_field<"submit", 4, Submit>, + pp::message_field<"renderpass", 5, BeginRenderpass>, + pp::message_field<"continue_renderpass", 6, ContinueRenderpass>, + pp::message_field<"dispatch", 7, Dispatch>, + pp::message_field<"trace_rays", 8, TraceRays>, + pp::message_field<"image_transfer", 9, ImageTransfer>, + pp::message_field<"buffer_transfer", 10, BufferTransfer>, + pp::message_field<"acceleration_structure_build", 11, AccelerationStructureBuild>, + pp::message_field<"acceleration_structure_transfer", 12, AccelerationStructureTransfer>>; namespace { @@ -245,10 +287,11 @@ Comms::MessageData packBuffer(pp::constant c, T&& f) } /** - * @brief Map the state-tracker enum value that describes the renderpass attachment name to some pair of + * @brief Map the state-tracker enum value that describes the render pass attachment name to some pair of * protocol values, being the attachment type, and optional attachment index. * - * @param name The name value to map to the pair of type and index + * @param name The name value to map to the pair of type and index + * * @return A pair, where the first value is the corresponding attachment type, and the second value is * the corresponding attachment index (or nullopt in the case the index is not relevant). */ @@ -283,6 +326,33 @@ constexpr std::pair> mapRender } } +/** + * @brief Map the enum value that describes the acceleration structure build into the protocol encoded value + * + * NB: Whilst we are currently just replicating one enum value into another (which the compiler should be smart enough + * to fix), we do it this way to ensure we decouple the state-tracker from the protobuf encoding, since we don't want to + * accidentally change some enum wire-value in the future. + * + * @param type The type enum to convert + * + * @return The wire value enum to store in the protobuf message + */ +constexpr AccelerationStructureBuildType mapASBuildType(Tracker::LCSAccelerationStructureBuild::Type type) +{ + switch (type) + { + case Tracker::LCSAccelerationStructureBuild::Type::unknown: + return AccelerationStructureBuildType::unknown_as_build; + case Tracker::LCSAccelerationStructureBuild::Type::fast_build: + return AccelerationStructureBuildType::fast_build; + case Tracker::LCSAccelerationStructureBuild::Type::fast_trace: + return AccelerationStructureBuildType::fast_trace; + default: + assert(false && "Unexpected LCSAccelerationStructureBuild::Type"); + return AccelerationStructureBuildType::unknown_as_build; + } +} + /** * @brief Map the state-tracker enum value that describes the buffer transfer type into the protocol encoded value * @@ -290,7 +360,8 @@ constexpr std::pair> mapRender * to fix), we do it this way to ensure we decouple the state-tracker from the protobuf encoding, since we don't want to * accidentally change some enum wire-value in the future. * - * @param type The type enum to convert + * @param type The type enum to convert + * * @return The wire value enum to store in the protobuf message */ constexpr BufferTransferType mapBufferTransferType(Tracker::LCSBufferTransfer::Type type) @@ -316,7 +387,8 @@ constexpr BufferTransferType mapBufferTransferType(Tracker::LCSBufferTransfer::T * to fix), we do it this way to ensure we decouple the state-tracker from the protobuf encoding, since we don't want to * accidentally change some enum wire-value in the future. * - * @param type The type enum to convert + * @param type The type enum to convert + * * @return The wire value enum to store in the protobuf message */ constexpr ImageTransferType mapImageTransferType(Tracker::LCSImageTransfer::Type type) @@ -339,11 +411,40 @@ constexpr ImageTransferType mapImageTransferType(Tracker::LCSImageTransfer::Type } } +/** + * @brief Map the state-tracker enum value that describes the buffer transfer type into the protocol encoded value + * + * NB: Whilst we are currently just replicating one enum value into another (which the compiler should be smart enough + * to fix), we do it this way to ensure we decouple the state-tracker from the protobuf encoding, since we don't want to + * accidentally change some enum wire-value in the future. + * + * @param type The type enum to convert + * + * @return The wire value enum to store in the protobuf message + */ +constexpr AccelerationStructureTransferType mapASTransferType(Tracker::LCSAccelerationStructureTransfer::Type type) +{ + switch (type) + { + case Tracker::LCSAccelerationStructureTransfer::Type::unknown: + return AccelerationStructureTransferType::unknown_as_transfer; + case Tracker::LCSAccelerationStructureTransfer::Type::struct_to_struct: + return AccelerationStructureTransferType::struct_to_struct; + case Tracker::LCSAccelerationStructureTransfer::Type::struct_to_mem: + return AccelerationStructureTransferType::struct_to_mem; + case Tracker::LCSAccelerationStructureTransfer::Type::mem_to_struct: + return AccelerationStructureTransferType::mem_to_struct; + default: + assert(false && "Unexpected LCSBufferTransfer::Type"); + return AccelerationStructureTransferType::unknown_as_transfer; + } +} + /** * @brief Serialize the metadata for this render pass workload. * - * @param renderpass The renderpass to serialize - * @param debugLabel The debug label stack of the VkQueue at submit time. + * @param renderpass The render pass to serialize + * @param debugLabel The debug label stack of the VkQueue at submit time. */ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std::vector& debugLabel) { @@ -357,7 +458,7 @@ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std ? -1 : static_cast(renderpass.getDrawCallCount())); - // make the attachements array + // Make the attachments array const auto& attachments = renderpass.getAttachments(); std::vector attachmentsMsg {}; attachmentsMsg.reserve(attachments.size()); @@ -372,7 +473,7 @@ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std // "not_loaded" / "not_stored" since that is the uncommon case (attachment.isLoaded() ? std::nullopt : std::make_optional(false)), (attachment.isStored() ? std::nullopt : std::make_optional(false)), - // resolved is not inverted since that is the incommon case + // resolved is not inverted since that is the uncommon case (attachment.isResolved() ? std::make_optional(true) : std::nullopt)); } @@ -391,8 +492,8 @@ Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std /** * @brief Serialize the metadata for this render pass continuation workload. * - * @param continuation The renderpass continuation to serialize - * @param tagIDContinuation The ID of the workload if this is a continuation of it. + * @param continuation The renderpass continuation to serialize + * @param tagIDContinuation The ID of the workload if this is a continuation of it. */ Comms::MessageData serialize(const Tracker::LCSRenderPassContinuation& continuation, uint64_t tagIDContinuation) { @@ -409,8 +510,8 @@ Comms::MessageData serialize(const Tracker::LCSRenderPassContinuation& continuat /** * @brief Get the metadata for this workload * - * @param dispatch The dispatch to serialize - * @param debugLabel The debug label stack for the VkQueue at submit time. + * @param dispatch The dispatch to serialize + * @param debugLabel The debug label stack for the VkQueue at submit time. */ Comms::MessageData serialize(const Tracker::LCSDispatch& dispatch, const std::vector& debugLabel) { @@ -429,8 +530,8 @@ Comms::MessageData serialize(const Tracker::LCSDispatch& dispatch, const std::ve /** * @brief Get the metadata for this workload * - * @param traceRays The trace rays to serialize - * @param debugLabel The debug label stack for the VkQueue at submit time. + * @param traceRays The trace rays to serialize + * @param debugLabel The debug label stack for the VkQueue at submit time. */ Comms::MessageData serialize(const Tracker::LCSTraceRays& traceRays, const std::vector& debugLabel) { @@ -449,8 +550,8 @@ Comms::MessageData serialize(const Tracker::LCSTraceRays& traceRays, const std:: /** * @brief Get the metadata for this workload * - * @param imageTransfer The image transfer to serialize - * @param debugLabel The debug label stack for the VkQueue at submit time. + * @param imageTransfer The image transfer to serialize + * @param debugLabel The debug label stack for the VkQueue at submit time. */ Comms::MessageData serialize(const Tracker::LCSImageTransfer& imageTransfer, const std::vector& debugLabel) { @@ -468,8 +569,8 @@ Comms::MessageData serialize(const Tracker::LCSImageTransfer& imageTransfer, con /** * @brief Get the metadata for this workload * - * @param bufferTransfer The buffer transfer to serialize - * @param debugLabel The debug label stack for the VkQueue at submit time. + * @param bufferTransfer The buffer transfer to serialize + * @param debugLabel The debug label stack for the VkQueue at submit time. */ Comms::MessageData serialize(const Tracker::LCSBufferTransfer& bufferTransfer, const std::vector& debugLabel) @@ -484,6 +585,46 @@ Comms::MessageData serialize(const Tracker::LCSBufferTransfer& bufferTransfer, debugLabel, }); } + +/** + * @brief Get the metadata for this workload + * + * @param asBuild The acceleration structure build to serialize + * @param debugLabel The debug label stack for the VkQueue at submit time. + */ +Comms::MessageData serialize(const Tracker::LCSAccelerationStructureBuild& asBuild, + const std::vector& debugLabel) +{ + using namespace pp; + + return packBuffer("acceleration_structure_build"_f, + AccelerationStructureBuild { + asBuild.getTagID(), + asBuild.getPrimitiveCount(), + mapASBuildType(asBuild.getBuildType()), + debugLabel, + }); +} + +/** + * @brief Get the metadata for this workload + * + * @param asTransfer The acceleration structure transfer to serialize + * @param debugLabel The debug label stack for the VkQueue at submit time. + */ +Comms::MessageData serialize(const Tracker::LCSAccelerationStructureTransfer& asTransfer, + const std::vector& debugLabel) +{ + using namespace pp; + + return packBuffer("acceleration_structure_transfer"_f, + AccelerationStructureTransfer { + asTransfer.getTagID(), + asTransfer.getByteCount(), + mapASTransferType(asTransfer.getTransferType()), + debugLabel, + }); +} } void TimelineProtobufEncoder::emitHeaderMessage(TimelineComms& comms) @@ -578,3 +719,15 @@ void TimelineProtobufEncoder::operator()(const Tracker::LCSBufferTransfer& buffe { device.txMessage(serialize(bufferTransfer, debugStack)); } + +void TimelineProtobufEncoder::operator()(const Tracker::LCSAccelerationStructureBuild& asBuild, + const std::vector& debugStack) +{ + device.txMessage(serialize(asBuild, debugStack)); +} + +void TimelineProtobufEncoder::operator()(const Tracker::LCSAccelerationStructureTransfer& asTransfer, + const std::vector& debugStack) +{ + device.txMessage(serialize(asTransfer, debugStack)); +} diff --git a/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp index 721007f..34e6e4b 100644 --- a/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp +++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp @@ -117,6 +117,10 @@ class TimelineProtobufEncoder : public Tracker::SubmitCommandWorkloadVisitor const std::vector& debugStack) override; void operator()(const Tracker::LCSBufferTransfer& bufferTransfer, const std::vector& debugStack) override; + void operator()(const Tracker::LCSAccelerationStructureBuild& asBuild, + const std::vector& debugStack) override; + void operator()(const Tracker::LCSAccelerationStructureTransfer& asTransfer, + const std::vector& debugStack) override; /** * @brief Called at the start of the submit to emit a "Submit" record, delimiting the subsequent items from any diff --git a/layer_gpu_timeline/timeline.proto b/layer_gpu_timeline/timeline.proto index c9deaca..5cfb0b0 100644 --- a/layer_gpu_timeline/timeline.proto +++ b/layer_gpu_timeline/timeline.proto @@ -148,7 +148,7 @@ message Submit { uint64 queue = 3; } -/* Enumerates the possible attachment types a renderpass can have */ +/* Enumerates the possible attachment types a render pass can have */ enum RenderpassAttachmentType { undefined = 0; color = 1; @@ -156,7 +156,7 @@ enum RenderpassAttachmentType { stencil = 3; } -/* Describe an attachment to a renderpass */ +/* Describe an attachment to a render pass */ message RenderpassAttachment { /* The attachment type */ RenderpassAttachmentType type = 1; @@ -250,11 +250,11 @@ enum BufferTransferType { copy_buffer = 2; } -/* An buffer transfer submission */ +/* A buffer transfer submission */ message BufferTransfer { /* The unique identifier for this operation */ uint64 tag_id = 1; - /* The number of bytes being transfered */ + /* The number of bytes being transferred */ int64 byte_count = 2; /* The buffer type */ BufferTransferType transfer_type = 3; @@ -262,6 +262,47 @@ message BufferTransfer { repeated string debug_label = 4; } +/* Enumerates possible acceleration structure build types */ +enum AccelerationStructureBuildType +{ + unknown_as_build = 0; + fast_build = 1; + fast_trace = 2; +} + +/* An acceleration structure build submission */ +message AccelerationStructureBuild { + /* The unique identifier for this operation */ + uint64 tag_id = 1; + /* The number of primitives in the build */ + int64 primitive_count = 2; + /* The build type */ + AccelerationStructureBuildType build_type = 3; + /* Any user defined debug labels associated with the dispatch */ + repeated string debug_label = 4; +} + +/* Enumerates possible acceleration structure transfer types */ +enum AccelerationStructureTransferType +{ + unknown_as_transfer = 0; + struct_to_struct = 1; + struct_to_mem = 2; + mem_to_struct = 3; +} + +/* An acceleration structure transfer submission */ +message AccelerationStructureTransfer { + /* The unique identifier for this operation */ + uint64 tag_id = 1; + /* The number of bytes being transferred */ + int64 byte_count = 2; + /* The transfer type */ + AccelerationStructureTransferType transfer_type = 3; + /* Any user defined debug labels associated with the dispatch */ + repeated string debug_label = 4; +} + /* The data payload message that wraps all other messages */ message TimelineRecord { Header header = 1; @@ -274,4 +315,6 @@ message TimelineRecord { TraceRays trace_rays = 8; ImageTransfer image_transfer = 9; BufferTransfer buffer_transfer = 10; + AccelerationStructureBuild acceleration_structure_build = 11; + AccelerationStructureTransfer acceleration_structure_transfer = 12; } diff --git a/lglpy/timeline/protos/layer_driver/timeline_pb2.py b/lglpy/timeline/protos/layer_driver/timeline_pb2.py index 7ade6a9..84ecb59 100644 --- a/lglpy/timeline/protos/layer_driver/timeline_pb2.py +++ b/lglpy/timeline/protos/layer_driver/timeline_pb2.py @@ -2,7 +2,7 @@ # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE # source: timeline.proto -# Protobuf Python Version: 5.29.2 +# Protobuf Python Version: 5.29.3 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,7 +13,7 @@ _runtime_version.Domain.PUBLIC, 5, 29, - 2, + 3, '', 'timeline.proto' ) @@ -24,7 +24,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0etimeline.proto\x12\x12gpulayers.timeline\"A\n\x06Header\x12\x37\n\nversion_no\x18\x01 \x01(\x0e\x32#.gpulayers.timeline.HeaderVersionNo\"\x83\x01\n\x0e\x44\x65viceMetadata\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nprocess_id\x18\x02 \x01(\r\x12\x15\n\rmajor_version\x18\x03 \x01(\r\x12\x15\n\rminor_version\x18\x04 \x01(\r\x12\x15\n\rpatch_version\x18\x05 \x01(\r\x12\x0c\n\x04name\x18\x06 \x01(\t\"6\n\x05\x46rame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\x11\n\ttimestamp\x18\x03 \x01(\x04\":\n\x06Submit\x12\x11\n\ttimestamp\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\r\n\x05queue\x18\x03 \x01(\x04\"\x9b\x01\n\x14RenderpassAttachment\x12:\n\x04type\x18\x01 \x01(\x0e\x32,.gpulayers.timeline.RenderpassAttachmentType\x12\r\n\x05index\x18\x02 \x01(\r\x12\x12\n\nnot_loaded\x18\x03 \x01(\x08\x12\x12\n\nnot_stored\x18\x04 \x01(\x08\x12\x10\n\x08resolved\x18\x05 \x01(\x08\"\xc4\x01\n\x0f\x42\x65ginRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x17\n\x0f\x64raw_call_count\x18\x04 \x01(\r\x12\x15\n\rsubpass_count\x18\x05 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x06 \x03(\t\x12=\n\x0b\x61ttachments\x18\x07 \x03(\x0b\x32(.gpulayers.timeline.RenderpassAttachment\"R\n\x12\x43ontinueRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x17\n\x0f\x64raw_call_count\x18\x02 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x03 \x03(\t\"e\n\x08\x44ispatch\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x10\n\x08x_groups\x18\x02 \x01(\x03\x12\x10\n\x08y_groups\x18\x03 \x01(\x03\x12\x10\n\x08z_groups\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"c\n\tTraceRays\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x0f\n\x07x_items\x18\x02 \x01(\x03\x12\x0f\n\x07y_items\x18\x03 \x01(\x03\x12\x0f\n\x07z_items\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"\x87\x01\n\rImageTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x13\n\x0bpixel_count\x18\x02 \x01(\x03\x12<\n\rtransfer_type\x18\x03 \x01(\x0e\x32%.gpulayers.timeline.ImageTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\x88\x01\n\x0e\x42ufferTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x12\n\nbyte_count\x18\x02 \x01(\x03\x12=\n\rtransfer_type\x18\x03 \x01(\x0e\x32&.gpulayers.timeline.BufferTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xa1\x04\n\x0eTimelineRecord\x12*\n\x06header\x18\x01 \x01(\x0b\x32\x1a.gpulayers.timeline.Header\x12\x34\n\x08metadata\x18\x02 \x01(\x0b\x32\".gpulayers.timeline.DeviceMetadata\x12(\n\x05\x66rame\x18\x03 \x01(\x0b\x32\x19.gpulayers.timeline.Frame\x12*\n\x06submit\x18\x04 \x01(\x0b\x32\x1a.gpulayers.timeline.Submit\x12\x37\n\nrenderpass\x18\x05 \x01(\x0b\x32#.gpulayers.timeline.BeginRenderpass\x12\x43\n\x13\x63ontinue_renderpass\x18\x06 \x01(\x0b\x32&.gpulayers.timeline.ContinueRenderpass\x12.\n\x08\x64ispatch\x18\x07 \x01(\x0b\x32\x1c.gpulayers.timeline.Dispatch\x12\x31\n\ntrace_rays\x18\x08 \x01(\x0b\x32\x1d.gpulayers.timeline.TraceRays\x12\x39\n\x0eimage_transfer\x18\t \x01(\x0b\x32!.gpulayers.timeline.ImageTransfer\x12;\n\x0f\x62uffer_transfer\x18\n \x01(\x0b\x32\".gpulayers.timeline.BufferTransfer* \n\x0fHeaderVersionNo\x12\r\n\tversion_1\x10\x00*L\n\x18RenderpassAttachmentType\x12\r\n\tundefined\x10\x00\x12\t\n\x05\x63olor\x10\x01\x12\t\n\x05\x64\x65pth\x10\x02\x12\x0b\n\x07stencil\x10\x03*\x84\x01\n\x11ImageTransferType\x12\x1a\n\x16unknown_image_transfer\x10\x00\x12\x0f\n\x0b\x63lear_image\x10\x01\x12\x0e\n\ncopy_image\x10\x02\x12\x18\n\x14\x63opy_buffer_to_image\x10\x03\x12\x18\n\x14\x63opy_image_to_buffer\x10\x04*S\n\x12\x42ufferTransferType\x12\x1b\n\x17unknown_buffer_transfer\x10\x00\x12\x0f\n\x0b\x66ill_buffer\x10\x01\x12\x0f\n\x0b\x63opy_buffer\x10\x02\x42\x02H\x03\x62\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0etimeline.proto\x12\x12gpulayers.timeline\"A\n\x06Header\x12\x37\n\nversion_no\x18\x01 \x01(\x0e\x32#.gpulayers.timeline.HeaderVersionNo\"\x83\x01\n\x0e\x44\x65viceMetadata\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nprocess_id\x18\x02 \x01(\r\x12\x15\n\rmajor_version\x18\x03 \x01(\r\x12\x15\n\rminor_version\x18\x04 \x01(\r\x12\x15\n\rpatch_version\x18\x05 \x01(\r\x12\x0c\n\x04name\x18\x06 \x01(\t\"6\n\x05\x46rame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\x11\n\ttimestamp\x18\x03 \x01(\x04\":\n\x06Submit\x12\x11\n\ttimestamp\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\r\n\x05queue\x18\x03 \x01(\x04\"\x9b\x01\n\x14RenderpassAttachment\x12:\n\x04type\x18\x01 \x01(\x0e\x32,.gpulayers.timeline.RenderpassAttachmentType\x12\r\n\x05index\x18\x02 \x01(\r\x12\x12\n\nnot_loaded\x18\x03 \x01(\x08\x12\x12\n\nnot_stored\x18\x04 \x01(\x08\x12\x10\n\x08resolved\x18\x05 \x01(\x08\"\xc4\x01\n\x0f\x42\x65ginRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x17\n\x0f\x64raw_call_count\x18\x04 \x01(\r\x12\x15\n\rsubpass_count\x18\x05 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x06 \x03(\t\x12=\n\x0b\x61ttachments\x18\x07 \x03(\x0b\x32(.gpulayers.timeline.RenderpassAttachment\"R\n\x12\x43ontinueRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x17\n\x0f\x64raw_call_count\x18\x02 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x03 \x03(\t\"e\n\x08\x44ispatch\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x10\n\x08x_groups\x18\x02 \x01(\x03\x12\x10\n\x08y_groups\x18\x03 \x01(\x03\x12\x10\n\x08z_groups\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"c\n\tTraceRays\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x0f\n\x07x_items\x18\x02 \x01(\x03\x12\x0f\n\x07y_items\x18\x03 \x01(\x03\x12\x0f\n\x07z_items\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"\x87\x01\n\rImageTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x13\n\x0bpixel_count\x18\x02 \x01(\x03\x12<\n\rtransfer_type\x18\x03 \x01(\x0e\x32%.gpulayers.timeline.ImageTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\x88\x01\n\x0e\x42ufferTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x12\n\nbyte_count\x18\x02 \x01(\x03\x12=\n\rtransfer_type\x18\x03 \x01(\x0e\x32&.gpulayers.timeline.BufferTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xa2\x01\n\x1a\x41\x63\x63\x65lerationStructureBuild\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x17\n\x0fprimitive_count\x18\x02 \x01(\x03\x12\x46\n\nbuild_type\x18\x03 \x01(\x0e\x32\x32.gpulayers.timeline.AccelerationStructureBuildType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xa6\x01\n\x1d\x41\x63\x63\x65lerationStructureTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x12\n\nbyte_count\x18\x02 \x01(\x03\x12L\n\rtransfer_type\x18\x03 \x01(\x0e\x32\x35.gpulayers.timeline.AccelerationStructureTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xd3\x05\n\x0eTimelineRecord\x12*\n\x06header\x18\x01 \x01(\x0b\x32\x1a.gpulayers.timeline.Header\x12\x34\n\x08metadata\x18\x02 \x01(\x0b\x32\".gpulayers.timeline.DeviceMetadata\x12(\n\x05\x66rame\x18\x03 \x01(\x0b\x32\x19.gpulayers.timeline.Frame\x12*\n\x06submit\x18\x04 \x01(\x0b\x32\x1a.gpulayers.timeline.Submit\x12\x37\n\nrenderpass\x18\x05 \x01(\x0b\x32#.gpulayers.timeline.BeginRenderpass\x12\x43\n\x13\x63ontinue_renderpass\x18\x06 \x01(\x0b\x32&.gpulayers.timeline.ContinueRenderpass\x12.\n\x08\x64ispatch\x18\x07 \x01(\x0b\x32\x1c.gpulayers.timeline.Dispatch\x12\x31\n\ntrace_rays\x18\x08 \x01(\x0b\x32\x1d.gpulayers.timeline.TraceRays\x12\x39\n\x0eimage_transfer\x18\t \x01(\x0b\x32!.gpulayers.timeline.ImageTransfer\x12;\n\x0f\x62uffer_transfer\x18\n \x01(\x0b\x32\".gpulayers.timeline.BufferTransfer\x12T\n\x1c\x61\x63\x63\x65leration_structure_build\x18\x0b \x01(\x0b\x32..gpulayers.timeline.AccelerationStructureBuild\x12Z\n\x1f\x61\x63\x63\x65leration_structure_transfer\x18\x0c \x01(\x0b\x32\x31.gpulayers.timeline.AccelerationStructureTransfer* \n\x0fHeaderVersionNo\x12\r\n\tversion_1\x10\x00*L\n\x18RenderpassAttachmentType\x12\r\n\tundefined\x10\x00\x12\t\n\x05\x63olor\x10\x01\x12\t\n\x05\x64\x65pth\x10\x02\x12\x0b\n\x07stencil\x10\x03*\x84\x01\n\x11ImageTransferType\x12\x1a\n\x16unknown_image_transfer\x10\x00\x12\x0f\n\x0b\x63lear_image\x10\x01\x12\x0e\n\ncopy_image\x10\x02\x12\x18\n\x14\x63opy_buffer_to_image\x10\x03\x12\x18\n\x14\x63opy_image_to_buffer\x10\x04*S\n\x12\x42ufferTransferType\x12\x1b\n\x17unknown_buffer_transfer\x10\x00\x12\x0f\n\x0b\x66ill_buffer\x10\x01\x12\x0f\n\x0b\x63opy_buffer\x10\x02*V\n\x1e\x41\x63\x63\x65lerationStructureBuildType\x12\x14\n\x10unknown_as_build\x10\x00\x12\x0e\n\nfast_build\x10\x01\x12\x0e\n\nfast_trace\x10\x02*x\n!AccelerationStructureTransferType\x12\x17\n\x13unknown_as_transfer\x10\x00\x12\x14\n\x10struct_to_struct\x10\x01\x12\x11\n\rstruct_to_mem\x10\x02\x12\x11\n\rmem_to_struct\x10\x03\x42\x02H\x03\x62\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -32,14 +32,18 @@ if not _descriptor._USE_C_DESCRIPTORS: _globals['DESCRIPTOR']._loaded_options = None _globals['DESCRIPTOR']._serialized_options = b'H\003' - _globals['_HEADERVERSIONNO']._serialized_start=1825 - _globals['_HEADERVERSIONNO']._serialized_end=1857 - _globals['_RENDERPASSATTACHMENTTYPE']._serialized_start=1859 - _globals['_RENDERPASSATTACHMENTTYPE']._serialized_end=1935 - _globals['_IMAGETRANSFERTYPE']._serialized_start=1938 - _globals['_IMAGETRANSFERTYPE']._serialized_end=2070 - _globals['_BUFFERTRANSFERTYPE']._serialized_start=2072 - _globals['_BUFFERTRANSFERTYPE']._serialized_end=2155 + _globals['_HEADERVERSIONNO']._serialized_start=2337 + _globals['_HEADERVERSIONNO']._serialized_end=2369 + _globals['_RENDERPASSATTACHMENTTYPE']._serialized_start=2371 + _globals['_RENDERPASSATTACHMENTTYPE']._serialized_end=2447 + _globals['_IMAGETRANSFERTYPE']._serialized_start=2450 + _globals['_IMAGETRANSFERTYPE']._serialized_end=2582 + _globals['_BUFFERTRANSFERTYPE']._serialized_start=2584 + _globals['_BUFFERTRANSFERTYPE']._serialized_end=2667 + _globals['_ACCELERATIONSTRUCTUREBUILDTYPE']._serialized_start=2669 + _globals['_ACCELERATIONSTRUCTUREBUILDTYPE']._serialized_end=2755 + _globals['_ACCELERATIONSTRUCTURETRANSFERTYPE']._serialized_start=2757 + _globals['_ACCELERATIONSTRUCTURETRANSFERTYPE']._serialized_end=2877 _globals['_HEADER']._serialized_start=38 _globals['_HEADER']._serialized_end=103 _globals['_DEVICEMETADATA']._serialized_start=106 @@ -62,6 +66,10 @@ _globals['_IMAGETRANSFER']._serialized_end=1136 _globals['_BUFFERTRANSFER']._serialized_start=1139 _globals['_BUFFERTRANSFER']._serialized_end=1275 - _globals['_TIMELINERECORD']._serialized_start=1278 - _globals['_TIMELINERECORD']._serialized_end=1823 + _globals['_ACCELERATIONSTRUCTUREBUILD']._serialized_start=1278 + _globals['_ACCELERATIONSTRUCTUREBUILD']._serialized_end=1440 + _globals['_ACCELERATIONSTRUCTURETRANSFER']._serialized_start=1443 + _globals['_ACCELERATIONSTRUCTURETRANSFER']._serialized_end=1609 + _globals['_TIMELINERECORD']._serialized_start=1612 + _globals['_TIMELINERECORD']._serialized_end=2335 # @@protoc_insertion_point(module_scope) diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp index 04c2a8d..1b99145 100644 --- a/source_common/trackers/command_buffer.cpp +++ b/source_common/trackers/command_buffer.cpp @@ -136,7 +136,7 @@ uint64_t CommandBuffer::dispatch(int64_t xGroups, int64_t yGroups, int64_t zGrou uint64_t tagID = Tracker::LCSWorkload::assignTagID(); stats.incDispatchCount(); - // Add a workload to the render pass + // Add a workload to the command stream auto workload = std::make_shared(tagID, xGroups, yGroups, zGroups); // Add a command to the layer-side command stream @@ -151,7 +151,7 @@ uint64_t CommandBuffer::traceRays(int64_t xItems, int64_t yItems, int64_t zItems uint64_t tagID = Tracker::LCSWorkload::assignTagID(); stats.incTraceRaysCount(); - // Add a workload to the render pass + // Add a workload to the command stream auto workload = std::make_shared(tagID, xItems, yItems, zItems); // Add a command to the layer-side command stream @@ -166,7 +166,7 @@ uint64_t CommandBuffer::imageTransfer(LCSImageTransfer::Type transferType, int64 uint64_t tagID = Tracker::LCSWorkload::assignTagID(); stats.incImageTransferCount(); - // Add a workload to the render pass + // Add a workload to the command stream auto workload = std::make_shared(tagID, transferType, pixelCount); // Add a command to the layer-side command stream @@ -181,7 +181,7 @@ uint64_t CommandBuffer::bufferTransfer(LCSBufferTransfer::Type transferType, int uint64_t tagID = Tracker::LCSWorkload::assignTagID(); stats.incBufferTransferCount(); - // Add a workload to the render pass + // Add a workload to the command stream auto workload = std::make_shared(tagID, transferType, byteCount); // Add a command to the layer-side command stream @@ -190,6 +190,36 @@ uint64_t CommandBuffer::bufferTransfer(LCSBufferTransfer::Type transferType, int return tagID; } +/* See header for documentation. */ +uint64_t CommandBuffer::accelerationStructureBuild(LCSAccelerationStructureBuild::Type buildType, + int64_t primitiveCount) +{ + uint64_t tagID = Tracker::LCSWorkload::assignTagID(); + + // Add a workload to the command stream + auto workload = std::make_shared(tagID, buildType, primitiveCount); + + // Add a command to the layer-side command stream + workloadCommandStream.emplace_back(LCSInstructionWorkload(workload)); + + return tagID; +} + +/* See header for documentation. */ +uint64_t CommandBuffer::accelerationStructureTransfer(LCSAccelerationStructureTransfer::Type transferType, + int64_t byteCount) +{ + uint64_t tagID = Tracker::LCSWorkload::assignTagID(); + + // Add a workload to the command stream + auto workload = std::make_shared(tagID, transferType, byteCount); + + // Add a command to the layer-side command stream + workloadCommandStream.emplace_back(LCSInstructionWorkload(workload)); + + return tagID; +} + /* See header for documentation. */ void CommandBuffer::executeCommands(CommandBuffer& secondary) { diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp index 17b71c3..d67d05c 100644 --- a/source_common/trackers/command_buffer.hpp +++ b/source_common/trackers/command_buffer.hpp @@ -141,12 +141,32 @@ class CommandBuffer * @brief Capture a transfer where the destination is a buffer. * * @param transferType The type of the transfer. - * @param byteCount The number of pixels written. + * @param byteCount The number of bytes written. * * @return Returns the tagID assigned to this workload. */ uint64_t bufferTransfer(LCSBufferTransfer::Type transferType, int64_t byteCount); + /** + * @brief Capture an acceleration structure build. + * + * @param buildType The type of the build. + * @param primitiveCount The number of primitives in the build. + * + * @return Returns the tagID assigned to this workload. + */ + uint64_t accelerationStructureBuild(LCSAccelerationStructureBuild::Type buildType, int64_t primitiveCount); + + /** + * @brief Capture a transfer where the destination is an acceleration structure. + * + * @param transferType The type of the transfer. + * @param byteCount The number of bytes written. + * + * @return Returns the tagID assigned to this workload. + */ + uint64_t accelerationStructureTransfer(LCSAccelerationStructureTransfer::Type transferType, int64_t byteCount); + /** * @brief Begin a user debug marker range. * diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp index 24610b0..c6b2e45 100644 --- a/source_common/trackers/layer_command_stream.cpp +++ b/source_common/trackers/layer_command_stream.cpp @@ -134,6 +134,60 @@ std::string LCSBufferTransfer::getTransferTypeStr() const } } +/* See header for details. */ +LCSAccelerationStructureBuild::LCSAccelerationStructureBuild(uint64_t _tagID, Type _buildType, int64_t _primitiveCount) + : LCSWorkload(_tagID), + buildType(_buildType), + primitiveCount(_primitiveCount) +{ +} + +/* See header for details. */ +std::string LCSAccelerationStructureBuild::getBuildTypeStr() const +{ + switch (buildType) + { + case Type::unknown: + return "Unknown"; + case Type::fast_build: + return "Fast build"; + case Type::fast_trace: + return "Fast trace"; + default: + assert(false && "Unexpected LCSAccelerationStructureBuild::Type"); + return ""; + } +} + +/* See header for details. */ +LCSAccelerationStructureTransfer::LCSAccelerationStructureTransfer(uint64_t _tagID, + Type _transferType, + int64_t _byteCount) + : LCSWorkload(_tagID), + transferType(_transferType), + byteCount(_byteCount) +{ +} + +/* See header for details. */ +std::string LCSAccelerationStructureTransfer::getTransferTypeStr() const +{ + switch (transferType) + { + case Type::unknown: + return "Unknown"; + case Type::struct_to_struct: + return "Copy acceleration structure"; + case Type::struct_to_mem: + return "Copy acceleration structure to memory"; + case Type::mem_to_struct: + return "Copy memory to acceleration structure"; + default: + assert(false && "Unexpected LCSAccelerationStructureTransfer::Type"); + return ""; + } +} + /* See header for details. */ LCSInstructionMarkerPush::LCSInstructionMarkerPush(const std::string& _label) : label(std::make_shared(_label)) diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp index ded0be7..415a357 100644 --- a/source_common/trackers/layer_command_stream.hpp +++ b/source_common/trackers/layer_command_stream.hpp @@ -411,6 +411,96 @@ class LCSBufferTransfer : public LCSWorkload int64_t byteCount; }; +/** + * @brief Class representing an acceleration structure build workload in the command stream. + */ +class LCSAccelerationStructureBuild : public LCSWorkload +{ +public: + /* Enumerates possible acceleration structure build types */ + enum class Type + { + unknown, + fast_build, + fast_trace + }; + + /** + * @brief Create a new acceleration structure build workload. + * + * Workloads of unknown dimension should use @c primitiveCount of -1. + * + * @param tagID The assigned tagID. + * @param buildType The subtype of the build. + * @param primitiveCount The size of the build, in primitives. + */ + LCSAccelerationStructureBuild(uint64_t tagID, Type buildType, int64_t primitiveCount); + + /** @return The subtype of the build */ + Type getBuildType() const { return buildType; } + + /** @return The subtype of the build */ + std::string getBuildTypeStr() const; + + /** @return The size of the build, in primitives */ + int64_t getPrimitiveCount() const { return primitiveCount; } + +private: + /** + * @brief The subtype of the build. + */ + Type buildType; + + /** + * @brief The number of primitives in the build, or -1 if unknown. + */ + int64_t primitiveCount; +}; + +/** + * @brief Class representing an acceleration structure transfer workload in the command stream. + */ +class LCSAccelerationStructureTransfer : public LCSWorkload +{ +public: + /* Enumerates possible acceleration structure transfer types */ + enum class Type + { + unknown, + struct_to_struct, + struct_to_mem, + mem_to_struct, + }; + + /** + * @brief Create a new acceleration structure transfer workload. + * + * @param tagID The assigned tagID. + * @param transferType The subtype of the transfer. + */ + LCSAccelerationStructureTransfer(uint64_t tagID, Type transferType, int64_t byteCount); + + /** @return The subtype of the transfer */ + Type getTransferType() const { return transferType; } + + /** @return The subtype of the transfer */ + std::string getTransferTypeStr() const; + + /** @return The size of the transfer, in bytes */ + int64_t getByteCount() const { return byteCount; } + +private: + /** + * @brief The subtype of the transfer. + */ + Type transferType; + + /** + * @brief The number of bytes transferred, -1 if unknown. + */ + int64_t byteCount; +}; + /** * @brief Class representing a marker instruction in the command stream that represents a debug label push operation. */ @@ -488,21 +578,24 @@ class LCSInstructionWorkload * @brief Instructions are a variant representing the operation. */ using LCSInstruction = std::variant< - // the instruction is a debug-label push operation + // The instruction is a debug-label push operation LCSInstructionMarkerPush, - // the instruction is a debug-label pop operation + // The instruction is a debug-label pop operation LCSInstructionMarkerPop, - // the instruction represents a renderpass workload operation + // The instruction represents a render pass workload operation LCSInstructionWorkload, - // the instruction represents a continuation of a renderpass workload operation + // The instruction represents a continuation of a render pass workload operation LCSInstructionWorkload, - // the instruction represents a dispatch workload operation + // The instruction represents a dispatch workload operation LCSInstructionWorkload, - // the instruction represents a trace rays workload operation + // The instruction represents a trace rays workload operation LCSInstructionWorkload, - // the instruction represents an image transfer workload operation + // The instruction represents an image transfer workload operation LCSInstructionWorkload, - // the instruction represents a buffer transfer workload operation - LCSInstructionWorkload>; - + // The instruction represents a buffer transfer workload operation + LCSInstructionWorkload, + // The instruction represents an acceleration structure buildworkload operation + LCSInstructionWorkload, + // The instruction represents an acceleration structure transfer workload operation + LCSInstructionWorkload>; } diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp index 90131b2..022eac9 100644 --- a/source_common/trackers/queue.cpp +++ b/source_common/trackers/queue.cpp @@ -122,13 +122,15 @@ namespace } /** - * @brief Visit a dispatch/trace rays/image transfer/buffer transfer workload instruction + * @brief Visit a dispatch/trace rays/any transfer workload instruction * * @param instruction The workload instruction */ template requires(std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v) + || std::is_same_v || std::is_same_v + || std::is_same_v + || std::is_same_v) void operator()(const LCSInstructionWorkload& instruction) { const auto& workload = instruction.getWorkload(); diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp index 04c5eea..2116964 100644 --- a/source_common/trackers/queue.hpp +++ b/source_common/trackers/queue.hpp @@ -111,6 +111,24 @@ class SubmitCommandWorkloadVisitor * @param debugStack The stack of debug labels that are associated with this buffer transfer */ virtual void operator()(const LCSBufferTransfer& bufferTransfer, const std::vector& debugStack) = 0; + + /** + * @brief Visit an acceleration structure build workload object + * + * @param asBuild The acceleration structure build + * @param debugStack The stack of debug labels that are associated with this acceleration structure build + */ + virtual void operator()(const LCSAccelerationStructureBuild& asBuild, + const std::vector& debugStack) = 0; + + /** + * @brief Visit an acceleration structure transfer workload object + * + * @param asTransfer The acceleration structure transfer + * @param debugStack The stack of debug labels that are associated with this acceleration structure transfer + */ + virtual void operator()(const LCSAccelerationStructureTransfer& asTransfer, + const std::vector& debugStack) = 0; }; /** From 99284d0457f460b2316bbab9396448f6c5b65b3a Mon Sep 17 00:00:00 2001 From: Peter Harris Date: Mon, 24 Feb 2025 20:58:57 +0000 Subject: [PATCH 2/2] Add basic Python decoder --- generator/generate_vulkan_layer.py | 5 +- generator/vk_codegen/source_CMakeLists.txt | 2 +- .../layer_device_functions_transfer.cpp | 18 +++ lglpy/comms/service_gpu_timeline.py | 133 ++++++++++++++++-- lglpy/timeline/data/processed_trace.py | 119 +++++++++++++++- lglpy/timeline/data/raw_trace.py | 70 +++++++-- 6 files changed, 319 insertions(+), 28 deletions(-) diff --git a/generator/generate_vulkan_layer.py b/generator/generate_vulkan_layer.py index 689b9fb..cbe306f 100755 --- a/generator/generate_vulkan_layer.py +++ b/generator/generate_vulkan_layer.py @@ -76,7 +76,10 @@ def get_layer_api_name(vendor: str, layer: str) -> str: for char in name: is_uc = char.isupper() - is_last_uc = False if not part else part[-1].isupper() + if part is None: + is_last_uc = False + else: + is_last_uc = part[-1].isupper() if (is_uc and not is_last_uc) or not part: if part: diff --git a/generator/vk_codegen/source_CMakeLists.txt b/generator/vk_codegen/source_CMakeLists.txt index 1de8d53..640fa85 100644 --- a/generator/vk_codegen/source_CMakeLists.txt +++ b/generator/vk_codegen/source_CMakeLists.txt @@ -71,4 +71,4 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") COMMENT "Stripped lib${VK_LAYER}.so to ${VK_LAYER_STRIP}") endif() -add_clang_tools() \ No newline at end of file +add_clang_tools() diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp index fe04f84..457d2bf 100644 --- a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp +++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp @@ -594,6 +594,12 @@ VKAPI_ATTR void VKAPI_CALL std::unique_lock lock {g_vulkanLock}; auto* layer = Device::retrieve(commandBuffer); + // TODO: We ideally want to track sizes of the transfers, but this requires + // dispatching vkCmdWriteAccelerationStructuresPropertiesKHR() queries and + // capturing the result "later" which we don't support yet. + // We can approximate the size using vkGetAccelerationStructureBuildSizesKHR(), + // but this returns the build size which may be larger than the size of the + // AS itself which can be smaller (especially if later compacted). uint64_t tagID = registerAccelerationStructureTransfer(layer, commandBuffer, @@ -619,6 +625,12 @@ VKAPI_ATTR void VKAPI_CALL std::unique_lock lock {g_vulkanLock}; auto* layer = Device::retrieve(commandBuffer); + // TODO: We ideally want to track sizes of the transfers, but this requires + // dispatching vkCmdWriteAccelerationStructuresPropertiesKHR() queries and + // capturing the result "later" which we don't support yet. + // We can approximate the size using vkGetAccelerationStructureBuildSizesKHR(), + // but this returns the build size which may be larger than the size of the + // AS itself which can be smaller (especially if later compacted). uint64_t tagID = registerAccelerationStructureTransfer(layer, commandBuffer, @@ -644,6 +656,12 @@ VKAPI_ATTR void VKAPI_CALL std::unique_lock lock {g_vulkanLock}; auto* layer = Device::retrieve(commandBuffer); + // TODO: We ideally want to track sizes of the transfers, but this requires + // dispatching vkCmdWriteAccelerationStructuresPropertiesKHR() queries and + // capturing the result "later" which we don't support yet. + // We can approximate the size using vkGetAccelerationStructureBuildSizesKHR(), + // but this returns the build size which may be larger than the size of the + // AS itself which can be smaller (especially if later compacted). uint64_t tagID = registerAccelerationStructureTransfer(layer, commandBuffer, diff --git a/lglpy/comms/service_gpu_timeline.py b/lglpy/comms/service_gpu_timeline.py index 41bbeb1..20ce567 100644 --- a/lglpy/comms/service_gpu_timeline.py +++ b/lglpy/comms/service_gpu_timeline.py @@ -105,6 +105,28 @@ class BufferTransferMetadataType(TypedDict): label: list[str] +class ASTransferMetadataType(TypedDict): + ''' + Structured dict type for type hinting. + ''' + type: str + tid: int + subtype: str + byteCount: int + label: list[str] + + +class ASBuildMetadataType(TypedDict): + ''' + Structured dict type for type hinting. + ''' + type: str + tid: int + subtype: str + primitiveCount: int + label: list[str] + + class SubmitMetadataType(TypedDict): ''' Structured dict type for type hinting. @@ -116,7 +138,9 @@ class SubmitMetadataType(TypedDict): | DispatchMetadataType | TraceRaysMetadataType | ImageTransferMetadataType - | BufferTransferMetadataType] + | BufferTransferMetadataType + | ASBuildMetadataType + | ASTransferMetadataType] class FrameMetadataType(TypedDict): @@ -138,8 +162,7 @@ def expect_int(v: int | None) -> int: def map_renderpass_binding(type, index: int | None) -> str: ''' - Map the PB encoded renderpass attachment type to a corresponding description - string + Map the PB encoded render pass attachment type to a description. ''' if type == timeline_pb2.RenderpassAttachmentType.undefined: assert ((index is None) or (index == 0)) @@ -159,8 +182,7 @@ def map_renderpass_binding(type, index: int | None) -> str: def map_image_transfer_type(type) -> str: ''' - Map the PB encoded image transfer type to some corresponding description - string + Map the PB encoded image transfer type to a description. ''' if type == timeline_pb2.ImageTransferType.unknown_image_transfer: return "Unknown" @@ -178,8 +200,7 @@ def map_image_transfer_type(type) -> str: def map_buffer_transfer_type(type) -> str: ''' - Map the PB encoded image transfer type to some corresponding description - string + Map the PB encoded image transfer type to a description. ''' if type == timeline_pb2.BufferTransferType.unknown_buffer_transfer: return "Unknown" @@ -191,6 +212,37 @@ def map_buffer_transfer_type(type) -> str: assert False +def map_as_build_type(type) -> str: + ''' + Map the PB encoded acceleration structure build to a description. + ''' + if type == timeline_pb2.AccelerationStructureBuildType.unknown_as_build: + return "Unknown" + elif type == timeline_pb2.AccelerationStructureTransferType.fast_build: + return "Fast build" + elif type == timeline_pb2.AccelerationStructureTransferType.fast_trace: + return "Fast trace" + else: + assert False + + +def map_as_transfer_type(type) -> str: + ''' + Map the PB encoded acceleration structure transfer to a description. + ''' + base_type = timeline_pb2.AccelerationStructureTransferType + if type == base_type.unknown_as_transfer: + return "Unknown" + elif type == base_type.struct_to_struct: + return "Copy acceleration structure" + elif type == base_type.struct_to_mem: + return "Copy acceleration structure to mem" + elif type == base_type.mem_to_struct: + return "Copy mem to acceleration structure" + else: + assert False + + def map_debug_label(labels: list[str] | None) -> list[str]: ''' Normalize the 'debug_label' field from the PB data @@ -444,7 +496,7 @@ def handle_dispatch(self, msg: Any) -> None: assert self.last_submit is not None submit = self.last_submit - # Clear the last renderpass + # Clear the last render pass self.last_render_pass = None # Convert the PB message into our data representation @@ -470,7 +522,7 @@ def handle_trace_rays(self, msg: Any) -> None: assert self.last_submit is not None submit = self.last_submit - # Clear the last renderpass + # Clear the last render pass self.last_render_pass = None # Convert the PB message into our data representation @@ -496,7 +548,7 @@ def handle_image_transfer(self, msg: Any) -> None: assert self.last_submit is not None submit = self.last_submit - # Clear the last renderpass + # Clear the last render pass self.last_render_pass = None # Convert the PB message into our data representation @@ -521,7 +573,7 @@ def handle_buffer_transfer(self, msg: Any) -> None: assert self.last_submit is not None submit = self.last_submit - # Clear the last renderpass + # Clear the last render pass self.last_render_pass = None # Convert the PB message into our data representation @@ -535,6 +587,56 @@ def handle_buffer_transfer(self, msg: Any) -> None: submit['workloads'].append(buffer_transfer) + def handle_as_build(self, msg: Any) -> None: + ''' + Handle an acceleration structure build workload + + Args: + msg: The Python decode of a Timeline PB payload. + ''' + # Get the active submit to append to + assert self.last_submit is not None + submit = self.last_submit + + # Clear the last render pass + self.last_render_pass = None + + # Convert the PB message into our data representation + as_build: ASBuildMetadataType = { + 'type': 'asbuild', + 'tid': expect_int(msg.tag_id), + 'subtype': map_as_build_type(msg.build_type), + 'primitiveCount': expect_int(msg.primitive_count), + 'label': map_debug_label(msg.debug_label), + } + + submit['workloads'].append(as_build) + + def handle_as_transfer(self, msg: Any) -> None: + ''' + Handle an acceleration structure transfer workload + + Args: + msg: The Python decode of a Timeline PB payload. + ''' + # Get the active submit to append to + assert self.last_submit is not None + submit = self.last_submit + + # Clear the last render pass + self.last_render_pass = None + + # Convert the PB message into our data representation + as_transfer: ASTransferMetadataType = { + 'type': 'astransfer', + 'tid': expect_int(msg.tag_id), + 'subtype': map_as_transfer_type(msg.transfer_type), + 'byteCount': expect_int(msg.byte_count), + 'label': map_debug_label(msg.debug_label), + } + + submit['workloads'].append(as_transfer) + def handle_message(self, message: Message) -> None: ''' Handle a service request from a layer. @@ -555,7 +657,10 @@ def handle_message(self, message: Message) -> None: + int(pb_record.HasField('dispatch')) + int(pb_record.HasField('trace_rays')) + int(pb_record.HasField('image_transfer')) - + int(pb_record.HasField('buffer_transfer'))) <= 1) + + int(pb_record.HasField('buffer_transfer')) + + int(pb_record.HasField('acceleration_structure_build')) + + int(pb_record.HasField('acceleration_structure_transfer'))) + <= 1) # Process the message if pb_record.HasField('header'): @@ -578,5 +683,9 @@ def handle_message(self, message: Message) -> None: self.handle_image_transfer(pb_record.image_transfer) elif pb_record.HasField('buffer_transfer'): self.handle_buffer_transfer(pb_record.buffer_transfer) + elif pb_record.HasField('acceleration_structure_build'): + self.handle_as_build(pb_record.acceleration_structure_build) + elif pb_record.HasField('acceleration_structure_transfer'): + self.handle_as_transfer(pb_record.acceleration_structure_transfer) else: assert False, f'Unknown payload {pb_record}' diff --git a/lglpy/timeline/data/processed_trace.py b/lglpy/timeline/data/processed_trace.py index 22be3c8..bb323cf 100644 --- a/lglpy/timeline/data/processed_trace.py +++ b/lglpy/timeline/data/processed_trace.py @@ -31,7 +31,8 @@ from .raw_trace import RawTrace, RenderstageEvent, MetadataWork, \ MetadataRenderPass, MetadataDispatch, MetadataTraceRays, \ - MetadataBufferTransfer, MetadataImageTransfer, GPUStreamID, GPUStageID + MetadataBufferTransfer, MetadataImageTransfer, MetadataASBuild, \ + MetadataASTransfer, GPUStreamID, GPUStageID LABEL_HEURISTICS = True LABEL_MAX_LEN = 60 @@ -582,6 +583,112 @@ def get_short_label(self) -> str: return self.transfer_type +class GPUASBuild(GPUWorkload): + ''' + Workload class representing an acceleration structure build. + ''' + + def __init__( + self, event: RenderstageEvent, metadata: MetadataASBuild): + ''' + Buffer transfer workload in a trace. + + Args: + event: Parsed render stage event. + metadata: Parsed metadata annotation. + ''' + # Populate common data + super().__init__(event, metadata) + + # We must have metadata so no need to check + self.build_type = metadata.subtype + self.primitive_count = metadata.primitive_count + + def get_long_label(self) -> str: + ''' + Get the long form label for this workload. + + Returns: + Returns the label for use in the UI. + ''' + lines = [] + + if label_name := self.get_label_name(): + lines.append(label_name) + + # If indirect then show a placeholder + if self.primitive_count == -1: + line = f'{self.build_type} (? primitives)' + else: + s = 's' if self.primitive_count != 1 else '' + line = f'{self.build_type} ({self.primitive_count} primitive{s})' + lines.append(line) + + return '\n'.join(lines) + + def get_short_label(self) -> str: + ''' + Get the short form label for this workload. + + Returns: + Returns the label for use in the UI. + ''' + return self.build_type + + +class GPUASTransfer(GPUWorkload): + ''' + Workload class representing an acceleration structure transfer. + ''' + + def __init__( + self, event: RenderstageEvent, metadata: MetadataASTransfer): + ''' + Acceleration structure transfer workload in a trace. + + Args: + event: Parsed render stage event. + metadata: Parsed metadata annotation. + ''' + # Populate common data + super().__init__(event, metadata) + + # We must have metadata so no need to check + self.transfer_type = metadata.subtype + self.byte_count = metadata.byte_count + + def get_long_label(self) -> str: + ''' + Get the long form label for this workload. + + Returns: + Returns the label for use in the UI. + ''' + lines = [] + + if label_name := self.get_label_name(): + lines.append(label_name) + + # If indirect then show a placeholder + if self.byte_count == -1: + line = f'{self.transfer_type} (? bytes)' + else: + s = 's' if self.byte_count != 1 else '' + line = f'{self.transfer_type} ({self.byte_count} byte{s})' + lines.append(line) + + return '\n'.join(lines) + + def get_short_label(self) -> str: + ''' + Get the short form label for this workload. + + Returns: + Returns the label for use in the UI. + ''' + return self.transfer_type + + # Helper for typing all workload subclasses of MetadataWorkload GPUWork = Union[ # Generic workload if no metadata @@ -592,7 +699,9 @@ def get_short_label(self) -> str: GPUDispatch, GPUTraceRays, GPUImageTransfer, - GPUBufferTransfer + GPUBufferTransfer, + GPUASBuild, + GPUASTransfer, ] @@ -639,6 +748,12 @@ def __init__(self, raw_trace: RawTrace): elif isinstance(event_meta, MetadataBufferTransfer): workload = GPUBufferTransfer(event, event_meta) + elif isinstance(event_meta, MetadataASBuild): + workload = GPUASBuild(event, event_meta) + + elif isinstance(event_meta, MetadataASTransfer): + workload = GPUASTransfer(event, event_meta) + else: assert False, 'Unknown metadata type' diff --git a/lglpy/timeline/data/raw_trace.py b/lglpy/timeline/data/raw_trace.py index a5aebe1..d846186 100644 --- a/lglpy/timeline/data/raw_trace.py +++ b/lglpy/timeline/data/raw_trace.py @@ -427,12 +427,7 @@ def __init__(self, submit: MetadataSubmit, metadata: JSONType): super().__init__(submit, metadata) self.subtype = str(metadata['subtype']) - - if 'pixelCount' in metadata: - self.pixel_count = int(metadata['pixelCount']) - # Remove this when we re-record our test traces - else: - self.pixel_count = int(metadata['pixels']) + self.pixel_count = int(metadata['pixelCount']) class MetadataBufferTransfer(MetadataWorkload): @@ -455,12 +450,55 @@ def __init__(self, submit: MetadataSubmit, metadata: JSONType): super().__init__(submit, metadata) self.subtype = str(metadata['subtype']) + self.byte_count = int(metadata['byteCount']) - if 'byteCount' in metadata: - self.byte_count = int(metadata['byteCount']) - # Remove this when we re-record our test traces - else: - self.byte_count = int(metadata['bytes']) + +class MetadataASBuild(MetadataWorkload): + ''' + Parsed GPU Timeline layer payload for an acceleration structure build. + + Attributes: + subtype: Specific type of the transfer. + primitive_count: Number of bytes written, or -1 if unknown. + ''' + + def __init__(self, submit: MetadataSubmit, metadata: JSONType): + ''' + Parsed GPU Timeline layer payload for a single AS build. + + Args: + submit: The submit information. + metadata: JSON payload from the layer. + ''' + super().__init__(submit, metadata) + + self.subtype = str(metadata['subtype']) + self.primitive_count = int(metadata['primitiveCount']) + + +class MetadataASTransfer(MetadataWorkload): + ''' + Parsed GPU Timeline layer payload for a transfer that writes an + acceleration structure. + + Attributes: + subtype: Specific type of the transfer. + byte_count: Number of bytes written, or -1 if unknown. + ''' + + def __init__(self, submit: MetadataSubmit, metadata: JSONType): + ''' + Parsed GPU Timeline layer payload for a single acceleration structure + transfer. + + Args: + submit: The submit information. + metadata: JSON payload from the layer. + ''' + super().__init__(submit, metadata) + + self.subtype = str(metadata['subtype']) + self.byte_count = int(metadata['byteCount']) class RenderstageEvent: @@ -518,7 +556,9 @@ def __init__(self, start_time: int, spec: Any): MetadataDispatch, MetadataTraceRays, MetadataImageTransfer, - MetadataBufferTransfer + MetadataBufferTransfer, + MetadataASBuild, + MetadataASTransfer ] @@ -762,6 +802,12 @@ def load_metadata_from_file( elif workload['type'] == 'buffertransfer': meta = MetadataBufferTransfer(submeta, workload) + elif workload['type'] == 'asbuild': + meta = MetadataASBuild(submeta, workload) + + elif workload['type'] == 'astransfer': + meta = MetadataASTransfer(submeta, workload) + else: assert False, f'Unknown workload {workload["type"]}'