From e10063efd1cb7b026437245b8755fdf5051503a4 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Fri, 31 Jan 2025 13:13:49 +0000
Subject: [PATCH 01/11] Refactor layer_command_stream, queue and command_buffer
 to remove dynamic dispatch.

Switch to a variant based instruction, rather than using dynamic dispatch + enum as this is more type safe.

Remove the duplicated  object in command_buffer.
---
 source_common/trackers/command_buffer.cpp     |  34 +--
 source_common/trackers/command_buffer.hpp     |   6 -
 .../trackers/layer_command_stream.cpp         |  85 +++-----
 .../trackers/layer_command_stream.hpp         | 199 +++++++++---------
 source_common/trackers/queue.cpp              | 129 +++++++++---
 source_common/trackers/queue.hpp              |  46 ++--
 6 files changed, 266 insertions(+), 233 deletions(-)
diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
index 17b7084..2953525 100644
--- a/source_common/trackers/command_buffer.cpp
+++ b/source_common/trackers/command_buffer.cpp
@@ -25,7 +25,6 @@
 
 #include "trackers/command_buffer.hpp"
 
-#include "framework/utils.hpp"
 #include "utils/misc.hpp"
 
 #include <cassert>
@@ -44,7 +43,6 @@ void CommandBuffer::reset()
 {
     oneTimeSubmit = false;
     stats.reset();
-    workloads.clear();
     workloadCommandStream.clear();
 }
 
@@ -57,22 +55,15 @@ void CommandBuffer::begin(bool _oneTimeSubmit)
 /* See header for documentation. */
 void CommandBuffer::debugMarkerBegin(std::string marker)
 {
-    // Create a workload we can reference later
-    auto workload = std::make_shared<LCSMarker>(marker);
-    workloads.push_back(workload);
-
     // Add command to update queue debug stack on submit
-    auto instr = std::make_pair(LCSOpcode::MARKER_BEGIN, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionMarkerPush(marker));
 }
 
 /* See header for documentation. */
 void CommandBuffer::debugMarkerEnd()
 {
     // Add command with empty workload to update queue debug stack on submit
-    auto workload = std::shared_ptr<LCSWorkload>();
-    auto instr = std::make_pair(LCSOpcode::MARKER_END, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionMarkerPop());
 }
 
 /* See header for documentation. */
@@ -99,11 +90,9 @@ uint64_t CommandBuffer::renderPassBegin(const RenderPass& renderPass,
     auto workload = std::make_shared<LCSRenderPass>(tagID, renderPass, width, height, suspending, oneTimeSubmit);
 
     currentRenderPass = workload;
-    workloads.push_back(workload);
 
     // Add a command to the layer-side command stream
-    auto instr = std::make_pair(LCSOpcode::RENDER_PASS, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
 
     return tagID;
 }
@@ -133,11 +122,9 @@ uint64_t CommandBuffer::dispatch(int64_t xGroups, int64_t yGroups, int64_t zGrou
 
     // Add a workload to the render pass
     auto workload = std::make_shared<LCSDispatch>(tagID, xGroups, yGroups, zGroups);
-    workloads.push_back(workload);
 
     // Add a command to the layer-side command stream
-    auto instr = std::make_pair(LCSOpcode::DISPATCH, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
 
     return tagID;
 }
@@ -150,11 +137,9 @@ uint64_t CommandBuffer::traceRays(int64_t xItems, int64_t yItems, int64_t zItems
 
     // Add a workload to the render pass
     auto workload = std::make_shared<LCSTraceRays>(tagID, xItems, yItems, zItems);
-    workloads.push_back(workload);
 
     // Add a command to the layer-side command stream
-    auto instr = std::make_pair(LCSOpcode::TRACE_RAYS, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
 
     return tagID;
 }
@@ -167,11 +152,9 @@ uint64_t CommandBuffer::imageTransfer(const std::string& transferType, int64_t p
 
     // Add a workload to the render pass
     auto workload = std::make_shared<LCSImageTransfer>(tagID, transferType, pixelCount);
-    workloads.push_back(workload);
 
     // Add a command to the layer-side command stream
-    auto instr = std::make_pair(LCSOpcode::IMAGE_TRANSFER, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
 
     return tagID;
 }
@@ -184,11 +167,9 @@ uint64_t CommandBuffer::bufferTransfer(const std::string& transferType, int64_t
 
     // Add a workload to the render pass
     auto workload = std::make_shared<LCSBufferTransfer>(tagID, transferType, byteCount);
-    workloads.push_back(workload);
 
     // Add a command to the layer-side command stream
-    auto instr = std::make_pair(LCSOpcode::BUFFER_TRANSFER, workload);
-    workloadCommandStream.push_back(instr);
+    workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
 
     return tagID;
 }
@@ -200,7 +181,6 @@ void CommandBuffer::executeCommands(CommandBuffer& secondary)
     stats.mergeCounts(secondary.getStats());
 
     // Integrate secondary layer commands
-    vecAppend(workloads, secondary.workloads);
     vecAppend(workloadCommandStream, secondary.workloadCommandStream);
 }
 
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
index 029cb41..0060520 100644
--- a/source_common/trackers/command_buffer.hpp
+++ b/source_common/trackers/command_buffer.hpp
@@ -49,7 +49,6 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
-#include <utility>
 #include <vector>
 
 #include <vulkan/vulkan.h>
@@ -203,11 +202,6 @@ class CommandBuffer
      */
     std::shared_ptr<LCSRenderPass> currentRenderPass;
 
-    /**
-     * @brief The recorded workloads.
-     */
-    std::vector<std::shared_ptr<LCSWorkload>> workloads;
-
     /**
      * @brief The recorded commands.
      */
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index 9d4cfd0..b04c15b 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -26,6 +26,7 @@
 #include "trackers/layer_command_stream.hpp"
 
 #include <cassert>
+#include <memory>
 
 #include <nlohmann/json.hpp>
 
@@ -41,13 +42,6 @@ LCSWorkload::LCSWorkload(uint64_t _tagID)
 {
 }
 
-/* See header for details. */
-LCSMarker::LCSMarker(const std::string& _label)
-    : LCSWorkload(0),
-      label(_label) {
-
-      };
-
 /* See header for details. */
 LCSRenderPass::LCSRenderPass(uint64_t _tagID,
                              const RenderPass& renderPass,
@@ -67,7 +61,7 @@ LCSRenderPass::LCSRenderPass(uint64_t _tagID,
 }
 
 /* See header for details. */
-std::string LCSRenderPass::getBeginMetadata(const std::vector<std::string>* debugLabel) const
+std::string LCSRenderPass::getBeginMetadata(const std::vector<std::string>& debugLabel) const
 {
     // Draw count for a multi-submit command buffer cannot be reliably
     // associated with a single tagID if restartable across command buffer
@@ -87,9 +81,9 @@ std::string LCSRenderPass::getBeginMetadata(const std::vector<std::string>* debu
         {"drawCallCount", drawCount},
     };
 
-    if (debugLabel && debugLabel->size())
+    if (!debugLabel.empty())
     {
-        metadata["label"] = *debugLabel;
+        metadata["label"] = debugLabel;
     }
 
     // Default is 1, so only store if we need it
@@ -131,8 +125,7 @@ std::string LCSRenderPass::getBeginMetadata(const std::vector<std::string>* debu
 }
 
 /* See header for details. */
-std::string LCSRenderPass::getContinuationMetadata(const std::vector<std::string>* debugLabel,
-                                                   uint64_t tagIDContinuation) const
+std::string LCSRenderPass::getContinuationMetadata(uint64_t tagIDContinuation) const
 {
     json metadata = {
         {"type", "renderpass"},
@@ -140,27 +133,9 @@ std::string LCSRenderPass::getContinuationMetadata(const std::vector<std::string
         {"drawCallCount", drawCallCount},
     };
 
-    if (debugLabel && debugLabel->size())
-    {
-        metadata["label"] = *debugLabel;
-    }
-
     return metadata.dump();
 }
 
-/* See header for details. */
-std::string LCSRenderPass::getMetadata(const std::vector<std::string>* debugLabel, uint64_t tagIDContinuation) const
-{
-    if (tagID)
-    {
-        assert(tagIDContinuation == 0);
-        return getBeginMetadata(debugLabel);
-    }
-
-    assert(tagIDContinuation != 0);
-    return getContinuationMetadata(debugLabel, tagIDContinuation);
-}
-
 /* See header for details. */
 LCSDispatch::LCSDispatch(uint64_t _tagID, int64_t _xGroups, int64_t _yGroups, int64_t _zGroups)
     : LCSWorkload(_tagID),
@@ -171,21 +146,17 @@ LCSDispatch::LCSDispatch(uint64_t _tagID, int64_t _xGroups, int64_t _yGroups, in
 }
 
 /* See header for details. */
-std::string LCSDispatch::getMetadata(const std::vector<std::string>* debugLabel, uint64_t tagIDContinuation) const
+std::string LCSDispatch::getMetadata(const std::vector<std::string>& debugLabel) const
 {
-    UNUSED(tagIDContinuation);
-
-    json metadata = {
-        {"type", "dispatch"},
-        {"tid", tagID},
-        {"xGroups", xGroups},
-        {"yGroups", yGroups},
-        {"zGroups", zGroups},
-    };
+    json metadata = {{"type", "dispatch"},
+                     {"tid", tagID},
+                     {"xGroups", xGroups},
+                     {"yGroups", yGroups},
+                     {"zGroups", zGroups}};
 
-    if (debugLabel && debugLabel->size())
+    if (!debugLabel.empty())
     {
-        metadata["label"] = *debugLabel;
+        metadata["label"] = debugLabel;
     }
 
     return metadata.dump();
@@ -201,10 +172,8 @@ LCSTraceRays::LCSTraceRays(uint64_t _tagID, int64_t _xItems, int64_t _yItems, in
 }
 
 /* See header for details. */
-std::string LCSTraceRays::getMetadata(const std::vector<std::string>* debugLabel, uint64_t tagIDContinuation) const
+std::string LCSTraceRays::getMetadata(const std::vector<std::string>& debugLabel) const
 {
-    UNUSED(tagIDContinuation);
-
     json metadata = {
         {"type", "tracerays"},
         {"tid", tagID},
@@ -213,9 +182,9 @@ std::string LCSTraceRays::getMetadata(const std::vector<std::string>* debugLabel
         {"zItems", zItems},
     };
 
-    if (debugLabel && debugLabel->size())
+    if (!debugLabel.empty())
     {
-        metadata["label"] = *debugLabel;
+        metadata["label"] = debugLabel;
     }
 
     return metadata.dump();
@@ -230,10 +199,8 @@ LCSImageTransfer::LCSImageTransfer(uint64_t _tagID, const std::string& _transfer
 }
 
 /* See header for details. */
-std::string LCSImageTransfer::getMetadata(const std::vector<std::string>* debugLabel, uint64_t tagIDContinuation) const
+std::string LCSImageTransfer::getMetadata(const std::vector<std::string>& debugLabel) const
 {
-    UNUSED(tagIDContinuation);
-
     json metadata = {
         {"type", "imagetransfer"},
         {"tid", tagID},
@@ -241,9 +208,9 @@ std::string LCSImageTransfer::getMetadata(const std::vector<std::string>* debugL
         {"pixelCount", pixelCount},
     };
 
-    if (debugLabel && debugLabel->size())
+    if (!debugLabel.empty())
     {
-        metadata["label"] = *debugLabel;
+        metadata["label"] = debugLabel;
     }
 
     return metadata.dump();
@@ -258,10 +225,8 @@ LCSBufferTransfer::LCSBufferTransfer(uint64_t _tagID, const std::string& _transf
 }
 
 /* See header for details. */
-std::string LCSBufferTransfer::getMetadata(const std::vector<std::string>* debugLabel, uint64_t tagIDContinuation) const
+std::string LCSBufferTransfer::getMetadata(const std::vector<std::string>& debugLabel) const
 {
-    UNUSED(tagIDContinuation);
-
     json metadata = {
         {"type", "buffertransfer"},
         {"tid", tagID},
@@ -269,12 +234,18 @@ std::string LCSBufferTransfer::getMetadata(const std::vector<std::string>* debug
         {"byteCount", byteCount},
     };
 
-    if (debugLabel && debugLabel->size())
+    if (!debugLabel.empty())
     {
-        metadata["label"] = *debugLabel;
+        metadata["label"] = debugLabel;
     }
 
     return metadata.dump();
 }
 
+/* See header for details. */
+LCSInstructionMarkerPush::LCSInstructionMarkerPush(const std::string& _label)
+    : label(std::make_shared<std::string>(_label))
+{
+}
+
 }
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index 87f1e29..4987226 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -44,55 +44,21 @@
 #include <atomic>
 #include <memory>
 #include <string>
+#include <type_traits>
 #include <utility>
+#include <variant>
 #include <vector>
 
 #include <vulkan/vulkan.h>
 
 namespace Tracker
 {
-
-/**
- * @brief Enumeration of layer command stream opcodes.
- */
-enum class LCSOpcode
-{
-    MARKER_BEGIN,
-    MARKER_END,
-    RENDER_PASS,
-    DISPATCH,
-    TRACE_RAYS,
-    BUFFER_TRANSFER,
-    IMAGE_TRANSFER
-};
-
 /**
  * @brief Base class representing a GPU workload in the command stream.
  */
 class LCSWorkload
 {
 public:
-    /**
-     * @brief Create a new workload.
-     *
-     * @param tagID   The assigned tagID.
-     */
-    LCSWorkload(uint64_t tagID);
-
-    /**
-     * @brief Destroy a workload.
-     */
-    virtual ~LCSWorkload() = default;
-
-    /**
-     * @brief Get the metadata for this workload
-     *
-     * @param debugLabel          The debug label stack for the VkQueue at submit time.
-     * @param tagIDContinuation   The ID of the workload if this is a continuation of it.
-     */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const = 0;
-
     /**
      * @brief Get this workload's tagID.
      *
@@ -115,6 +81,19 @@ class LCSWorkload
      */
     uint64_t tagID;
 
+    /**
+     * @brief Create a new workload.
+     *
+     * @param tagID   The assigned tagID.
+     */
+    LCSWorkload(uint64_t tagID);
+
+    /**
+     * @brief Destroy a workload; this is protected since we should never really be dealing with workloads in the
+     * abstract sense (or at least not deleting them as such)
+     */
+    ~LCSWorkload() noexcept = default;
+
 private:
     /**
      * @brief The workload tagID allocator.
@@ -145,11 +124,6 @@ class LCSRenderPass : public LCSWorkload
                   bool suspending,
                   bool oneTimeSubmit);
 
-    /**
-     * @brief Destroy a workload.
-     */
-    virtual ~LCSRenderPass() = default;
-
     /**
      * @brief Is this a suspending render pass?
      *
@@ -164,27 +138,21 @@ class LCSRenderPass : public LCSWorkload
      */
     void setDrawCallCount(uint64_t count) { drawCallCount = count; };
 
-    /* See base class for documentation. */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const;
-
-private:
     /**
      * @brief Get the metadata for this workload if beginning a new render pass.
      *
      * @param debugLabel   The debug label stack of the VkQueue at submit time.
      */
-    std::string getBeginMetadata(const std::vector<std::string>* debugLabel = nullptr) const;
+    std::string getBeginMetadata(const std::vector<std::string>& debugLabel) const;
 
     /**
      * @brief Get the metadata for this workload if continuing an existing render pass.
      *
-     * @param debugLabel          The debug label stack of the VkQueue at submit time.
      * @param tagIDContinuation   The ID of the workload if this is a continuation of it.
      */
-    std::string getContinuationMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                        uint64_t tagIDContinuation = 0) const;
+    std::string getContinuationMetadata(uint64_t tagIDContinuation) const;
 
+private:
     /**
      * @brief Width of this workload, in pixels.
      */
@@ -243,13 +211,11 @@ class LCSDispatch : public LCSWorkload
     LCSDispatch(uint64_t tagID, int64_t xGroups, int64_t yGroups, int64_t zGroups);
 
     /**
-     * @brief Destroy a workload.
+     * @brief Get the metadata for this workload
+     *
+     * @param debugLabel          The debug label stack for the VkQueue at submit time.
      */
-    virtual ~LCSDispatch() = default;
-
-    /* See base class for documentation. */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const;
+    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
 
 private:
     /**
@@ -287,13 +253,11 @@ class LCSTraceRays : public LCSWorkload
     LCSTraceRays(uint64_t tagID, int64_t xItems, int64_t yItems, int64_t zItems);
 
     /**
-     * @brief Destroy a workload.
+     * @brief Get the metadata for this workload
+     *
+     * @param debugLabel          The debug label stack for the VkQueue at submit time.
      */
-    virtual ~LCSTraceRays() = default;
-
-    /* See base class for documentation. */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const;
+    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
 
 private:
     /**
@@ -330,13 +294,11 @@ class LCSImageTransfer : public LCSWorkload
     LCSImageTransfer(uint64_t tagID, const std::string& transferType, int64_t pixelCount);
 
     /**
-     * @brief Destroy a workload.
+     * @brief Get the metadata for this workload
+     *
+     * @param debugLabel          The debug label stack for the VkQueue at submit time.
      */
-    virtual ~LCSImageTransfer() = default;
-
-    /* See base class for documentation. */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const;
+    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
 
 private:
     /**
@@ -369,13 +331,11 @@ class LCSBufferTransfer : public LCSWorkload
     LCSBufferTransfer(uint64_t tagID, const std::string& transferType, int64_t byteCount);
 
     /**
-     * @brief Destroy a workload.
+     * @brief Get the metadata for this workload
+     *
+     * @param debugLabel          The debug label stack for the VkQueue at submit time.
      */
-    virtual ~LCSBufferTransfer() = default;
-
-    /* See base class for documentation. */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const;
+    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
 
 private:
     /**
@@ -390,12 +350,9 @@ class LCSBufferTransfer : public LCSWorkload
 };
 
 /**
- * @brief Class representing a marker workload in the command stream.
- *
- * Note there is no class for a marker end, as is has no payload and can use
- * just the opcode to indicate behavior.
+ * @brief Class representing a marker instruction in the command stream that represents a debug label push operation.
  */
-class LCSMarker : public LCSWorkload
+class LCSInstructionMarkerPush
 {
 public:
     /**
@@ -403,33 +360,85 @@ class LCSMarker : public LCSWorkload
      *
      * @param label   The application debug label.
      */
-    LCSMarker(const std::string& label);
+    LCSInstructionMarkerPush(const std::string& label);
+
+    /**
+     * @brief Get the stored debug label
+     *
+     * @return The label string
+     */
+    const std::string& getLabel() const { return *label; }
 
+private:
     /**
-     * @brief Destroy a workload.
+     * @brief The application debug label.
+     *
+     * The label is stored in a shared point to avoid copying the actual string when it is shared between
+     * subcommandbuffers
      */
-    virtual ~LCSMarker() = default;
+    std::shared_ptr<std::string> label;
+};
+
+/**
+ * @brief Class representing a marker instruction in the command stream that represents a debug label pop operation.
+ */
+class LCSInstructionMarkerPop
+{
+    // there are no members, as this type is just a marker within LCSInstruction variant
+};
 
-    /* See base class for documentation. */
-    virtual std::string getMetadata(const std::vector<std::string>* debugLabel = nullptr,
-                                    uint64_t tagIDContinuation = 0) const
+/**
+ * @brief Class representing a workload instruction in the command stream.
+ */
+template<typename WorkloadType>
+requires(std::is_base_of_v<LCSWorkload, WorkloadType>)
+class LCSInstructionWorkload
+{
+public:
+    /**
+     * @brief Create a new workload instruction from a pre-made workload pointer.
+     *
+     * @param wrkload The workload object (must not be null)
+     */
+    LCSInstructionWorkload(std::shared_ptr<WorkloadType> wrkload)
+        : workload(std::move(wrkload))
     {
-        UNUSED(debugLabel);
-        UNUSED(tagIDContinuation);
-        return label;
-    };
+    }
+
+    /**
+     * @brief Get the stored workload
+     *
+     * @return The workload
+     */
+    const WorkloadType& getWorkload() const { return *workload; }
 
 private:
     /**
-     * @brief The application debug label.
+     * @brief The stored workload
+     *
+     * The workload is stored in a shared point to avoid copying the actual value when it is shared between
+     * subcommandbuffers
      */
-    std::string label;
+    std::shared_ptr<WorkloadType> workload;
 };
 
 /**
- * @brief Instructions are an opcode with a data pointer.
- *
- * Data pointers may be null for some opcodes.
+ * @brief Instructions are a variant representing the operation.
  */
-using LCSInstruction = std::pair<LCSOpcode, std::shared_ptr<LCSWorkload>>;
+using LCSInstruction = std::variant<
+    // the instruction is a debug-label push operation
+    LCSInstructionMarkerPush,
+    // the instruction is a debug-label pop operation
+    LCSInstructionMarkerPop,
+    // the instruction represents a renderpass workload operation
+    LCSInstructionWorkload<LCSRenderPass>,
+    // the instruction represents a dispatch workload operation
+    LCSInstructionWorkload<LCSDispatch>,
+    // the instruction represents a trace rays workload operation
+    LCSInstructionWorkload<LCSTraceRays>,
+    // the instruction represents an image transfer workload operation
+    LCSInstructionWorkload<LCSImageTransfer>,
+    // the instruction represents a buffer transfer workload operation
+    LCSInstructionWorkload<LCSBufferTransfer>>;
+
 }
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
index 6acd15b..45a915e 100644
--- a/source_common/trackers/queue.cpp
+++ b/source_common/trackers/queue.cpp
@@ -25,68 +25,131 @@
 
 #include "trackers/queue.hpp"
 
+#include "trackers/layer_command_stream.hpp"
+#include "utils/misc.hpp"
+
 #include <cassert>
+#include <variant>
+#include <vector>
 
 namespace Tracker
 {
-/* See header for details. */
-Queue::Queue(VkQueue _handle)
-    : handle(_handle) {
-
-      };
-
-/* See header for details. */
-void Queue::runSubmitCommandStream(const std::vector<LCSInstruction>& stream,
-                                   std::function<void(const std::string&)> callback)
+namespace
 {
-    for (auto& instr : stream)
+
+    /**
+     * @brief A visitor implementation that processes each command stream instruction, and
+     *        correctly updates the Queue's state, as well as serializing workload objects
+     *        into the message data callback.
+     */
+    class SubmitCommandInstructionVisitor
     {
-        LCSOpcode opCode = instr.first;
-        const LCSWorkload* opData = instr.second.get();
+    public:
+        SubmitCommandInstructionVisitor(QueueState& _queueState, std::function<void(const std::string&)> _callback)
+            : queueState(_queueState),
+              callback(_callback)
+        {
+        }
+
+        // visitor should not be copied or moved from
+        SubmitCommandInstructionVisitor(const SubmitCommandInstructionVisitor&) = delete;
+        SubmitCommandInstructionVisitor(SubmitCommandInstructionVisitor&&) noexcept = delete;
+        SubmitCommandInstructionVisitor& operator=(const SubmitCommandInstructionVisitor&) = delete;
+        SubmitCommandInstructionVisitor& operator=(SubmitCommandInstructionVisitor&&) noexcept = delete;
 
-        if (opCode == LCSOpcode::MARKER_BEGIN)
+        /**
+         * @brief Visit a debug-label push marker instruction
+         *
+         * @param instruction The push instruction
+         */
+        void operator()(const LCSInstructionMarkerPush& instruction)
         {
-            debugStack.push_back(opData->getMetadata());
+            queueState.debugStack.emplace_back(instruction.getLabel());
         }
-        else if (opCode == LCSOpcode::MARKER_END)
+
+        /**
+         * @brief Visit a debug-label pop marker instruction
+         *
+         * @param instruction The pop instruction
+         */
+        void operator()(const LCSInstructionMarkerPop& instruction)
         {
-            debugStack.pop_back();
+            UNUSED(instruction);
+
+            queueState.debugStack.pop_back();
         }
-        else if (opCode == LCSOpcode::RENDER_PASS)
+
+        /**
+         * @brief Visit a renderpass workload instruction
+         *
+         * @param instruction The workload instruction
+         */
+        void operator()(const LCSInstructionWorkload<LCSRenderPass>& instruction)
         {
-            auto* workload = dynamic_cast<const LCSRenderPass*>(opData);
-            uint64_t tagID = workload->getTagID();
+            const auto& workload = instruction.getWorkload();
+            const auto tagID = workload.getTagID();
 
             // Workload is a new render pass
             if (tagID > 0)
             {
-                assert(lastRenderPassTagID == 0);
-                callback(workload->getMetadata(&debugStack));
+                assert(queueState.lastRenderPassTagID == 0);
+                callback(workload.getBeginMetadata(queueState.debugStack));
 
-                lastRenderPassTagID = 0;
-                if (workload->isSuspending())
+                queueState.lastRenderPassTagID = 0;
+                if (workload.isSuspending())
                 {
-                    lastRenderPassTagID = tagID;
+                    queueState.lastRenderPassTagID = tagID;
                 }
             }
             // Workload is a continuation
             else
             {
-                assert(lastRenderPassTagID != 0);
-                callback(workload->getMetadata(nullptr, lastRenderPassTagID));
-                if (!workload->isSuspending())
+                assert(queueState.lastRenderPassTagID != 0);
+                callback(workload.getContinuationMetadata(queueState.lastRenderPassTagID));
+                if (!workload.isSuspending())
                 {
-                    lastRenderPassTagID = 0;
+                    queueState.lastRenderPassTagID = 0;
                 }
             }
         }
-        else if ((opCode == LCSOpcode::DISPATCH) || (opCode == LCSOpcode::TRACE_RAYS)
-                 || (opCode == LCSOpcode::IMAGE_TRANSFER) || (opCode == LCSOpcode::BUFFER_TRANSFER))
+
+        /**
+         * @brief Visit a dispatch/trace rays/image transfer/buffer transfer workload instruction
+         *
+         * @param instruction The workload instruction
+         */
+        template<typename WorkloadType>
+        requires(std::is_same_v<WorkloadType, LCSDispatch> || std::is_same_v<WorkloadType, LCSTraceRays>
+                 || std::is_same_v<WorkloadType, LCSImageTransfer> || std::is_same_v<WorkloadType, LCSBufferTransfer>)
+        void operator()(const LCSInstructionWorkload<WorkloadType>& instruction)
         {
-            uint64_t tagID = opData->getTagID();
-            std::string log = joinString(debugStack, "|");
-            callback(opData->getMetadata(&debugStack, tagID));
+            const auto& workload = instruction.getWorkload();
+
+            callback(workload.getMetadata(queueState.debugStack));
         }
+
+    private:
+        QueueState& queueState;
+        std::function<void(const std::string&)> callback;
+    };
+
+}
+
+/* See header for details. */
+Queue::Queue(VkQueue _handle)
+    : state(_handle) {
+
+      };
+
+/* See header for details. */
+void Queue::runSubmitCommandStream(const std::vector<LCSInstruction>& stream,
+                                   std::function<void(const std::string&)> callback)
+{
+    SubmitCommandInstructionVisitor visitor {state, callback};
+
+    for (auto& instr : stream)
+    {
+        std::visit(visitor, instr);
     }
 }
 
diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp
index 98a5f2f..6fefae4 100644
--- a/source_common/trackers/queue.hpp
+++ b/source_common/trackers/queue.hpp
@@ -41,10 +41,8 @@
 
 #pragma once
 
-#include "framework/utils.hpp"
 #include "trackers/layer_command_stream.hpp"
 
-#include <atomic>
 #include <functional>
 #include <string>
 #include <vector>
@@ -53,25 +51,22 @@
 
 namespace Tracker
 {
-
 /**
- * @brief The state tracker for a queue.
+ * Metadata tracked by the queue when it emits commands, that can be
+ * shared with the LCSInstruction visitor object during instruction processing
  */
-class Queue
+struct QueueState
 {
-public:
-    Queue(VkQueue handle);
-
     /**
-     * @brief Execute a layer command stream.
+     * @brief Construct the state object
      *
-     * @param stream     The layer command stream to execute.
-     * @param callback   The callback to pass submitted workloads to.
+     * @param queue The queue which the state tracks
      */
-    void runSubmitCommandStream(const std::vector<LCSInstruction>& stream,
-                                std::function<void(const std::string&)> callback);
+    QueueState(VkQueue queue)
+        : handle(queue)
+    {
+    }
 
-private:
     /**
      * The handle of the native queue we are wrapping.
      */
@@ -80,7 +75,7 @@ class Queue
     /**
      * @brief The stack of user debug labels for this queue.
      */
-    std::vector<std::string> debugStack;
+    std::vector<std::string> debugStack {};
 
     /**
      * @brief The last non-zero render pass tagID submitted.
@@ -88,4 +83,25 @@ class Queue
     uint64_t lastRenderPassTagID {0};
 };
 
+/**
+ * @brief The state tracker for a queue.
+ */
+class Queue
+{
+public:
+    Queue(VkQueue handle);
+
+    /**
+     * @brief Execute a layer command stream.
+     *
+     * @param stream     The layer command stream to execute.
+     * @param callback   The callback to pass submitted workloads to.
+     */
+    void runSubmitCommandStream(const std::vector<LCSInstruction>& stream,
+                                std::function<void(const std::string&)> callback);
+
+private:
+    QueueState state;
+};
+
 }

From 7a4e20b25e3c42ddeb2ca40d1a3b8d9b83252123 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Fri, 31 Jan 2025 14:00:07 +0000
Subject: [PATCH 02/11] Split LCSRenderPassContinuation from LCSRenderPass.

No need to carry around the extra state uncessarily.

This also makes clear from the type, rather than from the magic
tagID value that the renderpass is a continuation. This makes
refactoring to separate out the the workload as a data object
from operations on that data safer since they will not
need to propogate this magic knowledge forward.
---
 source_common/trackers/command_buffer.cpp     |  40 +++++--
 source_common/trackers/command_buffer.hpp     |   2 +-
 .../trackers/layer_command_stream.cpp         |  14 ++-
 .../trackers/layer_command_stream.hpp         | 107 ++++++++++++------
 source_common/trackers/queue.cpp              |  44 ++++---
 5 files changed, 135 insertions(+), 72 deletions(-)

diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
index 2953525..3a5da3c 100644
--- a/source_common/trackers/command_buffer.cpp
+++ b/source_common/trackers/command_buffer.cpp
@@ -73,28 +73,44 @@ uint64_t CommandBuffer::renderPassBegin(const RenderPass& renderPass,
                                         bool resuming,
                                         bool suspending)
 {
-    uint64_t tagID {0};
-
     assert(!currentRenderPass);
 
-    // Assign ID and update the stats tracker for new render passes only
+    // Record the current draw call count so that the delta can be computed at
+    // the end of the renderpass; this gives the number of draw calls in that pass
+    renderPassStartDrawCount = stats.getDrawCallCount();
+
+    // Create the workload object and populate with config information
     if (!resuming)
     {
-        tagID = Tracker::LCSWorkload::assignTagID();
+        // Assign ID and update the stats tracker for new render passes only
+        const auto tagID = Tracker::LCSWorkload::assignTagID();
         stats.incRenderPassCount();
-    }
 
-    // Populate render pass with config information
-    renderPassStartDrawCount = stats.getDrawCallCount();
+        // Create a new renderpass object
+        const auto workload =
+            std::make_shared<LCSRenderPass>(tagID, renderPass, width, height, suspending, oneTimeSubmit);
 
-    auto workload = std::make_shared<LCSRenderPass>(tagID, renderPass, width, height, suspending, oneTimeSubmit);
+        // Track the workload as it will be modified at the end of the renderpass
+        currentRenderPass = workload;
 
-    currentRenderPass = workload;
+        // Add a command to the layer-side command stream
+        workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
 
-    // Add a command to the layer-side command stream
-    workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
+        return tagID;
+    }
+    else
+    {
+        // Create a renderpass continuation object
+        const auto workload = std::make_shared<LCSRenderPassContinuation>(suspending);
 
-    return tagID;
+        // Track the workload as it will be modified at the end of the renderpass
+        currentRenderPass = workload;
+
+        // Add a command to the layer-side command stream
+        workloadCommandStream.emplace_back(LCSInstructionWorkload(workload));
+
+        return 0;
+    }
 }
 
 /* See header for documentation. */
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
index 0060520..67caf6d 100644
--- a/source_common/trackers/command_buffer.hpp
+++ b/source_common/trackers/command_buffer.hpp
@@ -200,7 +200,7 @@ class CommandBuffer
     /**
      * @brief The current render pass if we are in one.
      */
-    std::shared_ptr<LCSRenderPass> currentRenderPass;
+    std::shared_ptr<LCSRenderPassBase> currentRenderPass;
 
     /**
      * @brief The recorded commands.
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index b04c15b..bf64761 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -42,6 +42,13 @@ LCSWorkload::LCSWorkload(uint64_t _tagID)
 {
 }
 
+/* See header for details. */
+LCSRenderPassBase::LCSRenderPassBase(uint64_t _tagID, bool _suspending)
+    : LCSWorkload(_tagID),
+      suspending(_suspending)
+{
+}
+
 /* See header for details. */
 LCSRenderPass::LCSRenderPass(uint64_t _tagID,
                              const RenderPass& renderPass,
@@ -49,10 +56,9 @@ LCSRenderPass::LCSRenderPass(uint64_t _tagID,
                              uint32_t _height,
                              bool _suspending,
                              bool _oneTimeSubmit)
-    : LCSWorkload(_tagID),
+    : LCSRenderPassBase(_tagID, _suspending),
       width(_width),
       height(_height),
-      suspending(_suspending),
       oneTimeSubmit(_oneTimeSubmit)
 {
     // Copy these as the render pass object may be transient.
@@ -61,7 +67,7 @@ LCSRenderPass::LCSRenderPass(uint64_t _tagID,
 }
 
 /* See header for details. */
-std::string LCSRenderPass::getBeginMetadata(const std::vector<std::string>& debugLabel) const
+std::string LCSRenderPass::getMetadata(const std::vector<std::string>& debugLabel) const
 {
     // Draw count for a multi-submit command buffer cannot be reliably
     // associated with a single tagID if restartable across command buffer
@@ -125,7 +131,7 @@ std::string LCSRenderPass::getBeginMetadata(const std::vector<std::string>& debu
 }
 
 /* See header for details. */
-std::string LCSRenderPass::getContinuationMetadata(uint64_t tagIDContinuation) const
+std::string LCSRenderPassContinuation::getMetadata(uint64_t tagIDContinuation) const
 {
     json metadata = {
         {"type", "renderpass"},
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index 4987226..78717b4 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -39,13 +39,11 @@
 #pragma once
 
 #include "trackers/render_pass.hpp"
-#include "utils/misc.hpp"
 
 #include <atomic>
 #include <memory>
 #include <string>
 #include <type_traits>
-#include <utility>
 #include <variant>
 #include <vector>
 
@@ -101,10 +99,53 @@ class LCSWorkload
     static std::atomic<uint64_t> nextTagID;
 };
 
+/**
+ * @brief Common base class for classes representing render pass workload in the command stream.
+ */
+class LCSRenderPassBase : public LCSWorkload
+{
+public:
+    /**
+     * @brief Is this a suspending render pass?
+     *
+     * @return @c true if this instance suspends rather than ends.
+     */
+    bool isSuspending() const { return suspending; }
+
+    /**
+     * @brief Update this workload with the final draw count.
+     *
+     * @param count   The number of draw calls tracked by the command buffer.
+     */
+    void setDrawCallCount(uint64_t count) { drawCallCount = count; }
+
+protected:
+    /**
+     * @brief The number of draw calls in the render pass.
+     *
+     * Note: This is updated by ther command buffer tracker when the render
+     * pass is suspended or ended.
+     */
+    uint64_t drawCallCount {0};
+
+    /**
+     * @brief Is this workload suspending rather than ending?
+     */
+    bool suspending;
+
+    /**
+     * @brief Construct the common renderbase workload
+     *
+     * @param tagID           The assigned tagID.
+     * @param suspending      Is this a render pass part that suspends later?
+     */
+    LCSRenderPassBase(uint64_t tagID, bool suspending);
+};
+
 /**
  * @brief Class representing a render pass workload in the command stream.
  */
-class LCSRenderPass : public LCSWorkload
+class LCSRenderPass : public LCSRenderPassBase
 {
 public:
     /**
@@ -125,34 +166,18 @@ class LCSRenderPass : public LCSWorkload
                   bool oneTimeSubmit);
 
     /**
-     * @brief Is this a suspending render pass?
-     *
-     * @return @c true if this instance suspends rather than ends.
-     */
-    bool isSuspending() const { return suspending; };
-
-    /**
-     * @brief Update this workload with the final draw count.
-     *
-     * @param count   The number of draw calls tracked by the command buffer.
-     */
-    void setDrawCallCount(uint64_t count) { drawCallCount = count; };
-
-    /**
-     * @brief Get the metadata for this workload if beginning a new render pass.
+     * @brief Get the metadata for this render pass workload.
      *
      * @param debugLabel   The debug label stack of the VkQueue at submit time.
      */
-    std::string getBeginMetadata(const std::vector<std::string>& debugLabel) const;
+    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
 
+private:
     /**
-     * @brief Get the metadata for this workload if continuing an existing render pass.
-     *
-     * @param tagIDContinuation   The ID of the workload if this is a continuation of it.
+     * @brief The attachments for this render pass.
      */
-    std::string getContinuationMetadata(uint64_t tagIDContinuation) const;
+    std::vector<RenderPassAttachment> attachments;
 
-private:
     /**
      * @brief Width of this workload, in pixels.
      */
@@ -164,32 +189,38 @@ class LCSRenderPass : public LCSWorkload
     uint32_t height;
 
     /**
-     * @brief Is this workload suspending rather than ending?
+     * @brief The number of subpasses in the render pass.
      */
-    bool suspending;
+    uint32_t subpassCount;
 
     /**
      * @brief Is this workload in a one-time-submit command buffer?
      */
     bool oneTimeSubmit;
+};
 
+/**
+ * @brief Class representing the continuation of a split render pass workload continuation in the command stream.
+ */
+class LCSRenderPassContinuation : public LCSRenderPassBase
+{
+public:
     /**
-     * @brief The number of subpasses in the render pass.
-     */
-    uint32_t subpassCount;
-
-    /**
-     * @brief The number of draw calls in the render pass.
+     * @brief Create a new workload representing a split render pass.
      *
-     * Note: This is updated by ther command buffer tracker when the render
-     * pass is suspended or ended.
+     * @param _suspending      Is this a render pass part that suspends later?
      */
-    uint64_t drawCallCount {0};
+    LCSRenderPassContinuation(bool _suspending)
+        : LCSRenderPassBase(0, _suspending)
+    {
+    }
 
     /**
-     * @brief The attachments for this render pass.
+     * @brief Get the metadata for this render pass continuation workload.
+     *
+     * @param tagIDContinuation   The ID of the workload if this is a continuation of it.
      */
-    std::vector<RenderPassAttachment> attachments;
+    std::string getMetadata(uint64_t tagIDContinuation) const;
 };
 
 /**
@@ -432,6 +463,8 @@ using LCSInstruction = std::variant<
     LCSInstructionMarkerPop,
     // the instruction represents a renderpass workload operation
     LCSInstructionWorkload<LCSRenderPass>,
+    // the instruction represents a continuation of a renderpass workload operation
+    LCSInstructionWorkload<LCSRenderPassContinuation>,
     // the instruction represents a dispatch workload operation
     LCSInstructionWorkload<LCSDispatch>,
     // the instruction represents a trace rays workload operation
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
index 45a915e..eda2e11 100644
--- a/source_common/trackers/queue.cpp
+++ b/source_common/trackers/queue.cpp
@@ -90,26 +90,34 @@ namespace
             const auto tagID = workload.getTagID();
 
             // Workload is a new render pass
-            if (tagID > 0)
-            {
-                assert(queueState.lastRenderPassTagID == 0);
-                callback(workload.getBeginMetadata(queueState.debugStack));
+            assert(tagID > 0);
+            assert(queueState.lastRenderPassTagID == 0);
 
-                queueState.lastRenderPassTagID = 0;
-                if (workload.isSuspending())
-                {
-                    queueState.lastRenderPassTagID = tagID;
-                }
-            }
-            // Workload is a continuation
-            else
+            callback(workload.getMetadata(queueState.debugStack));
+
+            queueState.lastRenderPassTagID = (workload.isSuspending() ? tagID : 0);
+        }
+
+        /**
+         * @brief Visit a renderpass continuation workload instruction
+         *
+         * @param instruction The workload instruction
+         */
+        void operator()(const LCSInstructionWorkload<LCSRenderPassContinuation>& instruction)
+        {
+            const auto& workload = instruction.getWorkload();
+            const auto tagID = workload.getTagID();
+
+            UNUSED(tagID); // other than for the assert
+
+            assert(tagID == 0);
+            assert(queueState.lastRenderPassTagID != 0);
+
+            callback(workload.getMetadata(queueState.lastRenderPassTagID));
+
+            if (!workload.isSuspending())
             {
-                assert(queueState.lastRenderPassTagID != 0);
-                callback(workload.getContinuationMetadata(queueState.lastRenderPassTagID));
-                if (!workload.isSuspending())
-                {
-                    queueState.lastRenderPassTagID = 0;
-                }
+                queueState.lastRenderPassTagID = 0;
             }
         }
 

From 8bce9db913a8ee1cdbec842a9f6670fb24812081 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Fri, 31 Jan 2025 14:52:40 +0000
Subject: [PATCH 03/11] Move timeline layer driver payload creation and
 transmission into WorkloadMetadataEmitterVisitor.

This centralizes the construction of metadata payloads into one place,
decoupling the metadata format from the layer driver handler functions.
---
 layer_gpu_timeline/source/CMakeLists.txt      |   3 +-
 layer_gpu_timeline/source/device.cpp          |  19 +--
 layer_gpu_timeline/source/device.hpp          |  11 +-
 .../source/layer_device_functions_queue.cpp   |  66 ++++------
 .../source/workload_metadata_builder.cpp      | 118 ++++++++++++++++++
 .../source/workload_metadata_builder.hpp      | 117 +++++++++++++++++
 source_common/trackers/queue.cpp              |  24 ++--
 source_common/trackers/queue.hpp              |  70 ++++++++++-
 8 files changed, 341 insertions(+), 87 deletions(-)
 create mode 100644 layer_gpu_timeline/source/workload_metadata_builder.cpp
 create mode 100644 layer_gpu_timeline/source/workload_metadata_builder.hpp

diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
index 4c302e7..e4946e7 100644
--- a/layer_gpu_timeline/source/CMakeLists.txt
+++ b/layer_gpu_timeline/source/CMakeLists.txt
@@ -53,7 +53,8 @@ add_library(
         layer_device_functions_render_pass.cpp
         layer_device_functions_trace_rays.cpp
         layer_device_functions_transfer.cpp
-        timeline_comms.cpp)
+        timeline_comms.cpp
+        workload_metadata_builder.cpp)
 
 target_include_directories(
     ${VK_LAYER} PRIVATE
diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp
index e721ca5..2e63bf2 100644
--- a/layer_gpu_timeline/source/device.cpp
+++ b/layer_gpu_timeline/source/device.cpp
@@ -28,18 +28,13 @@
 #include "comms/comms_module.hpp"
 #include "framework/utils.hpp"
 #include "instance.hpp"
+#include "workload_metadata_builder.hpp"
 
-#include <array>
-#include <fstream>
-#include <iostream>
 #include <vector>
 
-#include <nlohmann/json.hpp>
 #include <sys/stat.h>
 #include <unistd.h>
 
-using json = nlohmann::json;
-
 /**
  * @brief The dispatch lookup for all of the created Vulkan devices.
  */
@@ -125,15 +120,5 @@ Device::Device(Instance* _instance,
 
     pid_t processPID = getpid();
 
-    json deviceMetadata {
-        {"type", "device"},
-        {"pid", static_cast<uint32_t>(processPID)},
-        {"device", reinterpret_cast<uintptr_t>(device)},
-        {"deviceName", name},
-        {"driverMajor", major},
-        {"driverMinor", minor},
-        {"driverPatch", patch},
-    };
-
-    commsWrapper->txMessage(deviceMetadata.dump());
+    WorkloadMetadataEmitterVisitor::emitMetadata(*this, processPID, major, minor, patch, std::move(name));
 }
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
index 9253db0..151e786 100644
--- a/layer_gpu_timeline/source/device.hpp
+++ b/layer_gpu_timeline/source/device.hpp
@@ -131,18 +131,11 @@ class Device
     ~Device() = default;
 
     /**
-     * @brief Callback for sending messages on frame boundary.
+     * @brief Callback for sending some message for the device
      *
      * @param message   The message to send.
      */
-    void onFrame(const std::string& message) { commsWrapper->txMessage(message); }
-
-    /**
-     * @brief Callback for sending messages on workload submit to a queue.
-     *
-     * @param message   The message to send.
-     */
-    void onWorkloadSubmit(const std::string& message) { commsWrapper->txMessage(message); }
+    void txMessage(const std::string& message) { commsWrapper->txMessage(message); }
 
     /**
      * @brief Get the cumulative stats for this device.
diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
index a1ed3f1..15cef57 100644
--- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -25,17 +25,13 @@
 
 #include "device.hpp"
 #include "framework/device_dispatch_table.hpp"
-#include "utils/misc.hpp"
+#include "trackers/queue.hpp"
+#include "workload_metadata_builder.hpp"
 
 #include <mutex>
 
-#include <nlohmann/json.hpp>
 #include <time.h>
 
-using json = nlohmann::json;
-
-using namespace std::placeholders;
-
 extern std::mutex g_vulkanLock;
 
 /**
@@ -66,34 +62,26 @@ static uint64_t getClockMonotonicRaw()
 /**
  * @brief Emit the queue submit time metadata.
  *
- * @param queue      The queue being submitted to.
- * @param callback   The data emit callback.
+ * @param queue             The queue being submitted to.
+ * @param workloadVisitor   The data emit callback.
  */
-static void emitQueueMetadata(VkDevice device, VkQueue queue, std::function<void(const std::string&)> callback)
+static void emitQueueMetadata(VkDevice device, VkQueue queue, WorkloadMetadataEmitterVisitor& workloadVisitor)
 {
-    // Write the queue submit metadata
-    json submitMetadata {
-        {"type", "submit"},
-        {"device", reinterpret_cast<uintptr_t>(device)},
-        {"queue", reinterpret_cast<uintptr_t>(queue)},
-        {"timestamp", getClockMonotonicRaw()},
-    };
-
-    callback(submitMetadata.dump());
+    workloadVisitor.emitSubmit(device, queue, getClockMonotonicRaw());
 }
 
 /**
  * @brief Emit the command buffer submit time metadata.
  *
- * @param layer           The layer context.
- * @param queue           The queue being submitted to.
- * @param commandBuffer   The command buffer being submitted.
- * @param callback        The data emit callback.
+ * @param layer             The layer context.
+ * @param queue             The queue being submitted to.
+ * @param commandBuffer     The command buffer being submitted.
+ * @param workloadVisitor   The data emit callback.
  */
 static void emitCommandBufferMetadata(Device& layer,
                                       VkQueue queue,
                                       VkCommandBuffer commandBuffer,
-                                      std::function<void(const std::string&)> callback)
+                                      Tracker::SubmitCommandWorkloadVisitor& workloadVisitor)
 {
     // Fetch layer proxies for this workload
     auto& tracker = layer.getStateTracker();
@@ -102,7 +90,7 @@ static void emitCommandBufferMetadata(Device& layer,
 
     // Play the layer command stream into the queue
     const auto& LCS = trackCB.getSubmitCommandStream();
-    trackQueue.runSubmitCommandStream(LCS, callback);
+    trackQueue.runSubmitCommandStream(LCS, workloadVisitor);
 }
 
 /* See Vulkan API for documentation. */
@@ -120,14 +108,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(VkQueue queue,
 
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially to the host tool
-    json frame {
-        {"type", "frame"},
-        {"device", reinterpret_cast<uintptr_t>(layer->device)},
-        {"fid", tracker.totalStats.getFrameCount()},
-        {"timestamp", getClockMonotonicRaw()},
-    };
-
-    layer->onFrame(frame.dump());
+    WorkloadMetadataEmitterVisitor::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw());
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -145,13 +126,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
     std::unique_lock<std::mutex> lock {g_vulkanLock};
     auto* layer = Device::retrieve(queue);
 
-    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
-
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
+    WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, onSubmit);
+    emitQueueMetadata(layer->device, queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -160,7 +140,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
         for (uint32_t j = 0; j < submit.commandBufferCount; j++)
         {
             VkCommandBuffer commandBuffer = submit.pCommandBuffers[j];
-            emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
+            emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
         }
     }
 
@@ -180,13 +160,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
     std::unique_lock<std::mutex> lock {g_vulkanLock};
     auto* layer = Device::retrieve(queue);
 
-    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
-
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
+    WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, onSubmit);
+    emitQueueMetadata(layer->device, queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -195,7 +174,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
         for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
         {
             VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
-            emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
+            emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
         }
     }
 
@@ -215,13 +194,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
     std::unique_lock<std::mutex> lock {g_vulkanLock};
     auto* layer = Device::retrieve(queue);
 
-    auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);
-
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
+    WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, onSubmit);
+    emitQueueMetadata(layer->device, queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -230,7 +208,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
         for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
         {
             VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
-            emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
+            emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
         }
     }
 
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.cpp b/layer_gpu_timeline/source/workload_metadata_builder.cpp
new file mode 100644
index 0000000..f782fca
--- /dev/null
+++ b/layer_gpu_timeline/source/workload_metadata_builder.cpp
@@ -0,0 +1,118 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024-2025 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+#include "workload_metadata_builder.hpp"
+
+#include "utils/misc.hpp"
+
+#include <string>
+
+#include <nlohmann/json.hpp>
+
+using json = nlohmann::json;
+
+void WorkloadMetadataEmitterVisitor::emitMetadata(Device& layerDevice,
+                                                  uint32_t pid,
+                                                  uint32_t major,
+                                                  uint32_t minor,
+                                                  uint32_t patch,
+                                                  std::string name)
+{
+    json deviceMetadata {
+        {"type", "device"},
+        {"pid", pid},
+        {"device", reinterpret_cast<uintptr_t>(layerDevice.device)},
+        {"deviceName", std::move(name)},
+        {"driverMajor", major},
+        {"driverMinor", minor},
+        {"driverPatch", patch},
+    };
+
+    layerDevice.txMessage(deviceMetadata.dump());
+}
+
+void WorkloadMetadataEmitterVisitor::emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp)
+{
+    json frame {
+        {"type", "frame"},
+        {"device", reinterpret_cast<uintptr_t>(device.device)},
+        {"fid", frameNumber},
+        {"timestamp", timestamp},
+    };
+
+    device.txMessage(frame.dump());
+}
+
+void WorkloadMetadataEmitterVisitor::emitSubmit(VkDevice device, VkQueue queue, uint64_t timestamp)
+{
+    // Write the queue submit metadata
+    json submitMetadata {
+        {"type", "submit"},
+        {"device", reinterpret_cast<uintptr_t>(device)},
+        {"queue", reinterpret_cast<uintptr_t>(queue)},
+        {"timestamp", timestamp},
+    };
+
+    layerDevice.txMessage(submitMetadata.dump());
+}
+
+void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPass& renderpass,
+                                                const std::vector<std::string>& debugStack)
+{
+    layerDevice.txMessage(renderpass.getMetadata(debugStack));
+}
+
+void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPassContinuation& continuation,
+                                                const std::vector<std::string>& debugStack,
+                                                uint64_t renderpassTagID)
+{
+    UNUSED(debugStack);
+
+    layerDevice.txMessage(continuation.getMetadata(renderpassTagID));
+}
+
+void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSDispatch& dispatch,
+                                                const std::vector<std::string>& debugStack)
+{
+    layerDevice.txMessage(dispatch.getMetadata(debugStack));
+}
+
+void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSTraceRays& traceRays,
+                                                const std::vector<std::string>& debugStack)
+{
+    layerDevice.txMessage(traceRays.getMetadata(debugStack));
+}
+
+void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSImageTransfer& imageTransfer,
+                                                const std::vector<std::string>& debugStack)
+{
+    layerDevice.txMessage(imageTransfer.getMetadata(debugStack));
+}
+
+void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSBufferTransfer& bufferTransfer,
+                                                const std::vector<std::string>& debugStack)
+{
+    layerDevice.txMessage(bufferTransfer.getMetadata(debugStack));
+}
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.hpp b/layer_gpu_timeline/source/workload_metadata_builder.hpp
new file mode 100644
index 0000000..5b40993
--- /dev/null
+++ b/layer_gpu_timeline/source/workload_metadata_builder.hpp
@@ -0,0 +1,117 @@
+/*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2024-2025 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ */
+
+/**
+ * \file
+ * Methods for transforming layer command stream workloads into metadata messages
+ * for transmission to the host.
+ *
+ * Role summary
+ * ============
+ *
+ * These methods convert a command stream workload into some metadata payload
+ * message that can then be sent to the host.
+ */
+
+#pragma once
+
+#include "device.hpp"
+#include "trackers/layer_command_stream.hpp"
+#include "trackers/queue.hpp"
+
+#include <cstdint>
+
+#include <vulkan/vulkan_core.h>
+
+class WorkloadMetadataEmitterVisitor : public Tracker::SubmitCommandWorkloadVisitor
+{
+public:
+    /**
+     * @brief Called once when the layer is first created to produce the "metadata" frame for that layer device
+     *
+     * @param device The device object that the payloads are produced for, and to which they are passed for transmission
+     * @param pid The process ID of this process
+     * @param major The driver major version
+     * @param minor The driver minor version
+     * @param patch The driver patch version
+     * @param name The device name
+     */
+    static void emitMetadata(Device& device,
+                             uint32_t pid,
+                             uint32_t major,
+                             uint32_t minor,
+                             uint32_t patch,
+                             std::string name);
+
+    /**
+     * @brief Called at the start of a frame, delimiting the subsequent items from any later frame
+     *
+     * @param device The device object that the payloads are produced for, and to which they are passed for transmission
+     * @param frameNumber The frame number that uniquely identifies this frame
+     * @param timestamp The timestamp of the frame
+     */
+    static void emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp);
+
+    /**
+     * Construct a new workload metadata emitter that will output paylaods for the provided device
+     *
+     * @param device The device object that the payloads are produced for, and to which they are passed for transmission
+     */
+    WorkloadMetadataEmitterVisitor(Device& device)
+        : layerDevice(device)
+    {
+    }
+
+    // visitor should not be copied or moved from
+    WorkloadMetadataEmitterVisitor(const WorkloadMetadataEmitterVisitor&) = delete;
+    WorkloadMetadataEmitterVisitor(WorkloadMetadataEmitterVisitor&&) noexcept = delete;
+    WorkloadMetadataEmitterVisitor& operator=(const WorkloadMetadataEmitterVisitor&) = delete;
+    WorkloadMetadataEmitterVisitor& operator=(WorkloadMetadataEmitterVisitor&&) noexcept = delete;
+
+    // methods from the visitor interface
+    void operator()(const Tracker::LCSRenderPass& renderpass, const std::vector<std::string>& debugStack) override;
+    void operator()(const Tracker::LCSRenderPassContinuation& continuation,
+                    const std::vector<std::string>& debugStack,
+                    uint64_t renderpassTagID) override;
+    void operator()(const Tracker::LCSDispatch& dispatch, const std::vector<std::string>& debugStack) override;
+    void operator()(const Tracker::LCSTraceRays& traceRays, const std::vector<std::string>& debugStack) override;
+    void operator()(const Tracker::LCSImageTransfer& imageTransfer,
+                    const std::vector<std::string>& debugStack) override;
+    void operator()(const Tracker::LCSBufferTransfer& bufferTransfer,
+                    const std::vector<std::string>& debugStack) override;
+
+    /**
+     * @brief Called at the start of the submit to emit a "Submit" record, delimiting the subsequent items from any
+     * later submit
+     *
+     * @param device The device the submit belongs to
+     * @param queue The queue that was submitted to
+     * @param timestamp The timestamp of the submission
+     */
+    void emitSubmit(VkDevice device, VkQueue queue, uint64_t timestamp);
+
+private:
+    Device& layerDevice;
+};
diff --git a/source_common/trackers/queue.cpp b/source_common/trackers/queue.cpp
index eda2e11..90131b2 100644
--- a/source_common/trackers/queue.cpp
+++ b/source_common/trackers/queue.cpp
@@ -45,9 +45,9 @@ namespace
     class SubmitCommandInstructionVisitor
     {
     public:
-        SubmitCommandInstructionVisitor(QueueState& _queueState, std::function<void(const std::string&)> _callback)
+        SubmitCommandInstructionVisitor(QueueState& _queueState, SubmitCommandWorkloadVisitor& _workload_visitor)
             : queueState(_queueState),
-              callback(_callback)
+              workload_visitor(_workload_visitor)
         {
         }
 
@@ -93,7 +93,7 @@ namespace
             assert(tagID > 0);
             assert(queueState.lastRenderPassTagID == 0);
 
-            callback(workload.getMetadata(queueState.debugStack));
+            workload_visitor(workload, queueState.debugStack);
 
             queueState.lastRenderPassTagID = (workload.isSuspending() ? tagID : 0);
         }
@@ -113,7 +113,7 @@ namespace
             assert(tagID == 0);
             assert(queueState.lastRenderPassTagID != 0);
 
-            callback(workload.getMetadata(queueState.lastRenderPassTagID));
+            workload_visitor(workload, queueState.debugStack, queueState.lastRenderPassTagID);
 
             if (!workload.isSuspending())
             {
@@ -133,31 +133,31 @@ namespace
         {
             const auto& workload = instruction.getWorkload();
 
-            callback(workload.getMetadata(queueState.debugStack));
+            workload_visitor(workload, queueState.debugStack);
         }
 
     private:
         QueueState& queueState;
-        std::function<void(const std::string&)> callback;
+        SubmitCommandWorkloadVisitor& workload_visitor;
     };
 
 }
 
 /* See header for details. */
 Queue::Queue(VkQueue _handle)
-    : state(_handle) {
-
-      };
+    : state(_handle)
+{
+}
 
 /* See header for details. */
 void Queue::runSubmitCommandStream(const std::vector<LCSInstruction>& stream,
-                                   std::function<void(const std::string&)> callback)
+                                   SubmitCommandWorkloadVisitor& workload_visitor)
 {
-    SubmitCommandInstructionVisitor visitor {state, callback};
+    SubmitCommandInstructionVisitor instruction_visitor {state, workload_visitor};
 
     for (auto& instr : stream)
     {
-        std::visit(visitor, instr);
+        std::visit(instruction_visitor, instr);
     }
 }
 
diff --git a/source_common/trackers/queue.hpp b/source_common/trackers/queue.hpp
index 6fefae4..04c5eea 100644
--- a/source_common/trackers/queue.hpp
+++ b/source_common/trackers/queue.hpp
@@ -43,7 +43,7 @@
 
 #include "trackers/layer_command_stream.hpp"
 
-#include <functional>
+#include <cstdint>
 #include <string>
 #include <vector>
 
@@ -51,6 +51,68 @@
 
 namespace Tracker
 {
+/**
+ * @brief Represents the interface to some workload visitor that can be passed to Queue::runSubmitCommandStream
+ * and which will be called once per item within the submitted command stream for that queue.
+ */
+class SubmitCommandWorkloadVisitor
+{
+public:
+    /** @brief Destructor for the visitor */
+    virtual ~SubmitCommandWorkloadVisitor() noexcept = default;
+
+    /**
+     * @brief Visit a renderpass workload object
+     *
+     * @param renderpass The renderpass
+     * @param debugStack The stack of debug labels that are associated with this renderpass
+     */
+    virtual void operator()(const LCSRenderPass& renderpass, const std::vector<std::string>& debugStack) = 0;
+
+    /**
+     * @brief Visit a renderpass continuation workload object
+     *
+     * @param continuation The renderpass continuation
+     * @param debugStack The stack of debug labels that are associated with this renderpass
+     * @param renderpassTagID The renderpass tag that the continuation was associated with
+     */
+    virtual void operator()(const LCSRenderPassContinuation& continuation,
+                            const std::vector<std::string>& debugStack,
+                            uint64_t renderpassTagID) = 0;
+
+    /**
+     * @brief Visit a dispatch workload object
+     *
+     * @param dispatch The dispatch
+     * @param debugStack The stack of debug labels that are associated with this dispatch
+     */
+    virtual void operator()(const LCSDispatch& dispatch, const std::vector<std::string>& debugStack) = 0;
+
+    /**
+     * @brief Visit a trace rays workload object
+     *
+     * @param traceRays The trace rays
+     * @param debugStack The stack of debug labels that are associated with this trace rays
+     */
+    virtual void operator()(const LCSTraceRays& traceRays, const std::vector<std::string>& debugStack) = 0;
+
+    /**
+     * @brief Visit an image transfer workload object
+     *
+     * @param imageTransfer The image transfer
+     * @param debugStack The stack of debug labels that are associated with this image transfer
+     */
+    virtual void operator()(const LCSImageTransfer& imageTransfer, const std::vector<std::string>& debugStack) = 0;
+
+    /**
+     * @brief Visit a buffer transfer workload object
+     *
+     * @param bufferTransfer The buffer transfer
+     * @param debugStack The stack of debug labels that are associated with this buffer transfer
+     */
+    virtual void operator()(const LCSBufferTransfer& bufferTransfer, const std::vector<std::string>& debugStack) = 0;
+};
+
 /**
  * Metadata tracked by the queue when it emits commands, that can be
  * shared with the LCSInstruction visitor object during instruction processing
@@ -94,11 +156,11 @@ class Queue
     /**
      * @brief Execute a layer command stream.
      *
-     * @param stream     The layer command stream to execute.
-     * @param callback   The callback to pass submitted workloads to.
+     * @param stream            The layer command stream to execute.
+     * @param workload_visitor  The visitor to pass submitted workloads to.
      */
     void runSubmitCommandStream(const std::vector<LCSInstruction>& stream,
-                                std::function<void(const std::string&)> callback);
+                                SubmitCommandWorkloadVisitor& workload_visitor);
 
 private:
     QueueState state;

From fc6335b259b690bbe9e0b986dcf759ffa0a5f44d Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Fri, 31 Jan 2025 15:14:31 +0000
Subject: [PATCH 04/11] Remove getMetadata from layer_command_stream.

This is now moved into the centralized serializer for the layer driver in workload_metadata_builder and fully decouples
the state tracker from the timeline layer's data serialization
---
 .../source/workload_metadata_builder.cpp      | 199 +++++++++++++++++-
 .../trackers/layer_command_stream.cpp         | 152 -------------
 .../trackers/layer_command_stream.hpp         |  81 +++----
 3 files changed, 237 insertions(+), 195 deletions(-)

diff --git a/layer_gpu_timeline/source/workload_metadata_builder.cpp b/layer_gpu_timeline/source/workload_metadata_builder.cpp
index f782fca..8252d5c 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.cpp
+++ b/layer_gpu_timeline/source/workload_metadata_builder.cpp
@@ -25,6 +25,7 @@
 
 #include "workload_metadata_builder.hpp"
 
+#include "trackers/layer_command_stream.hpp"
 #include "utils/misc.hpp"
 
 #include <string>
@@ -33,6 +34,192 @@
 
 using json = nlohmann::json;
 
+namespace
+{
+/**
+ * @brief Serialize the metadata for this render pass workload.
+ *
+ * @param renderpass The renderpass to serialize
+ * @param debugLabel The debug label stack of the VkQueue at submit time.
+ */
+std::string serialize(const Tracker::LCSRenderPass& renderpass, const std::vector<std::string>& debugLabel)
+{
+    // Draw count for a multi-submit command buffer cannot be reliably
+    // associated with a single tagID if restartable across command buffer
+    // boundaries because different command buffer submit combinations can
+    // result in different draw counts for the same starting tagID.
+    int64_t drawCount = static_cast<int64_t>(renderpass.getDrawCallCount());
+
+    if (!renderpass.isOneTimeSubmit() && renderpass.isSuspending())
+    {
+        drawCount = -1;
+    }
+
+    json metadata = {
+        {"type", "renderpass"},
+        {"tid", renderpass.getTagID()},
+        {"width", renderpass.getWidth()},
+        {"height", renderpass.getHeight()},
+        {"drawCallCount", drawCount},
+    };
+
+    if (!debugLabel.empty())
+    {
+        metadata["label"] = debugLabel;
+    }
+
+    // Default is 1, so only store if we need it
+    if (const auto spc = renderpass.getSubpassCount(); spc != 1)
+    {
+        metadata["subpassCount"] = spc;
+    }
+
+    json attachPoints = json::array();
+
+    for (const auto& attachment : renderpass.getAttachments())
+    {
+        json attachPoint {
+            {"binding", attachment.getAttachmentStr()},
+        };
+
+        // Default is false, so only serialize if we need it
+        if (attachment.isLoaded())
+        {
+            attachPoint["load"] = true;
+        }
+
+        // Default is true, so only serialize if we need it
+        if (!attachment.isStored())
+        {
+            attachPoint["store"] = false;
+        }
+
+        // Default is false, so only serialize if we need it
+        if (attachment.isResolved())
+        {
+            attachPoint["resolve"] = true;
+        }
+
+        attachPoints.push_back(attachPoint);
+    }
+
+    metadata["attachments"] = attachPoints;
+
+    return metadata.dump();
+}
+
+/**
+ * @brief Serialize the metadata for this render pass continuation workload.
+ *
+ * @param continuation The renderpass continuation to serialize
+ * @param tagIDContinuation The ID of the workload if this is a continuation of it.
+ */
+std::string serialize(const Tracker::LCSRenderPassContinuation& continuation, uint64_t tagIDContinuation)
+{
+    json metadata = {
+        {"type", "renderpass"},
+        {"tid", tagIDContinuation},
+        {"drawCallCount", continuation.getDrawCallCount()},
+    };
+
+    return metadata.dump();
+}
+
+/**
+ * @brief Get the metadata for this workload
+ *
+ * @param dispatch The dispatch to serialize
+ * @param debugLabel The debug label stack for the VkQueue at submit time.
+ */
+std::string serialize(const Tracker::LCSDispatch& dispatch, const std::vector<std::string>& debugLabel)
+{
+    json metadata = {
+        {"type", "dispatch"},
+        {"tid", dispatch.getTagID()},
+        {"xGroups", dispatch.getXGroups()},
+        {"yGroups", dispatch.getYGroups()},
+        {"zGroups", dispatch.getZGroups()},
+    };
+
+    if (!debugLabel.empty())
+    {
+        metadata["label"] = debugLabel;
+    }
+
+    return metadata.dump();
+}
+
+/**
+ * @brief Get the metadata for this workload
+ *
+ * @param traceRays The trace rays to serialize
+ * @param debugLabel The debug label stack for the VkQueue at submit time.
+ */
+std::string serialize(const Tracker::LCSTraceRays& traceRays, const std::vector<std::string>& debugLabel)
+{
+    json metadata = {
+        {"type", "tracerays"},
+        {"tid", traceRays.getTagID()},
+        {"xItems", traceRays.getXItems()},
+        {"yItems", traceRays.getYItems()},
+        {"zItems", traceRays.getZItems()},
+    };
+
+    if (!debugLabel.empty())
+    {
+        metadata["label"] = debugLabel;
+    }
+
+    return metadata.dump();
+}
+
+/**
+ * @brief Get the metadata for this workload
+ *
+ * @param imageTransfer The image transfer to serialize
+ * @param debugLabel The debug label stack for the VkQueue at submit time.
+ */
+std::string serialize(const Tracker::LCSImageTransfer& imageTransfer, const std::vector<std::string>& debugLabel)
+{
+    json metadata = {
+        {"type", "imagetransfer"},
+        {"tid", imageTransfer.getTagID()},
+        {"subtype", imageTransfer.getTransferType()},
+        {"pixelCount", imageTransfer.getPixelCount()},
+    };
+
+    if (!debugLabel.empty())
+    {
+        metadata["label"] = debugLabel;
+    }
+
+    return metadata.dump();
+}
+
+/**
+ * @brief Get the metadata for this workload
+ *
+ * @param bufferTransfer The buffer transfer to serialize
+ * @param debugLabel The debug label stack for the VkQueue at submit time.
+ */
+std::string serialize(const Tracker::LCSBufferTransfer& bufferTransfer, const std::vector<std::string>& debugLabel)
+{
+    json metadata = {
+        {"type", "buffertransfer"},
+        {"tid", bufferTransfer.getTagID()},
+        {"subtype", bufferTransfer.getTransferType()},
+        {"byteCount", bufferTransfer.getByteCount()},
+    };
+
+    if (!debugLabel.empty())
+    {
+        metadata["label"] = debugLabel;
+    }
+
+    return metadata.dump();
+}
+}
+
 void WorkloadMetadataEmitterVisitor::emitMetadata(Device& layerDevice,
                                                   uint32_t pid,
                                                   uint32_t major,
@@ -81,7 +268,7 @@ void WorkloadMetadataEmitterVisitor::emitSubmit(VkDevice device, VkQueue queue,
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPass& renderpass,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(renderpass.getMetadata(debugStack));
+    layerDevice.txMessage(serialize(renderpass, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPassContinuation& continuation,
@@ -90,29 +277,29 @@ void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPassCont
 {
     UNUSED(debugStack);
 
-    layerDevice.txMessage(continuation.getMetadata(renderpassTagID));
+    layerDevice.txMessage(serialize(continuation, renderpassTagID));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSDispatch& dispatch,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(dispatch.getMetadata(debugStack));
+    layerDevice.txMessage(serialize(dispatch, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSTraceRays& traceRays,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(traceRays.getMetadata(debugStack));
+    layerDevice.txMessage(serialize(traceRays, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSImageTransfer& imageTransfer,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(imageTransfer.getMetadata(debugStack));
+    layerDevice.txMessage(serialize(imageTransfer, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSBufferTransfer& bufferTransfer,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(bufferTransfer.getMetadata(debugStack));
+    layerDevice.txMessage(serialize(bufferTransfer, debugStack));
 }
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index bf64761..973b502 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -28,10 +28,6 @@
 #include <cassert>
 #include <memory>
 
-#include <nlohmann/json.hpp>
-
-using json = nlohmann::json;
-
 namespace Tracker
 {
 /* See header for details. */
@@ -66,82 +62,6 @@ LCSRenderPass::LCSRenderPass(uint64_t _tagID,
     attachments = renderPass.getAttachments();
 }
 
-/* See header for details. */
-std::string LCSRenderPass::getMetadata(const std::vector<std::string>& debugLabel) const
-{
-    // Draw count for a multi-submit command buffer cannot be reliably
-    // associated with a single tagID if restartable across command buffer
-    // boundaries because different command buffer submit combinations can
-    // result in different draw counts for the same starting tagID.
-    int64_t drawCount = static_cast<int64_t>(drawCallCount);
-    if (!oneTimeSubmit && suspending)
-    {
-        drawCount = -1;
-    }
-
-    json metadata = {
-        {"type", "renderpass"},
-        {"tid", tagID},
-        {"width", width},
-        {"height", height},
-        {"drawCallCount", drawCount},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    // Default is 1, so only store if we need it
-    if (subpassCount != 1)
-    {
-        metadata["subpassCount"] = subpassCount;
-    }
-
-    json attachPoints = json::array();
-    for (const auto& attachment : attachments)
-    {
-        json attachPoint {
-            {"binding", attachment.getAttachmentStr()},
-        };
-
-        // Default is false, so only serialize if we need it
-        if (attachment.isLoaded())
-        {
-            attachPoint["load"] = true;
-        }
-
-        // Default is true, so only serialize if we need it
-        if (!attachment.isStored())
-        {
-            attachPoint["store"] = false;
-        }
-
-        // Default is false, so only serialize if we need it
-        if (attachment.isResolved())
-        {
-            attachPoint["resolve"] = true;
-        }
-
-        attachPoints.push_back(attachPoint);
-    }
-
-    metadata["attachments"] = attachPoints;
-    return metadata.dump();
-}
-
-/* See header for details. */
-std::string LCSRenderPassContinuation::getMetadata(uint64_t tagIDContinuation) const
-{
-    json metadata = {
-        {"type", "renderpass"},
-        {"tid", tagIDContinuation},
-        {"drawCallCount", drawCallCount},
-    };
-
-    return metadata.dump();
-}
-
 /* See header for details. */
 LCSDispatch::LCSDispatch(uint64_t _tagID, int64_t _xGroups, int64_t _yGroups, int64_t _zGroups)
     : LCSWorkload(_tagID),
@@ -151,23 +71,6 @@ LCSDispatch::LCSDispatch(uint64_t _tagID, int64_t _xGroups, int64_t _yGroups, in
 {
 }
 
-/* See header for details. */
-std::string LCSDispatch::getMetadata(const std::vector<std::string>& debugLabel) const
-{
-    json metadata = {{"type", "dispatch"},
-                     {"tid", tagID},
-                     {"xGroups", xGroups},
-                     {"yGroups", yGroups},
-                     {"zGroups", zGroups}};
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
-}
-
 /* See header for details. */
 LCSTraceRays::LCSTraceRays(uint64_t _tagID, int64_t _xItems, int64_t _yItems, int64_t _zItems)
     : LCSWorkload(_tagID),
@@ -177,25 +80,6 @@ LCSTraceRays::LCSTraceRays(uint64_t _tagID, int64_t _xItems, int64_t _yItems, in
 {
 }
 
-/* See header for details. */
-std::string LCSTraceRays::getMetadata(const std::vector<std::string>& debugLabel) const
-{
-    json metadata = {
-        {"type", "tracerays"},
-        {"tid", tagID},
-        {"xItems", xItems},
-        {"yItems", yItems},
-        {"zItems", zItems},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
-}
-
 /* See header for details. */
 LCSImageTransfer::LCSImageTransfer(uint64_t _tagID, const std::string& _transferType, int64_t _pixelCount)
     : LCSWorkload(_tagID),
@@ -204,24 +88,6 @@ LCSImageTransfer::LCSImageTransfer(uint64_t _tagID, const std::string& _transfer
 {
 }
 
-/* See header for details. */
-std::string LCSImageTransfer::getMetadata(const std::vector<std::string>& debugLabel) const
-{
-    json metadata = {
-        {"type", "imagetransfer"},
-        {"tid", tagID},
-        {"subtype", transferType},
-        {"pixelCount", pixelCount},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
-}
-
 /* See header for details. */
 LCSBufferTransfer::LCSBufferTransfer(uint64_t _tagID, const std::string& _transferType, int64_t _byteCount)
     : LCSWorkload(_tagID),
@@ -230,24 +96,6 @@ LCSBufferTransfer::LCSBufferTransfer(uint64_t _tagID, const std::string& _transf
 {
 }
 
-/* See header for details. */
-std::string LCSBufferTransfer::getMetadata(const std::vector<std::string>& debugLabel) const
-{
-    json metadata = {
-        {"type", "buffertransfer"},
-        {"tid", tagID},
-        {"subtype", transferType},
-        {"byteCount", byteCount},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
-}
-
 /* See header for details. */
 LCSInstructionMarkerPush::LCSInstructionMarkerPush(const std::string& _label)
     : label(std::make_shared<std::string>(_label))
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index 78717b4..550ac7d 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -41,6 +41,7 @@
 #include "trackers/render_pass.hpp"
 
 #include <atomic>
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <type_traits>
@@ -119,6 +120,9 @@ class LCSRenderPassBase : public LCSWorkload
      */
     void setDrawCallCount(uint64_t count) { drawCallCount = count; }
 
+    /** @return The number of draw calls in this renderpass */
+    uint64_t getDrawCallCount() const { return drawCallCount; }
+
 protected:
     /**
      * @brief The number of draw calls in the render pass.
@@ -165,12 +169,20 @@ class LCSRenderPass : public LCSRenderPassBase
                   bool suspending,
                   bool oneTimeSubmit);
 
-    /**
-     * @brief Get the metadata for this render pass workload.
-     *
-     * @param debugLabel   The debug label stack of the VkQueue at submit time.
-     */
-    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
+    /** @return The width of the renderpass in pixels */
+    uint32_t getWidth() const { return width; }
+
+    /** @return The height of the renderpass in pixels */
+    uint32_t getHeight() const { return height; }
+
+    /** @return The number of subpasses */
+    uint32_t getSubpassCount() const { return subpassCount; }
+
+    /** @return True if it is a one-time submit renderpass */
+    bool isOneTimeSubmit() const { return oneTimeSubmit; }
+
+    /** @return The list of attachments */
+    const std::vector<RenderPassAttachment>& getAttachments() const { return attachments; }
 
 private:
     /**
@@ -214,13 +226,6 @@ class LCSRenderPassContinuation : public LCSRenderPassBase
         : LCSRenderPassBase(0, _suspending)
     {
     }
-
-    /**
-     * @brief Get the metadata for this render pass continuation workload.
-     *
-     * @param tagIDContinuation   The ID of the workload if this is a continuation of it.
-     */
-    std::string getMetadata(uint64_t tagIDContinuation) const;
 };
 
 /**
@@ -241,12 +246,14 @@ class LCSDispatch : public LCSWorkload
      */
     LCSDispatch(uint64_t tagID, int64_t xGroups, int64_t yGroups, int64_t zGroups);
 
-    /**
-     * @brief Get the metadata for this workload
-     *
-     * @param debugLabel          The debug label stack for the VkQueue at submit time.
-     */
-    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
+    /** @return The number of work groups in the X dimension, or -1 if unknown. */
+    int64_t getXGroups() const { return xGroups; }
+
+    /** @return The number of work groups in the y dimension, or -1 if unknown. */
+    int64_t getYGroups() const { return yGroups; }
+
+    /** @return The number of work groups in the z dimension, or -1 if unknown. */
+    int64_t getZGroups() const { return zGroups; }
 
 private:
     /**
@@ -283,12 +290,14 @@ class LCSTraceRays : public LCSWorkload
      */
     LCSTraceRays(uint64_t tagID, int64_t xItems, int64_t yItems, int64_t zItems);
 
-    /**
-     * @brief Get the metadata for this workload
-     *
-     * @param debugLabel          The debug label stack for the VkQueue at submit time.
-     */
-    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
+    /** @return The number of work items in the X dimension, or -1 if unknown. */
+    int64_t getXItems() const { return xItems; }
+
+    /** @return The number of work items in the y dimension, or -1 if unknown. */
+    int64_t getYItems() const { return yItems; }
+
+    /** @return The number of work items in the z dimension, or -1 if unknown. */
+    int64_t getZItems() const { return zItems; }
 
 private:
     /**
@@ -324,12 +333,11 @@ class LCSImageTransfer : public LCSWorkload
      */
     LCSImageTransfer(uint64_t tagID, const std::string& transferType, int64_t pixelCount);
 
-    /**
-     * @brief Get the metadata for this workload
-     *
-     * @param debugLabel          The debug label stack for the VkQueue at submit time.
-     */
-    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
+    /** @return The subtype of the transfer */
+    const std::string& getTransferType() const { return transferType; }
+
+    /** @return The size of the transfer, in pixels */
+    int64_t getPixelCount() const { return pixelCount; }
 
 private:
     /**
@@ -361,12 +369,11 @@ class LCSBufferTransfer : public LCSWorkload
      */
     LCSBufferTransfer(uint64_t tagID, const std::string& transferType, int64_t byteCount);
 
-    /**
-     * @brief Get the metadata for this workload
-     *
-     * @param debugLabel          The debug label stack for the VkQueue at submit time.
-     */
-    std::string getMetadata(const std::vector<std::string>& debugLabel) const;
+    /** @return The subtype of the transfer */
+    const std::string& getTransferType() const { return transferType; }
+
+    /** @return The size of the transfer, in bytes */
+    int64_t getByteCount() const { return byteCount; }
 
 private:
     /**

From dca18133502e2bd04ed6bb86ad7be5060c1ddba5 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Mon, 3 Feb 2025 12:54:02 +0000
Subject: [PATCH 05/11] Fixup review comments

---
 .../source/workload_metadata_builder.cpp      | 26 +++++++++----------
 .../source/workload_metadata_builder.hpp      | 15 ++++++-----
 .../trackers/layer_command_stream.hpp         | 18 ++++++-------
 3 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/layer_gpu_timeline/source/workload_metadata_builder.cpp b/layer_gpu_timeline/source/workload_metadata_builder.cpp
index 8252d5c..b4451c7 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.cpp
+++ b/layer_gpu_timeline/source/workload_metadata_builder.cpp
@@ -220,7 +220,7 @@ std::string serialize(const Tracker::LCSBufferTransfer& bufferTransfer, const st
 }
 }
 
-void WorkloadMetadataEmitterVisitor::emitMetadata(Device& layerDevice,
+void WorkloadMetadataEmitterVisitor::emitMetadata(Device& device,
                                                   uint32_t pid,
                                                   uint32_t major,
                                                   uint32_t minor,
@@ -230,14 +230,14 @@ void WorkloadMetadataEmitterVisitor::emitMetadata(Device& layerDevice,
     json deviceMetadata {
         {"type", "device"},
         {"pid", pid},
-        {"device", reinterpret_cast<uintptr_t>(layerDevice.device)},
+        {"device", reinterpret_cast<uintptr_t>(device.device)},
         {"deviceName", std::move(name)},
         {"driverMajor", major},
         {"driverMinor", minor},
         {"driverPatch", patch},
     };
 
-    layerDevice.txMessage(deviceMetadata.dump());
+    device.txMessage(deviceMetadata.dump());
 }
 
 void WorkloadMetadataEmitterVisitor::emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp)
@@ -252,23 +252,23 @@ void WorkloadMetadataEmitterVisitor::emitFrame(Device& device, uint64_t frameNum
     device.txMessage(frame.dump());
 }
 
-void WorkloadMetadataEmitterVisitor::emitSubmit(VkDevice device, VkQueue queue, uint64_t timestamp)
+void WorkloadMetadataEmitterVisitor::emitSubmit(VkDevice vkDevice, VkQueue vkQueue, uint64_t timestamp)
 {
     // Write the queue submit metadata
     json submitMetadata {
         {"type", "submit"},
-        {"device", reinterpret_cast<uintptr_t>(device)},
-        {"queue", reinterpret_cast<uintptr_t>(queue)},
+        {"device", reinterpret_cast<uintptr_t>(vkDevice)},
+        {"queue", reinterpret_cast<uintptr_t>(vkQueue)},
         {"timestamp", timestamp},
     };
 
-    layerDevice.txMessage(submitMetadata.dump());
+    device.txMessage(submitMetadata.dump());
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPass& renderpass,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(serialize(renderpass, debugStack));
+    device.txMessage(serialize(renderpass, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPassContinuation& continuation,
@@ -277,29 +277,29 @@ void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPassCont
 {
     UNUSED(debugStack);
 
-    layerDevice.txMessage(serialize(continuation, renderpassTagID));
+    device.txMessage(serialize(continuation, renderpassTagID));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSDispatch& dispatch,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(serialize(dispatch, debugStack));
+    device.txMessage(serialize(dispatch, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSTraceRays& traceRays,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(serialize(traceRays, debugStack));
+    device.txMessage(serialize(traceRays, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSImageTransfer& imageTransfer,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(serialize(imageTransfer, debugStack));
+    device.txMessage(serialize(imageTransfer, debugStack));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSBufferTransfer& bufferTransfer,
                                                 const std::vector<std::string>& debugStack)
 {
-    layerDevice.txMessage(serialize(bufferTransfer, debugStack));
+    device.txMessage(serialize(bufferTransfer, debugStack));
 }
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.hpp b/layer_gpu_timeline/source/workload_metadata_builder.hpp
index 5b40993..3a324a6 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.hpp
+++ b/layer_gpu_timeline/source/workload_metadata_builder.hpp
@@ -77,10 +77,11 @@ class WorkloadMetadataEmitterVisitor : public Tracker::SubmitCommandWorkloadVisi
     /**
      * Construct a new workload metadata emitter that will output paylaods for the provided device
      *
-     * @param device The device object that the payloads are produced for, and to which they are passed for transmission
+     * @param _device The device object that the payloads are produced for, and to which they are passed for
+     * transmission
      */
-    WorkloadMetadataEmitterVisitor(Device& device)
-        : layerDevice(device)
+    WorkloadMetadataEmitterVisitor(Device& _device)
+        : device(_device)
     {
     }
 
@@ -106,12 +107,12 @@ class WorkloadMetadataEmitterVisitor : public Tracker::SubmitCommandWorkloadVisi
      * @brief Called at the start of the submit to emit a "Submit" record, delimiting the subsequent items from any
      * later submit
      *
-     * @param device The device the submit belongs to
-     * @param queue The queue that was submitted to
+     * @param vkDevice The device the submit belongs to
+     * @param vkQueue The queue that was submitted to
      * @param timestamp The timestamp of the submission
      */
-    void emitSubmit(VkDevice device, VkQueue queue, uint64_t timestamp);
+    void emitSubmit(VkDevice vkDevice, VkQueue vkQueue, uint64_t timestamp);
 
 private:
-    Device& layerDevice;
+    Device& device;
 };
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index 550ac7d..b0273ff 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -124,6 +124,14 @@ class LCSRenderPassBase : public LCSWorkload
     uint64_t getDrawCallCount() const { return drawCallCount; }
 
 protected:
+    /**
+     * @brief Construct the common renderbase workload
+     *
+     * @param tagID           The assigned tagID.
+     * @param suspending      Is this a render pass part that suspends later?
+     */
+    LCSRenderPassBase(uint64_t tagID, bool suspending);
+
     /**
      * @brief The number of draw calls in the render pass.
      *
@@ -136,14 +144,6 @@ class LCSRenderPassBase : public LCSWorkload
      * @brief Is this workload suspending rather than ending?
      */
     bool suspending;
-
-    /**
-     * @brief Construct the common renderbase workload
-     *
-     * @param tagID           The assigned tagID.
-     * @param suspending      Is this a render pass part that suspends later?
-     */
-    LCSRenderPassBase(uint64_t tagID, bool suspending);
 };
 
 /**
@@ -411,7 +411,7 @@ class LCSInstructionMarkerPush
     /**
      * @brief The application debug label.
      *
-     * The label is stored in a shared point to avoid copying the actual string when it is shared between
+     * The label is stored in a shared pointer to avoid copying the actual string when it is shared between
      * subcommandbuffers
      */
     std::shared_ptr<std::string> label;

From e8d844594c7d4e9b4468dbef71e5adb261e82623 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Tue, 4 Feb 2025 09:51:44 +0000
Subject: [PATCH 06/11] Rework workload_metadata_builder to emit protobuf
 messages instead of JSON.

 * Uses "protobuf" C++ header only library for encoding the messages
   as they are simple and we want to avoid a larger dependency on
   libprotobuf and the necessary build-time changes to build
   or require protoc for host and libprotobuf for target.
---
 .gitmodules                                   |   3 +
 layer_gpu_timeline/CMakeLists.txt             |   4 +
 layer_gpu_timeline/source/CMakeLists.txt      |   6 +-
 layer_gpu_timeline/source/device.hpp          |   2 +-
 .../source/layer_device_functions_queue.cpp   |  10 +-
 .../layer_device_functions_transfer.cpp       |  50 +-
 layer_gpu_timeline/source/timeline_comms.cpp  |   4 +-
 layer_gpu_timeline/source/timeline_comms.hpp  |   2 +-
 .../source/workload_metadata_builder.cpp      | 550 +++++++++++++-----
 .../source/workload_metadata_builder.hpp      |   5 +-
 layer_gpu_timeline/timeline.proto             | 218 +++++++
 source_common/trackers/command_buffer.cpp     |   4 +-
 source_common/trackers/command_buffer.hpp     |   4 +-
 .../trackers/layer_command_stream.cpp         |  42 +-
 .../trackers/layer_command_stream.hpp         |  36 +-
 source_common/trackers/render_pass.hpp        |   7 +
 source_third_party/protopuf                   |   1 +
 17 files changed, 755 insertions(+), 193 deletions(-)
 create mode 100644 layer_gpu_timeline/timeline.proto
 create mode 160000 source_third_party/protopuf

diff --git a/.gitmodules b/.gitmodules
index 5ac8d86..2eed470 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,6 @@
 [submodule "source_third_party/gtest"]
 	path = source_third_party/gtest
 	url = https://github.com/google/googletest
+[submodule "source_third_party/protopuf"]
+	path = source_third_party/protopuf
+	url = https://github.com/PragmaTwice/protopuf.git
diff --git a/layer_gpu_timeline/CMakeLists.txt b/layer_gpu_timeline/CMakeLists.txt
index eef8fef..52214cd 100644
--- a/layer_gpu_timeline/CMakeLists.txt
+++ b/layer_gpu_timeline/CMakeLists.txt
@@ -35,6 +35,10 @@ set(LGL_CONFIG_LOG 1)
 include(../source_common/compiler_helper.cmake)
 include(../cmake/clang-tools.cmake)
 
+# TPIP
+set(BUILD_TESTS OFF)
+add_subdirectory(../source_third_party/protopuf "source_third_party/protopuf")
+
 # Build steps
 add_subdirectory(../source_common/comms source_common/comms)
 add_subdirectory(../source_common/framework source_common/framework)
diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
index e4946e7..76f1bb8 100644
--- a/layer_gpu_timeline/source/CMakeLists.txt
+++ b/layer_gpu_timeline/source/CMakeLists.txt
@@ -65,7 +65,8 @@ target_include_directories(
 target_include_directories(
     ${VK_LAYER} SYSTEM PRIVATE
         ../../source_third_party/
-        ../../source_third_party/khronos/vulkan/include/)
+        ../../source_third_party/khronos/vulkan/include/
+        ../../source_third_party/protopuf/include/)
 
 lgl_set_build_options(${VK_LAYER})
 
@@ -74,7 +75,8 @@ target_link_libraries(
         lib_layer_comms
         lib_layer_framework
         lib_layer_trackers
-        $<$<PLATFORM_ID:Android>:log>)
+        $<$<PLATFORM_ID:Android>:log>
+        protopuf)
 
 if (CMAKE_BUILD_TYPE STREQUAL "Release")
     add_custom_command(
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
index 151e786..e82a5ab 100644
--- a/layer_gpu_timeline/source/device.hpp
+++ b/layer_gpu_timeline/source/device.hpp
@@ -135,7 +135,7 @@ class Device
      *
      * @param message   The message to send.
      */
-    void txMessage(const std::string& message) { commsWrapper->txMessage(message); }
+    void txMessage(Comms::MessageData&& message) { commsWrapper->txMessage(std::move(message)); }
 
     /**
      * @brief Get the cumulative stats for this device.
diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
index 15cef57..95522af 100644
--- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -65,9 +65,9 @@ static uint64_t getClockMonotonicRaw()
  * @param queue             The queue being submitted to.
  * @param workloadVisitor   The data emit callback.
  */
-static void emitQueueMetadata(VkDevice device, VkQueue queue, WorkloadMetadataEmitterVisitor& workloadVisitor)
+static void emitQueueMetadata(VkQueue queue, WorkloadMetadataEmitterVisitor& workloadVisitor)
 {
-    workloadVisitor.emitSubmit(device, queue, getClockMonotonicRaw());
+    workloadVisitor.emitSubmit(queue, getClockMonotonicRaw());
 }
 
 /**
@@ -131,7 +131,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
     WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, workloadVisitor);
+    emitQueueMetadata(queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -165,7 +165,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
     WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, workloadVisitor);
+    emitQueueMetadata(queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
@@ -199,7 +199,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
     WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
 
     // Add queue-level metadata
-    emitQueueMetadata(layer->device, queue, workloadVisitor);
+    emitQueueMetadata(queue, workloadVisitor);
 
     // Add per-command buffer metadata
     for (uint32_t i = 0; i < submitCount; i++)
diff --git a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
index 791a1f1..253c970 100644
--- a/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_transfer.cpp
@@ -26,6 +26,7 @@
 #include "device.hpp"
 #include "device_utils.hpp"
 #include "framework/device_dispatch_table.hpp"
+#include "trackers/layer_command_stream.hpp"
 
 #include <memory>
 #include <mutex>
@@ -45,7 +46,7 @@ extern std::mutex g_vulkanLock;
  */
 static uint64_t registerBufferTransfer(Device* layer,
                                        VkCommandBuffer commandBuffer,
-                                       const std::string& transferType,
+                                       Tracker::LCSBufferTransfer::Type transferType,
                                        int64_t byteCount)
 {
     auto& tracker = layer->getStateTracker();
@@ -65,7 +66,7 @@ static uint64_t registerBufferTransfer(Device* layer,
  */
 static uint64_t registerImageTransfer(Device* layer,
                                       VkCommandBuffer commandBuffer,
-                                      const std::string& transferType,
+                                      Tracker::LCSImageTransfer::Type transferType,
                                       int64_t pixelCount)
 {
     auto& tracker = layer->getStateTracker();
@@ -97,7 +98,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdFillBuffer<user_tag>(VkCommandBuffer comma
         byteCount = -2;
     }
 
-    uint64_t tagID = registerBufferTransfer(layer, commandBuffer, "Fill buffer", byteCount);
+    uint64_t tagID =
+        registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::fill_buffer, byteCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -125,7 +127,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearColorImage<user_tag>(VkCommandBuffer
     // TODO: Add image tracking so we can turn image and pRanges into pixels
     int64_t pixelCount = -1;
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Clear image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::clear_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -153,7 +156,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdClearDepthStencilImage<user_tag>(VkCommand
     // TODO: Add image tracking so we can turn image and pRanges into pixels
     int64_t pixelCount = -1;
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Clear image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::clear_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -183,7 +187,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer<user_tag>(VkCommandBuffer comma
         byteCount += static_cast<int64_t>(pRegions[i].size);
     }
 
-    uint64_t tagID = registerBufferTransfer(layer, commandBuffer, "Copy buffer", byteCount);
+    uint64_t tagID =
+        registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::copy_buffer, byteCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -210,7 +215,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2<user_tag>(VkCommandBuffer comm
         byteCount += static_cast<int64_t>(pCopyBufferInfo->pRegions[i].size);
     }
 
-    uint64_t tagID = registerBufferTransfer(layer, commandBuffer, "Copy buffer", byteCount);
+    uint64_t tagID =
+        registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::copy_buffer, byteCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -237,7 +243,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBuffer2KHR<user_tag>(VkCommandBuffer c
         byteCount += static_cast<int64_t>(pCopyBufferInfo->pRegions[i].size);
     }
 
-    uint64_t tagID = registerBufferTransfer(layer, commandBuffer, "Copy buffer", byteCount);
+    uint64_t tagID =
+        registerBufferTransfer(layer, commandBuffer, Tracker::LCSBufferTransfer::Type::copy_buffer, byteCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -271,7 +278,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyBufferToImage<user_tag>(VkCommandBuffe
         pixelCount += rPixelCount;
     }
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy buffer to image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_buffer_to_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -302,7 +310,8 @@ VKAPI_ATTR void VKAPI_CALL
         pixelCount += rPixelCount;
     }
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy buffer to image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_buffer_to_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -333,7 +342,8 @@ VKAPI_ATTR void VKAPI_CALL
         pixelCount += rPixelCount;
     }
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy buffer to image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_buffer_to_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -368,7 +378,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage<user_tag>(VkCommandBuffer comman
         pixelCount += rPixelCount;
     }
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -399,7 +410,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2<user_tag>(VkCommandBuffer comma
         pixelCount += rPixelCount;
     }
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -429,7 +441,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImage2KHR<user_tag>(VkCommandBuffer co
         pixelCount += rPixelCount;
     }
 
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy image", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -467,7 +480,8 @@ VKAPI_ATTR void VKAPI_CALL layer_vkCmdCopyImageToBuffer<user_tag>(VkCommandBuffe
     // type, which means this should be a bufferTransfer reporting size in
     // bytes. Without image tracking we only have pixels, so for now we report
     // as "Copy image" and report size in pixels.
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy image to buffer", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image_to_buffer, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -502,7 +516,8 @@ VKAPI_ATTR void VKAPI_CALL
     // type, which means this should be a bufferTransfer reporting size in
     // bytes. Without image tracking we only have pixels, so for now we report
     // as "Copy image" and report size in pixels.
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy image to buffer", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image_to_buffer, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -537,7 +552,8 @@ VKAPI_ATTR void VKAPI_CALL
     // type, which means this should be a bufferTransfer reporting size in
     // bytes. Without image tracking we only have pixels, so for now we report
     // as "Copy image" and report size in pixels.
-    uint64_t tagID = registerImageTransfer(layer, commandBuffer, "Copy image to buffer", pixelCount);
+    uint64_t tagID =
+        registerImageTransfer(layer, commandBuffer, Tracker::LCSImageTransfer::Type::copy_image_to_buffer, pixelCount);
 
     // Release the lock to call into the driver
     lock.unlock();
diff --git a/layer_gpu_timeline/source/timeline_comms.cpp b/layer_gpu_timeline/source/timeline_comms.cpp
index 6ad1e40..309e84d 100644
--- a/layer_gpu_timeline/source/timeline_comms.cpp
+++ b/layer_gpu_timeline/source/timeline_comms.cpp
@@ -38,7 +38,7 @@ TimelineComms::TimelineComms(Comms::CommsInterface& _comms)
 }
 
 /* See header for documentation. */
-void TimelineComms::txMessage(const std::string& message)
+void TimelineComms::txMessage(Comms::MessageData&& message)
 {
     // Message endpoint is not available
     if (endpoint == 0)
@@ -46,6 +46,6 @@ void TimelineComms::txMessage(const std::string& message)
         return;
     }
 
-    auto data = std::make_unique<Comms::MessageData>(message.begin(), message.end());
+    auto data = std::make_unique<Comms::MessageData>(std::move(message));
     comms.txAsync(endpoint, std::move(data));
 }
diff --git a/layer_gpu_timeline/source/timeline_comms.hpp b/layer_gpu_timeline/source/timeline_comms.hpp
index c06537b..15bc518 100644
--- a/layer_gpu_timeline/source/timeline_comms.hpp
+++ b/layer_gpu_timeline/source/timeline_comms.hpp
@@ -54,7 +54,7 @@ class TimelineComms
      *
      * @param message   The message to send.
      */
-    void txMessage(const std::string& message);
+    void txMessage(Comms::MessageData&& message);
 
 private:
     /**
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.cpp b/layer_gpu_timeline/source/workload_metadata_builder.cpp
index b4451c7..3b35ae6 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.cpp
+++ b/layer_gpu_timeline/source/workload_metadata_builder.cpp
@@ -25,87 +25,352 @@
 
 #include "workload_metadata_builder.hpp"
 
+#include "comms/comms_interface.hpp"
 #include "trackers/layer_command_stream.hpp"
+#include "trackers/render_pass.hpp"
 #include "utils/misc.hpp"
 
+#include <cstddef>
+#include <cstdint>
 #include <string>
 
-#include <nlohmann/json.hpp>
-
-using json = nlohmann::json;
+#include <protopuf/field.h>
+#include <protopuf/message.h>
+#include <protopuf/skip.h>
+
+/* The metadata packet that is sent once for a given VkDevice, before any other
+ * packet related to that Device and describes the VkDevice / VkPhysicalDevice
+ * etc */
+using DeviceMetadata = pp::message<
+    /* The VkDevice handle */
+    pp::uint64_field<"id", 1>,
+    /* The PID of the process that the layer driver was loaded into */
+    pp::uint32_field<"process_id", 2>,
+    /* The major version that came from the VkPhysicalDeviceProperties for that VkDevice */
+    pp::uint32_field<"major_version", 3>,
+    /* The minor version that came from the VkPhysicalDeviceProperties for that VkDevice */
+    pp::uint32_field<"minor_version", 4>,
+    /* The patch version that came from the VkPhysicalDeviceProperties for that VkDevice */
+    pp::uint32_field<"patch_version", 5>,
+    /* The name that came from the VkPhysicalDeviceProperties for that VkDevice */
+    pp::string_field<"name", 6>>;
+
+/* A frame definition message */
+using Frame = pp::message<
+    /* The unique counter / identifier for this new frame */
+    pp::uint64_field<"id", 1>,
+    /* The VkDevice that the frame belongs to */
+    pp::uint64_field<"device", 2>,
+    /* The timestamp (in NS, CLOCK_MONOTONIC_RAW) of the point where QueuePresent was called */
+    pp::uint64_field<"timestamp", 3>>;
+
+/* A submit message */
+using Submit = pp::message<
+    /* The timestamp of the submission (in NS, CLOCK_MONOTONIC_RAW) */
+    pp::uint64_field<"timestamp", 1>,
+    /* The VkDevice that the submit belongs to */
+    pp::uint64_field<"device", 2>,
+    /* The VkQueue the frame belongs to */
+    pp::uint64_field<"queue", 3>>;
+
+/* Enumerates the possible attachment types a renderpass can have */
+enum class RenderpassAttachmentType
+{
+    undefined = 0,
+    color = 1,
+    depth = 2,
+    stencil = 3,
+};
+
+/* Describe an attachment to a renderpass */
+using RenderpassAttachment = pp::message<
+    /* The attachment type */
+    pp::enum_field<"type", 1, RenderpassAttachmentType>,
+    /* For color attachments, its index, otherwise should be zero/unspecified */
+    pp::uint32_field<"index", 2>,
+    /* True if the attachment was *not* loaded (this is inverted since usually
+       things are loaded, so this saves a field in the data) */
+    pp::bool_field<"not_loaded", 3>,
+    /* True if the attachment was *not* stored (this is inverted since usually
+       things are stored, so this saves a field in the data) */
+    pp::bool_field<"not_stored", 4>,
+    /* True if the attachment was resolved (this is *not* inverted since usually
+       things are not resolved, so this saves a field in the data) */
+    pp::bool_field<"resolved", 5>>;
+
+/* Start a new renderpass */
+using BeginRenderpass = pp::message<
+    /* The unique identifier for this new renderpass */
+    pp::uint64_field<"tag_id", 1>,
+    /* The dimensions of the renderpass' attachments */
+    pp::uint32_field<"width", 2>,
+    pp::uint32_field<"height", 3>,
+    /* The number of drawcalls in the renderpass */
+    pp::uint32_field<"draw_call_count", 4>,
+    /* The subpass count */
+    pp::uint32_field<"subpass_count", 5>,
+    /* Any user defined debug labels associated with the renderpass */
+    pp::string_field<"debug_label", 6, pp::repeated>,
+    /* Any attachments associated with the renderpass */
+    pp::message_field<"attachments", 7, RenderpassAttachment, pp::repeated>>;
+
+/* Continue a split renderpass */
+using ContinueRenderpass = pp::message<
+    /* The unique identifier for the renderpass that is being continued */
+    pp::uint64_field<"tag_id", 1>,
+    /* The number of drawcalls to add to the total in the renderpass */
+    pp::uint32_field<"draw_call_count", 2>,
+    /* Any user defined debug labels to add to the renderpass */
+    pp::string_field<"debug_label", 3, pp::repeated>>;
+
+/* A dispatch object submission */
+using Dispatch = pp::message<
+    /* The unique identifier for this operation */
+    pp::uint64_field<"tag_id", 1>,
+    /* The dimensions of the dispatch */
+    pp::int64_field<"x_groups", 2>,
+    pp::int64_field<"y_groups", 3>,
+    pp::int64_field<"z_groups", 4>,
+    /* Any user defined debug labels associated with the dispatch */
+    pp::string_field<"debug_label", 5, pp::repeated>>;
+
+/* A trace rays object submission */
+using TraceRays = pp::message<
+    /* The unique identifier for this operation */
+    pp::uint64_field<"tag_id", 1>,
+    /* The dimensions of the operation */
+    pp::int64_field<"x_items", 2>,
+    pp::int64_field<"y_items", 3>,
+    pp::int64_field<"z_items", 4>,
+    /* Any user defined debug labels associated with the dispatch */
+    pp::string_field<"debug_label", 5, pp::repeated>>;
+
+/* Enumerates possible image transfer types */
+enum class ImageTransferType
+{
+    unknown_image_transfer = 0,
+    clear_image = 1,
+    copy_image = 2,
+    copy_buffer_to_image = 3,
+    copy_image_to_buffer = 4,
+};
+
+/* An image transfer submission */
+using ImageTransfer = pp::message<
+    /* The unique identifier for this operation */
+    pp::uint64_field<"tag_id", 1>,
+    /* The number of pixels being transfered */
+    pp::int64_field<"pixel_count", 2>,
+    /* The image type */
+    pp::enum_field<"transfer_type", 3, ImageTransferType>,
+    /* Any user defined debug labels associated with the dispatch */
+    pp::string_field<"debug_label", 4, pp::repeated>>;
+
+/* Enumerates possible buffer transfer types */
+enum class BufferTransferType
+{
+    unknown_buffer_transfer = 0,
+    fill_buffer = 1,
+    copy_buffer = 2,
+};
+
+/* An buffer transfer submission */
+using BufferTransfer = pp::message<
+    /* The unique identifier for this operation */
+    pp::uint64_field<"tag_id", 1>,
+    /* The number of bytes being transfered */
+    pp::int64_field<"byte_count", 2>,
+    /* The buffer type */
+    pp::enum_field<"transfer_type", 3, BufferTransferType>,
+    /* Any user defined debug labels associated with the dispatch */
+    pp::string_field<"debug_label", 4, pp::repeated>>;
+
+/* The data payload message that wraps all other messages */
+using TimelineRecord = pp::message<pp::message_field<"metadata", 1, DeviceMetadata>,
+                                   pp::message_field<"frame", 2, Frame>,
+                                   pp::message_field<"submit", 3, Submit>,
+                                   pp::message_field<"renderpass", 4, BeginRenderpass>,
+                                   pp::message_field<"continue_renderpass", 5, ContinueRenderpass>,
+                                   pp::message_field<"dispatch", 6, Dispatch>,
+                                   pp::message_field<"trace_rays", 7, TraceRays>,
+                                   pp::message_field<"image_transfer", 8, ImageTransfer>,
+                                   pp::message_field<"buffer_transfer", 9, BufferTransfer>>;
 
 namespace
 {
 /**
- * @brief Serialize the metadata for this render pass workload.
+ * A helper to pack some message into a TimelineRecord and then encode it to the
+ * corresponding protobuf byte sequence.
  *
- * @param renderpass The renderpass to serialize
- * @param debugLabel The debug label stack of the VkQueue at submit time.
+ * @param c The record field name
+ * @param f The record field value
+ * @return The encoded byte sequence
  */
-std::string serialize(const Tracker::LCSRenderPass& renderpass, const std::vector<std::string>& debugLabel)
+template<pp::basic_fixed_string F, typename T>
+Comms::MessageData packBuffer(pp::constant<F> c, T&& f)
 {
-    // Draw count for a multi-submit command buffer cannot be reliably
-    // associated with a single tagID if restartable across command buffer
-    // boundaries because different command buffer submit combinations can
-    // result in different draw counts for the same starting tagID.
-    int64_t drawCount = static_cast<int64_t>(renderpass.getDrawCallCount());
+    using namespace pp;
+
+    TimelineRecord record {};
+    record[c] = std::move(f);
 
-    if (!renderpass.isOneTimeSubmit() && renderpass.isSuspending())
+    // allocate storage for the message
+    Comms::MessageData buffer {};
+    buffer.resize(skipper<message_coder<TimelineRecord>>::encode_skip(record));
+
+    const auto bufferBytes = bytes(reinterpret_cast<std::byte*>(buffer.data()), buffer.size());
+    const auto encodeResult = message_coder<TimelineRecord>::encode(record, bufferBytes);
+    assert(encodeResult.has_value());
+
+    const auto& bufferEnd = *encodeResult;
+    const auto usedLength = begin_diff(bufferEnd, bufferBytes);
+    buffer.resize(usedLength);
+
+    return buffer;
+}
+
+/**
+ * @brief Map the state-tracker enum value that describes the renderpass attachment name to some pair of
+ * protocol values, being the attachment type, and optional attachment index.
+ *
+ * @param name The name value to map to the pair of type and index
+ * @return A pair, where the first value is the corresponding attachment type, and the second value is
+ * the corresponding attachment index (or nullopt in the case the index is not relevant).
+ */
+constexpr std::pair<RenderpassAttachmentType, std::optional<uint32_t>> mapRenderpassAttachmentName(
+    Tracker::RenderPassAttachName name)
+{
+    switch (name)
     {
-        drawCount = -1;
+    case Tracker::RenderPassAttachName::COLOR0:
+        return {RenderpassAttachmentType::color, 0};
+    case Tracker::RenderPassAttachName::COLOR1:
+        return {RenderpassAttachmentType::color, 1};
+    case Tracker::RenderPassAttachName::COLOR2:
+        return {RenderpassAttachmentType::color, 2};
+    case Tracker::RenderPassAttachName::COLOR3:
+        return {RenderpassAttachmentType::color, 3};
+    case Tracker::RenderPassAttachName::COLOR4:
+        return {RenderpassAttachmentType::color, 4};
+    case Tracker::RenderPassAttachName::COLOR5:
+        return {RenderpassAttachmentType::color, 5};
+    case Tracker::RenderPassAttachName::COLOR6:
+        return {RenderpassAttachmentType::color, 6};
+    case Tracker::RenderPassAttachName::COLOR7:
+        return {RenderpassAttachmentType::color, 7};
+    case Tracker::RenderPassAttachName::DEPTH:
+        return {RenderpassAttachmentType::depth, std::nullopt};
+    case Tracker::RenderPassAttachName::STENCIL:
+        return {RenderpassAttachmentType::stencil, std::nullopt};
+    default:
+        assert(false && "What is this attachment name?");
+        return {RenderpassAttachmentType::undefined, std::nullopt};
     }
+}
 
-    json metadata = {
-        {"type", "renderpass"},
-        {"tid", renderpass.getTagID()},
-        {"width", renderpass.getWidth()},
-        {"height", renderpass.getHeight()},
-        {"drawCallCount", drawCount},
-    };
-
-    if (!debugLabel.empty())
+/**
+ * @brief Map the state-tracker enum value that describes the buffer transfer type into the protocol encoded value
+ *
+ * NB: Whilst we are currently just replicating one enum value into another (which the compiler should be smart enough
+ * to fix), we do it this way to ensure we decouple the state-tracker from the protobuf encoding, since we don't want to
+ * accidentally change some enum wire-value in the future.
+ *
+ * @param type The type enum to convert
+ * @return The wire value enum to store in the protobuf message
+ */
+constexpr BufferTransferType mapBufferTransferType(Tracker::LCSBufferTransfer::Type type)
+{
+    switch (type)
     {
-        metadata["label"] = debugLabel;
+    case Tracker::LCSBufferTransfer::Type::unknown:
+        return BufferTransferType::unknown_buffer_transfer;
+    case Tracker::LCSBufferTransfer::Type::fill_buffer:
+        return BufferTransferType::fill_buffer;
+    case Tracker::LCSBufferTransfer::Type::copy_buffer:
+        return BufferTransferType::copy_buffer;
+    default:
+        assert(false && "Unexpected LCSBufferTransfer::Type");
+        return BufferTransferType::unknown_buffer_transfer;
     }
+}
 
-    // Default is 1, so only store if we need it
-    if (const auto spc = renderpass.getSubpassCount(); spc != 1)
+/**
+ * @brief Map the state-tracker enum value that describes the image transfer type into the protocol encoded value
+ *
+ * NB: Whilst we are currently just replicating one enum value into another (which the compiler should be smart enough
+ * to fix), we do it this way to ensure we decouple the state-tracker from the protobuf encoding, since we don't want to
+ * accidentally change some enum wire-value in the future.
+ *
+ * @param type The type enum to convert
+ * @return The wire value enum to store in the protobuf message
+ */
+constexpr ImageTransferType mapImageTransferType(Tracker::LCSImageTransfer::Type type)
+{
+    switch (type)
     {
-        metadata["subpassCount"] = spc;
+    case Tracker::LCSImageTransfer::Type::unknown:
+        return ImageTransferType::unknown_image_transfer;
+    case Tracker::LCSImageTransfer::Type::clear_image:
+        return ImageTransferType::clear_image;
+    case Tracker::LCSImageTransfer::Type::copy_image:
+        return ImageTransferType::copy_image;
+    case Tracker::LCSImageTransfer::Type::copy_buffer_to_image:
+        return ImageTransferType::copy_buffer_to_image;
+    case Tracker::LCSImageTransfer::Type::copy_image_to_buffer:
+        return ImageTransferType::copy_image_to_buffer;
+    default:
+        assert(false && "Unexpected LCSImageTransfer::Type");
+        return ImageTransferType::unknown_image_transfer;
     }
+}
+
+/**
+ * @brief Serialize the metadata for this render pass workload.
+ *
+ * @param renderpass The renderpass to serialize
+ * @param debugLabel The debug label stack of the VkQueue at submit time.
+ */
+Comms::MessageData serialize(const Tracker::LCSRenderPass& renderpass, const std::vector<std::string>& debugLabel)
+{
+    using namespace pp;
 
-    json attachPoints = json::array();
+    // Draw count for a multi-submit command buffer cannot be reliably
+    // associated with a single tagID if restartable across command buffer
+    // boundaries because different command buffer submit combinations can
+    // result in different draw counts for the same starting tagID.
+    const auto drawCount = (!renderpass.isOneTimeSubmit() && renderpass.isSuspending()
+                                ? -1
+                                : static_cast<int64_t>(renderpass.getDrawCallCount()));
 
-    for (const auto& attachment : renderpass.getAttachments())
+    // make the attachements array
+    const auto& attachments = renderpass.getAttachments();
+    std::vector<RenderpassAttachment> attachmentsMsg {};
+    attachmentsMsg.reserve(attachments.size());
+
+    for (const auto& attachment : attachments)
     {
-        json attachPoint {
-            {"binding", attachment.getAttachmentStr()},
-        };
-
-        // Default is false, so only serialize if we need it
-        if (attachment.isLoaded())
-        {
-            attachPoint["load"] = true;
-        }
-
-        // Default is true, so only serialize if we need it
-        if (!attachment.isStored())
-        {
-            attachPoint["store"] = false;
-        }
-
-        // Default is false, so only serialize if we need it
-        if (attachment.isResolved())
-        {
-            attachPoint["resolve"] = true;
-        }
-
-        attachPoints.push_back(attachPoint);
+        const auto [type, index] = mapRenderpassAttachmentName(attachment.getAttachmentName());
+
+        attachmentsMsg.emplace_back(type,
+                                    index,
+                                    // these two are expected to be inverted, and will only be sent if the value is
+                                    // "not_loaded" / "not_stored" since that is the uncommon case
+                                    (attachment.isLoaded() ? std::nullopt : std::make_optional(false)),
+                                    (attachment.isStored() ? std::nullopt : std::make_optional(false)),
+                                    // resolved is not inverted since that is the incommon case
+                                    (attachment.isResolved() ? std::make_optional(true) : std::nullopt));
     }
 
-    metadata["attachments"] = attachPoints;
-
-    return metadata.dump();
+    return packBuffer("renderpass"_f,
+                      BeginRenderpass {
+                          renderpass.getTagID(),
+                          renderpass.getWidth(),
+                          renderpass.getHeight(),
+                          drawCount,
+                          renderpass.getSubpassCount(),
+                          debugLabel,
+                          std::move(attachmentsMsg),
+                      });
 }
 
 /**
@@ -114,15 +379,16 @@ std::string serialize(const Tracker::LCSRenderPass& renderpass, const std::vecto
  * @param continuation The renderpass continuation to serialize
  * @param tagIDContinuation The ID of the workload if this is a continuation of it.
  */
-std::string serialize(const Tracker::LCSRenderPassContinuation& continuation, uint64_t tagIDContinuation)
+Comms::MessageData serialize(const Tracker::LCSRenderPassContinuation& continuation, uint64_t tagIDContinuation)
 {
-    json metadata = {
-        {"type", "renderpass"},
-        {"tid", tagIDContinuation},
-        {"drawCallCount", continuation.getDrawCallCount()},
-    };
-
-    return metadata.dump();
+    using namespace pp;
+
+    return packBuffer("continue_renderpass"_f,
+                      ContinueRenderpass {
+                          tagIDContinuation,
+                          continuation.getDrawCallCount(),
+                          {},
+                      });
 }
 
 /**
@@ -131,22 +397,18 @@ std::string serialize(const Tracker::LCSRenderPassContinuation& continuation, ui
  * @param dispatch The dispatch to serialize
  * @param debugLabel The debug label stack for the VkQueue at submit time.
  */
-std::string serialize(const Tracker::LCSDispatch& dispatch, const std::vector<std::string>& debugLabel)
+Comms::MessageData serialize(const Tracker::LCSDispatch& dispatch, const std::vector<std::string>& debugLabel)
 {
-    json metadata = {
-        {"type", "dispatch"},
-        {"tid", dispatch.getTagID()},
-        {"xGroups", dispatch.getXGroups()},
-        {"yGroups", dispatch.getYGroups()},
-        {"zGroups", dispatch.getZGroups()},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
+    using namespace pp;
+
+    return packBuffer("dispatch"_f,
+                      Dispatch {
+                          dispatch.getTagID(),
+                          dispatch.getXGroups(),
+                          dispatch.getYGroups(),
+                          dispatch.getZGroups(),
+                          debugLabel,
+                      });
 }
 
 /**
@@ -155,22 +417,18 @@ std::string serialize(const Tracker::LCSDispatch& dispatch, const std::vector<st
  * @param traceRays The trace rays to serialize
  * @param debugLabel The debug label stack for the VkQueue at submit time.
  */
-std::string serialize(const Tracker::LCSTraceRays& traceRays, const std::vector<std::string>& debugLabel)
+Comms::MessageData serialize(const Tracker::LCSTraceRays& traceRays, const std::vector<std::string>& debugLabel)
 {
-    json metadata = {
-        {"type", "tracerays"},
-        {"tid", traceRays.getTagID()},
-        {"xItems", traceRays.getXItems()},
-        {"yItems", traceRays.getYItems()},
-        {"zItems", traceRays.getZItems()},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
+    using namespace pp;
+
+    return packBuffer("trace_rays"_f,
+                      TraceRays {
+                          traceRays.getTagID(),
+                          traceRays.getXItems(),
+                          traceRays.getYItems(),
+                          traceRays.getZItems(),
+                          debugLabel,
+                      });
 }
 
 /**
@@ -179,21 +437,17 @@ std::string serialize(const Tracker::LCSTraceRays& traceRays, const std::vector<
  * @param imageTransfer The image transfer to serialize
  * @param debugLabel The debug label stack for the VkQueue at submit time.
  */
-std::string serialize(const Tracker::LCSImageTransfer& imageTransfer, const std::vector<std::string>& debugLabel)
+Comms::MessageData serialize(const Tracker::LCSImageTransfer& imageTransfer, const std::vector<std::string>& debugLabel)
 {
-    json metadata = {
-        {"type", "imagetransfer"},
-        {"tid", imageTransfer.getTagID()},
-        {"subtype", imageTransfer.getTransferType()},
-        {"pixelCount", imageTransfer.getPixelCount()},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
+    using namespace pp;
+
+    return packBuffer("image_transfer"_f,
+                      ImageTransfer {
+                          imageTransfer.getTagID(),
+                          imageTransfer.getPixelCount(),
+                          mapImageTransferType(imageTransfer.getTransferType()),
+                          debugLabel,
+                      });
 }
 
 /**
@@ -202,21 +456,18 @@ std::string serialize(const Tracker::LCSImageTransfer& imageTransfer, const std:
  * @param bufferTransfer The buffer transfer to serialize
  * @param debugLabel The debug label stack for the VkQueue at submit time.
  */
-std::string serialize(const Tracker::LCSBufferTransfer& bufferTransfer, const std::vector<std::string>& debugLabel)
+Comms::MessageData serialize(const Tracker::LCSBufferTransfer& bufferTransfer,
+                             const std::vector<std::string>& debugLabel)
 {
-    json metadata = {
-        {"type", "buffertransfer"},
-        {"tid", bufferTransfer.getTagID()},
-        {"subtype", bufferTransfer.getTransferType()},
-        {"byteCount", bufferTransfer.getByteCount()},
-    };
-
-    if (!debugLabel.empty())
-    {
-        metadata["label"] = debugLabel;
-    }
-
-    return metadata.dump();
+    using namespace pp;
+
+    return packBuffer("buffer_transfer"_f,
+                      BufferTransfer {
+                          bufferTransfer.getTagID(),
+                          bufferTransfer.getByteCount(),
+                          mapBufferTransferType(bufferTransfer.getTransferType()),
+                          debugLabel,
+                      });
 }
 }
 
@@ -227,42 +478,41 @@ void WorkloadMetadataEmitterVisitor::emitMetadata(Device& device,
                                                   uint32_t patch,
                                                   std::string name)
 {
-    json deviceMetadata {
-        {"type", "device"},
-        {"pid", pid},
-        {"device", reinterpret_cast<uintptr_t>(device.device)},
-        {"deviceName", std::move(name)},
-        {"driverMajor", major},
-        {"driverMinor", minor},
-        {"driverPatch", patch},
-    };
-
-    device.txMessage(deviceMetadata.dump());
+    using namespace pp;
+
+    device.txMessage(packBuffer("metadata"_f,
+                                DeviceMetadata {
+                                    reinterpret_cast<uintptr_t>(device.device),
+                                    pid,
+                                    major,
+                                    minor,
+                                    patch,
+                                    std::move(name),
+                                }));
 }
 
 void WorkloadMetadataEmitterVisitor::emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp)
 {
-    json frame {
-        {"type", "frame"},
-        {"device", reinterpret_cast<uintptr_t>(device.device)},
-        {"fid", frameNumber},
-        {"timestamp", timestamp},
-    };
-
-    device.txMessage(frame.dump());
+    using namespace pp;
+
+    device.txMessage(packBuffer("frame"_f,
+                                Frame {
+                                    frameNumber,
+                                    reinterpret_cast<uintptr_t>(device.device),
+                                    timestamp,
+                                }));
 }
 
-void WorkloadMetadataEmitterVisitor::emitSubmit(VkDevice vkDevice, VkQueue vkQueue, uint64_t timestamp)
+void WorkloadMetadataEmitterVisitor::emitSubmit(VkQueue queue, uint64_t timestamp)
 {
-    // Write the queue submit metadata
-    json submitMetadata {
-        {"type", "submit"},
-        {"device", reinterpret_cast<uintptr_t>(vkDevice)},
-        {"queue", reinterpret_cast<uintptr_t>(vkQueue)},
-        {"timestamp", timestamp},
-    };
-
-    device.txMessage(submitMetadata.dump());
+    using namespace pp;
+
+    device.txMessage(packBuffer("submit"_f,
+                                Submit {
+                                    timestamp,
+                                    reinterpret_cast<uintptr_t>(device.device),
+                                    reinterpret_cast<uintptr_t>(queue),
+                                }));
 }
 
 void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPass& renderpass,
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.hpp b/layer_gpu_timeline/source/workload_metadata_builder.hpp
index 3a324a6..10c61b6 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.hpp
+++ b/layer_gpu_timeline/source/workload_metadata_builder.hpp
@@ -107,11 +107,10 @@ class WorkloadMetadataEmitterVisitor : public Tracker::SubmitCommandWorkloadVisi
      * @brief Called at the start of the submit to emit a "Submit" record, delimiting the subsequent items from any
      * later submit
      *
-     * @param vkDevice The device the submit belongs to
-     * @param vkQueue The queue that was submitted to
+     * @param queue The queue that was submitted to
      * @param timestamp The timestamp of the submission
      */
-    void emitSubmit(VkDevice vkDevice, VkQueue vkQueue, uint64_t timestamp);
+    void emitSubmit(VkQueue queue, uint64_t timestamp);
 
 private:
     Device& device;
diff --git a/layer_gpu_timeline/timeline.proto b/layer_gpu_timeline/timeline.proto
new file mode 100644
index 0000000..e3f56d1
--- /dev/null
+++ b/layer_gpu_timeline/timeline.proto
@@ -0,0 +1,218 @@
+/* Copyright (C) 2025 by Arm Limited. All rights reserved. */
+
+/*
+ * The layer driver will emit an ordered sequence of `TimelineRecord` messages.
+ * The messages are not nested in the protocol, but the ordering allows a nested structure to be recovered.
+ * `Frame` messages form the outer level structure; each `Frame` marks the boundary of one sequence of events and another; all subsequent messages that are not `Frame` messages are children of that `Frame`.
+ * `Submit` messages form the next level structure within a `Frame`; all subsequent messages that are not `Submit` or `Frame` messages are children of that `Submit`.
+ * All other messages are children of the last received `Submit`.
+ * `BeginRenderpass` and `ContinueRenderpass` are a special case; where a `ContinueRenderpass` is seen it should be merged into the last received `BeginRenderpass` within that `Submit` that has the same `tag_id` value.
+ * It is guaranteed that you will not receive a `ContinueRenderpass` unless the proceeding `BeginRenderpass` was received (though it is valid to have a sequence of `ContinueRenderpass` for the same `BeginRenderpass`).
+ *
+ * Therefore the sequence of messages like:
+ * Frame
+ * Submit
+ * BeginRenderpass
+ * BeginRenderpass
+ * Submit
+ * BeginRenderpass
+ * Dispatch
+ * BeginRenderpass
+ * Frame
+ * Submit
+ * BeginRenderpass
+ * Dispatch
+ * BeginRenderpass
+ * Submit
+ * BeginRenderpass
+ * ContinueRenderpass
+ * BeginRenderpass
+ *
+ * Will become:
+ *
+ * +-> Frame
+ * |   +-> Submit
+ * |   |   +-> Renderpass
+ * |   |   +-> Renderpass
+ * |   +-> Submit
+ * |       +-> Renderpass
+ * |       +-> Dispatch
+ * |       +-> Renderpass
+ * +-> Frame
+ *     +-> Submit
+ *     |   +-> Renderpass
+ *         +-> Dispatch
+ *     |   +-> Renderpass
+ *     +-> Submit
+ *         +-> Renderpass (BeginRenderpass+ContinueRenderpass)
+ *         +-> Renderpass
+ */
+
+syntax = "proto3";
+
+package gpulayers.timeline;
+
+option optimize_for = LITE_RUNTIME;
+
+/* The metadata packet that is sent once for a given VkDevice, before any other packet related to that Device and describes the VkDevice / VkPhysicalDevice etc */
+message DeviceMetadata {
+    /* The VkDevice handle */
+    uint64 id = 1;
+    /* The PID of the process that the layer driver was loaded into */
+    uint32 process_id = 2;
+    /* The major version that came from the VkPhysicalDeviceProperties for that VkDevice */
+    uint32 major_version = 3;
+    /* The minor version that came from the VkPhysicalDeviceProperties for that VkDevice */
+    uint32 minor_version = 4;
+    /* The patch version that came from the VkPhysicalDeviceProperties for that VkDevice */
+    uint32 patch_version = 5;
+    /* The name that came from the VkPhysicalDeviceProperties for that VkDevice */
+    string name = 6;
+}
+
+/* A frame definition message */
+message Frame {
+    /* The unique counter / identifier for this new frame */
+    uint64 id = 1;
+    /* The VkDevice that the frame belongs to */
+    uint64 device = 2;
+    /* The timestamp (in NS, CLOCK_MONOTONIC_RAW) of the point where QueuePresent was called */
+    uint64 timestamp = 3;
+}
+
+/* A submit message */
+message Submit {
+    /* The timestamp of the submission (in NS, CLOCK_MONOTONIC_RAW) */
+    uint64 timestamp = 1;
+    /* The VkDevice that the submit belongs to */
+    uint64 device = 2;
+    /* The VkQueue the frame belongs to */
+    uint64 queue = 3;
+}
+
+/* Enumerates the possible attachment types a renderpass can have */
+enum RenderpassAttachmentType {
+    undefined = 0;
+    color = 1;
+    depth = 2;
+    stencil = 3;
+}
+
+/* Describe an attachment to a renderpass */
+message RenderpassAttachment {
+    /* The attachment type */
+    RenderpassAttachmentType type = 1;
+    /* For color attachments, its index, otherwise should be zero/unspecified */
+    uint32 index = 2;
+    /* True if the attachment was *not* loaded (this is inverted since usually things are loaded, so this saves a field in the data) */
+    bool not_loaded = 3;
+    /* True if the attachment was *not* stored (this is inverted since usually things are stored, so this saves a field in the data) */
+    bool not_stored = 4;
+    /* True if the attachment was resolved (this is *not* inverted since usually things are not resolved, so this saves a field in the data) */
+    bool resolved = 5;
+}
+
+/* Start a new renderpass */
+message BeginRenderpass {
+    /* The unique identifier for this new renderpass */
+    uint64 tag_id = 1;
+    /* The dimensions of the renderpass' attachments */
+    uint32 width = 2;
+    uint32 height = 3;
+    /* The number of drawcalls in the renderpass */
+    uint32 draw_call_count = 4;
+    /* The subpass count */
+    uint32 subpass_count = 5;
+    /* Any user defined debug labels associated with the renderpass */
+    repeated string debug_label = 6;
+    /* Any attachments associated with the renderpass */
+    repeated RenderpassAttachment attachments = 7;
+}
+
+/* Continue a split renderpass */
+message ContinueRenderpass {
+    /* The unique identifier for the renderpass that is being continued */
+    uint64 tag_id = 1;
+    /* The number of drawcalls to add to the total in the renderpass */
+    uint32 draw_call_count = 2;
+    /* Any user defined debug labels to add to the renderpass */
+    repeated string debug_label = 3;
+}
+
+/* A dispatch object submission */
+message Dispatch {
+    /* The unique identifier for this operation */
+    uint64 tag_id = 1;
+    /* The dimensions of the dispatch */
+    int64 x_groups = 2;
+    int64 y_groups = 3;
+    int64 z_groups = 4;
+    /* Any user defined debug labels associated with the dispatch */
+    repeated string debug_label = 5;
+}
+
+/* A trace rays object submission */
+message TraceRays {
+    /* The unique identifier for this operation */
+    uint64 tag_id = 1;
+    /* The dimensions of the operation */
+    int64 x_items = 2;
+    int64 y_items = 3;
+    int64 z_items = 4;
+    /* Any user defined debug labels associated with the dispatch */
+    repeated string debug_label = 5;
+}
+
+/* Enumerates possible image transfer types */
+enum ImageTransferType {
+    unknown_image_transfer = 0;
+    clear_image = 1;
+    copy_image = 2;
+    copy_buffer_to_image = 3;
+    copy_image_to_buffer = 4;
+}
+
+/* An image transfer submission */
+message ImageTransfer {
+    /* The unique identifier for this operation */
+    uint64 tag_id = 1;
+    /* The number of pixels being transfered */
+    int64 pixel_count = 2;
+    /* The image type */
+    ImageTransferType transfer_type = 3;
+    /* Any user defined debug labels associated with the dispatch */
+    repeated string debug_label = 4;
+}
+
+
+/* Enumerates possible buffer transfer types */
+enum BufferTransferType {
+    unknown_buffer_transfer = 0;
+    fill_buffer = 1;
+    copy_buffer = 2;
+}
+
+/* An buffer transfer submission */
+message BufferTransfer {
+    /* The unique identifier for this operation */
+    uint64 tag_id = 1;
+    /* The number of bytes being transfered */
+    int64 byte_count = 2;
+    /* The buffer type */
+    BufferTransferType transfer_type = 3;
+    /* Any user defined debug labels associated with the dispatch */
+    repeated string debug_label = 4;
+}
+
+/* The data payload message that wraps all other messages */
+message TimelineRecord {
+    DeviceMetadata metadata = 1;
+    Frame frame = 2;
+    Submit submit = 3;
+    BeginRenderpass renderpass = 4;
+    ContinueRenderpass continue_renderpass = 5;
+    Dispatch dispatch = 6;
+    TraceRays trace_rays = 7;
+    ImageTransfer image_transfer = 8;
+    BufferTransfer buffer_transfer = 9;
+}
diff --git a/source_common/trackers/command_buffer.cpp b/source_common/trackers/command_buffer.cpp
index 3a5da3c..04c2a8d 100644
--- a/source_common/trackers/command_buffer.cpp
+++ b/source_common/trackers/command_buffer.cpp
@@ -161,7 +161,7 @@ uint64_t CommandBuffer::traceRays(int64_t xItems, int64_t yItems, int64_t zItems
 }
 
 /* See header for documentation. */
-uint64_t CommandBuffer::imageTransfer(const std::string& transferType, int64_t pixelCount)
+uint64_t CommandBuffer::imageTransfer(LCSImageTransfer::Type transferType, int64_t pixelCount)
 {
     uint64_t tagID = Tracker::LCSWorkload::assignTagID();
     stats.incImageTransferCount();
@@ -176,7 +176,7 @@ uint64_t CommandBuffer::imageTransfer(const std::string& transferType, int64_t p
 }
 
 /* See header for documentation. */
-uint64_t CommandBuffer::bufferTransfer(const std::string& transferType, int64_t byteCount)
+uint64_t CommandBuffer::bufferTransfer(LCSBufferTransfer::Type transferType, int64_t byteCount)
 {
     uint64_t tagID = Tracker::LCSWorkload::assignTagID();
     stats.incBufferTransferCount();
diff --git a/source_common/trackers/command_buffer.hpp b/source_common/trackers/command_buffer.hpp
index 67caf6d..17b71c3 100644
--- a/source_common/trackers/command_buffer.hpp
+++ b/source_common/trackers/command_buffer.hpp
@@ -135,7 +135,7 @@ class CommandBuffer
      *
      * @return Returns the tagID assigned to this workload.
      */
-    uint64_t imageTransfer(const std::string& transferType, int64_t pixelCount);
+    uint64_t imageTransfer(LCSImageTransfer::Type transferType, int64_t pixelCount);
 
     /**
      * @brief Capture a transfer where the destination is a buffer.
@@ -145,7 +145,7 @@ class CommandBuffer
      *
      * @return Returns the tagID assigned to this workload.
      */
-    uint64_t bufferTransfer(const std::string& transferType, int64_t byteCount);
+    uint64_t bufferTransfer(LCSBufferTransfer::Type transferType, int64_t byteCount);
 
     /**
      * @brief Begin a user debug marker range.
diff --git a/source_common/trackers/layer_command_stream.cpp b/source_common/trackers/layer_command_stream.cpp
index 973b502..24610b0 100644
--- a/source_common/trackers/layer_command_stream.cpp
+++ b/source_common/trackers/layer_command_stream.cpp
@@ -81,7 +81,7 @@ LCSTraceRays::LCSTraceRays(uint64_t _tagID, int64_t _xItems, int64_t _yItems, in
 }
 
 /* See header for details. */
-LCSImageTransfer::LCSImageTransfer(uint64_t _tagID, const std::string& _transferType, int64_t _pixelCount)
+LCSImageTransfer::LCSImageTransfer(uint64_t _tagID, Type _transferType, int64_t _pixelCount)
     : LCSWorkload(_tagID),
       transferType(_transferType),
       pixelCount(_pixelCount)
@@ -89,13 +89,51 @@ LCSImageTransfer::LCSImageTransfer(uint64_t _tagID, const std::string& _transfer
 }
 
 /* See header for details. */
-LCSBufferTransfer::LCSBufferTransfer(uint64_t _tagID, const std::string& _transferType, int64_t _byteCount)
+std::string LCSImageTransfer::getTransferTypeStr() const
+{
+    switch (transferType)
+    {
+    case Type::unknown:
+        return "Unknown";
+    case Type::clear_image:
+        return "Clear image";
+    case Type::copy_image:
+        return "Copy image";
+    case Type::copy_buffer_to_image:
+        return "Copy buffer to image";
+    case Type::copy_image_to_buffer:
+        return "Copy image to buffer";
+    default:
+        assert(false && "Unexpected LCSImageTransfer::Type");
+        return "<invalid>";
+    }
+}
+
+/* See header for details. */
+LCSBufferTransfer::LCSBufferTransfer(uint64_t _tagID, Type _transferType, int64_t _byteCount)
     : LCSWorkload(_tagID),
       transferType(_transferType),
       byteCount(_byteCount)
 {
 }
 
+/* See header for details. */
+std::string LCSBufferTransfer::getTransferTypeStr() const
+{
+    switch (transferType)
+    {
+    case Type::unknown:
+        return "Unknown";
+    case Type::fill_buffer:
+        return "Fill buffer";
+    case Type::copy_buffer:
+        return "Copy buffer";
+    default:
+        assert(false && "Unexpected LCSBufferTransfer::Type");
+        return "<invalid>";
+    }
+}
+
 /* See header for details. */
 LCSInstructionMarkerPush::LCSInstructionMarkerPush(const std::string& _label)
     : label(std::make_shared<std::string>(_label))
diff --git a/source_common/trackers/layer_command_stream.hpp b/source_common/trackers/layer_command_stream.hpp
index b0273ff..ded0be7 100644
--- a/source_common/trackers/layer_command_stream.hpp
+++ b/source_common/trackers/layer_command_stream.hpp
@@ -322,6 +322,16 @@ class LCSTraceRays : public LCSWorkload
 class LCSImageTransfer : public LCSWorkload
 {
 public:
+    /* Enumerates possible image transfer types */
+    enum class Type
+    {
+        unknown,
+        clear_image,
+        copy_image,
+        copy_buffer_to_image,
+        copy_image_to_buffer,
+    };
+
     /**
      * @brief Create a new image transfer workload.
      *
@@ -331,10 +341,13 @@ class LCSImageTransfer : public LCSWorkload
      * @param transferType   The subtype of the transfer.
      * @param pixelCount     The size of the transfer, in pixels.
      */
-    LCSImageTransfer(uint64_t tagID, const std::string& transferType, int64_t pixelCount);
+    LCSImageTransfer(uint64_t tagID, Type transferType, int64_t pixelCount);
+
+    /** @return The subtype of the transfer */
+    Type getTransferType() const { return transferType; }
 
     /** @return The subtype of the transfer */
-    const std::string& getTransferType() const { return transferType; }
+    std::string getTransferTypeStr() const;
 
     /** @return The size of the transfer, in pixels */
     int64_t getPixelCount() const { return pixelCount; }
@@ -343,7 +356,7 @@ class LCSImageTransfer : public LCSWorkload
     /**
      * @brief The subtype of the transfer.
      */
-    std::string transferType;
+    Type transferType;
 
     /**
      * @brief The number of pixels transferred, or -1 if unknown.
@@ -357,6 +370,14 @@ class LCSImageTransfer : public LCSWorkload
 class LCSBufferTransfer : public LCSWorkload
 {
 public:
+    /* Enumerates possible buffer transfer types */
+    enum class Type
+    {
+        unknown,
+        fill_buffer,
+        copy_buffer,
+    };
+
     /**
      * @brief Create a new buffer transfer workload.
      *
@@ -367,10 +388,13 @@ class LCSBufferTransfer : public LCSWorkload
      * @param transferType   The subtype of the transfer.
      * @param byteCount      The size of the transfer, in bytes.
      */
-    LCSBufferTransfer(uint64_t tagID, const std::string& transferType, int64_t byteCount);
+    LCSBufferTransfer(uint64_t tagID, Type transferType, int64_t byteCount);
+
+    /** @return The subtype of the transfer */
+    Type getTransferType() const { return transferType; }
 
     /** @return The subtype of the transfer */
-    const std::string& getTransferType() const { return transferType; }
+    std::string getTransferTypeStr() const;
 
     /** @return The size of the transfer, in bytes */
     int64_t getByteCount() const { return byteCount; }
@@ -379,7 +403,7 @@ class LCSBufferTransfer : public LCSWorkload
     /**
      * @brief The subtype of the transfer.
      */
-    std::string transferType;
+    Type transferType;
 
     /**
      * @brief The number of bytes transferred, -1 if unknown, -2 if whole buffer.
diff --git a/source_common/trackers/render_pass.hpp b/source_common/trackers/render_pass.hpp
index c45c634..4dafd93 100644
--- a/source_common/trackers/render_pass.hpp
+++ b/source_common/trackers/render_pass.hpp
@@ -87,6 +87,13 @@ class RenderPassAttachment
      */
     std::string getAttachmentStr() const;
 
+    /**
+     * @brief Get the name of the attachment point.
+     *
+     * @return The attachment point name.
+     */
+    RenderPassAttachName getAttachmentName() const { return name; }
+
     /**
      * @brief Is this attachment loaded at the start of the render pass?
      *
diff --git a/source_third_party/protopuf b/source_third_party/protopuf
new file mode 160000
index 0000000..60f5511
--- /dev/null
+++ b/source_third_party/protopuf
@@ -0,0 +1 @@
+Subproject commit 60f55110ea0ccf9827f24334043c9c778580bf07

From 05a39153804845a96e7fbe5565103f96b3358fbd Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Tue, 4 Feb 2025 13:40:06 +0000
Subject: [PATCH 07/11] Rework service_gpu_timeline to process the raw protobuf
 frames into JSON objects that the viewer expects

---
 lglpy/comms/service_gpu_timeline.py           | 356 +++++++++++++++---
 .../timeline/protos/layer_driver/__init__.py  |   0
 .../protos/layer_driver/timeline_pb2.py       |  63 ++++
 3 files changed, 370 insertions(+), 49 deletions(-)
 create mode 100644 lglpy/timeline/protos/layer_driver/__init__.py
 create mode 100644 lglpy/timeline/protos/layer_driver/timeline_pb2.py

diff --git a/lglpy/comms/service_gpu_timeline.py b/lglpy/comms/service_gpu_timeline.py
index 9359291..601f00f 100644
--- a/lglpy/comms/service_gpu_timeline.py
+++ b/lglpy/comms/service_gpu_timeline.py
@@ -32,26 +32,167 @@
 from typing import Any, TypedDict
 
 from lglpy.comms.server import Message
+from lglpy.timeline.protos.layer_driver import timeline_pb2
+
+
+class RenderpassAttachmentMetadataType(TypedDict):
+    '''
+    Structured dict type for type hinting.
+    '''
+    binding: str
+    load: bool
+    store: bool
+    resolve: bool
+
+
+class RenderpassMetadataType(TypedDict):
+    '''
+    Structured dict type for type hinting.
+    '''
+    type: str
+    tid: int
+    width: int
+    height: int
+    drawCallCount: int
+    subpassCount: int
+    label: list[str]
+    attachments: list[RenderpassAttachmentMetadataType]
+
+
+class DispatchMetadataType(TypedDict):
+    '''
+    Structured dict type for type hinting.
+    '''
+    type: str
+    tid: int
+    xGroups: int
+    yGroups: int
+    zGroups: int
+    label: list[str]
+
+
+class TraceRaysMetadataType(TypedDict):
+    '''
+    Structured dict type for type hinting.
+    '''
+    type: str
+    tid: int
+    xItems: int
+    yItems: int
+    zItems: int
+    label: list[str]
+
+
+class ImageTransferMetadataType(TypedDict):
+    '''
+    Structured dict type for type hinting.
+    '''
+    type: str
+    tid: int
+    subtype: str
+    pixelCount: int
+    label: list[str]
+
+
+class BufferTransferMetadataType(TypedDict):
+    '''
+    Structured dict type for type hinting.
+    '''
+    type: str
+    tid: int
+    subtype: str
+    byteCount: int
+    label: list[str]
 
 
 class SubmitMetadataType(TypedDict):
     '''
     Structured dict type for type hinting.
     '''
+    device: int
     queue: int
     timestamp: int
-    workloads: list[Any]
+    workloads: list[RenderpassMetadataType|DispatchMetadataType|TraceRaysMetadataType|ImageTransferMetadataType|BufferTransferMetadataType]
 
 
 class FrameMetadataType(TypedDict):
     '''
     Structured dict type for type hinting.
     '''
+    device: int
     frame: int
     presentTimestamp: int
     submits: list[SubmitMetadataType]
 
 
+def expect_int(v : int|None) -> int:
+    if v is None:
+        return 0
+    assert isinstance(v, int)
+    return v
+
+
+def map_renderpass_binding(type : timeline_pb2.RenderpassAttachmentType, index : int|None) -> str:
+    '''
+    Map the PB encoded renderpass attachment type to a corresponding description string
+    '''
+    if type == timeline_pb2.RenderpassAttachmentType.undefined:
+        assert((index is None) or (index == 0))
+        return "U"
+    elif type == timeline_pb2.RenderpassAttachmentType.color:
+        assert(index is not None)
+        return f"C{index}"
+    elif type == timeline_pb2.RenderpassAttachmentType.depth:
+        assert((index is None) or (index == 0))
+        return "D"
+    elif type == timeline_pb2.RenderpassAttachmentType.stencil:
+        assert((index is None) or (index == 0))
+        return "S"
+    else:
+        assert False
+
+
+def map_image_transfer_type(type : timeline_pb2.ImageTransferType) -> str:
+    '''
+    Map the PB encoded image transfer type to some corresponding description string
+    '''
+    if type == timeline_pb2.ImageTransferType.unknown_image_transfer:
+        return "Unknown"
+    elif type == timeline_pb2.ImageTransferType.clear_image:
+        return "Clear image"
+    elif type == timeline_pb2.ImageTransferType.copy_image:
+        return "Copy image"
+    elif type == timeline_pb2.ImageTransferType.copy_buffer_to_image:
+        return "Copy buffer to image"
+    elif type == timeline_pb2.ImageTransferType.copy_image_to_buffer:
+        return "Copy image to buffer"
+    else:
+        assert False
+
+
+def map_buffer_transfer_type(type : timeline_pb2.BufferTransferType) -> str:
+    '''
+    Map the PB encoded image transfer type to some corresponding description string
+    '''
+    if type == timeline_pb2.BufferTransferType.unknown_buffer_transfer:
+        return "Unknown"
+    elif type == timeline_pb2.BufferTransferType.fill_buffer:
+        return "Fill buffer"
+    elif type == timeline_pb2.BufferTransferType.copy_buffer:
+        return "Copy buffer"
+    else:
+        assert False
+
+
+def map_debug_label(labels : list[str]|None) -> list[str]:
+    '''
+    Normalize the 'debug_label' field from the PB data
+    '''
+    if labels is None:
+        return []
+    return [str(l) for l in labels] # need to convert it to a list from a RepeatedScalarContainer
+
+
 class GPUTimelineService:
     '''
     A service for handling network comms from the layer_gpu_timeline layer.
@@ -71,6 +212,7 @@ def __init__(self, file_path: str, verbose: bool = False):
         # Create a default frame record
         # End time written on queuePresent
         self.frame: FrameMetadataType = {
+            'device': 0,
             'frame': 0,
             'presentTimestamp': 0,
             'submits': []
@@ -94,16 +236,17 @@ def handle_device(self, msg: Any) -> None:
         Handle a device config packet.
 
         Args:
-            msg: The Python decode of a JSON payload.
+            msg: The Python decode of a Timeline PB payload.
         '''
         # Reset the local frame state for the next frame
-        major = msg["driverMajor"]
-        minor = msg["driverMinor"]
-        patch = msg["driverPatch"]
+        major = expect_int(msg.major_version)
+        minor = expect_int(msg.minor_version)
+        patch = expect_int(msg.patch_version)
 
         if self.verbose:
-            print(f'Device: {msg["deviceName"]}')
-            print(f'Driver: r{major}p{minor} ({patch})')
+            print(f'Device:  {msg.name} (0x{msg.id:02X})')
+            print(f'Driver:  r{major}p{minor} ({patch})')
+            print(f'Process: {msg.process_id}')
 
     def handle_frame(self, msg: Any) -> None:
         '''
@@ -113,12 +256,13 @@ def handle_frame(self, msg: Any) -> None:
         reset the frame tracker ready for the next frame.
 
         Args:
-            msg: The Python decode of a JSON payload.
+            msg: The Python decode of a Timeline PB payload.
         '''
         # Update end time of the current frame
-        self.frame['presentTimestamp'] = msg['timestamp']
+        self.frame['presentTimestamp'] = expect_int(msg.timestamp)
 
         # Write frame packet to the file
+        # FIXME: No need to write the first empty frame?
         last_frame = json.dumps(self.frame).encode('utf-8')
         length = struct.pack('<I', len(last_frame))
 
@@ -126,8 +270,9 @@ def handle_frame(self, msg: Any) -> None:
         self.file_handle.write(last_frame)
 
         # Reset the local frame state for the next frame
-        next_frame = msg['fid']
+        next_frame = expect_int(msg.id)
         self.frame = {
+            'device': expect_int(msg.device),
             'frame': next_frame,
             'presentTimestamp': 0,
             'submits': []
@@ -141,12 +286,13 @@ def handle_submit(self, msg: Any) -> None:
         Handle a submit boundary workload.
 
         Args:
-            msg: The Python decode of a JSON payload.
+            msg: The Python decode of a Timeline PB payload.
         '''
         # Write frame packet to the file
         submit: SubmitMetadataType = {
-            'queue': msg['queue'],
-            'timestamp': msg['timestamp'],
+            'device': expect_int(msg.device),
+            'queue': expect_int(msg.queue),
+            'timestamp': expect_int(msg.timestamp),
             'workloads': []
         }
 
@@ -157,12 +303,49 @@ def handle_render_pass(self, msg: Any) -> None:
         '''
         Handle a render pass workload.
 
+        Render passes may generate multiple messages if suspended and resumed
+        when using Vulkan 1.3 dynamic render passes; this message may be
+        followed by zero or more continuation messages that will be
+        merged into this renderpass.
+
+        Args:
+            msg: The Python decode of a Timeline PB payload.
+        '''
+        submit = self.frame['submits'][-1]
+
+        # Convert the PB message into our data representation
+        renderpass: RenderpassMetadataType = {
+            'type': 'renderpass',
+            'tid': expect_int(msg.tag_id),
+            'width': expect_int(msg.width),
+            'height': expect_int(msg.height),
+            'drawCallCount': expect_int(msg.draw_call_count),
+            'subpassCount': expect_int(msg.subpass_count),
+            'label': map_debug_label(msg.debug_label),
+            'attachments': []
+        }
+
+        for pb_attachment in msg.attachments:
+            attachment: RenderpassAttachmentMetadataType = {
+                'binding': map_renderpass_binding(pb_attachment.type, pb_attachment.index),
+                'load': not bool(pb_attachment.not_loaded),
+                'store': not bool(pb_attachment.not_stored),
+                'resolve': bool(pb_attachment.resolved),
+            }
+            renderpass['attachments'].append(attachment)
+
+        submit['workloads'].append(renderpass)
+
+    def handle_render_pass_continuation(self, msg: Any) -> None:
+        '''
+        Handle a render pass workload continuation.
+
         Render passes may generate multiple messages if suspended and resumed
         when using Vulkan 1.3 dynamic render passes, so merge those into a
         single workload.
 
         Args:
-            msg: The Python decode of a JSON payload.
+            msg: The Python decode of a Timeline PB payload.
         '''
         submit = self.frame['submits'][-1]
 
@@ -173,59 +356,134 @@ def handle_render_pass(self, msg: Any) -> None:
             if last_workload['type'] == 'renderpass':
                 last_render_pass = last_workload
 
-        # If this is a continuation then merge records
-        if last_render_pass and (last_render_pass['tid'] == msg['tid']):
-            # Don't accumulate if tag_id is flagged as ambiguous
-            if last_render_pass['drawCallCount'] != -1:
-                last_render_pass['drawCallCount'] += msg['drawCallCount']
+        # Validate that this is a continuation of the last renderpass
+        assert last_render_pass and (last_render_pass['tid'] == expect_int(msg.tag_id))
 
-        # Otherwise this is a new record
-        else:
-            submit['workloads'].append(msg)
+        # Don't accumulate if tag_id is flagged as ambiguous
+        if last_render_pass['drawCallCount'] >= 0:
+            last_render_pass['drawCallCount'] += expect_int(msg.draw_call_count)
 
-    def handle_generic(self, msg: Any) -> None:
+    def handle_dispatch(self, msg: Any) -> None:
         '''
-        Handle a generic workload that needs no special handling.
+        Handle a dispatch workload
 
         Args:
-            msg: The Python decode of a JSON payload.
+            msg: The Python decode of a Timeline PB payload.
         '''
         submit = self.frame['submits'][-1]
-        submit['workloads'].append(msg)
 
-    def handle_message(self, message: Message) -> None:
+        # Convert the PB message into our data representation
+        dispatch: DispatchMetadataType = {
+            'type': 'dispatch',
+            'tid': expect_int(msg.tag_id),
+            'xGroups': expect_int(msg.x_groups),
+            'yGroups': expect_int(msg.y_groups),
+            'zGroups': expect_int(msg.z_groups),
+            'label': map_debug_label(msg.debug_label),
+        }
+
+        submit['workloads'].append(dispatch)
+
+    def handle_trace_rays(self, msg: Any) -> None:
         '''
-        Handle a service request from a layer.
+        Handle a trace rays workload
 
-        Note that this service only expects pushed TX or TX_ASYNC messages, so
-        never provides a response.
+        Args:
+            msg: The Python decode of a Timeline PB payload.
         '''
-        encoded_payload = message.payload.decode('utf-8')
-        payload = json.loads(encoded_payload)
+        submit = self.frame['submits'][-1]
 
-        generic_payload_types = {
-            'dispatch',
-            'tracerays',
-            'imagetransfer',
-            'buffertransfer'
+        # Convert the PB message into our data representation
+        trace_rays: TraceRaysMetadataType = {
+            'type': 'tracerays',
+            'tid': expect_int(msg.tag_id),
+            'xItems': expect_int(msg.x_items),
+            'yItems': expect_int(msg.y_items),
+            'zItems': expect_int(msg.z_items),
+            'label': map_debug_label(msg.debug_label),
         }
 
-        payload_type = payload['type']
+        submit['workloads'].append(trace_rays)
 
-        if payload_type == 'device':
-            self.handle_device(payload)
+    def handle_image_transfer(self, msg: Any) -> None:
+        '''
+        Handle an image transfer workload
 
-        elif payload_type == 'frame':
-            self.handle_frame(payload)
+        Args:
+            msg: The Python decode of a Timeline PB payload.
+        '''
+        submit = self.frame['submits'][-1]
 
-        elif payload_type == 'submit':
-            self.handle_submit(payload)
+        # Convert the PB message into our data representation
+        image_transfer: ImageTransferMetadataType = {
+            'type': 'imagetransfer',
+            'tid': expect_int(msg.tag_id),
+            'subtype': map_image_transfer_type(msg.transfer_type),
+            'pixelCount': expect_int(msg.pixel_count),
+            'label': map_debug_label(msg.debug_label),
+        }
 
-        elif payload_type == 'renderpass':
-            self.handle_render_pass(payload)
+        submit['workloads'].append(image_transfer)
 
-        elif payload_type in generic_payload_types:
-            self.handle_generic(payload)
+    def handle_buffer_transfer(self, msg: Any) -> None:
+        '''
+        Handle a buffer transfer workload
+
+        Args:
+            msg: The Python decode of a Timeline PB payload.
+        '''
+        submit = self.frame['submits'][-1]
 
+        # Convert the PB message into our data representation
+        buffer_transfer: BufferTransferMetadataType = {
+            'type': 'buffertransfer',
+            'tid': expect_int(msg.tag_id),
+            'subtype': map_buffer_transfer_type(msg.transfer_type),
+            'byteCount': expect_int(msg.byte_count),
+            'label': map_debug_label(msg.debug_label),
+        }
+
+        submit['workloads'].append(buffer_transfer)
+
+    def handle_message(self, message: Message) -> None:
+        '''
+        Handle a service request from a layer.
+
+        Note that this service only expects pushed TX or TX_ASYNC messages, so
+        never provides a response.
+        '''
+        pb_record = timeline_pb2.TimelineRecord()  # pylint: disable=no-member
+        pb_record.ParseFromString(message.payload)
+
+        # Assert there is at most one member message
+        assert((int(pb_record.HasField('metadata'))
+               + int(pb_record.HasField('frame'))
+               + int(pb_record.HasField('submit'))
+               + int(pb_record.HasField('renderpass'))
+               + int(pb_record.HasField('continue_renderpass'))
+               + int(pb_record.HasField('dispatch'))
+               + int(pb_record.HasField('trace_rays'))
+               + int(pb_record.HasField('image_transfer'))
+               + int(pb_record.HasField('buffer_transfer'))) <= 1)
+
+        # Process the message
+        if pb_record.HasField('metadata'):
+            self.handle_device(pb_record.metadata)
+        elif pb_record.HasField('frame'):
+            self.handle_frame(pb_record.frame)
+        elif pb_record.HasField('submit'):
+            self.handle_submit(pb_record.submit)
+        elif pb_record.HasField('renderpass'):
+            self.handle_render_pass(pb_record.renderpass)
+        elif pb_record.HasField('continue_renderpass'):
+            self.handle_render_pass_continuation(pb_record.continue_renderpass)
+        elif pb_record.HasField('dispatch'):
+            self.handle_dispatch(pb_record.dispatch)
+        elif pb_record.HasField('trace_rays'):
+            self.handle_trace_rays(pb_record.trace_rays)
+        elif pb_record.HasField('image_transfer'):
+            self.handle_image_transfer(pb_record.image_transfer)
+        elif pb_record.HasField('buffer_transfer'):
+            self.handle_buffer_transfer(pb_record.buffer_transfer)
         else:
-            assert False, f'Unknown payload type {payload_type}'
+            assert False, f'Unknown payload {pb_record}'
diff --git a/lglpy/timeline/protos/layer_driver/__init__.py b/lglpy/timeline/protos/layer_driver/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lglpy/timeline/protos/layer_driver/timeline_pb2.py b/lglpy/timeline/protos/layer_driver/timeline_pb2.py
new file mode 100644
index 0000000..ca71621
--- /dev/null
+++ b/lglpy/timeline/protos/layer_driver/timeline_pb2.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: timeline.proto
+# Protobuf Python Version: 5.29.2
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC,
+    5,
+    29,
+    2,
+    '',
+    'timeline.proto'
+)
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0etimeline.proto\x12\x12gpulayers.timeline\"\x83\x01\n\x0e\x44\x65viceMetadata\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nprocess_id\x18\x02 \x01(\r\x12\x15\n\rmajor_version\x18\x03 \x01(\r\x12\x15\n\rminor_version\x18\x04 \x01(\r\x12\x15\n\rpatch_version\x18\x05 \x01(\r\x12\x0c\n\x04name\x18\x06 \x01(\t\"6\n\x05\x46rame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\x11\n\ttimestamp\x18\x03 \x01(\x04\":\n\x06Submit\x12\x11\n\ttimestamp\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\r\n\x05queue\x18\x03 \x01(\x04\"\x9b\x01\n\x14RenderpassAttachment\x12:\n\x04type\x18\x01 \x01(\x0e\x32,.gpulayers.timeline.RenderpassAttachmentType\x12\r\n\x05index\x18\x02 \x01(\r\x12\x12\n\nnot_loaded\x18\x03 \x01(\x08\x12\x12\n\nnot_stored\x18\x04 \x01(\x08\x12\x10\n\x08resolved\x18\x05 \x01(\x08\"\xc4\x01\n\x0f\x42\x65ginRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x17\n\x0f\x64raw_call_count\x18\x04 \x01(\r\x12\x15\n\rsubpass_count\x18\x05 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x06 \x03(\t\x12=\n\x0b\x61ttachments\x18\x07 \x03(\x0b\x32(.gpulayers.timeline.RenderpassAttachment\"R\n\x12\x43ontinueRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x17\n\x0f\x64raw_call_count\x18\x02 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x03 \x03(\t\"e\n\x08\x44ispatch\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x10\n\x08x_groups\x18\x02 \x01(\x03\x12\x10\n\x08y_groups\x18\x03 \x01(\x03\x12\x10\n\x08z_groups\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"c\n\tTraceRays\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x0f\n\x07x_items\x18\x02 \x01(\x03\x12\x0f\n\x07y_items\x18\x03 \x01(\x03\x12\x0f\n\x07z_items\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"\x87\x01\n\rImageTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x13\n\x0bpixel_count\x18\x02 \x01(\x03\x12<\n\rtransfer_type\x18\x03 \x01(\x0e\x32%.gpulayers.timeline.ImageTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\x88\x01\n\x0e\x42ufferTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x12\n\nbyte_count\x18\x02 \x01(\x03\x12=\n\rtransfer_type\x18\x03 \x01(\x0e\x32&.gpulayers.timeline.BufferTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xf5\x03\n\x0eTimelineRecord\x12\x34\n\x08metadata\x18\x01 \x01(\x0b\x32\".gpulayers.timeline.DeviceMetadata\x12(\n\x05\x66rame\x18\x02 \x01(\x0b\x32\x19.gpulayers.timeline.Frame\x12*\n\x06submit\x18\x03 \x01(\x0b\x32\x1a.gpulayers.timeline.Submit\x12\x37\n\nrenderpass\x18\x04 \x01(\x0b\x32#.gpulayers.timeline.BeginRenderpass\x12\x43\n\x13\x63ontinue_renderpass\x18\x05 \x01(\x0b\x32&.gpulayers.timeline.ContinueRenderpass\x12.\n\x08\x64ispatch\x18\x06 \x01(\x0b\x32\x1c.gpulayers.timeline.Dispatch\x12\x31\n\ntrace_rays\x18\x07 \x01(\x0b\x32\x1d.gpulayers.timeline.TraceRays\x12\x39\n\x0eimage_transfer\x18\x08 \x01(\x0b\x32!.gpulayers.timeline.ImageTransfer\x12;\n\x0f\x62uffer_transfer\x18\t \x01(\x0b\x32\".gpulayers.timeline.BufferTransfer*L\n\x18RenderpassAttachmentType\x12\r\n\tundefined\x10\x00\x12\t\n\x05\x63olor\x10\x01\x12\t\n\x05\x64\x65pth\x10\x02\x12\x0b\n\x07stencil\x10\x03*\x84\x01\n\x11ImageTransferType\x12\x1a\n\x16unknown_image_transfer\x10\x00\x12\x0f\n\x0b\x63lear_image\x10\x01\x12\x0e\n\ncopy_image\x10\x02\x12\x18\n\x14\x63opy_buffer_to_image\x10\x03\x12\x18\n\x14\x63opy_image_to_buffer\x10\x04*S\n\x12\x42ufferTransferType\x12\x1b\n\x17unknown_buffer_transfer\x10\x00\x12\x0f\n\x0b\x66ill_buffer\x10\x01\x12\x0f\n\x0b\x63opy_buffer\x10\x02\x42\x02H\x03\x62\x06proto3')
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'timeline_pb2', _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+  _globals['DESCRIPTOR']._loaded_options = None
+  _globals['DESCRIPTOR']._serialized_options = b'H\003'
+  _globals['_RENDERPASSATTACHMENTTYPE']._serialized_start=1714
+  _globals['_RENDERPASSATTACHMENTTYPE']._serialized_end=1790
+  _globals['_IMAGETRANSFERTYPE']._serialized_start=1793
+  _globals['_IMAGETRANSFERTYPE']._serialized_end=1925
+  _globals['_BUFFERTRANSFERTYPE']._serialized_start=1927
+  _globals['_BUFFERTRANSFERTYPE']._serialized_end=2010
+  _globals['_DEVICEMETADATA']._serialized_start=39
+  _globals['_DEVICEMETADATA']._serialized_end=170
+  _globals['_FRAME']._serialized_start=172
+  _globals['_FRAME']._serialized_end=226
+  _globals['_SUBMIT']._serialized_start=228
+  _globals['_SUBMIT']._serialized_end=286
+  _globals['_RENDERPASSATTACHMENT']._serialized_start=289
+  _globals['_RENDERPASSATTACHMENT']._serialized_end=444
+  _globals['_BEGINRENDERPASS']._serialized_start=447
+  _globals['_BEGINRENDERPASS']._serialized_end=643
+  _globals['_CONTINUERENDERPASS']._serialized_start=645
+  _globals['_CONTINUERENDERPASS']._serialized_end=727
+  _globals['_DISPATCH']._serialized_start=729
+  _globals['_DISPATCH']._serialized_end=830
+  _globals['_TRACERAYS']._serialized_start=832
+  _globals['_TRACERAYS']._serialized_end=931
+  _globals['_IMAGETRANSFER']._serialized_start=934
+  _globals['_IMAGETRANSFER']._serialized_end=1069
+  _globals['_BUFFERTRANSFER']._serialized_start=1072
+  _globals['_BUFFERTRANSFER']._serialized_end=1208
+  _globals['_TIMELINERECORD']._serialized_start=1211
+  _globals['_TIMELINERECORD']._serialized_end=1712
+# @@protoc_insertion_point(module_scope)

From bbc8ecb622c0fe37269af8b810a645751a405695 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Wed, 5 Feb 2025 09:51:24 +0000
Subject: [PATCH 08/11] Track GPU timeline PB submits per-device.

The protocol supports multiple devices, as does the API
but historically we just assumed a single device.
Modify the state tracking in service_gpu_timeline.py so
that we collect frames and submits on a per-device
basis.
---
 lglpy/comms/service_gpu_timeline.py | 106 ++++++++++++++++++++--------
 1 file changed, 78 insertions(+), 28 deletions(-)

diff --git a/lglpy/comms/service_gpu_timeline.py b/lglpy/comms/service_gpu_timeline.py
index 601f00f..f773e06 100644
--- a/lglpy/comms/service_gpu_timeline.py
+++ b/lglpy/comms/service_gpu_timeline.py
@@ -193,6 +193,30 @@ def map_debug_label(labels : list[str]|None) -> list[str]:
     return [str(l) for l in labels] # need to convert it to a list from a RepeatedScalarContainer
 
 
+class GPUDeviceState:
+    '''
+    Holds per device state.
+
+    Typically there will only be one physical device, and one corresponding VkDevice,
+    but the protocol and API are both designed to support multiple devices so abstract
+    that here.
+
+    Args:
+        device_id: The device id associated with the state object
+    '''
+    def __init__(self, device_id: int):
+        '''
+        Initialize the state for a single device
+        '''
+        # Create a default frame record
+        # End time written on queuePresent
+        self.frame: FrameMetadataType = {
+            'device': device_id,
+            'frame': 0,
+            'presentTimestamp': 0,
+            'submits': []
+        }
+
 class GPUTimelineService:
     '''
     A service for handling network comms from the layer_gpu_timeline layer.
@@ -205,23 +229,23 @@ def __init__(self, file_path: str, verbose: bool = False):
         Args:
             file_path: File to write on the filesystem
             verbose: Should this use verbose logging?
-
-        Returns:
-            The endpoint name.
         '''
-        # Create a default frame record
-        # End time written on queuePresent
-        self.frame: FrameMetadataType = {
-            'device': 0,
-            'frame': 0,
-            'presentTimestamp': 0,
-            'submits': []
-        }
-
+        self.devices : dict[int,GPUDeviceState] = dict()
+        self.last_submit : SubmitMetadataType|None = None
         # pylint: disable=consider-using-with
         self.file_handle = open(file_path, 'wb')
         self.verbose = verbose
 
+    def get_device(self, device: int) -> GPUDeviceState:
+        '''
+        Get or create a device object with the specified handle
+        '''
+        device = self.devices.get(device, None)
+        if device is None:
+            device = GPUDeviceState(device)
+            self.devices[device] = device
+        return device
+
     def get_service_name(self) -> str:
         '''
         Get the service endpoint name.
@@ -239,12 +263,18 @@ def handle_device(self, msg: Any) -> None:
             msg: The Python decode of a Timeline PB payload.
         '''
         # Reset the local frame state for the next frame
-        major = expect_int(msg.major_version)
-        minor = expect_int(msg.minor_version)
-        patch = expect_int(msg.patch_version)
+        device_id = expect_int(msg.id)
+        self.devices[device_id] = GPUDeviceState(device_id)
+
+        # This clears the last submit; expect a submit message before any new workloads
+        self.last_submit = None
+
 
         if self.verbose:
-            print(f'Device:  {msg.name} (0x{msg.id:02X})')
+            major = expect_int(msg.major_version)
+            minor = expect_int(msg.minor_version)
+            patch = expect_int(msg.patch_version)
+            print(f'Device:  {msg.name} (0x{device_id:02X})')
             print(f'Driver:  r{major}p{minor} ({patch})')
             print(f'Process: {msg.process_id}')
 
@@ -258,12 +288,19 @@ def handle_frame(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
+        # Get the device object
+        device_id = expect_int(msg.device)
+        device = self.get_device(device_id)
+
+        # This clears the last submit; expect a submit message before any new workloads
+        self.last_submit = None
+
         # Update end time of the current frame
-        self.frame['presentTimestamp'] = expect_int(msg.timestamp)
+        device.frame['presentTimestamp'] = expect_int(msg.timestamp)
 
         # Write frame packet to the file
         # FIXME: No need to write the first empty frame?
-        last_frame = json.dumps(self.frame).encode('utf-8')
+        last_frame = json.dumps(device.frame).encode('utf-8')
         length = struct.pack('<I', len(last_frame))
 
         self.file_handle.write(length)
@@ -271,15 +308,15 @@ def handle_frame(self, msg: Any) -> None:
 
         # Reset the local frame state for the next frame
         next_frame = expect_int(msg.id)
-        self.frame = {
-            'device': expect_int(msg.device),
+        device.frame = {
+            'device': device_id,
             'frame': next_frame,
             'presentTimestamp': 0,
             'submits': []
         }
 
         if self.verbose and (next_frame % 100 == 0):
-            print(f'Starting frame {next_frame} ...')
+            print(f'Starting frame {next_frame} for 0x{device_id:02X} ...')
 
     def handle_submit(self, msg: Any) -> None:
         '''
@@ -288,6 +325,9 @@ def handle_submit(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
+        # Get the device object
+        device = self.get_device(expect_int(msg.device))
+
         # Write frame packet to the file
         submit: SubmitMetadataType = {
             'device': expect_int(msg.device),
@@ -297,7 +337,11 @@ def handle_submit(self, msg: Any) -> None:
         }
 
         # Reset the local frame state for the next frame
-        self.frame['submits'].append(submit)
+        device.frame['submits'].append(submit)
+
+        # Track this new submit object; all subsequent workloads will attach to it
+        # up to the point of the next submit/frame/device
+        self.last_submit = submit
 
     def handle_render_pass(self, msg: Any) -> None:
         '''
@@ -311,7 +355,8 @@ def handle_render_pass(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        submit = self.frame['submits'][-1]
+        assert self.last_submit is not None
+        submit = self.last_submit
 
         # Convert the PB message into our data representation
         renderpass: RenderpassMetadataType = {
@@ -347,7 +392,8 @@ def handle_render_pass_continuation(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        submit = self.frame['submits'][-1]
+        assert self.last_submit is not None
+        submit = self.last_submit
 
         # Find the last workload
         last_render_pass = None
@@ -370,7 +416,8 @@ def handle_dispatch(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        submit = self.frame['submits'][-1]
+        assert self.last_submit is not None
+        submit = self.last_submit
 
         # Convert the PB message into our data representation
         dispatch: DispatchMetadataType = {
@@ -391,7 +438,8 @@ def handle_trace_rays(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        submit = self.frame['submits'][-1]
+        assert self.last_submit is not None
+        submit = self.last_submit
 
         # Convert the PB message into our data representation
         trace_rays: TraceRaysMetadataType = {
@@ -412,7 +460,8 @@ def handle_image_transfer(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        submit = self.frame['submits'][-1]
+        assert self.last_submit is not None
+        submit = self.last_submit
 
         # Convert the PB message into our data representation
         image_transfer: ImageTransferMetadataType = {
@@ -432,7 +481,8 @@ def handle_buffer_transfer(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        submit = self.frame['submits'][-1]
+        assert self.last_submit is not None
+        submit = self.last_submit
 
         # Convert the PB message into our data representation
         buffer_transfer: BufferTransferMetadataType = {

From c00e2877b739df4b8876fff2ebb378b557c0a1a7 Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Wed, 5 Feb 2025 10:51:44 +0000
Subject: [PATCH 09/11] Fixed all pycodestyle and mypy warnings and errors

---
 .mypy.ini                           |   5 +-
 .pycodestyle.ini                    |   2 +-
 lglpy/comms/service_gpu_timeline.py | 133 +++++++++++++++++-----------
 lglpy/ui/console.py                 |   2 +-
 4 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/.mypy.ini b/.mypy.ini
index bcc0ffe..726b660 100644
--- a/.mypy.ini
+++ b/.mypy.ini
@@ -1,10 +1,13 @@
 [mypy]
-exclude = lglpy/timeline/protos/.*\.py
+exclude = lglpy/timeline/protos/
 ignore_missing_imports = True
 disable_error_code = annotation-unchecked
 
 [mypy-lglpy.timeline.data.raw_trace]
 disable_error_code = attr-defined
 
+[mypy-lglpy.comms.service_gpu_timeline]
+disable_error_code = attr-defined
+
 [mypy-google.*]
 ignore_missing_imports = True
diff --git a/.pycodestyle.ini b/.pycodestyle.ini
index b1cd005..35fd3b1 100644
--- a/.pycodestyle.ini
+++ b/.pycodestyle.ini
@@ -1,4 +1,4 @@
 [pycodestyle]
 exclude = lglpy/timeline/protos
-ignore = E402,E126,E127
+ignore = E402,E126,E127,W503
 max-line-length = 80
diff --git a/lglpy/comms/service_gpu_timeline.py b/lglpy/comms/service_gpu_timeline.py
index f773e06..17170a8 100644
--- a/lglpy/comms/service_gpu_timeline.py
+++ b/lglpy/comms/service_gpu_timeline.py
@@ -112,7 +112,11 @@ class SubmitMetadataType(TypedDict):
     device: int
     queue: int
     timestamp: int
-    workloads: list[RenderpassMetadataType|DispatchMetadataType|TraceRaysMetadataType|ImageTransferMetadataType|BufferTransferMetadataType]
+    workloads: list[RenderpassMetadataType
+                    | DispatchMetadataType
+                    | TraceRaysMetadataType
+                    | ImageTransferMetadataType
+                    | BufferTransferMetadataType]
 
 
 class FrameMetadataType(TypedDict):
@@ -125,36 +129,38 @@ class FrameMetadataType(TypedDict):
     submits: list[SubmitMetadataType]
 
 
-def expect_int(v : int|None) -> int:
+def expect_int(v: int | None) -> int:
     if v is None:
         return 0
     assert isinstance(v, int)
     return v
 
 
-def map_renderpass_binding(type : timeline_pb2.RenderpassAttachmentType, index : int|None) -> str:
+def map_renderpass_binding(type, index: int | None) -> str:
     '''
-    Map the PB encoded renderpass attachment type to a corresponding description string
+    Map the PB encoded renderpass attachment type to a corresponding description
+    string
     '''
     if type == timeline_pb2.RenderpassAttachmentType.undefined:
-        assert((index is None) or (index == 0))
+        assert ((index is None) or (index == 0))
         return "U"
     elif type == timeline_pb2.RenderpassAttachmentType.color:
-        assert(index is not None)
+        assert (index is not None)
         return f"C{index}"
     elif type == timeline_pb2.RenderpassAttachmentType.depth:
-        assert((index is None) or (index == 0))
+        assert ((index is None) or (index == 0))
         return "D"
     elif type == timeline_pb2.RenderpassAttachmentType.stencil:
-        assert((index is None) or (index == 0))
+        assert ((index is None) or (index == 0))
         return "S"
     else:
         assert False
 
 
-def map_image_transfer_type(type : timeline_pb2.ImageTransferType) -> str:
+def map_image_transfer_type(type) -> str:
     '''
-    Map the PB encoded image transfer type to some corresponding description string
+    Map the PB encoded image transfer type to some corresponding description
+    string
     '''
     if type == timeline_pb2.ImageTransferType.unknown_image_transfer:
         return "Unknown"
@@ -170,9 +176,10 @@ def map_image_transfer_type(type : timeline_pb2.ImageTransferType) -> str:
         assert False
 
 
-def map_buffer_transfer_type(type : timeline_pb2.BufferTransferType) -> str:
+def map_buffer_transfer_type(type) -> str:
     '''
-    Map the PB encoded image transfer type to some corresponding description string
+    Map the PB encoded image transfer type to some corresponding description
+    string
     '''
     if type == timeline_pb2.BufferTransferType.unknown_buffer_transfer:
         return "Unknown"
@@ -184,26 +191,28 @@ def map_buffer_transfer_type(type : timeline_pb2.BufferTransferType) -> str:
         assert False
 
 
-def map_debug_label(labels : list[str]|None) -> list[str]:
+def map_debug_label(labels: list[str] | None) -> list[str]:
     '''
     Normalize the 'debug_label' field from the PB data
     '''
     if labels is None:
         return []
-    return [str(l) for l in labels] # need to convert it to a list from a RepeatedScalarContainer
+    # need to convert it to a list from a RepeatedScalarContainer
+    return [str(label) for label in labels]
 
 
 class GPUDeviceState:
     '''
     Holds per device state.
 
-    Typically there will only be one physical device, and one corresponding VkDevice,
-    but the protocol and API are both designed to support multiple devices so abstract
-    that here.
+    Typically there will only be one physical device, and one corresponding
+    VkDevice, but the protocol and API are both designed to support multiple
+    devices so abstract that here.
 
     Args:
         device_id: The device id associated with the state object
     '''
+
     def __init__(self, device_id: int):
         '''
         Initialize the state for a single device
@@ -217,6 +226,7 @@ def __init__(self, device_id: int):
             'submits': []
         }
 
+
 class GPUTimelineService:
     '''
     A service for handling network comms from the layer_gpu_timeline layer.
@@ -230,8 +240,9 @@ def __init__(self, file_path: str, verbose: bool = False):
             file_path: File to write on the filesystem
             verbose: Should this use verbose logging?
         '''
-        self.devices : dict[int,GPUDeviceState] = dict()
-        self.last_submit : SubmitMetadataType|None = None
+        self.devices: dict[int, GPUDeviceState] = dict()
+        self.last_submit: SubmitMetadataType | None = None
+        self.last_render_pass: RenderpassMetadataType | None = None
         # pylint: disable=consider-using-with
         self.file_handle = open(file_path, 'wb')
         self.verbose = verbose
@@ -239,12 +250,12 @@ def __init__(self, file_path: str, verbose: bool = False):
     def get_device(self, device: int) -> GPUDeviceState:
         '''
         Get or create a device object with the specified handle
-        '''
-        device = self.devices.get(device, None)
-        if device is None:
-            device = GPUDeviceState(device)
-            self.devices[device] = device
-        return device
+         '''
+        state = self.devices.get(device, None)
+        if state is None:
+            state = GPUDeviceState(device)
+            self.devices[device] = state
+        return state
 
     def get_service_name(self) -> str:
         '''
@@ -266,9 +277,10 @@ def handle_device(self, msg: Any) -> None:
         device_id = expect_int(msg.id)
         self.devices[device_id] = GPUDeviceState(device_id)
 
-        # This clears the last submit; expect a submit message before any new workloads
+        # This clears the last submit; expect a submit message before any new
+        # workloads
         self.last_submit = None
-
+        self.last_render_pass = None
 
         if self.verbose:
             major = expect_int(msg.major_version)
@@ -292,8 +304,10 @@ def handle_frame(self, msg: Any) -> None:
         device_id = expect_int(msg.device)
         device = self.get_device(device_id)
 
-        # This clears the last submit; expect a submit message before any new workloads
+        # This clears the last submit; expect a submit message before any new
+        # workloads
         self.last_submit = None
+        self.last_render_pass = None
 
         # Update end time of the current frame
         device.frame['presentTimestamp'] = expect_int(msg.timestamp)
@@ -339,9 +353,10 @@ def handle_submit(self, msg: Any) -> None:
         # Reset the local frame state for the next frame
         device.frame['submits'].append(submit)
 
-        # Track this new submit object; all subsequent workloads will attach to it
-        # up to the point of the next submit/frame/device
+        # Track this new submit object; all subsequent workloads will attach to
+        # it up to the point of the next submit/frame/device
         self.last_submit = submit
+        self.last_render_pass = None
 
     def handle_render_pass(self, msg: Any) -> None:
         '''
@@ -372,7 +387,8 @@ def handle_render_pass(self, msg: Any) -> None:
 
         for pb_attachment in msg.attachments:
             attachment: RenderpassAttachmentMetadataType = {
-                'binding': map_renderpass_binding(pb_attachment.type, pb_attachment.index),
+                'binding': map_renderpass_binding(pb_attachment.type,
+                                                  pb_attachment.index),
                 'load': not bool(pb_attachment.not_loaded),
                 'store': not bool(pb_attachment.not_stored),
                 'resolve': bool(pb_attachment.resolved),
@@ -381,6 +397,9 @@ def handle_render_pass(self, msg: Any) -> None:
 
         submit['workloads'].append(renderpass)
 
+        # Save it, for any continuations
+        self.last_render_pass = renderpass
+
     def handle_render_pass_continuation(self, msg: Any) -> None:
         '''
         Handle a render pass workload continuation.
@@ -392,22 +411,14 @@ def handle_render_pass_continuation(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
-        assert self.last_submit is not None
-        submit = self.last_submit
-
-        # Find the last workload
-        last_render_pass = None
-        if submit['workloads']:
-            last_workload = submit['workloads'][-1]
-            if last_workload['type'] == 'renderpass':
-                last_render_pass = last_workload
-
         # Validate that this is a continuation of the last renderpass
-        assert last_render_pass and (last_render_pass['tid'] == expect_int(msg.tag_id))
+        assert ((self.last_render_pass is not None)
+                and (self.last_render_pass['tid'] == expect_int(msg.tag_id)))
 
         # Don't accumulate if tag_id is flagged as ambiguous
-        if last_render_pass['drawCallCount'] >= 0:
-            last_render_pass['drawCallCount'] += expect_int(msg.draw_call_count)
+        if self.last_render_pass['drawCallCount'] >= 0:
+            dcc = expect_int(msg.draw_call_count)
+            self.last_render_pass['drawCallCount'] += dcc
 
     def handle_dispatch(self, msg: Any) -> None:
         '''
@@ -416,9 +427,13 @@ def handle_dispatch(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
+        # Get the active submit to append to
         assert self.last_submit is not None
         submit = self.last_submit
 
+        # Clear the last renderpass
+        self.last_render_pass = None
+
         # Convert the PB message into our data representation
         dispatch: DispatchMetadataType = {
             'type': 'dispatch',
@@ -438,9 +453,13 @@ def handle_trace_rays(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
+        # Get the active submit to append to
         assert self.last_submit is not None
         submit = self.last_submit
 
+        # Clear the last renderpass
+        self.last_render_pass = None
+
         # Convert the PB message into our data representation
         trace_rays: TraceRaysMetadataType = {
             'type': 'tracerays',
@@ -460,9 +479,13 @@ def handle_image_transfer(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
+        # Get the active submit to append to
         assert self.last_submit is not None
         submit = self.last_submit
 
+        # Clear the last renderpass
+        self.last_render_pass = None
+
         # Convert the PB message into our data representation
         image_transfer: ImageTransferMetadataType = {
             'type': 'imagetransfer',
@@ -481,9 +504,13 @@ def handle_buffer_transfer(self, msg: Any) -> None:
         Args:
             msg: The Python decode of a Timeline PB payload.
         '''
+        # Get the active submit to append to
         assert self.last_submit is not None
         submit = self.last_submit
 
+        # Clear the last renderpass
+        self.last_render_pass = None
+
         # Convert the PB message into our data representation
         buffer_transfer: BufferTransferMetadataType = {
             'type': 'buffertransfer',
@@ -506,15 +533,15 @@ def handle_message(self, message: Message) -> None:
         pb_record.ParseFromString(message.payload)
 
         # Assert there is at most one member message
-        assert((int(pb_record.HasField('metadata'))
-               + int(pb_record.HasField('frame'))
-               + int(pb_record.HasField('submit'))
-               + int(pb_record.HasField('renderpass'))
-               + int(pb_record.HasField('continue_renderpass'))
-               + int(pb_record.HasField('dispatch'))
-               + int(pb_record.HasField('trace_rays'))
-               + int(pb_record.HasField('image_transfer'))
-               + int(pb_record.HasField('buffer_transfer'))) <= 1)
+        assert ((int(pb_record.HasField('metadata'))
+                 + int(pb_record.HasField('frame'))
+                 + int(pb_record.HasField('submit'))
+                 + int(pb_record.HasField('renderpass'))
+                 + int(pb_record.HasField('continue_renderpass'))
+                 + int(pb_record.HasField('dispatch'))
+                 + int(pb_record.HasField('trace_rays'))
+                 + int(pb_record.HasField('image_transfer'))
+                 + int(pb_record.HasField('buffer_transfer'))) <= 1)
 
         # Process the message
         if pb_record.HasField('metadata'):
diff --git a/lglpy/ui/console.py b/lglpy/ui/console.py
index ceafa26..849cca9 100644
--- a/lglpy/ui/console.py
+++ b/lglpy/ui/console.py
@@ -67,7 +67,7 @@ def select_from_menu(title: str, options: list[str]) -> Optional[int]:
             print(f'Select a {title}')
             chars = int(math.log10(len(options))) + 1
             for i, entry in enumerate(options):
-                print(f'    {i+1:{chars}}) {entry}')
+                print(f'    {i + 1:{chars}}) {entry}')
 
             print(f'    {0:{chars}}) Exit menu')
 

From 656ff4c7d8d563f193e7f70d1db3fb613b94be0d Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Fri, 7 Feb 2025 10:27:22 +0000
Subject: [PATCH 10/11] Added Header frame to protocol to identify version.
 Renamed workload_metadata_builder to timeline_protobuf_encoder to better
 reflect what it does

---
 docs/updating_protobuf_files.md               | 15 ++++
 layer_gpu_timeline/source/CMakeLists.txt      |  2 +-
 layer_gpu_timeline/source/device.cpp          |  4 +-
 .../source/layer_device_functions_queue.cpp   | 12 +--
 layer_gpu_timeline/source/timeline_comms.cpp  |  4 +
 ...lder.cpp => timeline_protobuf_encoder.cpp} | 87 ++++++++++++-------
 ...lder.hpp => timeline_protobuf_encoder.hpp} | 27 ++++--
 layer_gpu_timeline/timeline.proto             | 37 ++++++--
 lglpy/comms/service_gpu_timeline.py           | 20 ++++-
 .../protos/layer_driver/timeline_pb2.py       | 62 ++++++-------
 10 files changed, 184 insertions(+), 86 deletions(-)
 create mode 100644 docs/updating_protobuf_files.md
 rename layer_gpu_timeline/source/{workload_metadata_builder.cpp => timeline_protobuf_encoder.cpp} (87%)
 rename layer_gpu_timeline/source/{workload_metadata_builder.hpp => timeline_protobuf_encoder.hpp} (83%)

diff --git a/docs/updating_protobuf_files.md b/docs/updating_protobuf_files.md
new file mode 100644
index 0000000..192a99a
--- /dev/null
+++ b/docs/updating_protobuf_files.md
@@ -0,0 +1,15 @@
+# Updating the generated protobuf (de)serialization code
+
+This project uses protobufs for (de)serialization of certain data:
+
+ * In the raw GPU timeline messages sent from `layer_gpu_timeline` to the host.
+ * In the Perfetto data collected from the device.
+
+Python decoders for those protocols are pre-generated and stored in the sources
+under `lglpy/timeline/protos`. 
+
+To regenerate or update the timeline protocol files use:
+
+        protoc -I layer_gpu_timeline/                           \
+            --python_out=lglpy/timeline/protos/layer_driver/    \
+            layer_gpu_timeline/timeline.proto
diff --git a/layer_gpu_timeline/source/CMakeLists.txt b/layer_gpu_timeline/source/CMakeLists.txt
index 76f1bb8..1ca212b 100644
--- a/layer_gpu_timeline/source/CMakeLists.txt
+++ b/layer_gpu_timeline/source/CMakeLists.txt
@@ -54,7 +54,7 @@ add_library(
         layer_device_functions_trace_rays.cpp
         layer_device_functions_transfer.cpp
         timeline_comms.cpp
-        workload_metadata_builder.cpp)
+        timeline_protobuf_encoder.cpp)
 
 target_include_directories(
     ${VK_LAYER} PRIVATE
diff --git a/layer_gpu_timeline/source/device.cpp b/layer_gpu_timeline/source/device.cpp
index 2e63bf2..c676df8 100644
--- a/layer_gpu_timeline/source/device.cpp
+++ b/layer_gpu_timeline/source/device.cpp
@@ -28,7 +28,7 @@
 #include "comms/comms_module.hpp"
 #include "framework/utils.hpp"
 #include "instance.hpp"
-#include "workload_metadata_builder.hpp"
+#include "timeline_protobuf_encoder.hpp"
 
 #include <vector>
 
@@ -120,5 +120,5 @@ Device::Device(Instance* _instance,
 
     pid_t processPID = getpid();
 
-    WorkloadMetadataEmitterVisitor::emitMetadata(*this, processPID, major, minor, patch, std::move(name));
+    TimelineProtobufEncoder::emitMetadata(*this, processPID, major, minor, patch, std::move(name));
 }
diff --git a/layer_gpu_timeline/source/layer_device_functions_queue.cpp b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
index 95522af..65f4ca5 100644
--- a/layer_gpu_timeline/source/layer_device_functions_queue.cpp
+++ b/layer_gpu_timeline/source/layer_device_functions_queue.cpp
@@ -25,8 +25,8 @@
 
 #include "device.hpp"
 #include "framework/device_dispatch_table.hpp"
+#include "timeline_protobuf_encoder.hpp"
 #include "trackers/queue.hpp"
-#include "workload_metadata_builder.hpp"
 
 #include <mutex>
 
@@ -65,7 +65,7 @@ static uint64_t getClockMonotonicRaw()
  * @param queue             The queue being submitted to.
  * @param workloadVisitor   The data emit callback.
  */
-static void emitQueueMetadata(VkQueue queue, WorkloadMetadataEmitterVisitor& workloadVisitor)
+static void emitQueueMetadata(VkQueue queue, TimelineProtobufEncoder& workloadVisitor)
 {
     workloadVisitor.emitSubmit(queue, getClockMonotonicRaw());
 }
@@ -108,7 +108,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(VkQueue queue,
 
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially to the host tool
-    WorkloadMetadataEmitterVisitor::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw());
+    TimelineProtobufEncoder::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw());
 
     // Release the lock to call into the driver
     lock.unlock();
@@ -128,7 +128,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
 
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
-    WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
+    TimelineProtobufEncoder workloadVisitor {*layer};
 
     // Add queue-level metadata
     emitQueueMetadata(queue, workloadVisitor);
@@ -162,7 +162,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
 
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
-    WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
+    TimelineProtobufEncoder workloadVisitor {*layer};
 
     // Add queue-level metadata
     emitQueueMetadata(queue, workloadVisitor);
@@ -196,7 +196,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
 
     // This is run with the lock held to ensure that all queue submit
     // messages are sent sequentially and contiguously to the host tool
-    WorkloadMetadataEmitterVisitor workloadVisitor {*layer};
+    TimelineProtobufEncoder workloadVisitor {*layer};
 
     // Add queue-level metadata
     emitQueueMetadata(queue, workloadVisitor);
diff --git a/layer_gpu_timeline/source/timeline_comms.cpp b/layer_gpu_timeline/source/timeline_comms.cpp
index 309e84d..b016428 100644
--- a/layer_gpu_timeline/source/timeline_comms.cpp
+++ b/layer_gpu_timeline/source/timeline_comms.cpp
@@ -25,6 +25,8 @@
 
 #include "timeline_comms.hpp"
 
+#include "timeline_protobuf_encoder.hpp"
+
 #include <memory>
 
 /* See header for documentation. */
@@ -34,6 +36,8 @@ TimelineComms::TimelineComms(Comms::CommsInterface& _comms)
     if (comms.isConnected())
     {
         endpoint = comms.getEndpointID("GPUTimeline");
+
+        TimelineProtobufEncoder::emitHeaderMessage(*this);
     }
 }
 
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.cpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp
similarity index 87%
rename from layer_gpu_timeline/source/workload_metadata_builder.cpp
rename to layer_gpu_timeline/source/timeline_protobuf_encoder.cpp
index 3b35ae6..41d3d1d 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.cpp
+++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp
@@ -23,7 +23,7 @@
  * ----------------------------------------------------------------------------
  */
 
-#include "workload_metadata_builder.hpp"
+#include "timeline_protobuf_encoder.hpp"
 
 #include "comms/comms_interface.hpp"
 #include "trackers/layer_command_stream.hpp"
@@ -38,6 +38,20 @@
 #include <protopuf/message.h>
 #include <protopuf/skip.h>
 
+/* Possible version numbers that can be sent by the header */
+enum class HeaderVersionNo
+{
+    /* The only version currently defined */
+    version_1 = 0,
+};
+
+/* The connection header message sent to identify the version of the timeline protocol used. This should be sent exactly
+ * once per connection */
+using Header = pp::message<
+    /* The only mandatory field of this message. This identifys the version. Any subsequent fields are
+     * versioned based on this. All additional fields must be optional and additive (no removals) */
+    pp::enum_field<"version_no", 1, HeaderVersionNo>>;
+
 /* The metadata packet that is sent once for a given VkDevice, before any other
  * packet related to that Device and describes the VkDevice / VkPhysicalDevice
  * etc */
@@ -186,15 +200,16 @@ using BufferTransfer = pp::message<
     pp::string_field<"debug_label", 4, pp::repeated>>;
 
 /* The data payload message that wraps all other messages */
-using TimelineRecord = pp::message<pp::message_field<"metadata", 1, DeviceMetadata>,
-                                   pp::message_field<"frame", 2, Frame>,
-                                   pp::message_field<"submit", 3, Submit>,
-                                   pp::message_field<"renderpass", 4, BeginRenderpass>,
-                                   pp::message_field<"continue_renderpass", 5, ContinueRenderpass>,
-                                   pp::message_field<"dispatch", 6, Dispatch>,
-                                   pp::message_field<"trace_rays", 7, TraceRays>,
-                                   pp::message_field<"image_transfer", 8, ImageTransfer>,
-                                   pp::message_field<"buffer_transfer", 9, BufferTransfer>>;
+using TimelineRecord = pp::message<pp::message_field<"header", 1, Header>,
+                                   pp::message_field<"metadata", 2, DeviceMetadata>,
+                                   pp::message_field<"frame", 3, Frame>,
+                                   pp::message_field<"submit", 4, Submit>,
+                                   pp::message_field<"renderpass", 5, BeginRenderpass>,
+                                   pp::message_field<"continue_renderpass", 6, ContinueRenderpass>,
+                                   pp::message_field<"dispatch", 7, Dispatch>,
+                                   pp::message_field<"trace_rays", 8, TraceRays>,
+                                   pp::message_field<"image_transfer", 9, ImageTransfer>,
+                                   pp::message_field<"buffer_transfer", 10, BufferTransfer>>;
 
 namespace
 {
@@ -471,12 +486,22 @@ Comms::MessageData serialize(const Tracker::LCSBufferTransfer& bufferTransfer,
 }
 }
 
-void WorkloadMetadataEmitterVisitor::emitMetadata(Device& device,
-                                                  uint32_t pid,
-                                                  uint32_t major,
-                                                  uint32_t minor,
-                                                  uint32_t patch,
-                                                  std::string name)
+void TimelineProtobufEncoder::emitHeaderMessage(TimelineComms& comms)
+{
+    using namespace pp;
+
+    comms.txMessage(packBuffer("header"_f,
+                               Header {
+                                   HeaderVersionNo::version_1,
+                               }));
+}
+
+void TimelineProtobufEncoder::emitMetadata(Device& device,
+                                           uint32_t pid,
+                                           uint32_t major,
+                                           uint32_t minor,
+                                           uint32_t patch,
+                                           std::string name)
 {
     using namespace pp;
 
@@ -491,7 +516,7 @@ void WorkloadMetadataEmitterVisitor::emitMetadata(Device& device,
                                 }));
 }
 
-void WorkloadMetadataEmitterVisitor::emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp)
+void TimelineProtobufEncoder::emitFrame(Device& device, uint64_t frameNumber, uint64_t timestamp)
 {
     using namespace pp;
 
@@ -503,7 +528,7 @@ void WorkloadMetadataEmitterVisitor::emitFrame(Device& device, uint64_t frameNum
                                 }));
 }
 
-void WorkloadMetadataEmitterVisitor::emitSubmit(VkQueue queue, uint64_t timestamp)
+void TimelineProtobufEncoder::emitSubmit(VkQueue queue, uint64_t timestamp)
 {
     using namespace pp;
 
@@ -515,41 +540,41 @@ void WorkloadMetadataEmitterVisitor::emitSubmit(VkQueue queue, uint64_t timestam
                                 }));
 }
 
-void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPass& renderpass,
-                                                const std::vector<std::string>& debugStack)
+void TimelineProtobufEncoder::operator()(const Tracker::LCSRenderPass& renderpass,
+                                         const std::vector<std::string>& debugStack)
 {
     device.txMessage(serialize(renderpass, debugStack));
 }
 
-void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSRenderPassContinuation& continuation,
-                                                const std::vector<std::string>& debugStack,
-                                                uint64_t renderpassTagID)
+void TimelineProtobufEncoder::operator()(const Tracker::LCSRenderPassContinuation& continuation,
+                                         const std::vector<std::string>& debugStack,
+                                         uint64_t renderpassTagID)
 {
     UNUSED(debugStack);
 
     device.txMessage(serialize(continuation, renderpassTagID));
 }
 
-void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSDispatch& dispatch,
-                                                const std::vector<std::string>& debugStack)
+void TimelineProtobufEncoder::operator()(const Tracker::LCSDispatch& dispatch,
+                                         const std::vector<std::string>& debugStack)
 {
     device.txMessage(serialize(dispatch, debugStack));
 }
 
-void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSTraceRays& traceRays,
-                                                const std::vector<std::string>& debugStack)
+void TimelineProtobufEncoder::operator()(const Tracker::LCSTraceRays& traceRays,
+                                         const std::vector<std::string>& debugStack)
 {
     device.txMessage(serialize(traceRays, debugStack));
 }
 
-void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSImageTransfer& imageTransfer,
-                                                const std::vector<std::string>& debugStack)
+void TimelineProtobufEncoder::operator()(const Tracker::LCSImageTransfer& imageTransfer,
+                                         const std::vector<std::string>& debugStack)
 {
     device.txMessage(serialize(imageTransfer, debugStack));
 }
 
-void WorkloadMetadataEmitterVisitor::operator()(const Tracker::LCSBufferTransfer& bufferTransfer,
-                                                const std::vector<std::string>& debugStack)
+void TimelineProtobufEncoder::operator()(const Tracker::LCSBufferTransfer& bufferTransfer,
+                                         const std::vector<std::string>& debugStack)
 {
     device.txMessage(serialize(bufferTransfer, debugStack));
 }
diff --git a/layer_gpu_timeline/source/workload_metadata_builder.hpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp
similarity index 83%
rename from layer_gpu_timeline/source/workload_metadata_builder.hpp
rename to layer_gpu_timeline/source/timeline_protobuf_encoder.hpp
index 10c61b6..4e74060 100644
--- a/layer_gpu_timeline/source/workload_metadata_builder.hpp
+++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp
@@ -38,6 +38,7 @@
 #pragma once
 
 #include "device.hpp"
+#include "timeline_comms.hpp"
 #include "trackers/layer_command_stream.hpp"
 #include "trackers/queue.hpp"
 
@@ -45,9 +46,23 @@
 
 #include <vulkan/vulkan_core.h>
 
-class WorkloadMetadataEmitterVisitor : public Tracker::SubmitCommandWorkloadVisitor
+/**
+ * Encodes various protocol messages in protobuf format for transmission to the
+ * host
+ */
+class TimelineProtobufEncoder : public Tracker::SubmitCommandWorkloadVisitor
 {
 public:
+    /**
+     * @brief Called once when the GPU timeline comms connection is first established.
+     *
+     * Outputs a header message into the stream identifying the version of the layer driver protocol
+     * used.
+     *
+     * @param comms The commons interface used to transmit the message
+     */
+    static void emitHeaderMessage(TimelineComms& comms);
+
     /**
      * @brief Called once when the layer is first created to produce the "metadata" frame for that layer device
      *
@@ -80,16 +95,16 @@ class WorkloadMetadataEmitterVisitor : public Tracker::SubmitCommandWorkloadVisi
      * @param _device The device object that the payloads are produced for, and to which they are passed for
      * transmission
      */
-    WorkloadMetadataEmitterVisitor(Device& _device)
+    TimelineProtobufEncoder(Device& _device)
         : device(_device)
     {
     }
 
     // visitor should not be copied or moved from
-    WorkloadMetadataEmitterVisitor(const WorkloadMetadataEmitterVisitor&) = delete;
-    WorkloadMetadataEmitterVisitor(WorkloadMetadataEmitterVisitor&&) noexcept = delete;
-    WorkloadMetadataEmitterVisitor& operator=(const WorkloadMetadataEmitterVisitor&) = delete;
-    WorkloadMetadataEmitterVisitor& operator=(WorkloadMetadataEmitterVisitor&&) noexcept = delete;
+    TimelineProtobufEncoder(const TimelineProtobufEncoder&) = delete;
+    TimelineProtobufEncoder(TimelineProtobufEncoder&&) noexcept = delete;
+    TimelineProtobufEncoder& operator=(const TimelineProtobufEncoder&) = delete;
+    TimelineProtobufEncoder& operator=(TimelineProtobufEncoder&&) noexcept = delete;
 
     // methods from the visitor interface
     void operator()(const Tracker::LCSRenderPass& renderpass, const std::vector<std::string>& debugStack) override;
diff --git a/layer_gpu_timeline/timeline.proto b/layer_gpu_timeline/timeline.proto
index e3f56d1..f2a596b 100644
--- a/layer_gpu_timeline/timeline.proto
+++ b/layer_gpu_timeline/timeline.proto
@@ -48,12 +48,30 @@
  *         +-> Renderpass
  */
 
+
+// !!NB!!: This file is not used to generate the C++ bindings. Instead `protopuf`
+// library is used. Therefore any changes must be manually reflected in the C++ 
+// code. (See TimelineProtobufEncoder)
+
 syntax = "proto3";
 
 package gpulayers.timeline;
 
 option optimize_for = LITE_RUNTIME;
 
+/* Possible version numbers that can be sent by the header */
+enum HeaderVersionNo {
+    /* The only version currently defined */
+    version_1 = 0;
+}
+
+/* The connection header message sent to identify the version of the timeline protocol used. This should be sent exactly once per connection */
+message Header {
+    /* The only mandatory field of this message. This identifys the version. Any subsequent fields are versioned based on this. 
+     * All additional fields must be optional and additive (no removals) */
+    HeaderVersionNo version_no = 1;
+}
+
 /* The metadata packet that is sent once for a given VkDevice, before any other packet related to that Device and describes the VkDevice / VkPhysicalDevice etc */
 message DeviceMetadata {
     /* The VkDevice handle */
@@ -206,13 +224,14 @@ message BufferTransfer {
 
 /* The data payload message that wraps all other messages */
 message TimelineRecord {
-    DeviceMetadata metadata = 1;
-    Frame frame = 2;
-    Submit submit = 3;
-    BeginRenderpass renderpass = 4;
-    ContinueRenderpass continue_renderpass = 5;
-    Dispatch dispatch = 6;
-    TraceRays trace_rays = 7;
-    ImageTransfer image_transfer = 8;
-    BufferTransfer buffer_transfer = 9;
+    Header header = 1;
+    DeviceMetadata metadata = 2;
+    Frame frame = 3;
+    Submit submit = 4;
+    BeginRenderpass renderpass = 5;
+    ContinueRenderpass continue_renderpass = 6;
+    Dispatch dispatch = 7;
+    TraceRays trace_rays = 8;
+    ImageTransfer image_transfer = 9;
+    BufferTransfer buffer_transfer = 10;
 }
diff --git a/lglpy/comms/service_gpu_timeline.py b/lglpy/comms/service_gpu_timeline.py
index 17170a8..41bbeb1 100644
--- a/lglpy/comms/service_gpu_timeline.py
+++ b/lglpy/comms/service_gpu_timeline.py
@@ -246,6 +246,7 @@ def __init__(self, file_path: str, verbose: bool = False):
         # pylint: disable=consider-using-with
         self.file_handle = open(file_path, 'wb')
         self.verbose = verbose
+        self.seen_header = False
 
     def get_device(self, device: int) -> GPUDeviceState:
         '''
@@ -266,6 +267,18 @@ def get_service_name(self) -> str:
         '''
         return 'GPUTimeline'
 
+    def handle_header(self, msg: Any) -> None:
+        '''
+        Handle the header packet.
+
+        Args:
+            msg: The Python decode of a Timeline PB payload.
+        '''
+        assert msg.version_no == timeline_pb2.HeaderVersionNo.version_1
+        assert not self.seen_header
+
+        self.seen_header = True
+
     def handle_device(self, msg: Any) -> None:
         '''
         Handle a device config packet.
@@ -533,7 +546,8 @@ def handle_message(self, message: Message) -> None:
         pb_record.ParseFromString(message.payload)
 
         # Assert there is at most one member message
-        assert ((int(pb_record.HasField('metadata'))
+        assert ((int(pb_record.HasField('header'))
+                 + int(pb_record.HasField('metadata'))
                  + int(pb_record.HasField('frame'))
                  + int(pb_record.HasField('submit'))
                  + int(pb_record.HasField('renderpass'))
@@ -544,7 +558,9 @@ def handle_message(self, message: Message) -> None:
                  + int(pb_record.HasField('buffer_transfer'))) <= 1)
 
         # Process the message
-        if pb_record.HasField('metadata'):
+        if pb_record.HasField('header'):
+            self.handle_header(pb_record.header)
+        elif pb_record.HasField('metadata'):
             self.handle_device(pb_record.metadata)
         elif pb_record.HasField('frame'):
             self.handle_frame(pb_record.frame)
diff --git a/lglpy/timeline/protos/layer_driver/timeline_pb2.py b/lglpy/timeline/protos/layer_driver/timeline_pb2.py
index ca71621..7ade6a9 100644
--- a/lglpy/timeline/protos/layer_driver/timeline_pb2.py
+++ b/lglpy/timeline/protos/layer_driver/timeline_pb2.py
@@ -24,7 +24,7 @@
 
 
 
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0etimeline.proto\x12\x12gpulayers.timeline\"\x83\x01\n\x0e\x44\x65viceMetadata\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nprocess_id\x18\x02 \x01(\r\x12\x15\n\rmajor_version\x18\x03 \x01(\r\x12\x15\n\rminor_version\x18\x04 \x01(\r\x12\x15\n\rpatch_version\x18\x05 \x01(\r\x12\x0c\n\x04name\x18\x06 \x01(\t\"6\n\x05\x46rame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\x11\n\ttimestamp\x18\x03 \x01(\x04\":\n\x06Submit\x12\x11\n\ttimestamp\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\r\n\x05queue\x18\x03 \x01(\x04\"\x9b\x01\n\x14RenderpassAttachment\x12:\n\x04type\x18\x01 \x01(\x0e\x32,.gpulayers.timeline.RenderpassAttachmentType\x12\r\n\x05index\x18\x02 \x01(\r\x12\x12\n\nnot_loaded\x18\x03 \x01(\x08\x12\x12\n\nnot_stored\x18\x04 \x01(\x08\x12\x10\n\x08resolved\x18\x05 \x01(\x08\"\xc4\x01\n\x0f\x42\x65ginRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x17\n\x0f\x64raw_call_count\x18\x04 \x01(\r\x12\x15\n\rsubpass_count\x18\x05 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x06 \x03(\t\x12=\n\x0b\x61ttachments\x18\x07 \x03(\x0b\x32(.gpulayers.timeline.RenderpassAttachment\"R\n\x12\x43ontinueRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x17\n\x0f\x64raw_call_count\x18\x02 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x03 \x03(\t\"e\n\x08\x44ispatch\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x10\n\x08x_groups\x18\x02 \x01(\x03\x12\x10\n\x08y_groups\x18\x03 \x01(\x03\x12\x10\n\x08z_groups\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"c\n\tTraceRays\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x0f\n\x07x_items\x18\x02 \x01(\x03\x12\x0f\n\x07y_items\x18\x03 \x01(\x03\x12\x0f\n\x07z_items\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"\x87\x01\n\rImageTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x13\n\x0bpixel_count\x18\x02 \x01(\x03\x12<\n\rtransfer_type\x18\x03 \x01(\x0e\x32%.gpulayers.timeline.ImageTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\x88\x01\n\x0e\x42ufferTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x12\n\nbyte_count\x18\x02 \x01(\x03\x12=\n\rtransfer_type\x18\x03 \x01(\x0e\x32&.gpulayers.timeline.BufferTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xf5\x03\n\x0eTimelineRecord\x12\x34\n\x08metadata\x18\x01 \x01(\x0b\x32\".gpulayers.timeline.DeviceMetadata\x12(\n\x05\x66rame\x18\x02 \x01(\x0b\x32\x19.gpulayers.timeline.Frame\x12*\n\x06submit\x18\x03 \x01(\x0b\x32\x1a.gpulayers.timeline.Submit\x12\x37\n\nrenderpass\x18\x04 \x01(\x0b\x32#.gpulayers.timeline.BeginRenderpass\x12\x43\n\x13\x63ontinue_renderpass\x18\x05 \x01(\x0b\x32&.gpulayers.timeline.ContinueRenderpass\x12.\n\x08\x64ispatch\x18\x06 \x01(\x0b\x32\x1c.gpulayers.timeline.Dispatch\x12\x31\n\ntrace_rays\x18\x07 \x01(\x0b\x32\x1d.gpulayers.timeline.TraceRays\x12\x39\n\x0eimage_transfer\x18\x08 \x01(\x0b\x32!.gpulayers.timeline.ImageTransfer\x12;\n\x0f\x62uffer_transfer\x18\t \x01(\x0b\x32\".gpulayers.timeline.BufferTransfer*L\n\x18RenderpassAttachmentType\x12\r\n\tundefined\x10\x00\x12\t\n\x05\x63olor\x10\x01\x12\t\n\x05\x64\x65pth\x10\x02\x12\x0b\n\x07stencil\x10\x03*\x84\x01\n\x11ImageTransferType\x12\x1a\n\x16unknown_image_transfer\x10\x00\x12\x0f\n\x0b\x63lear_image\x10\x01\x12\x0e\n\ncopy_image\x10\x02\x12\x18\n\x14\x63opy_buffer_to_image\x10\x03\x12\x18\n\x14\x63opy_image_to_buffer\x10\x04*S\n\x12\x42ufferTransferType\x12\x1b\n\x17unknown_buffer_transfer\x10\x00\x12\x0f\n\x0b\x66ill_buffer\x10\x01\x12\x0f\n\x0b\x63opy_buffer\x10\x02\x42\x02H\x03\x62\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0etimeline.proto\x12\x12gpulayers.timeline\"A\n\x06Header\x12\x37\n\nversion_no\x18\x01 \x01(\x0e\x32#.gpulayers.timeline.HeaderVersionNo\"\x83\x01\n\x0e\x44\x65viceMetadata\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x12\n\nprocess_id\x18\x02 \x01(\r\x12\x15\n\rmajor_version\x18\x03 \x01(\r\x12\x15\n\rminor_version\x18\x04 \x01(\r\x12\x15\n\rpatch_version\x18\x05 \x01(\r\x12\x0c\n\x04name\x18\x06 \x01(\t\"6\n\x05\x46rame\x12\n\n\x02id\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\x11\n\ttimestamp\x18\x03 \x01(\x04\":\n\x06Submit\x12\x11\n\ttimestamp\x18\x01 \x01(\x04\x12\x0e\n\x06\x64\x65vice\x18\x02 \x01(\x04\x12\r\n\x05queue\x18\x03 \x01(\x04\"\x9b\x01\n\x14RenderpassAttachment\x12:\n\x04type\x18\x01 \x01(\x0e\x32,.gpulayers.timeline.RenderpassAttachmentType\x12\r\n\x05index\x18\x02 \x01(\r\x12\x12\n\nnot_loaded\x18\x03 \x01(\x08\x12\x12\n\nnot_stored\x18\x04 \x01(\x08\x12\x10\n\x08resolved\x18\x05 \x01(\x08\"\xc4\x01\n\x0f\x42\x65ginRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\r\n\x05width\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\x17\n\x0f\x64raw_call_count\x18\x04 \x01(\r\x12\x15\n\rsubpass_count\x18\x05 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x06 \x03(\t\x12=\n\x0b\x61ttachments\x18\x07 \x03(\x0b\x32(.gpulayers.timeline.RenderpassAttachment\"R\n\x12\x43ontinueRenderpass\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x17\n\x0f\x64raw_call_count\x18\x02 \x01(\r\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x03 \x03(\t\"e\n\x08\x44ispatch\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x10\n\x08x_groups\x18\x02 \x01(\x03\x12\x10\n\x08y_groups\x18\x03 \x01(\x03\x12\x10\n\x08z_groups\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"c\n\tTraceRays\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x0f\n\x07x_items\x18\x02 \x01(\x03\x12\x0f\n\x07y_items\x18\x03 \x01(\x03\x12\x0f\n\x07z_items\x18\x04 \x01(\x03\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x05 \x03(\t\"\x87\x01\n\rImageTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x13\n\x0bpixel_count\x18\x02 \x01(\x03\x12<\n\rtransfer_type\x18\x03 \x01(\x0e\x32%.gpulayers.timeline.ImageTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\x88\x01\n\x0e\x42ufferTransfer\x12\x0e\n\x06tag_id\x18\x01 \x01(\x04\x12\x12\n\nbyte_count\x18\x02 \x01(\x03\x12=\n\rtransfer_type\x18\x03 \x01(\x0e\x32&.gpulayers.timeline.BufferTransferType\x12\x13\n\x0b\x64\x65\x62ug_label\x18\x04 \x03(\t\"\xa1\x04\n\x0eTimelineRecord\x12*\n\x06header\x18\x01 \x01(\x0b\x32\x1a.gpulayers.timeline.Header\x12\x34\n\x08metadata\x18\x02 \x01(\x0b\x32\".gpulayers.timeline.DeviceMetadata\x12(\n\x05\x66rame\x18\x03 \x01(\x0b\x32\x19.gpulayers.timeline.Frame\x12*\n\x06submit\x18\x04 \x01(\x0b\x32\x1a.gpulayers.timeline.Submit\x12\x37\n\nrenderpass\x18\x05 \x01(\x0b\x32#.gpulayers.timeline.BeginRenderpass\x12\x43\n\x13\x63ontinue_renderpass\x18\x06 \x01(\x0b\x32&.gpulayers.timeline.ContinueRenderpass\x12.\n\x08\x64ispatch\x18\x07 \x01(\x0b\x32\x1c.gpulayers.timeline.Dispatch\x12\x31\n\ntrace_rays\x18\x08 \x01(\x0b\x32\x1d.gpulayers.timeline.TraceRays\x12\x39\n\x0eimage_transfer\x18\t \x01(\x0b\x32!.gpulayers.timeline.ImageTransfer\x12;\n\x0f\x62uffer_transfer\x18\n \x01(\x0b\x32\".gpulayers.timeline.BufferTransfer* \n\x0fHeaderVersionNo\x12\r\n\tversion_1\x10\x00*L\n\x18RenderpassAttachmentType\x12\r\n\tundefined\x10\x00\x12\t\n\x05\x63olor\x10\x01\x12\t\n\x05\x64\x65pth\x10\x02\x12\x0b\n\x07stencil\x10\x03*\x84\x01\n\x11ImageTransferType\x12\x1a\n\x16unknown_image_transfer\x10\x00\x12\x0f\n\x0b\x63lear_image\x10\x01\x12\x0e\n\ncopy_image\x10\x02\x12\x18\n\x14\x63opy_buffer_to_image\x10\x03\x12\x18\n\x14\x63opy_image_to_buffer\x10\x04*S\n\x12\x42ufferTransferType\x12\x1b\n\x17unknown_buffer_transfer\x10\x00\x12\x0f\n\x0b\x66ill_buffer\x10\x01\x12\x0f\n\x0b\x63opy_buffer\x10\x02\x42\x02H\x03\x62\x06proto3')
 
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -32,32 +32,36 @@
 if not _descriptor._USE_C_DESCRIPTORS:
   _globals['DESCRIPTOR']._loaded_options = None
   _globals['DESCRIPTOR']._serialized_options = b'H\003'
-  _globals['_RENDERPASSATTACHMENTTYPE']._serialized_start=1714
-  _globals['_RENDERPASSATTACHMENTTYPE']._serialized_end=1790
-  _globals['_IMAGETRANSFERTYPE']._serialized_start=1793
-  _globals['_IMAGETRANSFERTYPE']._serialized_end=1925
-  _globals['_BUFFERTRANSFERTYPE']._serialized_start=1927
-  _globals['_BUFFERTRANSFERTYPE']._serialized_end=2010
-  _globals['_DEVICEMETADATA']._serialized_start=39
-  _globals['_DEVICEMETADATA']._serialized_end=170
-  _globals['_FRAME']._serialized_start=172
-  _globals['_FRAME']._serialized_end=226
-  _globals['_SUBMIT']._serialized_start=228
-  _globals['_SUBMIT']._serialized_end=286
-  _globals['_RENDERPASSATTACHMENT']._serialized_start=289
-  _globals['_RENDERPASSATTACHMENT']._serialized_end=444
-  _globals['_BEGINRENDERPASS']._serialized_start=447
-  _globals['_BEGINRENDERPASS']._serialized_end=643
-  _globals['_CONTINUERENDERPASS']._serialized_start=645
-  _globals['_CONTINUERENDERPASS']._serialized_end=727
-  _globals['_DISPATCH']._serialized_start=729
-  _globals['_DISPATCH']._serialized_end=830
-  _globals['_TRACERAYS']._serialized_start=832
-  _globals['_TRACERAYS']._serialized_end=931
-  _globals['_IMAGETRANSFER']._serialized_start=934
-  _globals['_IMAGETRANSFER']._serialized_end=1069
-  _globals['_BUFFERTRANSFER']._serialized_start=1072
-  _globals['_BUFFERTRANSFER']._serialized_end=1208
-  _globals['_TIMELINERECORD']._serialized_start=1211
-  _globals['_TIMELINERECORD']._serialized_end=1712
+  _globals['_HEADERVERSIONNO']._serialized_start=1825
+  _globals['_HEADERVERSIONNO']._serialized_end=1857
+  _globals['_RENDERPASSATTACHMENTTYPE']._serialized_start=1859
+  _globals['_RENDERPASSATTACHMENTTYPE']._serialized_end=1935
+  _globals['_IMAGETRANSFERTYPE']._serialized_start=1938
+  _globals['_IMAGETRANSFERTYPE']._serialized_end=2070
+  _globals['_BUFFERTRANSFERTYPE']._serialized_start=2072
+  _globals['_BUFFERTRANSFERTYPE']._serialized_end=2155
+  _globals['_HEADER']._serialized_start=38
+  _globals['_HEADER']._serialized_end=103
+  _globals['_DEVICEMETADATA']._serialized_start=106
+  _globals['_DEVICEMETADATA']._serialized_end=237
+  _globals['_FRAME']._serialized_start=239
+  _globals['_FRAME']._serialized_end=293
+  _globals['_SUBMIT']._serialized_start=295
+  _globals['_SUBMIT']._serialized_end=353
+  _globals['_RENDERPASSATTACHMENT']._serialized_start=356
+  _globals['_RENDERPASSATTACHMENT']._serialized_end=511
+  _globals['_BEGINRENDERPASS']._serialized_start=514
+  _globals['_BEGINRENDERPASS']._serialized_end=710
+  _globals['_CONTINUERENDERPASS']._serialized_start=712
+  _globals['_CONTINUERENDERPASS']._serialized_end=794
+  _globals['_DISPATCH']._serialized_start=796
+  _globals['_DISPATCH']._serialized_end=897
+  _globals['_TRACERAYS']._serialized_start=899
+  _globals['_TRACERAYS']._serialized_end=998
+  _globals['_IMAGETRANSFER']._serialized_start=1001
+  _globals['_IMAGETRANSFER']._serialized_end=1136
+  _globals['_BUFFERTRANSFER']._serialized_start=1139
+  _globals['_BUFFERTRANSFER']._serialized_end=1275
+  _globals['_TIMELINERECORD']._serialized_start=1278
+  _globals['_TIMELINERECORD']._serialized_end=1823
 # @@protoc_insertion_point(module_scope)

From 94efee16cf6715849789df0c62f732ded30d2dec Mon Sep 17 00:00:00 2001
From: Ben Gainey <ben.gainey@arm.com>
Date: Fri, 7 Feb 2025 13:41:39 +0000
Subject: [PATCH 11/11] Cleanup review feedback

---
 docs/updating_protobuf_files.md               |  4 ++
 layer_gpu_timeline/source/device.hpp          |  2 +-
 .../source/timeline_protobuf_encoder.cpp      |  2 +-
 .../source/timeline_protobuf_encoder.hpp      |  2 +-
 layer_gpu_timeline/timeline.proto             | 68 +++++++++++++++----
 5 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/docs/updating_protobuf_files.md b/docs/updating_protobuf_files.md
index 192a99a..be50089 100644
--- a/docs/updating_protobuf_files.md
+++ b/docs/updating_protobuf_files.md
@@ -13,3 +13,7 @@ To regenerate or update the timeline protocol files use:
         protoc -I layer_gpu_timeline/                           \
             --python_out=lglpy/timeline/protos/layer_driver/    \
             layer_gpu_timeline/timeline.proto
+
+- - -
+
+_Copyright © 2025, Arm Limited and contributors._
diff --git a/layer_gpu_timeline/source/device.hpp b/layer_gpu_timeline/source/device.hpp
index e82a5ab..31196c3 100644
--- a/layer_gpu_timeline/source/device.hpp
+++ b/layer_gpu_timeline/source/device.hpp
@@ -131,7 +131,7 @@ class Device
     ~Device() = default;
 
     /**
-     * @brief Callback for sending some message for the device
+     * @brief Callback for sending some message for the device.
      *
      * @param message   The message to send.
      */
diff --git a/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp
index 41d3d1d..c69c58e 100644
--- a/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp
+++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.cpp
@@ -1,7 +1,7 @@
 /*
  * SPDX-License-Identifier: MIT
  * ----------------------------------------------------------------------------
- * Copyright (c) 2024-2025 Arm Limited
+ * Copyright (c) 2025 Arm Limited
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
diff --git a/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp
index 4e74060..721007f 100644
--- a/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp
+++ b/layer_gpu_timeline/source/timeline_protobuf_encoder.hpp
@@ -1,7 +1,7 @@
 /*
  * SPDX-License-Identifier: MIT
  * ----------------------------------------------------------------------------
- * Copyright (c) 2024-2025 Arm Limited
+ * Copyright (c) 2025 Arm Limited
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
diff --git a/layer_gpu_timeline/timeline.proto b/layer_gpu_timeline/timeline.proto
index f2a596b..c9deaca 100644
--- a/layer_gpu_timeline/timeline.proto
+++ b/layer_gpu_timeline/timeline.proto
@@ -1,13 +1,52 @@
-/* Copyright (C) 2025 by Arm Limited. All rights reserved. */
-
 /*
+ * SPDX-License-Identifier: MIT
+ * ----------------------------------------------------------------------------
+ * Copyright (c) 2025 Arm Limited
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ * ----------------------------------------------------------------------------
+ *
+ * Layer protocol notes:
+ *
  * The layer driver will emit an ordered sequence of `TimelineRecord` messages.
- * The messages are not nested in the protocol, but the ordering allows a nested structure to be recovered.
- * `Frame` messages form the outer level structure; each `Frame` marks the boundary of one sequence of events and another; all subsequent messages that are not `Frame` messages are children of that `Frame`.
- * `Submit` messages form the next level structure within a `Frame`; all subsequent messages that are not `Submit` or `Frame` messages are children of that `Submit`.
- * All other messages are children of the last received `Submit`.
- * `BeginRenderpass` and `ContinueRenderpass` are a special case; where a `ContinueRenderpass` is seen it should be merged into the last received `BeginRenderpass` within that `Submit` that has the same `tag_id` value.
- * It is guaranteed that you will not receive a `ContinueRenderpass` unless the proceeding `BeginRenderpass` was received (though it is valid to have a sequence of `ContinueRenderpass` for the same `BeginRenderpass`).
+ * The messages are not nested in the protocol, but the ordering allows a nested
+ * structure to be recovered.
+ *
+ *  - `Header` messages are sent once as the first message and identify the
+ *    version of the layer driver protocol used.
+ *  - `DeviceMetadata` messages are sent once per VkDevice and provide
+ *    metadata such as driver version.
+ *  - `Frame` messages form the outer level structure; each `Frame` marks the
+ *    boundary of one sequence of events and another; all subsequent messages
+ *    that are not `Frame` messages are children of that `Frame`.
+ *  - `Submit` messages form the next level structure within a `Frame`; all
+ *    subsequent messages that are not `Submit` or `Frame` messages are children
+ *    of that `Submit`.
+ *  - All other messages are children of the last received `Submit`.
+ *
+ * `BeginRenderpass` and `ContinueRenderpass` are a special case; where a
+ * `ContinueRenderpass` is seen it should be merged into the last received
+ * `BeginRenderpass` within that `Submit` that has the same `tag_id` value.
+ *
+ * It is guaranteed that you will not receive a `ContinueRenderpass` unless the
+ * proceeding `BeginRenderpass` was received (though it is valid to have a
+ * sequence of `ContinueRenderpass` for the same `BeginRenderpass`).
  *
  * Therefore the sequence of messages like:
  * Frame
@@ -46,13 +85,14 @@
  *     +-> Submit
  *         +-> Renderpass (BeginRenderpass+ContinueRenderpass)
  *         +-> Renderpass
+ *
+ * !!NB!!
+ * ------
+ * This file is not used to generate the C++ bindings. Instead `protopuf`
+ * library is used. Therefore any changes must be manually reflected in the C++
+ * code. (See TimelineProtobufEncoder)
  */
 
-
-// !!NB!!: This file is not used to generate the C++ bindings. Instead `protopuf`
-// library is used. Therefore any changes must be manually reflected in the C++ 
-// code. (See TimelineProtobufEncoder)
-
 syntax = "proto3";
 
 package gpulayers.timeline;
@@ -67,7 +107,7 @@ enum HeaderVersionNo {
 
 /* The connection header message sent to identify the version of the timeline protocol used. This should be sent exactly once per connection */
 message Header {
-    /* The only mandatory field of this message. This identifys the version. Any subsequent fields are versioned based on this. 
+    /* The only mandatory field of this message. This identifys the version. Any subsequent fields are versioned based on this.
      * All additional fields must be optional and additive (no removals) */
     HeaderVersionNo version_no = 1;
 }