Skip to content

Commit ebf444e

Browse files
make the queue reference all resources used during previous submits and use the MultiTimelineEventHandlerST to let go of them after they're no longer pending
1 parent 8ce6e0c commit ebf444e

File tree

7 files changed

+156
-45
lines changed

7 files changed

+156
-45
lines changed

include/nbl/video/CThreadSafeQueueAdapter.h

Lines changed: 45 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,34 +12,14 @@ namespace nbl::video
1212
*/
1313
class CThreadSafeQueueAdapter final : public IQueue
1414
{
15+
friend class ILogicalDevice; // to access the destructor
1516
friend class ISwapchain;
16-
protected:
17-
std::unique_ptr<IQueue> originalQueue = nullptr;
18-
mutable std::mutex m;
19-
20-
inline RESULT submit_impl(const std::span<const SSubmitInfo> _submits) override
21-
{
22-
IQueue* msvcIsDumb = originalQueue.get();
23-
return msvcIsDumb->submit_impl(_submits);
24-
}
2517

2618
public:
27-
inline CThreadSafeQueueAdapter(ILogicalDevice* originDevice, std::unique_ptr<IQueue>&& original)
28-
: IQueue(originDevice, original->getFamilyIndex(),original->getFlags(),original->getPriority()), originalQueue(std::move(original)) {}
19+
inline CThreadSafeQueueAdapter(ILogicalDevice* originDevice, IQueue* const original)
20+
: IQueue(originDevice,original->getFamilyIndex(),original->getFlags(),original->getPriority()), originalQueue(original) {}
2921
inline CThreadSafeQueueAdapter() : IQueue(nullptr, 0, CREATE_FLAGS::PROTECTED_BIT, 0.f) {}
3022

31-
inline RESULT waitIdle() const override
32-
{
33-
std::lock_guard g(m);
34-
return originalQueue->waitIdle();
35-
}
36-
37-
inline RESULT submit(const std::span<const SSubmitInfo> _submits) override
38-
{
39-
std::lock_guard g(m);
40-
return originalQueue->submit(_submits);
41-
}
42-
4323
inline bool startCapture() override
4424
{
4525
std::lock_guard g(m);
@@ -67,15 +47,56 @@ class CThreadSafeQueueAdapter final : public IQueue
6747
return originalQueue->endDebugMarker();
6848
}
6949

50+
inline RESULT submit(const std::span<const SSubmitInfo> _submits) override
51+
{
52+
std::lock_guard g(m);
53+
return originalQueue->submit(_submits);
54+
}
55+
56+
inline RESULT waitIdle() override
57+
{
58+
std::lock_guard g(m);
59+
return originalQueue->waitIdle();
60+
}
61+
62+
inline uint32_t cullResources() override
63+
{
64+
std::lock_guard g(m);
65+
return originalQueue->cullResources();
66+
}
67+
7068
inline IQueue* getUnderlyingQueue() const
7169
{
72-
return originalQueue.get();
70+
return originalQueue;
7371
}
7472

7573
inline const void* getNativeHandle() const
7674
{
7775
return originalQueue->getNativeHandle();
7876
}
77+
78+
protected:
79+
inline ~CThreadSafeQueueAdapter()
80+
{
81+
delete originalQueue;
82+
}
83+
84+
// These shall never be called, they're here just to stop the class being pure virtual
85+
inline RESULT submit_impl(const std::span<const SSubmitInfo> _submits) override
86+
{
87+
assert(false);
88+
return originalQueue->submit_impl(_submits);
89+
}
90+
inline RESULT waitIdle_impl() const override
91+
{
92+
assert(false);
93+
return originalQueue->waitIdle_impl();
94+
}
95+
96+
97+
// used to use unique_ptr here, but it needed `~IQueue` to be public, which requires a custom deleter, etc.
98+
IQueue* const originalQueue = nullptr;
99+
mutable std::mutex m;
79100
};
80101

81102
}

include/nbl/video/IQueue.h

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ namespace nbl::video
88

99
class IGPUCommandBuffer;
1010

11+
template<typename Functor,bool RefcountTheDevice>
12+
class MultiTimelineEventHandlerST;
13+
1114
class IQueue : public core::Interface, public core::Unmovable
1215
{
1316
public:
@@ -70,7 +73,7 @@ class IQueue : public core::Interface, public core::Unmovable
7073
DEVICE_LOST,
7174
OTHER_ERROR
7275
};
73-
//
76+
// We actually make our Queue Abstraction keep track of Commandbuffers and Semaphores used in a submit until its done
7477
struct SSubmitInfo
7578
{
7679
struct SSemaphoreInfo
@@ -94,34 +97,78 @@ class IQueue : public core::Interface, public core::Unmovable
9497

9598
inline bool valid() const
9699
{
97-
// any two being empty is wrong
98-
if (commandBuffers.empty() && signalSemaphores.empty()) // wait and do nothing
99-
return false;
100-
if (waitSemaphores.empty() && signalSemaphores.empty()) // work without sync
101-
return false;
102-
if (waitSemaphores.empty() && commandBuffers.empty()) // signal without doing work first
100+
// need at least one semaphore to keep the submit resources alive
101+
if (signalSemaphores.empty())
103102
return false;
104103
return true;
105104
}
106105
};
107-
virtual RESULT submit(const std::span<const SSubmitInfo> _submits);
108-
//
109-
virtual RESULT waitIdle() const = 0;
106+
NBL_API2 virtual RESULT submit(const std::span<const SSubmitInfo> _submits);
107+
// Wait idle also makes sure all past submits drop their resources
108+
NBL_API2 virtual RESULT waitIdle();
109+
// You can call this to force an early check on whether past submits have completed and hasten when the refcount gets dropped.
110+
// Normally its the next call to `submit` that polls the event timeline for completions.
111+
NBL_API2 virtual uint32_t cullResources();
110112

111113
// we cannot derive from IBackendObject because we can't derive from IReferenceCounted
112114
inline const ILogicalDevice* getOriginDevice() const {return m_originDevice;}
113115
inline bool wasCreatedBy(const ILogicalDevice* device) const {return device==m_originDevice;}
114116
// Vulkan: const VkQueue*
115117
virtual const void* getNativeHandle() const = 0;
118+
119+
// only public because MultiTimelineEventHandlerST needs to know about it
120+
class DeferredSubmitResourceDrop final
121+
{
122+
using smart_ptr = core::smart_refctd_ptr<IBackendObject>;
123+
core::smart_refctd_dynamic_array<smart_ptr> m_resources;
124+
125+
public:
126+
inline DeferredSubmitResourceDrop(const SSubmitInfo& info)
127+
{
128+
// We could actually not hold any signal semaphore because you're expected to use the signal result somewhere else.
129+
// However it's possible to you might only wait on one from the set and then drop the rest (UB)
130+
m_resources = core::make_refctd_dynamic_array<decltype(m_resources)>(info.signalSemaphores.size()-1+info.commandBuffers.size()+info.waitSemaphores.size());
131+
auto outRes = m_resources->data();
132+
for (const auto& sema : info.waitSemaphores)
133+
*(outRes++) = smart_ptr(sema.semaphore);
134+
for (const auto& cb : info.commandBuffers)
135+
*(outRes++) = smart_ptr(cb.cmdbuf);
136+
// We don't hold the last signal semaphore, because the timeline does as an Event trigger.
137+
for (auto i=0u; i<info.signalSemaphores.size()-1; i++)
138+
*(outRes++) = smart_ptr(info.signalSemaphores[i].semaphore);
139+
}
140+
DeferredSubmitResourceDrop(const DeferredSubmitResourceDrop& other) = delete;
141+
inline DeferredSubmitResourceDrop(DeferredSubmitResourceDrop&& other) : m_resources(nullptr)
142+
{
143+
this->operator=(std::move(other));
144+
}
145+
146+
DeferredSubmitResourceDrop& operator=(const DeferredSubmitResourceDrop& other) = delete;
147+
inline DeferredSubmitResourceDrop& operator=(DeferredSubmitResourceDrop&& other)
148+
{
149+
m_resources = std::move(other.m_resources);
150+
m_resources = nullptr;
151+
return *this;
152+
}
153+
154+
// always exhaustive poll, because we need to get rid of resources ASAP
155+
inline void operator()()
156+
{
157+
m_resources = nullptr;
158+
}
159+
};
116160

117161
protected:
118-
//! `flags` takes bits from E_CREATE_FLAGS
119-
inline IQueue(const ILogicalDevice* originDevice, const uint32_t _famIx, const core::bitflag<CREATE_FLAGS> _flags, const float _priority)
120-
: m_originDevice(originDevice), m_familyIndex(_famIx), m_priority(_priority), m_flags(_flags) {}
162+
NBL_API2 IQueue(ILogicalDevice* originDevice, const uint32_t _famIx, const core::bitflag<CREATE_FLAGS> _flags, const float _priority);
163+
// WARNING: Due to ordering of destructors, its the Base Class that implements `waitIdle_impl` that needs to call `waitIdle`!
164+
NBL_API2 ~IQueue();
121165

122166
friend class CThreadSafeQueueAdapter;
123167
virtual RESULT submit_impl(const std::span<const SSubmitInfo> _submits) = 0;
168+
virtual RESULT waitIdle_impl() const = 0;
124169

170+
// Refcounts all resources used by Pending Submits, gets occasionally cleared out
171+
std::unique_ptr<MultiTimelineEventHandlerST<DeferredSubmitResourceDrop,false>> m_submittedResources;
125172
const ILogicalDevice* m_originDevice;
126173
const uint32_t m_familyIndex;
127174
const float m_priority;

include/nbl/video/ISwapchain.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ class ISwapchain : public IBackendObject
515515
m_frameResources = nullptr;
516516
}
517517
};
518+
518519
protected:
519520
ISwapchain(core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params, const uint8_t imageCount, core::smart_refctd_ptr<ISwapchain>&& oldSwapchain);
520521
virtual inline ~ISwapchain()

src/nbl/video/CVulkanLogicalDevice.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ m_vkdev(vkdev), m_devf(vkdev), m_deferred_op_mempool(NODES_PER_BLOCK_DEFERRED_OP
3636
m_devf.vk.vkGetDeviceQueue(m_vkdev,i,j,&q);
3737

3838
const uint32_t ix = offset+j;
39-
auto queue = std::make_unique<CVulkanQueue>(this,rdoc,static_cast<const CVulkanConnection*>(m_api.get())->getInternalObject(),q,i,flags,priority);
40-
(*m_queues)[ix] = new CThreadSafeQueueAdapter(this,std::move(queue));
39+
auto queue = new CVulkanQueue(this,rdoc,static_cast<const CVulkanConnection*>(m_api.get())->getInternalObject(),q,i,flags,priority);
40+
(*m_queues)[ix] = new CThreadSafeQueueAdapter(this,queue);
4141
}
4242
}
4343

src/nbl/video/CVulkanQueue.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
namespace nbl::video
88
{
99

10-
auto CVulkanQueue::waitIdle() const -> RESULT
10+
auto CVulkanQueue::waitIdle_impl() const -> RESULT
1111
{
1212
return getResultFrom(static_cast<const CVulkanLogicalDevice*>(m_originDevice)->getFunctionTable()->vk.vkQueueWaitIdle(m_vkQueue));
1313
}

src/nbl/video/CVulkanQueue.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class ILogicalDevice;
1515
class CVulkanQueue final : public IQueue
1616
{
1717
public:
18-
inline CVulkanQueue(const ILogicalDevice* logicalDevice, renderdoc_api_t* rdoc, VkInstance vkinst, VkQueue vkq, const uint32_t _famIx, const core::bitflag<IQueue::CREATE_FLAGS> _flags, const float _priority)
18+
inline CVulkanQueue(ILogicalDevice* logicalDevice, renderdoc_api_t* rdoc, VkInstance vkinst, VkQueue vkq, const uint32_t _famIx, const core::bitflag<IQueue::CREATE_FLAGS> _flags, const float _priority)
1919
: IQueue(logicalDevice, _famIx, _flags, _priority), m_vkQueue(vkq), m_rdoc_api(rdoc), m_vkInstance(vkinst) {}
2020

2121
static inline RESULT getResultFrom(const VkResult result)
@@ -33,7 +33,6 @@ class CVulkanQueue final : public IQueue
3333
}
3434
return RESULT::OTHER_ERROR;
3535
}
36-
RESULT waitIdle() const override;
3736

3837
bool startCapture() override;
3938
bool endCapture() override;
@@ -45,7 +44,14 @@ class CVulkanQueue final : public IQueue
4544
inline VkQueue getInternalObject() const {return m_vkQueue;}
4645

4746
private:
47+
inline ~CVulkanQueue()
48+
{
49+
// need to ensure all submits end to stop spinning in the Base Class destructor
50+
waitIdle_impl();
51+
}
52+
4853
RESULT submit_impl(const std::span<const SSubmitInfo> _submits) override;
54+
RESULT waitIdle_impl() const override;
4955

5056
renderdoc_api_t* m_rdoc_api;
5157
VkInstance m_vkInstance;

src/nbl/video/IQueue.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
#include "nbl/video/IQueue.h"
22
#include "nbl/video/IGPUCommandBuffer.h"
33
#include "nbl/video/ILogicalDevice.h"
4+
#include "nbl/video/TimelineEventHandlers.h"
45

56
namespace nbl::video
67
{
78

9+
IQueue::IQueue(ILogicalDevice* originDevice, const uint32_t _famIx, const core::bitflag<CREATE_FLAGS> _flags, const float _priority)
10+
: m_originDevice(originDevice), m_familyIndex(_famIx), m_priority(_priority), m_flags(_flags)
11+
{
12+
m_submittedResources = std::make_unique<MultiTimelineEventHandlerST<DeferredSubmitResourceDrop,false>>(originDevice);
13+
}
14+
815
auto IQueue::submit(const std::span<const SSubmitInfo> _submits) -> RESULT
916
{
1017
if (_submits.empty())
@@ -78,11 +85,40 @@ auto IQueue::submit(const std::span<const SSubmitInfo> _submits) -> RESULT
7885
auto result = submit_impl(_submits);
7986
if (result!=RESULT::SUCCESS)
8087
return result;
81-
// mark cmdbufs as done (wrongly but conservatively wrong)
88+
// poll for whatever is done, free up memory ASAP
89+
// we poll everything (full GC run) because we would release memory very slowly otherwise
90+
m_submittedResources->poll();
91+
//
8292
for (const auto& submit : _submits)
83-
for (const auto& commandBuffer : submit.commandBuffers)
84-
commandBuffer.cmdbuf->m_state = commandBuffer.cmdbuf->getRecordingFlags().hasFlags(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT) ? IGPUCommandBuffer::STATE::INVALID:IGPUCommandBuffer::STATE::EXECUTABLE;
93+
{
94+
// hold onto the semaphores and commandbuffers until the submit signals the last semaphore
95+
const auto& lastSignal = submit.signalSemaphores.back();
96+
m_submittedResources->latch({.semaphore=lastSignal.semaphore,.value=lastSignal.value},DeferredSubmitResourceDrop(submit));
97+
// Mark cmdbufs as done (wrongly but conservatively wrong)
98+
// We can't use `m_submittedResources` to mark them done, because the functor may run "late" in the future, after the cmdbuf has already been validly reset or resubmitted
99+
for (const auto& commandBuffer : submit.commandBuffers)
100+
commandBuffer.cmdbuf->m_state = commandBuffer.cmdbuf->getRecordingFlags().hasFlags(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT) ? IGPUCommandBuffer::STATE::INVALID:IGPUCommandBuffer::STATE::EXECUTABLE;
101+
}
85102
return result;
86103
}
87104

105+
auto IQueue::waitIdle() -> RESULT
106+
{
107+
const auto retval = waitIdle_impl();
108+
// everything will be murdered because every submitted semaphore so far will signal
109+
m_submittedResources->abortAll();
110+
return retval;
111+
}
112+
113+
uint32_t IQueue::cullResources()
114+
{
115+
return m_submittedResources->poll().eventsLeft;
116+
}
117+
118+
IQueue::~IQueue()
119+
{
120+
// if you find yourself spinning a lot here, its because the base class hasn't called waitIdle_impl()
121+
//while (cullResources()) {} // deleter of `m_submittedResources` calls dtor which will do this
122+
}
123+
88124
}

0 commit comments

Comments
 (0)