Skip to content

Commit 54a4f53

Browse files
Fix a lot of things in CResizableSurface mostly by promoting semaphore wait/signal stage masks to ALL_COMMANDS_BIT due to KhronosGroup/Vulkan-Docs#2319
1 parent 7660fde commit 54a4f53

File tree

2 files changed

+54
-42
lines changed

2 files changed

+54
-42
lines changed

include/nbl/video/utilities/CResizableSurface.h

Lines changed: 53 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,16 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
4545
protected:
4646
friend class IResizableSurface;
4747

48-
// Returns what stage the submit signal semaphore should signal from for the presentation to wait on.
48+
// Returns what Pipeline Stages will be used for performing the `tripleBufferPresent`
49+
// We use this to set the waitStage for the Semaphore Wait, but only when ownership does not need to be acquired.
50+
virtual core::bitflag<asset::PIPELINE_STAGE_FLAGS> getTripleBufferPresentStages() const = 0;
51+
52+
// Returns what if the commands to present `source` into `dstIx` Swapchain Image were successfully recorded to `cmdbuf` and can be submitted
4953
// The `cmdbuf` is already begun when given to the callback, and will be ended outside.
5054
// User is responsible for transitioning the image layouts (most notably the swapchain), acquiring ownership etc.
5155
// Performance Tip: DO NOT transition the layout of `source` inside this callback, have it already in the correct Layout you need!
5256
// However, if `qFamToAcquireSrcFrom!=IQueue::FamilyIgnored`, you need to acquire the ownership of the `source.image`
53-
virtual asset::PIPELINE_STAGE_FLAGS tripleBufferPresent(IGPUCommandBuffer* cmdbuf, const SPresentSource& source, const uint8_t dstIx, const uint32_t qFamToAcquireSrcFrom) = 0;
57+
virtual bool tripleBufferPresent(IGPUCommandBuffer* cmdbuf, const SPresentSource& source, const uint8_t dstIx, const uint32_t qFamToAcquireSrcFrom) = 0;
5458
};
5559

5660
//
@@ -98,20 +102,24 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
98102
// Ofcourse if the target addresses of the atomic counters of wait values differ, no lock needed!
99103
inline std::unique_lock<std::mutex> pseudoAcquire(std::atomic_uint64_t* pPresentSemaphoreWaitValue)
100104
{
101-
if (pPresentSemaphoreWaitValue!=m_pLastPresentSignalValue)
105+
if (pPresentSemaphoreWaitValue!=m_lastPresent.pPresentSemaphoreWaitValue)
102106
return {}; // no lock
103107
return std::unique_lock<std::mutex>(m_swapchainResourcesMutex);
104108
}
105109

106-
struct SPresentInfo
110+
struct SCachedPresentInfo
107111
{
108-
inline operator bool() const {return source.image;}
112+
inline operator bool() const {return source.image && waitSemaphore && waitValue && pPresentSemaphoreWaitValue;}
109113

110-
SPresentSource source;
114+
SPresentSource source = {};
111115
// only allow waiting for one semaphore, because there's only one source to present!
112-
IQueue::SSubmitInfo::SSemaphoreInfo wait;
116+
ISemaphore* waitSemaphore = nullptr;
117+
uint64_t waitValue = 0;
113118
// what value will be signalled by the enqueued Triple Buffer Presents so far
114-
std::atomic_uint64_t* pPresentSemaphoreWaitValue;
119+
std::atomic_uint64_t* pPresentSemaphoreWaitValue = nullptr;
120+
};
121+
struct SPresentInfo : SCachedPresentInfo
122+
{
115123
uint32_t mostRecentFamilyOwningSource; // TODO: change to uint8_t
116124
core::IReferenceCounted* frameResources;
117125
};
@@ -126,7 +134,7 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
126134
else
127135
{
128136
guard = std::unique_lock<std::mutex>(m_swapchainResourcesMutex);
129-
assert(presentInfo.pPresentSemaphoreWaitValue!=m_pLastPresentSignalValue);
137+
assert(presentInfo.pPresentSemaphoreWaitValue!=m_lastPresent.pPresentSemaphoreWaitValue);
130138
}
131139
// The only thing we want to do under the mutex, is just enqueue a triple buffer present and a swapchain present, its not a lot.
132140
// Only acquire ownership if the Present queue is different to the current one.
@@ -156,13 +164,7 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
156164

157165
// The triple present enqueue operations are fast enough to be done under a mutex, this is safer on some platforms. You need to "race to present" to avoid a flicker.
158166
// Queue family ownership acquire not needed, done by the the very first present when `m_lastPresentSource` wasset.
159-
return present_impl({
160-
.source = m_lastPresentSource,
161-
.wait = m_lastPresentWait,
162-
.pPresentSemaphoreWaitValue = m_pLastPresentSignalValue,
163-
.mostRecentFamilyOwningSource = getAssignedQueue()->getFamilyIndex(),
164-
.frameResources=nullptr
165-
},false);
167+
return present_impl({{m_lastPresent},getAssignedQueue()->getFamilyIndex(),nullptr},false);
166168
}
167169

168170
protected:
@@ -179,10 +181,11 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
179181
std::unique_lock guard(m_swapchainResourcesMutex);
180182
static_cast<ICallback*>(m_cb)->setSwapchainRecreator(nullptr);
181183

182-
m_lastPresentWait = {};
183-
m_lastPresentSource = {};
184-
m_lastPresentSemaphore = nullptr;
184+
m_lastPresent.source = {};
185185
m_lastPresentSourceImage = nullptr;
186+
m_lastPresent.waitSemaphore = nullptr;
187+
m_lastPresent.waitValue = 0;
188+
m_lastPresentSemaphore = nullptr;
186189

187190
if (m_presentSemaphore)
188191
{
@@ -194,7 +197,7 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
194197
}
195198

196199
std::fill(m_cmdbufs.begin(),m_cmdbufs.end(),nullptr);
197-
m_pLastPresentSignalValue = nullptr;
200+
m_lastPresent.pPresentSemaphoreWaitValue = {};
198201
m_presentSemaphore = nullptr;
199202
}
200203

@@ -311,21 +314,31 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
311314
return false;
312315

313316
// now that an image is acquired, we HAVE TO present
317+
m_lastPresentSourceImage = core::smart_refctd_ptr<IGPUImage>(presentInfo.source.image);
318+
m_lastPresentSemaphore = core::smart_refctd_ptr<ISemaphore>(presentInfo.waitSemaphore);
319+
m_lastPresent = presentInfo;
320+
// in case of failure, most we can do is just not submit an invalidated commandbuffer with the triple buffer present
314321
bool willBlit = true;
322+
315323
const auto acquireCount = getAcquireCount();
316324
const IQueue::SSubmitInfo::SSemaphoreInfo waitSemaphores[2] = {
317-
presentInfo.wait,
325+
{
326+
.semaphore = presentInfo.waitSemaphore,
327+
.value = presentInfo.waitValue,
328+
// If we need to Acquire Ownership of the Triple Buffer Source we need a Happens-Before between the Semaphore wait and the Acquire Ownership
329+
// https://github.com/KhronosGroup/Vulkan-Docs/issues/2319
330+
// else we need to know what stage starts reading from the Triple Buffer Source
331+
.stageMask = acquireOwnership ? asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS:swapchainResources->getTripleBufferPresentStages()
332+
},
318333
{
319334
.semaphore = getAcquireSemaphore(),
320335
.value = acquireCount,
321-
.stageMask = asset::PIPELINE_STAGE_FLAGS::NONE // presentation engine usage isn't a stage
336+
// There 99% surely probably be a Layout Transition of the acquired image away from PRESENT_SRC,
337+
// so need an ALL->NONE dependency between acquire and semaphore wait
338+
// https://github.com/KhronosGroup/Vulkan-Docs/issues/2319
339+
.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
322340
}
323341
};
324-
m_lastPresentSourceImage = core::smart_refctd_ptr<IGPUImage>(presentInfo.source.image);
325-
m_lastPresentSemaphore = core::smart_refctd_ptr<ISemaphore>(presentInfo.wait.semaphore);
326-
m_pLastPresentSignalValue = presentInfo.pPresentSemaphoreWaitValue;
327-
m_lastPresentSource = presentInfo.source;
328-
m_lastPresentWait = presentInfo.wait;
329342

330343
//
331344
auto queue = getAssignedQueue();
@@ -350,19 +363,19 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
350363
auto cmdbuf = m_cmdbufs[cmdBufIx].get();
351364

352365
willBlit &= cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
353-
// now enqueue the Triple Buffer Present
354-
const IQueue::SSubmitInfo::SSemaphoreInfo presented[1] = {
355-
{
356-
.semaphore = m_presentSemaphore.get(),
357-
.value = acquireCount,
358-
// don't need to predicate with `willBlit` because if `willBlit==false` cmdbuf not properly begun and validation will fail
359-
.stageMask = swapchainResources->tripleBufferPresent(cmdbuf,m_lastPresentSource,imageIx,acquireOwnership ? queue->getFamilyIndex():IQueue::FamilyIgnored)
360-
}
361-
};
362-
willBlit &= bool(presented[1].stageMask.value);
366+
willBlit &= swapchainResources->tripleBufferPresent(cmdbuf,presentInfo.source,imageIx,acquireOwnership ? queue->getFamilyIndex():IQueue::FamilyIgnored);
363367
willBlit &= cmdbuf->end();
364368

365369
const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[1] = {{.cmdbuf=cmdbuf}};
370+
// now enqueue the Triple Buffer Present
371+
const IQueue::SSubmitInfo::SSemaphoreInfo presented[1] = {{
372+
.semaphore = m_presentSemaphore.get(),
373+
.value = acquireCount,
374+
// There 99% surely probably be a Layout Transition of the acquired image to PRESENT_SRC,
375+
// so need a NONE->ALL dependency between that and semaphore signal
376+
// https://github.com/KhronosGroup/Vulkan-Docs/issues/2319
377+
.stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS
378+
}};
366379
const IQueue::SSubmitInfo submitInfos[1] = {{
367380
.waitSemaphores = waitSemaphores,
368381
.commandBuffers = commandBuffers,
@@ -374,7 +387,7 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
374387
if (willBlit)
375388
{
376389
// let the others know we've enqueued another TBP
377-
const auto oldVal = m_pLastPresentSignalValue->exchange(acquireCount);
390+
const auto oldVal = m_lastPresent.pPresentSemaphoreWaitValue->exchange(acquireCount);
378391
assert(oldVal<acquireCount);
379392

380393
auto frameResources = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<ISwapchain::void_refctd_ptr>>(2);
@@ -403,9 +416,8 @@ class NBL_API2 IResizableSurface : public ISimpleManagedSurface
403416
// Why do we delay the swapchain recreate present until the rendering of the most recent present source is done? Couldn't we present whatever latest Triple Buffer is done?
404417
// No because there can be presents enqueued whose wait semaphores have not signalled yet, meaning there could be images presented in the future.
405418
// Unless you like your frames to go backwards in time in a special "rewind glitch" you need to blit the frame that has not been presented yet or is the same as most recently enqueued.
406-
IQueue::SSubmitInfo::SSemaphoreInfo m_lastPresentWait = {};
407-
SPresentSource m_lastPresentSource = {};
408-
std::atomic_uint64_t* m_pLastPresentSignalValue = nullptr;
419+
SCachedPresentInfo m_lastPresent = {};
420+
// extras for lifetime preservation till next immediate spontaneous triple buffer present
409421
core::smart_refctd_ptr<ISemaphore> m_lastPresentSemaphore = {};
410422
core::smart_refctd_ptr<IGPUImage> m_lastPresentSourceImage = {};
411423
};

0 commit comments

Comments
 (0)