Skip to content

Commit e2b7852

Browse files
Finally work out that this one assert didn't make sense, and make everything wait on a TS instead of a Fence
1 parent 0370c13 commit e2b7852

File tree

3 files changed

+106
-75
lines changed

3 files changed

+106
-75
lines changed

include/nbl/video/CVulkanSwapchain.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ class CVulkanSwapchain final : public ISwapchain
3030
const uint32_t imageCount,
3131
core::smart_refctd_ptr<CVulkanSwapchain>&& oldSwapchain,
3232
const VkSwapchainKHR swapchain,
33-
const VkSemaphore* const _adaptorSemaphores,
34-
const VkFence* const _prePresentFences
33+
const VkSemaphore* const _acquireAdaptorSemaphores,
34+
const VkSemaphore* const _prePresentSemaphores,
35+
const VkSemaphore* const _presentAdaptorSemaphores
3536
);
3637
~CVulkanSwapchain();
3738

@@ -45,8 +46,9 @@ class CVulkanSwapchain final : public ISwapchain
4546
const VkSwapchainKHR m_vkSwapchainKHR;
4647
VkImage m_images[ISwapchain::MaxImages];
4748
VkSemaphore m_acquireAdaptorSemaphores[ISwapchain::MaxImages];
48-
VkFence m_prePresentFences[ISwapchain::MaxImages];
49+
VkSemaphore m_prePresentSemaphores[ISwapchain::MaxImages];
4950
VkSemaphore m_presentAdaptorSemaphores[ISwapchain::MaxImages];
51+
uint64_t m_perImageAcquireCount[ISwapchain::MaxImages] = { 0 };
5052
// nasty way to fight UB of the Vulkan spec
5153
bool m_needToWaitIdle = true;
5254
};

include/nbl/video/ISwapchain.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,6 @@ class ISwapchain : public IBackendObject
361361
case ACQUIRE_IMAGE_RESULT::SUCCESS: [[fallthrough]];
362362
case ACQUIRE_IMAGE_RESULT::SUBOPTIMAL:
363363
m_acquireCounter++;
364-
//assert(!unacquired(out_imgIx)); // should happen in the impl anyway
365364
{
366365
auto semaphoreArray = core::make_refctd_dynamic_array<core::smart_refctd_dynamic_array<void_refctd_ptr>>(info.signalSemaphores.size());
367366
for (auto i=0ull; i<info.signalSemaphores.size(); i++)
@@ -495,7 +494,7 @@ class ISwapchain : public IBackendObject
495494
virtual core::smart_refctd_ptr<ISwapchain> recreate_impl(SSharedCreationParams&& params) = 0;
496495

497496
// The user needs to hold onto the old swapchain by themselves as well, because without the `KHR_swapchain_maintenance1` extension:
498-
// - Images cannot be "unacquired", so all already acquired images of a swapchain (even if its retired) must be presented
497+
// - Images cannot be "de-acquired", so all already acquired images of a swapchain (even if its retired) must be presented
499498
// - No way to query that the presentation is finished, so we can't destroy a swapchain just because we issued a present on all previously acquired images
500499
// This means new swapchain should hold onto the old swapchain for `getImageCount()` acquires or until an acquisition error.
501500
// But we don't handle the acquisition error case because we just presume swapchain will be recreated and eventually there will be one with enough acquisitions.

src/nbl/video/CVulkanSwapchain.cpp

Lines changed: 100 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -122,36 +122,33 @@ core::smart_refctd_ptr<CVulkanSwapchain> CVulkanSwapchain::create(core::smart_re
122122
return nullptr;
123123

124124
//
125-
VkSemaphore adaptorSemaphores[2*ISwapchain::MaxImages];
126-
{
127-
VkSemaphoreCreateInfo info = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,nullptr};
128-
info.flags = 0u;
129-
for (auto i=0u; i<2*imageCount; i++)
130-
if (vk.vkCreateSemaphore(vk_device,&info,nullptr,adaptorSemaphores+i)!=VK_SUCCESS)
131-
{
132-
// handle successful allocs before failure
133-
for (auto j=0u; j<i; j++)
134-
vk.vkDestroySemaphore(vk_device,adaptorSemaphores[j],nullptr);
135-
return nullptr;
136-
}
137-
}
138-
VkFence prePresentFences[ISwapchain::MaxImages];
125+
const VkSemaphoreTypeCreateInfo timelineType = {
126+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
127+
.pNext = nullptr,
128+
.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
129+
.initialValue = 0
130+
};
131+
const VkSemaphoreCreateInfo timelineInfo = {
132+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
133+
.pNext = &timelineType,
134+
.flags = 0
135+
};
136+
const VkSemaphoreCreateInfo adaptorInfo = {
137+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
138+
.pNext = nullptr,
139+
.flags = 0
140+
};
141+
VkSemaphore semaphores[ISwapchain::MaxImages];
142+
for (auto i=0u; i<3*imageCount; i++)
143+
if (vk.vkCreateSemaphore(vk_device,i<imageCount ? (&timelineInfo):(&adaptorInfo),nullptr,semaphores+i)!=VK_SUCCESS)
139144
{
140-
VkFenceCreateInfo info = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,nullptr};
141-
info.flags = VK_FENCE_CREATE_SIGNALED_BIT; // makes life easier on the very first acquire and in the destructor
142-
for (auto i=0u; i<imageCount; i++)
143-
if (vk.vkCreateFence(vk_device,&info,nullptr,prePresentFences+i)!=VK_SUCCESS)
144-
{
145-
// handle successful allocs before failure
146-
for (auto j=0u; j<ISwapchain::MaxImages; j++)
147-
vk.vkDestroySemaphore(vk_device,adaptorSemaphores[j],nullptr);
148-
for (auto j=0u; j<i; j++)
149-
vk.vkDestroyFence(vk_device,prePresentFences[j],nullptr);
150-
return nullptr;
151-
}
145+
// handle successful allocs before failure
146+
for (auto j=0u; j<i; j++)
147+
vk.vkDestroySemaphore(vk_device,semaphores[j],nullptr);
148+
return nullptr;
152149
}
153150

154-
return core::smart_refctd_ptr<CVulkanSwapchain>(new CVulkanSwapchain(std::move(device),std::move(params),imageCount,std::move(oldSwapchain),vk_swapchain,adaptorSemaphores,prePresentFences),core::dont_grab);
151+
return core::smart_refctd_ptr<CVulkanSwapchain>(new CVulkanSwapchain(std::move(device),std::move(params),imageCount,std::move(oldSwapchain),vk_swapchain,semaphores+imageCount,semaphores,semaphores+2*imageCount),core::dont_grab);
155152
}
156153

157154
CVulkanSwapchain::CVulkanSwapchain(
@@ -160,8 +157,9 @@ CVulkanSwapchain::CVulkanSwapchain(
160157
const uint32_t imageCount,
161158
core::smart_refctd_ptr<CVulkanSwapchain>&& oldSwapchain,
162159
const VkSwapchainKHR swapchain,
163-
const VkSemaphore* const _adaptorSemaphores,
164-
const VkFence* const _prePresentFences
160+
const VkSemaphore* const _acquireAdaptorSemaphores,
161+
const VkSemaphore* const _prePresentSemaphores,
162+
const VkSemaphore* const _presentAdaptorSemaphores
165163
) : ISwapchain(std::move(logicalDevice),std::move(params),imageCount,std::move(oldSwapchain)),
166164
m_imgMemRequirements{.size=0,.memoryTypeBits=0x0u,.alignmentLog2=63,.prefersDedicatedAllocation=true,.requiresDedicatedAllocation=true}, m_vkSwapchainKHR(swapchain)
167165
{
@@ -176,9 +174,9 @@ CVulkanSwapchain::CVulkanSwapchain(
176174
assert(retval==VK_SUCCESS && dummy==getImageCount());
177175
}
178176

179-
std::copy_n(_adaptorSemaphores,imageCount,m_acquireAdaptorSemaphores);
180-
std::copy_n(_prePresentFences,imageCount,m_prePresentFences);
181-
std::copy_n(_adaptorSemaphores+imageCount,imageCount,m_presentAdaptorSemaphores);
177+
std::copy_n(_acquireAdaptorSemaphores,imageCount,m_acquireAdaptorSemaphores);
178+
std::copy_n(_prePresentSemaphores,imageCount,m_prePresentSemaphores);
179+
std::copy_n(_presentAdaptorSemaphores,imageCount,m_presentAdaptorSemaphores);
182180
}
183181

184182
CVulkanSwapchain::~CVulkanSwapchain()
@@ -192,15 +190,22 @@ CVulkanSwapchain::~CVulkanSwapchain()
192190
if (m_needToWaitIdle)
193191
vk.vkDeviceWaitIdle(vk_device);
194192

193+
const VkSemaphoreWaitInfo info = {
194+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
195+
.pNext = nullptr,
196+
.flags = 0,
197+
.semaphoreCount = getImageCount(),
198+
.pSemaphores = m_prePresentSemaphores,
199+
.pValues = m_perImageAcquireCount
200+
};
195201
while (true) // if you find yourself spinning here forever, it might be because you didn't present an already acquired image
196-
if (vk.vkWaitForFences(vk_device,getImageCount(),m_prePresentFences,true,~0ull)!=VK_TIMEOUT)
202+
if (vk.vkWaitSemaphores(vk_device,&info,~0ull)!=VK_TIMEOUT)
197203
break;
198-
for (auto i=0; i<getImageCount(); i++)
199-
vk.vkDestroyFence(vk_device,m_prePresentFences[i],nullptr);
200204

201205
for (auto i=0u; i<getImageCount(); i++)
202206
{
203207
vk.vkDestroySemaphore(vk_device,m_acquireAdaptorSemaphores[i],nullptr);
208+
vk.vkDestroySemaphore(vk_device,m_prePresentSemaphores[i],nullptr);
204209
vk.vkDestroySemaphore(vk_device,m_presentAdaptorSemaphores[i],nullptr);
205210
}
206211

@@ -210,8 +215,12 @@ CVulkanSwapchain::~CVulkanSwapchain()
210215
bool CVulkanSwapchain::unacquired(const uint8_t imageIndex) const
211216
{
212217
const CVulkanLogicalDevice* vulkanDevice = static_cast<const CVulkanLogicalDevice*>(getOriginDevice());
213-
// Only lets us know if we're after a successful CPU call to acquire an image, and before the GPU finishes all work blocking the present!
214-
return vulkanDevice->getFunctionTable()->vk.vkGetFenceStatus(vulkanDevice->getInternalObject(),m_prePresentFences[imageIndex])!=VK_NOT_READY;
218+
219+
uint64_t value=0;
220+
// Only lets us know if we're not between after a successful CPU call to acquire an image, and before the GPU finishes all work blocking the present!
221+
if (vulkanDevice->getFunctionTable()->vk.vkGetSemaphoreCounterValue(vulkanDevice->getInternalObject(),m_prePresentSemaphores[imageIndex],&value)!=VK_SUCCESS)
222+
return true;
223+
return value>=m_perImageAcquireCount[imageIndex];
215224
}
216225

217226
auto CVulkanSwapchain::acquireNextImage_impl(const SAcquireInfo& info, uint32_t* const out_imgIx) -> ACQUIRE_IMAGE_RESULT
@@ -234,15 +243,22 @@ auto CVulkanSwapchain::acquireNextImage_impl(const SAcquireInfo& info, uint32_t*
234243
.deviceIndex = 0u // TODO: later obtain device index from swapchain
235244
};
236245

237-
bool suboptimal = false;
238246

239247
auto& vk = vulkanDevice->getFunctionTable()->vk;
240-
const VkDevice vk_device = vulkanDevice->getInternalObject();
248+
249+
// The presentation engine may not have finished reading from the image at the time it is acquired, so the application must use semaphore and/or fence
250+
// to ensure that the image layout and contents are not modified until the presentation engine reads have completed.
251+
// Once vkAcquireNextImageKHR successfully acquires an image, the semaphore signal operation referenced by semaphore is submitted for execution.
252+
// If vkAcquireNextImageKHR does not successfully acquire an image, semaphore and fence are unaffected.
253+
bool suboptimal = false;
241254
{
255+
const VkDevice vk_device = vulkanDevice->getInternalObject();
256+
242257
VkAcquireNextImageInfoKHR acquire = {VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,nullptr};
243258
acquire.swapchain = m_vkSwapchainKHR;
244259
acquire.timeout = info.timeout;
245260
acquire.semaphore = adaptorInfo.semaphore;
261+
// Fence is redundant, we can Host-wait on the timeline semaphore latched by the binary adaptor semaphore instead
246262
acquire.fence = VK_NULL_HANDLE;
247263
acquire.deviceMask = 0x1u<<adaptorInfo.deviceIndex;
248264
switch (vk.vkAcquireNextImage2KHR(vk_device,&acquire,out_imgIx))
@@ -262,17 +278,15 @@ auto CVulkanSwapchain::acquireNextImage_impl(const SAcquireInfo& info, uint32_t*
262278
return ACQUIRE_IMAGE_RESULT::_ERROR;
263279
}
264280
}
281+
265282
const auto imgIx = *out_imgIx;
266-
267-
// The previous present on the image MUST have had finished
268-
// TODO: why do I crash here? maybe cause I don't wait on the acquired fence!
269-
assert(unacquired(imgIx));
270-
// Now put the fence into UNSIGNALLED state where it will stay until "just before" the present
271-
{
272-
const bool result = vk.vkResetFences(vk_device,1,m_prePresentFences+imgIx)==VK_SUCCESS;
273-
// If this goes wrong, we are lost without KHR_swapchain_maintenance1 because there's no way to release acquired images without presenting them!
274-
assert(result);
275-
}
283+
// There's a certain problem `assert(unacquired(imgIx))`, because:
284+
// - we can't assert anything before the acquire, as:
285+
// + we don't know the `imgIx` before we issue the acquire call on the CPU
286+
// + it's okay for the present to not be done yet, because only when acquire signals the semaphore/fence it lets us know image is ready for use
287+
// - after the acquire returns on the CPU the image may not be ready and present could still be reading from it, unknowable without an acquire fence
288+
// + we'd have to have a special fence just for acquire and `assert(unacquired || !acquireFenceReady)` before incrementing the acquire/TS counter
289+
m_perImageAcquireCount[imgIx]++;
276290

277291
core::vector<VkSemaphoreSubmitInfo> signalInfos(info.signalSemaphores.size(),{VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,nullptr});
278292
for (auto i=0u; i<info.signalSemaphores.size(); i++)
@@ -304,19 +318,10 @@ auto CVulkanSwapchain::present_impl(const SPresentInfo& info) -> PRESENT_RESULT
304318
if (!vulkanQueue)
305319
return PRESENT_RESULT::FATAL_ERROR;
306320

321+
// We make the present wait on an empty submit so we can signal our image timeline semaphores that let us work out `unacquired()`
307322
auto& vk = vulkanDevice->getFunctionTable()->vk;
308323
const auto vk_device = vulkanDevice->getInternalObject();
309324

310-
// We make the present wait on an empty submit so we can signal our "not-acquired" fence
311-
VkSemaphoreSubmitInfo adaptorInfo = {
312-
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
313-
.pNext = nullptr,
314-
.semaphore = m_presentAdaptorSemaphores[info.imgIndex],
315-
.value = 0,// value is ignored because the adaptors are binary
316-
.stageMask = VK_PIPELINE_STAGE_2_NONE,
317-
.deviceIndex = 0u // TODO: later
318-
};
319-
320325
// Optionally the submit ties timeline semaphore waits with itself
321326
core::vector<VkSemaphoreSubmitInfo> waitInfos(info.waitSemaphores.size(),{VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,nullptr});
322327
for (auto i=0u; i<info.waitSemaphores.size(); i++)
@@ -329,29 +334,54 @@ auto CVulkanSwapchain::present_impl(const SPresentInfo& info) -> PRESENT_RESULT
329334
waitInfos[i].value = info.waitSemaphores[i].value;
330335
waitInfos[i].deviceIndex = 0u;
331336
}
337+
338+
const auto prePresentSema = m_prePresentSemaphores[info.imgIndex];
339+
auto& presentAdaptorSema = m_presentAdaptorSemaphores[info.imgIndex];
340+
// First signal that we're done writing to the swapchain, then signal the binary sema for presentation
341+
const VkSemaphoreSubmitInfo signalInfos[2] = {
342+
{
343+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
344+
.pNext = nullptr,
345+
.semaphore = prePresentSema,
346+
.value = m_perImageAcquireCount[info.imgIndex],
347+
.stageMask = VK_PIPELINE_STAGE_2_NONE,
348+
.deviceIndex = 0u // TODO: later
349+
},
350+
{
351+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO,
352+
.pNext = nullptr,
353+
.semaphore = presentAdaptorSema,
354+
.value = 0,// value is ignored because the adaptors are binary
355+
.stageMask = VK_PIPELINE_STAGE_2_NONE,
356+
.deviceIndex = 0u // TODO: later
357+
}
358+
};
332359

333360
// Perform the empty submit
334361
{
335362
VkSubmitInfo2 submit = {VK_STRUCTURE_TYPE_SUBMIT_INFO_2,nullptr};
336363
submit.waitSemaphoreInfoCount = info.waitSemaphores.size();
337364
submit.pWaitSemaphoreInfos = waitInfos.data();
338365
submit.commandBufferInfoCount = 0u;
339-
submit.signalSemaphoreInfoCount = 1u;
340-
submit.pSignalSemaphoreInfos = &adaptorInfo;
341-
if (vk.vkQueueSubmit2(vulkanQueue->getInternalObject(),1u,&submit,m_prePresentFences[info.imgIndex])!=VK_SUCCESS)
366+
submit.signalSemaphoreInfoCount = 2u;
367+
submit.pSignalSemaphoreInfos = signalInfos;
368+
if (vk.vkQueueSubmit2(vulkanQueue->getInternalObject(),1u,&submit,VK_NULL_HANDLE)!=VK_SUCCESS)
342369
{
343-
// need to recreate because can't signal a Fence from the Host
344-
vk.vkDestroyFence(vk_device,m_prePresentFences[info.imgIndex],nullptr);
345-
VkFenceCreateInfo createInfo = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,nullptr};
346-
createInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; // makes life easier on the very first acquire and in the destructor
347-
vk.vkCreateFence(vk_device,&createInfo,nullptr,m_prePresentFences+info.imgIndex);
370+
// Increment the semaphore on the Host to make sure we don't figure as "unacquired" forever
371+
const VkSemaphoreSignalInfo signalInfo = {
372+
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO,
373+
.pNext = nullptr,
374+
.semaphore = prePresentSema,
375+
.value = m_perImageAcquireCount[info.imgIndex]
376+
};
377+
vk.vkSignalSemaphore(vk_device,&signalInfo);
348378
return PRESENT_RESULT::FATAL_ERROR;
349379
}
350380
}
351381

352382
VkPresentInfoKHR presentInfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,nullptr};
353383
presentInfo.waitSemaphoreCount = 1u;
354-
presentInfo.pWaitSemaphores = &adaptorInfo.semaphore;
384+
presentInfo.pWaitSemaphores = &presentAdaptorSema;
355385
presentInfo.swapchainCount = 1u;
356386
presentInfo.pSwapchains = &m_vkSwapchainKHR;
357387
presentInfo.pImageIndices = &info.imgIndex;
@@ -367,11 +397,11 @@ auto CVulkanSwapchain::present_impl(const SPresentInfo& info) -> PRESENT_RESULT
367397
case VK_ERROR_OUT_OF_DEVICE_MEMORY:
368398
// as per the spec, only these "hard" errors don't result in the binary semaphore getting waited on
369399
vk.vkQueueWaitIdle(vulkanQueue->getInternalObject());
370-
vk.vkDestroySemaphore(vk_device,adaptorInfo.semaphore,nullptr);
400+
vk.vkDestroySemaphore(vk_device,presentAdaptorSema,nullptr);
371401
{
372402
VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,nullptr};
373403
createInfo.flags = 0u;
374-
vk.vkCreateSemaphore(vk_device,&createInfo,nullptr,m_presentAdaptorSemaphores+info.imgIndex);
404+
vk.vkCreateSemaphore(vk_device,&createInfo,nullptr,&presentAdaptorSema);
375405
}
376406
[[fallthrough]];
377407
case VK_ERROR_DEVICE_LOST:

0 commit comments

Comments
 (0)