Skip to content

Commit ff6ac48

Browse files
Port `downloadBufferRangeViaStagingBuffer
1 parent 42080a5 commit ff6ac48

File tree

1 file changed

+32
-73
lines changed

1 file changed

+32
-73
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 32 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -298,16 +298,15 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
298298
uint32_t localOffset = StreamingTransientDataBufferMT<>::invalid_value;
299299
m_defaultUploadBuffer.get()->multi_allocate(std::chrono::steady_clock::now()+std::chrono::microseconds(500u),1u,&localOffset,&allocationSize,&m_allocationAlignment);
300300
// copy only the unpadded part
301-
if (localOffset != StreamingTransientDataBufferMT<>::invalid_value)
301+
if (localOffset!=StreamingTransientDataBufferMT<>::invalid_value)
302302
{
303303
const void* dataPtr = reinterpret_cast<const uint8_t*>(data) + uploadedSize;
304304
memcpy(reinterpret_cast<uint8_t*>(m_defaultUploadBuffer->getBufferPointer()) + localOffset, dataPtr, subSize);
305305
}
306-
// keep trying again
307-
if (localOffset == StreamingTransientDataBufferMT<>::invalid_value)
306+
else
308307
{
309308
nextSubmit.overflowSubmit();
310-
continue;
309+
continue; // keep trying again
311310
}
312311
// some platforms expose non-coherent host-visible GPU memory, so writes need to be flushed explicitly
313312
if (m_defaultUploadBuffer.get()->needsManualFlushOrInvalidate())
@@ -402,9 +401,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
402401
StreamingTransientDataBufferMT<>* m_downstreamingBuffer;
403402
const size_t m_dstOffset;
404403
};
405-
#if 0 // TODO: port
404+
406405
//! Calls the callback to copy the data to a destination Offset
407-
//! * IMPORTANT: To make the copies ready, IUtility::getDefaultDownStreamingBuffer()->cull_frees() should be called after the `submissionFence` is signaled.
406+
//! * IMPORTANT: To make all the callbacks execute, IUtility::getDefaultDownStreamingBuffer()->cull_frees() should be called after the `nextSubmit.signalSemaphores.front()` is signaled.
408407
//! If the allocation from staging memory fails due to large image size or fragmentation then This function may need to submit the command buffer via the `submissionQueue` and then signal the fence.
409408
//! Returns:
410409
//! IQueue::SSubmitInfo to use for command buffer submission instead of `intendedNextSubmit`.
@@ -438,50 +437,44 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
438437
//! * submissionQueue must point to a valid IQueue
439438
//! * submissionFence must point to a valid IGPUFence
440439
//! * submissionFence must be in `UNSIGNALED` state
441-
[[nodiscard("Use The New IQueue::SubmitInfo")]] inline IQueue::SSubmitInfo downloadBufferRangeViaStagingBuffer(
442-
const std::function<data_consumption_callback_t>& consumeCallback, const asset::SBufferRange<IGPUBuffer>& srcBufferRange,
443-
IQueue* submissionQueue, IGPUFence* submissionFence, IQueue::SSubmitInfo intendedNextSubmit = {}
444-
)
440+
inline bool downloadBufferRangeViaStagingBuffer(const std::function<data_consumption_callback_t>& consumeCallback, SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange<IGPUBuffer>& srcBufferRange)
445441
{
446-
if (!intendedNextSubmit.isValid() || intendedNextSubmit.commandBufferCount <= 0u)
442+
if (!srcBufferRange.isValid() || !srcBufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_SRC_BIT))
447443
{
448-
// TODO: log error -> intendedNextSubmit is invalid
449-
assert(false);
450-
return intendedNextSubmit;
444+
m_logger.log("Invalid `srcBufferRange` or buffer has no `EUF_TRANSFER_SRC_BIT` usage flag, cannot `downloadBufferRangeViaStagingBuffer`!",system::ILogger::ELL_ERROR);
445+
return false;
451446
}
452447

453-
// Use the last command buffer in intendedNextSubmit, it should be in recording state
454-
auto& cmdbuf = intendedNextSubmit.commandBuffers[intendedNextSubmit.commandBufferCount - 1];
455-
456-
assert(cmdbuf->getState() == IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable());
457-
assert(cmdbuf->getRecordingFlags().hasFlags(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT));
448+
if (!nextSubmit.valid())
449+
{
450+
m_logger.log(nextSubmit.ErrorText, system::ILogger::ELL_ERROR);
451+
return false;
452+
}
458453

459454
const auto& limits = m_device->getPhysicalDevice()->getLimits();
460455
const uint32_t optimalTransferAtom = limits.maxResidentInvocations*sizeof(uint32_t);
461456

462-
auto* cmdpool = cmdbuf->getPool();
463-
assert(cmdpool->getQueueFamilyIndex() == submissionQueue->getFamilyIndex());
464-
457+
auto cmdbuf = nextSubmit.frontHalf.getScratchCommandBuffer();
465458
// Basically downloadedSize is downloadRecordedIntoCommandBufferSize :D
466-
for (size_t downloadedSize = 0ull; downloadedSize < srcBufferRange.size;)
459+
for (size_t downloadedSize=0ull; downloadedSize<srcBufferRange.size;)
467460
{
468461
const size_t notDownloadedSize = srcBufferRange.size - downloadedSize;
469462
// how large we can make the allocation
470-
uint32_t maxFreeBlock = m_defaultDownloadBuffer.get()->max_size();
463+
const uint32_t maxFreeBlock = m_defaultDownloadBuffer->max_size();
471464
// get allocation size
472-
const uint32_t allocationSize = getAllocationSizeForStreamingBuffer(notDownloadedSize, m_allocationAlignment, maxFreeBlock, optimalTransferAtom);
473-
const uint32_t copySize = core::min(allocationSize, notDownloadedSize);
465+
const uint32_t allocationSize = getAllocationSizeForStreamingBuffer(notDownloadedSize,m_allocationAlignment,maxFreeBlock,optimalTransferAtom);
466+
const uint32_t copySize = core::min(allocationSize,notDownloadedSize);
474467

475468
uint32_t localOffset = StreamingTransientDataBufferMT<>::invalid_value;
476469
m_defaultDownloadBuffer.get()->multi_allocate(std::chrono::steady_clock::now()+std::chrono::microseconds(500u),1u,&localOffset,&allocationSize,&m_allocationAlignment);
477470

478-
if (localOffset != StreamingTransientDataBufferMT<>::invalid_value)
471+
if (localOffset!=StreamingTransientDataBufferMT<>::invalid_value)
479472
{
480473
IGPUCommandBuffer::SBufferCopy copy;
481474
copy.srcOffset = srcBufferRange.offset + downloadedSize;
482475
copy.dstOffset = localOffset;
483476
copy.size = copySize;
484-
cmdbuf->copyBuffer(srcBufferRange.buffer.get(), m_defaultDownloadBuffer.get()->getBuffer(), 1u, &copy);
477+
cmdbuf->copyBuffer(srcBufferRange.buffer.get(),m_defaultDownloadBuffer->getBuffer(),1u,&copy);
485478

486479
auto dataConsumer = core::make_smart_refctd_ptr<CDownstreamingDataConsumer>(
487480
IDeviceMemoryAllocation::MemoryRange(localOffset,copySize),
@@ -490,63 +483,29 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
490483
m_defaultDownloadBuffer.get(),
491484
downloadedSize
492485
);
493-
m_defaultDownloadBuffer.get()->multi_deallocate(1u, &localOffset, &allocationSize, core::smart_refctd_ptr<IGPUFence>(submissionFence), &dataConsumer.get());
486+
m_defaultDownloadBuffer.get()->multi_deallocate(1u,&localOffset,&allocationSize,nextSubmit.getScratchSemaphoreNextWait(),&dataConsumer.get());
494487

495488
downloadedSize += copySize;
496489
}
497-
else
498-
{
499-
// but first sumbit the already buffered up copies
500-
cmdbuf->end();
501-
IQueue::SSubmitInfo submit = intendedNextSubmit;
502-
submit.signalSemaphoreCount = 0u;
503-
submit.pSignalSemaphores = nullptr;
504-
assert(submit.isValid());
505-
submissionQueue->submit(1u, &submit, submissionFence);
506-
m_device->blockForFences(1u, &submissionFence);
507-
508-
intendedNextSubmit.commandBufferCount = 1u;
509-
intendedNextSubmit.commandBuffers = &cmdbuf;
510-
intendedNextSubmit.waitSemaphoreCount = 0u;
511-
intendedNextSubmit.pWaitSemaphores = nullptr;
512-
intendedNextSubmit.pWaitDstStageMask = nullptr;
513-
514-
// before resetting we need poll all events in the allocator's deferred free list
515-
m_defaultDownloadBuffer->cull_frees();
516-
// we can reset the fence and commandbuffer because we fully wait for the GPU to finish here
517-
m_device->resetFences(1u, &submissionFence);
518-
cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
519-
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
520-
}
490+
else // but first sumbit the already buffered up copies
491+
nextSubmit.overflowSubmit();
521492
}
522-
return intendedNextSubmit;
493+
return true;
523494
}
524495

525496
//! This function is an specialization of the `downloadBufferRangeViaStagingBufferAutoSubmit` function above.
526497
//! Additionally waits for the fence
527498
//! WARNING: This function blocks CPU and stalls the GPU!
528-
inline void downloadBufferRangeViaStagingBufferAutoSubmit(
529-
const asset::SBufferRange<IGPUBuffer>& srcBufferRange, void* data,
530-
IQueue* submissionQueue, const IQueue::SSubmitInfo& submitInfo = {}
531-
)
499+
inline bool downloadBufferRangeViaStagingBufferAutoSubmit(const SIntendedSubmitInfo::SFrontHalf& submit,const asset::SBufferRange<IGPUBuffer>& srcBufferRange, void* data)
532500
{
533-
if (!submitInfo.isValid())
534-
{
535-
// TODO: log error
536-
assert(false);
537-
return;
538-
}
539-
540-
541-
auto fence = m_device->createFence(IGPUFence::ECF_UNSIGNALED);
542-
downloadBufferRangeViaStagingBufferAutoSubmit(std::function<data_consumption_callback_t>(default_data_consumption_callback_t(data)), srcBufferRange, submissionQueue, fence.get(), submitInfo);
543-
auto* fenceptr = fence.get();
544-
m_device->blockForFences(1u, &fenceptr);
501+
if (!autoSubmitAndBlock(submit,[&](SIntendedSubmitInfo& nextSubmit){return downloadBufferRangeViaStagingBuffer(default_data_consumption_callback_t(data),nextSubmit,srcBufferRange);}))
502+
return false;
545503

546-
//! TODO: NOTE this method cannot be turned into a pure autoSubmitAndBlock + lambda because there's stuff to do AFTER the semaphore wait~!
547-
m_defaultDownloadBuffer->cull_frees(); // its while(poll()) {} now IIRC
504+
//! NOTE this method cannot be turned into a pure autoSubmitAndBlock + lambda because there's stuff to do AFTER the semaphore wait~!
505+
m_defaultDownloadBuffer->cull_frees();
506+
return true;
548507
}
549-
#endif
508+
550509
// --------------
551510
// buildAccelerationStructures
552511
// --------------

0 commit comments

Comments
 (0)