Skip to content

Commit 00182c7

Browse files
API draft
1 parent 9147392 commit 00182c7

File tree

2 files changed

+101
-47
lines changed

2 files changed

+101
-47
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 94 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,75 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
211211
updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange<IGPUBuffer>{0u, params.size, core::smart_refctd_ptr(buffer)}, data, queue);
212212
return buffer;
213213
}
214+
#endif
214215

216+
struct SIntendedSubmitInfo final
217+
{
218+
public:
219+
inline bool valid() const
220+
{
221+
if (!queue || commandBuffers.empty() || signalSemaphores.empty())
222+
return false;
223+
if (!getScratchCommandBuffer()->isResettable())
224+
return false;
225+
if (!getScratchCommandBuffer()->getRecordingFlags().hasFlags(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT))
226+
return false;
227+
for (const auto& info : commandBuffers)
228+
if (info.cmdbuf->getPool()->getQueueFamilyIndex()!=queue->getFamilyIndex())
229+
return false;
230+
return true;
231+
}
232+
233+
// Use the last command buffer in intendedNextSubmit, it should be in recording state
234+
inline IGPUCommandBuffer* getScratchCommandBuffer() {return commandBuffers.back().cmdbuf;}
235+
inline const IGPUCommandBuffer* getScratchCommandBuffer() const {return commandBuffers.back().cmdbuf;}
236+
237+
inline ISemaphore::SWaitInfo getScratchSemaphoreNextWait() const {return {signalSemaphores.front().semaphore,signalSemaphores.front().value};}
238+
239+
inline operator IQueue::SSubmitInfo() const
240+
{
241+
return {
242+
.waitSemaphores = waitSemaphores,
243+
.commandBuffers = commandBuffers,
244+
.signalSemaphores = signalSemaphores
245+
};
246+
}
247+
248+
inline void overflowSubmit()
249+
{
250+
auto cmdbuf = getScratchCommandBuffer();
251+
auto& scratchSemaphore = signalSemaphores.front();
252+
// but first sumbit the already buffered up copies
253+
cmdbuf->end();
254+
IQueue::SSubmitInfo submit = *this;
255+
// we only signal the last semaphore which is used as scratch
256+
submit.signalSemaphores = {&scratchSemaphore,1};
257+
assert(submit.isValid());
258+
queue->submit({&submit,1});
259+
// We wait (stall) on the immediately preceeding submission timeline semaphore signal value and increase it for the next signaller
260+
{
261+
const ISemaphore::SWaitInfo info = {scratchSemaphore.semaphore,scratchSemaphore.value++};
262+
const_cast<ILogicalDevice*>(cmdbuf->getOriginDevice())->blockForSemaphores({&info,1});
263+
}
264+
// we've already waited on the Host for the semaphores, no use waiting twice
265+
waitSemaphores = {};
266+
// since all the commandbuffers have submitted already we only reuse the last one
267+
commandBuffers = {&commandBuffers.back(),1};
268+
// we will still signal the same set in the future
269+
cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
270+
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
271+
}
272+
273+
274+
IQueue* queue = {};
275+
std::span<const IQueue::SSubmitInfo::SSemaphoreInfo> waitSemaphores = {};
276+
std::span<IQueue::SSubmitInfo::SCommandBufferInfo> commandBuffers = {};
277+
std::span<IQueue::SSubmitInfo::SSemaphoreInfo> signalSemaphores = {};
278+
279+
private:
280+
friend class IUtilities;
281+
static const char* ErrorText;
282+
};
215283
// --------------
216284
// updateBufferRangeViaStagingBuffer
217285
// --------------
@@ -230,7 +298,10 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
230298
//! ** The last command buffer will be used to record the copy commands
231299
//! - submissionQueue: IQueue used to submit, when needed.
232300
//! Note: This parameter is required but may not be used if there is no need to submit
233-
//! - submissionFence:
301+
//! - scratchSemaphore:
302+
//! - since you've already decided on the semaphores you'll wait and signal in the `intendedNextSubmit`, we need an extra semaphore to "stich together" the submit if we split it
303+
304+
234305
//! - This is the fence you will use to submit the copies to, this allows freeing up space in stagingBuffer when the fence is signalled, indicating that the copy has finished.
235306
//! - This fence will be in `UNSIGNALED` state after exiting the function. (It will reset after each implicit submit)
236307
//! - This fence may be used for CommandBuffer submissions using `submissionQueue` inside the function.
@@ -249,31 +320,26 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
249320
//! * submissionFence must point to a valid IGPUFence
250321
//! * submissionFence must be in `UNSIGNALED` state
251322
//! ** IUtility::getDefaultUpStreamingBuffer()->cull_frees() should be called before reseting the submissionFence and after fence is signaled.
252-
[[nodiscard("Use The New IQueue::SubmitInfo")]] inline IQueue::SSubmitInfo updateBufferRangeViaStagingBuffer(
253-
const asset::SBufferRange<IGPUBuffer>& bufferRange, const void* data,
254-
IQueue* submissionQueue, IGPUFence* submissionFence, IQueue::SSubmitInfo intendedNextSubmit
255-
)
323+
inline bool updateBufferRangeViaStagingBuffer(SIntendedSubmitInfo& nextSubmit, const asset::SBufferRange<IGPUBuffer>& bufferRange, const void* data)
256324
{
257-
if(!intendedNextSubmit.isValid() || intendedNextSubmit.commandBufferCount <= 0u)
325+
if (!bufferRange.isValid() || !bufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_DST_BIT))
258326
{
259-
// TODO: log error -> intendedNextSubmit is invalid
260-
assert(false);
261-
return intendedNextSubmit;
327+
m_logger.log("Invalid `bufferRange` or buffer has no `EUF_TRANSFER_DST_BIT` usage flag, cannot `updateBufferRangeViaStagingBuffer`!", system::ILogger::ELL_ERROR);
328+
return false;
329+
}
330+
331+
if (!nextSubmit.valid())
332+
{
333+
m_logger.log(nextSubmit.ErrorText,system::ILogger::ELL_ERROR);
334+
return false;
262335
}
263336

264337
const auto& limits = m_device->getPhysicalDevice()->getLimits();
265-
const uint32_t optimalTransferAtom = limits.maxResidentInvocations*sizeof(uint32_t);
266-
267-
// Use the last command buffer in intendedNextSubmit, it should be in recording state
268-
auto& cmdbuf = intendedNextSubmit.commandBuffers[intendedNextSubmit.commandBufferCount-1];
269-
auto* cmdpool = cmdbuf->getPool();
270-
assert(cmdbuf->isResettable());
271-
assert(cmdpool->getQueueFamilyIndex() == submissionQueue->getFamilyIndex());
272-
assert(cmdbuf->getRecordingFlags().hasFlags(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT));
273-
assert(bufferRange.buffer->getCreationParams().usage.hasFlags(asset::IBuffer::EUF_TRANSFER_DST_BIT));
338+
const uint32_t optimalTransferAtom = limits.maxResidentInvocations * sizeof(uint32_t);
274339

340+
auto cmdbuf = nextSubmit.getScratchCommandBuffer();
275341
// no pipeline barriers necessary because write and optional flush happens before submit, and memory allocation is reclaimed after fence signal
276-
for (size_t uploadedSize = 0ull; uploadedSize < bufferRange.size;)
342+
for (size_t uploadedSize=0ull; uploadedSize<bufferRange.size;)
277343
{
278344
// how much hasn't been uploaded yet
279345
const size_t size = bufferRange.size-uploadedSize;
@@ -295,46 +361,28 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
295361
// keep trying again
296362
if (localOffset == StreamingTransientDataBufferMT<>::invalid_value)
297363
{
298-
// but first sumbit the already buffered up copies
299-
cmdbuf->end();
300-
IQueue::SSubmitInfo submit = intendedNextSubmit;
301-
submit.signalSemaphoreCount = 0u;
302-
submit.pSignalSemaphores = nullptr;
303-
assert(submit.isValid());
304-
submissionQueue->submit(1u, &submit, submissionFence);
305-
m_device->blockForFences(1u, &submissionFence);
306-
intendedNextSubmit.commandBufferCount = 1u;
307-
intendedNextSubmit.commandBuffers = &cmdbuf;
308-
intendedNextSubmit.waitSemaphoreCount = 0u;
309-
intendedNextSubmit.pWaitSemaphores = nullptr;
310-
intendedNextSubmit.pWaitDstStageMask = nullptr;
311-
// before resetting we need poll all events in the allocator's deferred free list
312-
m_defaultUploadBuffer->cull_frees();
313-
// we can reset the fence and commandbuffer because we fully wait for the GPU to finish here
314-
m_device->resetFences(1u, &submissionFence);
315-
cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT);
316-
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT);
364+
nextSubmit.overflowSubmit();
317365
continue;
318366
}
319367
// some platforms expose non-coherent host-visible GPU memory, so writes need to be flushed explicitly
320368
if (m_defaultUploadBuffer.get()->needsManualFlushOrInvalidate())
321369
{
322-
auto flushRange = AlignedMappedMemoryRange(m_defaultUploadBuffer.get()->getBuffer()->getBoundMemory(),localOffset,subSize,limits.nonCoherentAtomSize);
370+
auto flushRange = AlignedMappedMemoryRange(m_defaultUploadBuffer.get()->getBuffer()->getBoundMemory().memory,localOffset,subSize,limits.nonCoherentAtomSize);
323371
m_device->flushMappedMemoryRanges(1u,&flushRange);
324372
}
325373
// after we make sure writes are in GPU memory (visible to GPU) and not still in a cache, we can copy using the GPU to device-only memory
326374
IGPUCommandBuffer::SBufferCopy copy;
327375
copy.srcOffset = localOffset;
328-
copy.dstOffset = bufferRange.offset + uploadedSize;
376+
copy.dstOffset = bufferRange.offset+uploadedSize;
329377
copy.size = subSize;
330378
cmdbuf->copyBuffer(m_defaultUploadBuffer.get()->getBuffer(), bufferRange.buffer.get(), 1u, &copy);
331-
// this doesn't actually free the memory, the memory is queued up to be freed only after the GPU fence/event is signalled
332-
m_defaultUploadBuffer.get()->multi_deallocate(1u,&localOffset,&allocationSize,core::smart_refctd_ptr<IGPUFence>(submissionFence),&cmdbuf); // can queue with a reset but not yet pending fence, just fine
379+
// this doesn't actually free the memory, the memory is queued up to be freed only after the `scratchSemaphore` reaches a value a future submit will signal
380+
m_defaultUploadBuffer.get()->multi_deallocate(1u,&localOffset,&allocationSize,nextSubmit.getScratchSemaphoreNextWait(),&cmdbuf);
333381
uploadedSize += subSize;
334382
}
335-
return intendedNextSubmit;
383+
return true;
336384
}
337-
385+
#if 0
338386
//! This function is an specialization of the `updateBufferRangeViaStagingBuffer` function above.
339387
//! Submission of the commandBuffer to submissionQueue happens automatically, no need for the user to handle submit
340388
//! WARNING: Don't use this function in hot loops or to do batch updates, its merely a convenience for one-off uploads
@@ -373,7 +421,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
373421
}
374422

375423
//! This function is an specialization of the `updateBufferRangeViaStagingBufferAutoSubmit` function above.
376-
//! Additionally waits for the fence
424+
//! Additionally waits for the upload right away
377425
//! WARNING: This function blocks CPU and stalls the GPU!
378426
inline void updateBufferRangeViaStagingBufferAutoSubmit(
379427
const asset::SBufferRange<IGPUBuffer>& bufferRange, const void* data,
@@ -391,10 +439,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
391439
updateBufferRangeViaStagingBufferAutoSubmit(bufferRange, data, submissionQueue, fence.get(), submitInfo);
392440
m_device->blockForFences(1u, &fence.get());
393441
}
394-
442+
#endif
395443

396444
// pipelineBarrierAutoSubmit?
397-
#endif
398445

399446
// --------------
400447
// downloadBufferRangeViaStagingBuffer

src/nbl/video/utilities/IUtilities.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44

55
namespace nbl::video
66
{
7+
const char* IUtilities::SIntendedSubmitInfo::ErrorText = R"===(Invalid `IUtilities::SIntendedSubmitInfo`, possible reasons are:
8+
- No `commandBuffers` or `signalSemaphores` given in respective spans
9+
- `commandBuffer.back()` is not Resettable
10+
- `commandBuffer.back()` is not already begun with ONE_TIME_SUBMIT_BIT
11+
- one of the `commandBuffer`s' Pool's Queue Family Index doesn't match `queue`'s Family
12+
)===";
13+
714
#if 0 // TODO: port
815
IQueue::SSubmitInfo IUtilities::updateImageViaStagingBuffer(
916
asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, asset::IImage::LAYOUT currentDstImageLayout, const core::SRange<const asset::IImage::SBufferCopy>& regions,

0 commit comments

Comments
 (0)