Skip to content

Commit e700683

Browse files
Fix a huge bug in the downloadBufferRangeViaStagingBufferAutoSubmit's data consumption callback invocation.
Also other little QoL things
1 parent 8443812 commit e700683

File tree

8 files changed

+71
-79
lines changed

8 files changed

+71
-79
lines changed

include/nbl/asset/IDescriptorSetLayout.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,9 @@ class IDescriptorSetLayout : public virtual core::IReferenceCounted
278278
{
279279
bindings[i].binding = i;
280280
bindings[i].type = type;
281-
bindings[i].count = counts ? counts[i]:1u;
281+
bindings[i].createFlags = SBinding::E_CREATE_FLAGS::ECF_NONE;
282282
bindings[i].stageFlags = stageAccessFlags ? stageAccessFlags[i]:asset::IShader::ESS_ALL;
283+
bindings[i].count = counts ? counts[i]:1u;
283284
bindings[i].samplers = nullptr;
284285
}
285286
}

include/nbl/video/alloc/CAsyncSingleBufferSubAllocator.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -105,13 +105,13 @@ class CAsyncSingleBufferSubAllocator
105105

106106
inline void operator()()
107107
{
108+
auto* const objHoldIt = reinterpret_cast<core::smart_refctd_ptr<const core::IReferenceCounted>*>(rangeData+numAllocs*2u);
109+
for (size_t i=0u; i<numAllocs; i++)
110+
objHoldIt[i] = nullptr;
108111
#ifdef _NBL_DEBUG
109112
assert(composed && rangeData);
110113
#endif // _NBL_DEBUG
111114
composed->multi_deallocate(numAllocs,rangeData,rangeData+numAllocs);
112-
auto* const objHoldIt = reinterpret_cast<core::smart_refctd_ptr<const core::IReferenceCounted>*>(rangeData+numAllocs*2u);
113-
for (size_t i=0u; i<numAllocs; i++)
114-
objHoldIt[i] = nullptr;
115115
}
116116

117117
private:
@@ -133,14 +133,14 @@ class CAsyncSingleBufferSubAllocator
133133
inline IGPUBuffer* getBuffer() {return m_composed.getBuffer();}
134134
inline const IGPUBuffer* getBuffer() const {return m_composed.getBuffer();}
135135

136-
//!
137-
inline void cull_frees() noexcept
136+
//! Returns free events still outstanding
137+
inline uint32_t cull_frees() noexcept
138138
{
139139
#ifdef _NBL_DEBUG
140140
std::unique_lock<std::recursive_mutex> tLock(stAccessVerfier,std::try_to_lock_t());
141141
assert(tLock.owns_lock());
142142
#endif // _NBL_DEBUG
143-
deferredFrees.cullEvents(0u);
143+
return deferredFrees.cullEvents(0u);
144144
}
145145

146146
//! Returns max possible currently allocatable single allocation size, without having to wait for GPU more

include/nbl/video/alloc/StreamingTransientDataBuffer.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class StreamingTransientDataBuffer
5959
inline void* getBufferPointer() noexcept {return getBuffer()->getBoundMemory()->getMappedPointer();}
6060

6161
//
62-
inline void cull_frees() noexcept {m_composed.cull_frees();}
62+
inline uint32_t cull_frees() noexcept {return m_composed.cull_frees();}
6363

6464
//
6565
inline size_type max_size() noexcept {return m_composed.max_size();}
@@ -158,11 +158,12 @@ class StreamingTransientDataBufferMT : public core::IReferenceCounted
158158
}
159159

160160
//! you should really `this->get_lock()` if you need the guarantee that you'll be able to allocate a block of this size!
161-
inline void cull_frees() noexcept
161+
inline uint32_t cull_frees() noexcept
162162
{
163163
lock.lock();
164-
m_composed.cull_frees();
164+
auto retval = m_composed.cull_frees();
165165
lock.unlock();
166+
return retval;
166167
}
167168

168169
//! you should really `this->get_lock()` if you need the guarantee that you'll be able to allocate a block of this size!

include/nbl/video/alloc/SubAllocatedDataBuffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,13 @@ class SubAllocatedDataBuffer : protected core::impl::FriendOfHeterogenousMemoryA
195195
}
196196

197197
//!
198-
inline void cull_frees() noexcept
198+
inline uint32_t cull_frees() noexcept
199199
{
200200
#ifdef _NBL_DEBUG
201201
std::unique_lock<std::recursive_mutex> tLock(stAccessVerfier,std::try_to_lock_t());
202202
assert(tLock.owns_lock());
203203
#endif // _NBL_DEBUG
204-
deferredFrees.cullEvents(0u);
204+
return deferredFrees.cullEvents(0u);
205205
}
206206

207207
//! Returns max possible currently allocatable single allocation size, without having to wait for GPU more

include/nbl/video/utilities/ICommandPoolCache.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class ICommandPoolCache : public core::IReferenceCounted
5757
if (fence)
5858
m_deferredResets.addEvent(GPUEventWrapper(device,std::move(fence)),DeferredCommandPoolResetter(this,poolIx));
5959
else
60-
releaseSet(device,poolIx);
60+
releaseSet(poolIx);
6161
}
6262

6363
// only public because GPUDeferredEventHandlerST needs to know about it
@@ -118,15 +118,12 @@ class ICommandPoolCache : public core::IReferenceCounted
118118
delete[] m_cache;
119119
}
120120

121-
void releaseSet(ILogicalDevice* device, const uint32_t poolIx);
121+
void releaseSet(const uint32_t poolIx);
122122

123123
core::smart_refctd_ptr<IGPUCommandPool>* m_cache;
124124
void* m_reserved;
125125
CommandPoolAllocator m_cmdPoolAllocator;
126126
GPUDeferredEventHandlerST<DeferredCommandPoolResetter> m_deferredResets;
127-
// TODO: after CommandPool resetting, get rid of these
128-
const uint32_t m_queueFamilyIx;
129-
const IGPUCommandPool::E_CREATE_FLAGS m_flags;
130127
};
131128

132129
}

include/nbl/video/utilities/IUtilities.h

Lines changed: 49 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,47 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
600600
void* m_dstPtr;
601601
};
602602

603+
//! Used in downloadBufferRangeViaStagingBuffer multi_deallocate objectsToHold,
604+
//! Calls the std::function callback in destructor because allocator will hold on to this object and drop it when it's safe (fence is singnalled and submit has finished)
605+
class CDownstreamingDataConsumer final : public core::IReferenceCounted
606+
{
607+
public:
608+
CDownstreamingDataConsumer(
609+
const IDeviceMemoryAllocation::MemoryRange& copyRange,
610+
const std::function<data_consumption_callback_t>& consumeCallback,
611+
core::smart_refctd_ptr<IGPUCommandBuffer>&& cmdBuffer,
612+
StreamingTransientDataBufferMT<>* downstreamingBuffer,
613+
size_t dstOffset=0
614+
) : m_copyRange(copyRange)
615+
, m_consumeCallback(consumeCallback)
616+
, m_cmdBuffer(core::smart_refctd_ptr<IGPUCommandBuffer>(cmdBuffer))
617+
, m_downstreamingBuffer(downstreamingBuffer)
618+
, m_dstOffset(dstOffset)
619+
{}
620+
621+
~CDownstreamingDataConsumer()
622+
{
623+
assert(m_downstreamingBuffer);
624+
auto device = const_cast<ILogicalDevice*>(m_downstreamingBuffer->getBuffer()->getOriginDevice());
625+
if (m_downstreamingBuffer->needsManualFlushOrInvalidate())
626+
{
627+
const auto nonCoherentAtomSize = device->getPhysicalDevice()->getLimits().nonCoherentAtomSize;
628+
auto flushRange = AlignedMappedMemoryRange(m_downstreamingBuffer->getBuffer()->getBoundMemory(), m_copyRange.offset, m_copyRange.length, nonCoherentAtomSize);
629+
device->invalidateMappedMemoryRanges(1u, &flushRange);
630+
}
631+
// Call the function
632+
const uint8_t* copySrc = reinterpret_cast<uint8_t*>(m_downstreamingBuffer->getBufferPointer()) + m_copyRange.offset;
633+
m_consumeCallback(m_dstOffset, copySrc, m_copyRange.length);
634+
}
635+
636+
private:
637+
const IDeviceMemoryAllocation::MemoryRange m_copyRange;
638+
std::function<data_consumption_callback_t> m_consumeCallback;
639+
const core::smart_refctd_ptr<const IGPUCommandBuffer> m_cmdBuffer; // because command buffer submiting the copy shouldn't go out of scope when copy isn't finished
640+
StreamingTransientDataBufferMT<>* m_downstreamingBuffer;
641+
const size_t m_dstOffset;
642+
};
643+
603644
//! Calls the callback to copy the data to a destination Offset
604645
//! * IMPORTANT: To make the copies ready, IUtility::getDefaultDownStreamingBuffer()->cull_frees() should be called after the `submissionFence` is signaled.
605646
//! If the allocation from staging memory fails due to large image size or fragmentation then This function may need to submit the command buffer via the `submissionQueue` and then signal the fence.
@@ -680,8 +721,13 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
680721
copy.size = copySize;
681722
cmdbuf->copyBuffer(srcBufferRange.buffer.get(), m_defaultDownloadBuffer.get()->getBuffer(), 1u, &copy);
682723

683-
auto dataConsumer = core::make_smart_refctd_ptr<CDownstreamingDataConsumer>(downloadedSize, IDeviceMemoryAllocation::MemoryRange(localOffset, copySize), consumeCallback, cmdbuf, m_defaultDownloadBuffer.get(), core::smart_refctd_ptr(m_device));
684-
724+
auto dataConsumer = core::make_smart_refctd_ptr<CDownstreamingDataConsumer>(
725+
IDeviceMemoryAllocation::MemoryRange(localOffset,copySize),
726+
consumeCallback,
727+
core::smart_refctd_ptr<IGPUCommandBuffer>(cmdbuf),
728+
m_defaultDownloadBuffer.get(),
729+
downloadedSize
730+
);
685731
m_defaultDownloadBuffer.get()->multi_deallocate(1u, &localOffset, &allocationSize, core::smart_refctd_ptr<IGPUFence>(submissionFence), &dataConsumer.get());
686732

687733
downloadedSize += copySize;
@@ -958,63 +1004,14 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
9581004
core::smart_refctd_ptr<IGPUCommandBuffer> m_newCommandBuffer; // if necessary, then need to hold reference to.
9591005
};
9601006

961-
//! Used in downloadBufferRangeViaStagingBuffer multi_deallocate objectsToHold,
962-
//! Calls the std::function callback in destructor because allocator will hold on to this object and drop it when it's safe (fence is singnalled and submit has finished)
963-
class CDownstreamingDataConsumer final : public core::IReferenceCounted
964-
{
965-
public:
966-
CDownstreamingDataConsumer(
967-
size_t dstOffset,
968-
const IDeviceMemoryAllocation::MemoryRange& copyRange,
969-
const std::function<data_consumption_callback_t>& consumeCallback,
970-
IGPUCommandBuffer* cmdBuffer,
971-
StreamingTransientDataBufferMT<>* downstreamingBuffer,
972-
core::smart_refctd_ptr<ILogicalDevice>&& device
973-
)
974-
: m_dstOffset(dstOffset)
975-
, m_copyRange(copyRange)
976-
, m_consumeCallback(consumeCallback)
977-
, m_cmdBuffer(core::smart_refctd_ptr<IGPUCommandBuffer>(cmdBuffer))
978-
, m_downstreamingBuffer(downstreamingBuffer)
979-
, m_device(std::move(device))
980-
{}
981-
982-
~CDownstreamingDataConsumer()
983-
{
984-
if (m_downstreamingBuffer != nullptr)
985-
{
986-
if (m_downstreamingBuffer->needsManualFlushOrInvalidate())
987-
{
988-
const auto nonCoherentAtomSize = m_device->getPhysicalDevice()->getLimits().nonCoherentAtomSize;
989-
auto flushRange = AlignedMappedMemoryRange(m_downstreamingBuffer->getBuffer()->getBoundMemory(), m_copyRange.offset, m_copyRange.length, nonCoherentAtomSize);
990-
m_device->invalidateMappedMemoryRanges(1u, &flushRange);
991-
}
992-
// Call the function
993-
const uint8_t* copySrc = reinterpret_cast<uint8_t*>(m_downstreamingBuffer->getBufferPointer()) + m_copyRange.offset;
994-
m_consumeCallback(m_dstOffset, copySrc, m_copyRange.length);
995-
}
996-
else
997-
{
998-
assert(false);
999-
}
1000-
}
1001-
private:
1002-
const size_t m_dstOffset;
1003-
const IDeviceMemoryAllocation::MemoryRange m_copyRange;
1004-
std::function<data_consumption_callback_t> m_consumeCallback;
1005-
core::smart_refctd_ptr<ILogicalDevice> m_device;
1006-
const core::smart_refctd_ptr<const IGPUCommandBuffer> m_cmdBuffer; // because command buffer submiting the copy shouldn't go out of scope when copy isn't finished
1007-
StreamingTransientDataBufferMT<>* m_downstreamingBuffer = nullptr;
1008-
};
1009-
10101007
core::smart_refctd_ptr<ILogicalDevice> m_device;
10111008

10121009
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultDownloadBuffer;
10131010
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultUploadBuffer;
10141011

10151012
core::smart_refctd_ptr<CPropertyPoolHandler> m_propertyPoolHandler;
10161013
core::smart_refctd_ptr<CScanner> m_scanner;
1017-
};
1014+
};
10181015

10191016
class ImageRegionIterator
10201017
{

src/nbl/video/utilities/ICommandPoolCache.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,22 @@ using namespace video;
77

88

99
ICommandPoolCache::ICommandPoolCache(ILogicalDevice* device, const uint32_t queueFamilyIx, const IGPUCommandPool::E_CREATE_FLAGS _flags, const uint32_t capacity)
10-
: m_reserved(malloc(CommandPoolAllocator::reserved_size(1u,capacity,1u))), m_cmdPoolAllocator(m_reserved,0u,0u,1u,capacity,1u), m_deferredResets(), m_queueFamilyIx(queueFamilyIx), m_flags(_flags)
10+
: m_reserved(malloc(CommandPoolAllocator::reserved_size(1u,capacity,1u))), m_cmdPoolAllocator(m_reserved,0u,0u,1u,capacity,1u), m_deferredResets()
1111
{
1212
m_cache = new core::smart_refctd_ptr<IGPUCommandPool>[capacity];
1313
for (auto i=0u; i<getCapacity(); i++)
14-
m_cache[i] = device->createCommandPool(m_queueFamilyIx,m_flags);
14+
m_cache[i] = device->createCommandPool(queueFamilyIx,_flags);
1515
}
1616

17-
void ICommandPoolCache::releaseSet(ILogicalDevice* device, const uint32_t poolIx)
17+
void ICommandPoolCache::releaseSet(const uint32_t poolIx)
1818
{
19-
// TODO: support CommandPool resetting!
20-
//m_cache[m_poolIx]->reset();
21-
// temporary workaround
22-
m_cache[poolIx] = device->createCommandPool(m_queueFamilyIx,m_flags);
23-
m_cmdPoolAllocator.free_addr(poolIx,1u);
19+
m_cache[poolIx]->reset();
2420
}
2521

2622
void ICommandPoolCache::DeferredCommandPoolResetter::operator()()
2723
{
2824
#ifdef _NBL_DEBUG
2925
assert(m_cache && m_poolIx<m_cache->getCapacity());
3026
#endif // _NBL_DEBUG
31-
m_cache->releaseSet(const_cast<ILogicalDevice*>(m_cache->m_cache[m_poolIx]->getOriginDevice()),m_poolIx);
27+
m_cache->releaseSet(m_poolIx);
3228
}

0 commit comments

Comments
 (0)