Skip to content

Commit 2c35289

Browse files
Update SubAllocatedDescriptorSet.h
Some suggestions to optimize and make stuff correct
1 parent 54250a6 commit 2c35289

File tree

1 file changed

+59
-93
lines changed

1 file changed

+59
-93
lines changed

include/nbl/video/alloc/SubAllocatedDescriptorSet.h

Lines changed: 59 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -34,24 +34,27 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
3434
memcpy(m_addresses->data(), addresses, count * sizeof(value_type));
3535
}
3636

37-
// Just does the de-allocation
38-
inline void operator()()
37+
//
38+
inline auto getWorstCaseCount() const {return m_addresses->size();}
39+
40+
// Just does the de-allocation, note that the parameter is a reference
41+
inline void operator()(IGPUDescriptorSet::SDropDescriptorSet* &outNullify)
3942
{
40-
// isn't assert already debug-only?
4143
#ifdef _NBL_DEBUG
4244
assert(m_composed);
4345
#endif // _NBL_DEBUG
44-
m_composed->multi_deallocate(m_binding, m_addresses->size(), m_addresses->data());
46+
outNullify = m_composed->multi_deallocate(outNullify, m_binding, m_addresses->size(), m_addresses->data());
47+
m_composed->m_totalDeferredFrees -= getWorstCaseCount();
4548
}
4649

4750
// Takes count of allocations we want to free up as reference, true is returned if
4851
// the amount of allocations freed was >= allocationsToFreeUp
4952
// False is returned if there are more allocations to free up
50-
inline bool operator()(size_type& allocationsToFreeUp)
53+
inline bool operator()(size_type& allocationsToFreeUp, IGPUDescriptorSet::SDropDescriptorSet* &outNullify)
5154
{
52-
auto prevCount = m_addresses->size();
53-
operator()();
54-
auto totalFreed = m_addresses->size() - prevCount;
55+
auto prevNullify = outNullify;
56+
operator()(outNullify);
57+
auto totalFreed = outNullify-prevNullify;
5558

5659
// This does the same logic as bool operator()(size_type&) on
5760
// CAsyncSingleBufferSubAllocator
@@ -63,7 +66,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
6366
}
6467
protected:
6568
core::smart_refctd_dynamic_array<value_type> m_addresses;
66-
SubAllocatedDescriptorSet* m_composed;
69+
SubAllocatedDescriptorSet* m_composed; // TODO: shouldn't be called `composed`, maybe `parent` or something
6770
uint32_t m_binding;
6871
};
6972
using EventHandler = MultiTimelineEventHandlerST<DeferredFreeFunctor>;
@@ -93,6 +96,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
9396
std::map<uint32_t, SubAllocDescriptorSetRange> m_allocatableRanges = {};
9497
core::smart_refctd_ptr<video::IGPUDescriptorSet> m_descriptorSet;
9598
core::smart_refctd_ptr<video::ILogicalDevice> m_logicalDevice;
99+
value_type m_totalDeferredFrees = 0;
96100

97101
#ifdef _NBL_DEBUG
98102
std::recursive_mutex stAccessVerfier;
@@ -141,7 +145,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
141145
m_descriptorSet = std::move(descriptorSet);
142146
}
143147

144-
~SubAllocatedDescriptorSet()
148+
inline ~SubAllocatedDescriptorSet()
145149
{
146150
for (uint32_t i = 0; i < m_allocatableRanges.size(); i++)
147151
{
@@ -155,9 +159,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
155159
}
156160

157161
// whether that binding index can be sub-allocated
158-
bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); }
162+
inline bool isBindingAllocatable(uint32_t binding) { return m_allocatableRanges.find(binding) != m_allocatableRanges.end(); }
159163

160-
AddressAllocator* getBindingAllocator(uint32_t binding)
164+
inline AddressAllocator* getBindingAllocator(uint32_t binding)
161165
{
162166
auto range = m_allocatableRanges.find(binding);
163167
// Check if this binding has an allocator
@@ -169,36 +173,26 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
169173
// main methods
170174

171175
#ifdef _NBL_DEBUG
172-
std::unique_lock<std::recursive_mutex> stAccessVerifyDebugGuard()
176+
inline std::unique_lock<std::recursive_mutex> stAccessVerifyDebugGuard()
173177
{
174178
std::unique_lock<std::recursive_mutex> tLock(stAccessVerfier,std::try_to_lock_t());
175179
assert(tLock.owns_lock());
176180
return tLock;
177181
}
178182
#else
179-
bool stAccessVerifyDebugGuard() { return false; }
183+
inline bool stAccessVerifyDebugGuard() { return false; }
180184
#endif
181185

182-
video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); }
186+
inline video::IGPUDescriptorSet* getDescriptorSet() { return m_descriptorSet.get(); }
183187

184188
//! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value`
185-
inline size_type try_multi_allocate(
186-
uint32_t binding,
187-
size_type count,
188-
video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
189-
video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
190-
value_type* outAddresses
191-
)
189+
inline size_type try_multi_allocate(const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
192190
{
193191
auto debugGuard = stAccessVerifyDebugGuard();
194192

193+
// we assume you've validated that the binding is allocatable before trying this
195194
auto allocator = getBindingAllocator(binding);
196195

197-
std::vector<video::IGPUDescriptorSet::SWriteDescriptorSet> writes;
198-
std::vector<video::IGPUDescriptorSet::SDescriptorInfo> infos;
199-
writes.reserve(count);
200-
infos.reserve(count);
201-
202196
size_type unallocatedSize = 0u;
203197
for (size_type i=0; i<count; i++)
204198
{
@@ -211,35 +205,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
211205
unallocatedSize = count - i;
212206
break;
213207
}
214-
215-
auto& descriptor = descriptors[i];
216-
217-
video::IGPUDescriptorSet::SWriteDescriptorSet write;
218-
{
219-
write.dstSet = m_descriptorSet.get();
220-
write.binding = binding;
221-
write.arrayElement = outAddresses[i];
222-
write.count = 1u;
223-
// descriptors could be a const pointer, but the problem is that this pointer in
224-
// SWriteDescriptorSet.info isn't const
225-
// can we change it?
226-
write.info = &descriptor;
227-
}
228-
outDescriptorWrites[i] = write;
229208
}
230209

231210
return unallocatedSize;
232211
}
233212

234213
template<class Clock=typename std::chrono::steady_clock>
235-
inline size_type multi_allocate(
236-
const std::chrono::time_point<Clock>& maxWaitPoint,
237-
uint32_t binding,
238-
size_type count,
239-
video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
240-
video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
241-
value_type* outAddresses
242-
) noexcept
214+
inline size_type multi_allocate(const std::chrono::time_point<Clock>& maxWaitPoint, const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
243215
{
244216
auto debugGuard = stAccessVerifyDebugGuard();
245217

@@ -248,76 +220,60 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
248220
if (range == m_allocatableRanges.end())
249221
return count;
250222

251-
auto& eventHandler = range->second.eventHandler;
252-
253223
// try allocate once
254-
size_type unallocatedSize = try_multi_allocate(binding, count, descriptors, outDescriptorWrites, outAddresses);
224+
size_type unallocatedSize = try_multi_allocate(binding,count,outAddresses);
255225
if (!unallocatedSize)
256226
return 0u;
257227

258228
// then try to wait at least once and allocate
229+
auto& eventHandler = range->second.eventHandler;
230+
core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls(m_totalDeferredFrees);
231+
auto outNulls = nulls.data();
259232
do
260233
{
261-
eventHandler.wait(maxWaitPoint, unallocatedSize);
262-
263-
unallocatedSize = try_multi_allocate(
264-
binding,
265-
unallocatedSize,
266-
&descriptors[count - unallocatedSize],
267-
&outDescriptorWrites[count - unallocatedSize],
268-
&outAddresses[count - unallocatedSize]
269-
);
234+
eventHandler.wait(maxWaitPoint, unallocatedSize, outNulls);
235+
236+
// always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps
237+
unallocatedSize = try_multi_allocate(binding,count,outAddresses);
270238
if (!unallocatedSize)
271-
return 0u;
239+
break;
272240
} while(Clock::now()<maxWaitPoint);
241+
m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls});
273242

274243
return unallocatedSize;
275244
}
276245

277-
inline size_type multi_allocate(
278-
uint32_t binding,
279-
size_type count,
280-
video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
281-
video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
282-
value_type* outAddresses
283-
) noexcept
246+
// default timeout overload
247+
inline size_type multi_allocate(const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
284248
{
285-
auto range = m_allocatableRanges.find(binding);
286-
// Check if this binding has an allocator
287-
if (range == m_allocatableRanges.end())
288-
return count;
289-
290-
return multi_allocate(TimelineEventHandlerBase::default_wait(), binding, count, descriptors, outDescriptorWrites, outAddresses);
249+
// check that the binding is allocatable is done inside anyway
250+
return multi_allocate(TimelineEventHandlerBase::default_wait(), binding, count, outAddresses);
291251
}
292252

293-
inline void multi_deallocate(uint32_t binding, size_type count, const size_type* addr)
253+
// Very explicit low level call you'd need to sync and drop descriptors by yourself
254+
// Returns: the one-past the last `outNullify` write pointer, this allows you to work out how many descriptors were freed
255+
inline void multi_deallocate(IGPUDescriptorSet::SDropDescriptorSet* outNullify, uint32_t binding, size_type count, const size_type* addr)
294256
{
295257
auto debugGuard = stAccessVerifyDebugGuard();
296258

297259
auto allocator = getBindingAllocator(binding);
298-
if (!allocator)
299-
return;
260+
if (allocator)
300261
for (size_type i=0; i<count; i++)
301262
{
302263
if (addr[i]==AddressAllocator::invalid_address)
303264
continue;
304265

305266
allocator->free_addr(addr[i],1);
306-
// TODO: should also write something to the descriptor sets
307-
// basically if nullDescriptor device feature is enabled, you would
308-
// indeed write to the DS, else you'd just drop the refcounted references
309-
//
310-
// this needs to be done as a IGPUDescriptorSet::nullify(const uint32_t binding,
311-
// std::span<uint32_t> indices) function + a virtual nullify_impl
312-
video::IGPUDescriptorSet::SDropDescriptorSet dropDescriptorSet;
313-
dropDescriptorSet.dstSet = m_descriptorSet.get();
314-
dropDescriptorSet.binding = binding;
315-
dropDescriptorSet.arrayElement = i;
316-
dropDescriptorSet.count = 1;
317-
m_logicalDevice->nullifyDescriptors({ &dropDescriptorSet, 1 });
267+
outNullify->dstSet = m_descriptorSet.get();
268+
outNullify->binding = binding;
269+
outNullify->arrayElement = i;
270+
outNullify->count = 1;
271+
outNullify++;
318272
}
273+
return outNullify;
319274
}
320275

276+
// 100% will defer
321277
inline void multi_deallocate(uint32_t binding, const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept
322278
{
323279
auto range = m_allocatableRanges.find(binding);
@@ -327,26 +283,36 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
327283

328284
auto& eventHandler = range->second.eventHandler;
329285
auto debugGuard = stAccessVerifyDebugGuard();
286+
m_totalDeferredFrees += functor.getWorstCaseCount();
330287
eventHandler.latch(futureWait,std::move(functor));
331288
}
332289

290+
// defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately
333291
inline void multi_deallocate(uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept
334292
{
335293
if (futureWait.semaphore)
336294
multi_deallocate(binding, futureWait, DeferredFreeFunctor(this, binding, count, addr));
337295
else
338-
multi_deallocate(binding, count, addr);
296+
{
297+
core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls(count);
298+
auto actualEnd = multi_deallocate(nulls.data(), binding, count, addr);
299+
m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd});
300+
}
339301
}
302+
340303
//! Returns free events still outstanding
341304
inline uint32_t cull_frees() noexcept
342305
{
343306
auto debugGuard = stAccessVerifyDebugGuard();
344307
uint32_t frees = 0;
308+
core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls(m_totalDeferredFrees);
309+
auto outNulls = nulls.data();
345310
for (uint32_t i = 0; i < m_allocatableRanges.size(); i++)
346311
{
347312
auto& it = m_allocatableRanges[i];
348-
frees += it.eventHandler.poll().eventsLeft;
313+
frees += it.eventHandler.poll(outNulls).eventsLeft;
349314
}
315+
m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls});
350316
return frees;
351317
}
352318
};

0 commit comments

Comments
 (0)