@@ -34,24 +34,27 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
34
34
memcpy (m_addresses->data (), addresses, count * sizeof (value_type));
35
35
}
36
36
37
- // Just does the de-allocation
38
- inline void operator ()()
37
+ //
38
+ inline auto getWorstCaseCount () const {return m_addresses->size ();}
39
+
40
+ // Just does the de-allocation, note that the parameter is a reference
41
+ inline void operator ()(IGPUDescriptorSet::SDropDescriptorSet* &outNullify)
39
42
{
40
- // isn't assert already debug-only?
41
43
#ifdef _NBL_DEBUG
42
44
assert (m_composed);
43
45
#endif // _NBL_DEBUG
44
- m_composed->multi_deallocate (m_binding, m_addresses->size (), m_addresses->data ());
46
+ outNullify = m_composed->multi_deallocate (outNullify, m_binding, m_addresses->size (), m_addresses->data ());
47
+ m_composed->m_totalDeferredFrees -= getWorstCaseCount ();
45
48
}
46
49
47
50
// Takes count of allocations we want to free up as reference, true is returned if
48
51
// the amount of allocations freed was >= allocationsToFreeUp
49
52
// False is returned if there are more allocations to free up
50
- inline bool operator ()(size_type& allocationsToFreeUp)
53
+ inline bool operator ()(size_type& allocationsToFreeUp, IGPUDescriptorSet::SDropDescriptorSet* &outNullify )
51
54
{
52
- auto prevCount = m_addresses-> size () ;
53
- operator ()();
54
- auto totalFreed = m_addresses-> size () - prevCount ;
55
+ auto prevNullify = outNullify ;
56
+ operator ()(outNullify );
57
+ auto totalFreed = outNullify-prevNullify ;
55
58
56
59
// This does the same logic as bool operator()(size_type&) on
57
60
// CAsyncSingleBufferSubAllocator
@@ -63,7 +66,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
63
66
}
64
67
protected:
65
68
core::smart_refctd_dynamic_array<value_type> m_addresses;
66
- SubAllocatedDescriptorSet* m_composed;
69
+ SubAllocatedDescriptorSet* m_composed; // TODO: shouldn't be called `composed`, maybe `parent` or something
67
70
uint32_t m_binding;
68
71
};
69
72
using EventHandler = MultiTimelineEventHandlerST<DeferredFreeFunctor>;
@@ -93,6 +96,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
93
96
std::map<uint32_t , SubAllocDescriptorSetRange> m_allocatableRanges = {};
94
97
core::smart_refctd_ptr<video::IGPUDescriptorSet> m_descriptorSet;
95
98
core::smart_refctd_ptr<video::ILogicalDevice> m_logicalDevice;
99
+ value_type m_totalDeferredFrees = 0 ;
96
100
97
101
#ifdef _NBL_DEBUG
98
102
std::recursive_mutex stAccessVerfier;
@@ -141,7 +145,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
141
145
m_descriptorSet = std::move (descriptorSet);
142
146
}
143
147
144
- ~SubAllocatedDescriptorSet ()
148
+ inline ~SubAllocatedDescriptorSet ()
145
149
{
146
150
for (uint32_t i = 0 ; i < m_allocatableRanges.size (); i++)
147
151
{
@@ -155,9 +159,9 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
155
159
}
156
160
157
161
// whether that binding index can be sub-allocated
158
- bool isBindingAllocatable (uint32_t binding) { return m_allocatableRanges.find (binding) != m_allocatableRanges.end (); }
162
+ inline bool isBindingAllocatable (uint32_t binding) { return m_allocatableRanges.find (binding) != m_allocatableRanges.end (); }
159
163
160
- AddressAllocator* getBindingAllocator (uint32_t binding)
164
+ inline AddressAllocator* getBindingAllocator (uint32_t binding)
161
165
{
162
166
auto range = m_allocatableRanges.find (binding);
163
167
// Check if this binding has an allocator
@@ -169,36 +173,26 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
169
173
// main methods
170
174
171
175
#ifdef _NBL_DEBUG
172
- std::unique_lock<std::recursive_mutex> stAccessVerifyDebugGuard ()
176
+ inline std::unique_lock<std::recursive_mutex> stAccessVerifyDebugGuard ()
173
177
{
174
178
std::unique_lock<std::recursive_mutex> tLock (stAccessVerfier,std::try_to_lock_t ());
175
179
assert (tLock.owns_lock ());
176
180
return tLock;
177
181
}
178
182
#else
179
- bool stAccessVerifyDebugGuard () { return false ; }
183
+ inline bool stAccessVerifyDebugGuard () { return false ; }
180
184
#endif
181
185
182
- video::IGPUDescriptorSet* getDescriptorSet () { return m_descriptorSet.get (); }
186
+ inline video::IGPUDescriptorSet* getDescriptorSet () { return m_descriptorSet.get (); }
183
187
184
188
// ! Warning `outAddresses` needs to be primed with `invalid_value` values, otherwise no allocation happens for elements not equal to `invalid_value`
185
- inline size_type try_multi_allocate (
186
- uint32_t binding,
187
- size_type count,
188
- video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
189
- video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
190
- value_type* outAddresses
191
- )
189
+ inline size_type try_multi_allocate (const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
192
190
{
193
191
auto debugGuard = stAccessVerifyDebugGuard ();
194
192
193
+ // we assume you've validated that the binding is allocatable before trying this
195
194
auto allocator = getBindingAllocator (binding);
196
195
197
- std::vector<video::IGPUDescriptorSet::SWriteDescriptorSet> writes;
198
- std::vector<video::IGPUDescriptorSet::SDescriptorInfo> infos;
199
- writes.reserve (count);
200
- infos.reserve (count);
201
-
202
196
size_type unallocatedSize = 0u ;
203
197
for (size_type i=0 ; i<count; i++)
204
198
{
@@ -211,35 +205,13 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
211
205
unallocatedSize = count - i;
212
206
break ;
213
207
}
214
-
215
- auto & descriptor = descriptors[i];
216
-
217
- video::IGPUDescriptorSet::SWriteDescriptorSet write;
218
- {
219
- write.dstSet = m_descriptorSet.get ();
220
- write.binding = binding;
221
- write.arrayElement = outAddresses[i];
222
- write.count = 1u ;
223
- // descriptors could be a const pointer, but the problem is that this pointer in
224
- // SWriteDescriptorSet.info isn't const
225
- // can we change it?
226
- write.info = &descriptor;
227
- }
228
- outDescriptorWrites[i] = write;
229
208
}
230
209
231
210
return unallocatedSize;
232
211
}
233
212
234
213
template <class Clock =typename std::chrono::steady_clock>
235
- inline size_type multi_allocate (
236
- const std::chrono::time_point<Clock>& maxWaitPoint,
237
- uint32_t binding,
238
- size_type count,
239
- video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
240
- video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
241
- value_type* outAddresses
242
- ) noexcept
214
+ inline size_type multi_allocate (const std::chrono::time_point<Clock>& maxWaitPoint, const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
243
215
{
244
216
auto debugGuard = stAccessVerifyDebugGuard ();
245
217
@@ -248,76 +220,60 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
248
220
if (range == m_allocatableRanges.end ())
249
221
return count;
250
222
251
- auto & eventHandler = range->second .eventHandler ;
252
-
253
223
// try allocate once
254
- size_type unallocatedSize = try_multi_allocate (binding, count, descriptors, outDescriptorWrites, outAddresses);
224
+ size_type unallocatedSize = try_multi_allocate (binding,count,outAddresses);
255
225
if (!unallocatedSize)
256
226
return 0u ;
257
227
258
228
// then try to wait at least once and allocate
229
+ auto & eventHandler = range->second .eventHandler ;
230
+ core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls (m_totalDeferredFrees);
231
+ auto outNulls = nulls.data ();
259
232
do
260
233
{
261
- eventHandler.wait (maxWaitPoint, unallocatedSize);
262
-
263
- unallocatedSize = try_multi_allocate (
264
- binding,
265
- unallocatedSize,
266
- &descriptors[count - unallocatedSize],
267
- &outDescriptorWrites[count - unallocatedSize],
268
- &outAddresses[count - unallocatedSize]
269
- );
234
+ eventHandler.wait (maxWaitPoint, unallocatedSize, outNulls);
235
+
236
+ // always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps
237
+ unallocatedSize = try_multi_allocate (binding,count,outAddresses);
270
238
if (!unallocatedSize)
271
- return 0u ;
239
+ break ;
272
240
} while (Clock::now ()<maxWaitPoint);
241
+ m_logicalDevice->nullifyDescriptors ({nulls.data (),outNulls});
273
242
274
243
return unallocatedSize;
275
244
}
276
245
277
- inline size_type multi_allocate (
278
- uint32_t binding,
279
- size_type count,
280
- video::IGPUDescriptorSet::SDescriptorInfo* descriptors,
281
- video::IGPUDescriptorSet::SWriteDescriptorSet* outDescriptorWrites,
282
- value_type* outAddresses
283
- ) noexcept
246
+ // default timeout overload
247
+ inline size_type multi_allocate (const uint32_t binding, const size_type count, value_type* outAddresses) noexcept
284
248
{
285
- auto range = m_allocatableRanges.find (binding);
286
- // Check if this binding has an allocator
287
- if (range == m_allocatableRanges.end ())
288
- return count;
289
-
290
- return multi_allocate (TimelineEventHandlerBase::default_wait (), binding, count, descriptors, outDescriptorWrites, outAddresses);
249
+ // check that the binding is allocatable is done inside anyway
250
+ return multi_allocate (TimelineEventHandlerBase::default_wait (), binding, count, outAddresses);
291
251
}
292
252
293
- inline void multi_deallocate (uint32_t binding, size_type count, const size_type* addr)
253
+ // Very explicit low level call you'd need to sync and drop descriptors by yourself
254
+ // Returns: the one-past the last `outNullify` write pointer, this allows you to work out how many descriptors were freed
255
+ inline void multi_deallocate (IGPUDescriptorSet::SDropDescriptorSet* outNullify, uint32_t binding, size_type count, const size_type* addr)
294
256
{
295
257
auto debugGuard = stAccessVerifyDebugGuard ();
296
258
297
259
auto allocator = getBindingAllocator (binding);
298
- if (!allocator)
299
- return ;
260
+ if (allocator)
300
261
for (size_type i=0 ; i<count; i++)
301
262
{
302
263
if (addr[i]==AddressAllocator::invalid_address)
303
264
continue ;
304
265
305
266
allocator->free_addr (addr[i],1 );
306
- // TODO: should also write something to the descriptor sets
307
- // basically if nullDescriptor device feature is enabled, you would
308
- // indeed write to the DS, else you'd just drop the refcounted references
309
- //
310
- // this needs to be done as a IGPUDescriptorSet::nullify(const uint32_t binding,
311
- // std::span<uint32_t> indices) function + a virtual nullify_impl
312
- video::IGPUDescriptorSet::SDropDescriptorSet dropDescriptorSet;
313
- dropDescriptorSet.dstSet = m_descriptorSet.get ();
314
- dropDescriptorSet.binding = binding;
315
- dropDescriptorSet.arrayElement = i;
316
- dropDescriptorSet.count = 1 ;
317
- m_logicalDevice->nullifyDescriptors ({ &dropDescriptorSet, 1 });
267
+ outNullify->dstSet = m_descriptorSet.get ();
268
+ outNullify->binding = binding;
269
+ outNullify->arrayElement = i;
270
+ outNullify->count = 1 ;
271
+ outNullify++;
318
272
}
273
+ return outNullify;
319
274
}
320
275
276
+ // 100% will defer
321
277
inline void multi_deallocate (uint32_t binding, const ISemaphore::SWaitInfo& futureWait, DeferredFreeFunctor&& functor) noexcept
322
278
{
323
279
auto range = m_allocatableRanges.find (binding);
@@ -327,26 +283,36 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
327
283
328
284
auto & eventHandler = range->second .eventHandler ;
329
285
auto debugGuard = stAccessVerifyDebugGuard ();
286
+ m_totalDeferredFrees += functor.getWorstCaseCount ();
330
287
eventHandler.latch (futureWait,std::move (functor));
331
288
}
332
289
290
+ // defers based on the conservative estimation if `futureWait` needs to be waited on, if doesn't will call nullify descriiptors internally immediately
333
291
inline void multi_deallocate (uint32_t binding, size_type count, const value_type* addr, const ISemaphore::SWaitInfo& futureWait) noexcept
334
292
{
335
293
if (futureWait.semaphore )
336
294
multi_deallocate (binding, futureWait, DeferredFreeFunctor (this , binding, count, addr));
337
295
else
338
- multi_deallocate (binding, count, addr);
296
+ {
297
+ core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls (count);
298
+ auto actualEnd = multi_deallocate (nulls.data (), binding, count, addr);
299
+ m_logicalDevice->nullifyDescriptors ({nulls.data (),actualEnd});
300
+ }
339
301
}
302
+
340
303
// ! Returns free events still outstanding
341
304
inline uint32_t cull_frees () noexcept
342
305
{
343
306
auto debugGuard = stAccessVerifyDebugGuard ();
344
307
uint32_t frees = 0 ;
308
+ core::vector<IGPUDescriptorSet::SDropDescriptorSet> nulls (m_totalDeferredFrees);
309
+ auto outNulls = nulls.data ();
345
310
for (uint32_t i = 0 ; i < m_allocatableRanges.size (); i++)
346
311
{
347
312
auto & it = m_allocatableRanges[i];
348
- frees += it.eventHandler .poll ().eventsLeft ;
313
+ frees += it.eventHandler .poll (outNulls ).eventsLeft ;
349
314
}
315
+ m_logicalDevice->nullifyDescriptors ({nulls.data (),outNulls});
350
316
return frees;
351
317
}
352
318
};
0 commit comments