@@ -400,8 +400,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
400
400
template <typename T> requires nbl::is_any_of_v<T,std::conditional_t <std::is_same_v<BufferType,IGPUBuffer>,uint32_t ,BuildRangeInfo>,BuildRangeInfo>
401
401
inline uint32_t valid (const T& buildRangeInfo) const
402
402
{
403
+ uint32_t retval = trackedBLASes.size ();
403
404
if constexpr (std::is_same_v<T,uint32_t >)
404
- return valid<BuildRangeInfo>({.instanceCount =buildRangeInfo,.instanceByteOffset =0 });
405
+ retval += valid<BuildRangeInfo>({.instanceCount =buildRangeInfo,.instanceByteOffset =0 });
405
406
else
406
407
{
407
408
if (IGPUAccelerationStructure::BuildInfo<BufferType>::invalid (srcAS,dstAS))
@@ -444,8 +445,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
444
445
#endif
445
446
446
447
// destination, scratch and instanceData are required, source is optional
447
- return Base::isUpdate ? 4u :3u ;
448
+ retval += Base::isUpdate ? 4u :3u ;
448
449
}
450
+ return retval;
449
451
}
450
452
451
453
inline core::smart_refctd_ptr<const IReferenceCounted>* fillTracking (core::smart_refctd_ptr<const IReferenceCounted>* oit) const
@@ -457,6 +459,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
457
459
458
460
*(oit++) = core::smart_refctd_ptr<const IReferenceCounted>(instanceData.buffer );
459
461
462
+ for (const auto & blas : trackedBLASes)
463
+ *(oit++) = core::smart_refctd_ptr<const IReferenceCounted>(blas);
464
+
460
465
return oit;
461
466
}
462
467
@@ -470,6 +475,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
470
475
// + an array of `PolymorphicInstance` if our `SCreationParams::flags.hasFlags(MOTION_BIT)`, otherwise
471
476
// + an array of `StaticInstance`
472
477
asset::SBufferBinding<const BufferType> instanceData = {};
478
+ // [optional] Provide info about what BLAS references to hold onto after the build. For performance make sure the list is compact (without repeated elements).
479
+ std::span<const IGPUBottomLevelAccelerationStructure*> trackedBLASes = {};
473
480
};
474
481
using DeviceBuildInfo = BuildInfo<IGPUBuffer>;
475
482
using HostBuildInfo = BuildInfo<asset::ICPUBuffer>;
@@ -545,11 +552,71 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
545
552
using HostPolymorphicInstance = PolymorphicInstance<IGPUBottomLevelAccelerationStructure::host_op_ref_t >;
546
553
static_assert (sizeof (DevicePolymorphicInstance)==sizeof (HostPolymorphicInstance));
547
554
555
+ //
556
+ using build_ver_t = uint32_t ;
557
+ // this gets called when execution is sure to happen 100%, e.g. not during command recording but during submission
558
+ inline build_ver_t registerNextBuildVer ()
559
+ {
560
+ return m_pendingBuildVer++;
561
+ }
562
+ //
563
+ using blas_smart_ptr_t = core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>;
564
+ // returns number of tracked BLASes if `tracked==nullptr` otherwise writes `*count` tracked BLASes from `first` into `*tracked`
565
+ inline build_ver_t getTrackedBLASes (uint32_t * count, blas_smart_ptr_t * tracked, const uint32_t first=0 ) const
566
+ {
567
+ if (!count)
568
+ return 0 ;
569
+ // stop multiple threads messing with us
570
+ std::lock_guard lk (m_trackingLock);
571
+ const uint32_t toWrite = std::min<uint32_t >(std::max<uint32_t >(m_trackedBLASes.size (),first)-first,tracked ? (*count):0xffFFffFFu );
572
+ *count = toWrite;
573
+ if (tracked && toWrite)
574
+ {
575
+ auto it = m_trackedBLASes.begin ();
576
+ // cmon its an unordered map, iterator should have operator +=
577
+ for (auto i=0 ; i<first; i++)
578
+ it++;
579
+ for (auto i=0 ; i<toWrite; i++)
580
+ *(tracked++) = *(it++);
581
+ }
582
+ return m_completedBuildVer;
583
+ }
584
+ // Useful if TLAS got built externally as well, returns if there were no later builds that preempted us setting the result here
585
+ template <typename Iterator>
586
+ inline bool setTrackedBLASes (const Iterator begin, const Iterator end, const build_ver_t buildVer)
587
+ {
588
+ // stop multiple threads messing with us
589
+ std::lock_guard lk (m_trackingLock);
590
+ // stop out of order callbacks
591
+ if (buildVer<=m_completedBuildVer)
592
+ return false ;
593
+ m_completedBuildVer = buildVer;
594
+ // release already tracked BLASes
595
+ m_trackedBLASes.clear ();
596
+ // sanity check, TODO: this should be an atomic_max on the `m_pendingBuildVer`
597
+ if (m_completedBuildVer>m_pendingBuildVer)
598
+ m_pendingBuildVer = m_completedBuildVer;
599
+ // now fill the contents
600
+ m_trackedBLASes.insert (begin,end);
601
+ return true ;
602
+ }
603
+ // a little utility to make sure nothing from this build version and before gets tracked
604
+ inline bool clearTrackedBLASes (const build_ver_t buildVer)
605
+ {
606
+ return setTrackedBLASes<const blas_smart_ptr_t *>(nullptr ,nullptr ,buildVer);
607
+ }
608
+
548
609
protected:
549
610
inline IGPUTopLevelAccelerationStructure (core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params)
550
- : asset::ITopLevelAccelerationStructure<IGPUAccelerationStructure>(std::move(dev),std::move(params)), m_maxInstanceCount(params.maxInstanceCount) {}
611
+ : asset::ITopLevelAccelerationStructure<IGPUAccelerationStructure>(std::move(dev),std::move(params)),
612
+ m_maxInstanceCount(params.maxInstanceCount),m_trackedBLASes() {}
551
613
552
614
const uint32_t m_maxInstanceCount;
615
+ // TODO: maybe replace with new readers/writers lock
616
+ mutable std::mutex m_trackingLock;
617
+ std::atomic<build_ver_t > m_pendingBuildVer = 0 ;
618
+ build_ver_t m_completedBuildVer = 0 ;
619
+ core::unordered_set<blas_smart_ptr_t > m_trackedBLASes;
553
620
};
554
621
555
622
}
0 commit comments