Skip to content

Commit 4322084

Browse files
author
devsh
committed
so we need a mutex per TLAS, a pending build counter and a completed build counter (the iteration thats currently being stored)
Next are callbacks upon DeferredOperation and CommandBuffer completion
1 parent cca5e09 commit 4322084

File tree

2 files changed

+66
-1
lines changed

2 files changed

+66
-1
lines changed

include/nbl/video/IGPUAccelerationStructure.h

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
470470
// + an array of `PolymorphicInstance` if our `SCreationParams::flags.hasFlags(MOTION_BIT)`, otherwise
471471
// + an array of `StaticInstance`
472472
asset::SBufferBinding<const BufferType> instanceData = {};
473+
// [optional] Provide info about what BLAS references to hold onto after the build
474+
std::span<const IGPUBottomLevelAccelerationStructure*> trackedBLASes = {};
473475
};
474476
using DeviceBuildInfo = BuildInfo<IGPUBuffer>;
475477
using HostBuildInfo = BuildInfo<asset::ICPUBuffer>;
@@ -545,11 +547,66 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
545547
using HostPolymorphicInstance = PolymorphicInstance<IGPUBottomLevelAccelerationStructure::host_op_ref_t>;
546548
static_assert(sizeof(DevicePolymorphicInstance)==sizeof(HostPolymorphicInstance));
547549

550+
//
551+
using build_ver_t = uint32_t;
552+
// this gets called when execution is sure to happen 100%, e.g. not during command recording but during submission
553+
inline build_ver_t nextBuildVer()
554+
{
555+
return m_pendingBuildVer++;
556+
}
557+
//
558+
using blas_smart_ptr_t = core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>;
559+
// returns number of tracked BLASes if `tracked==nullptr` otherwise writes `*count` tracked BLASes from `first` into `*tracked`
560+
inline build_ver_t getTrackedBLASes(uint32_t* count, blas_smart_ptr_t* tracked, const uint32_t first=0) const
561+
{
562+
if (!count)
563+
return 0;
564+
// stop multiple threads messing with us
565+
std::lock_guard lk(m_trackingLock);
566+
const uint32_t toWrite = std::min<uint32_t>(std::max<uint32_t>(m_trackedBLASes.size(),first)-first,tracked ? (*count):0xffFFffFFu);
567+
*count = toWrite;
568+
if (tracked && toWrite)
569+
{
570+
auto it = m_trackedBLASes.begin();
571+
// cmon its an unordered map, iterator should have operator +=
572+
for (auto i=0; i<first; i++)
573+
it++;
574+
for (auto i=0; i<toWrite; i++)
575+
*(tracked++) = *(it++);
576+
}
577+
return m_completedBuildVer;
578+
}
579+
// Useful if TLAS got built externally as well, returns if there were no later builds that preempted us setting the result here
580+
template<typename Iterator>
581+
inline bool setTrackedBLASes(const Iterator begin, const Iterator end, const build_ver_t buildVer)
582+
{
583+
// stop multiple threads messing with us
584+
std::lock_guard lk(m_trackingLock);
585+
// stop out of order callbacks
586+
if (buildVer<=m_completedBuildVer)
587+
return false;
588+
m_completedBuildVer = buildVer;
589+
// release already tracked BLASes
590+
m_trackedBLASes.clear();
591+
// sanity check, TODO: this should be an atomic_max on the `m_pendingBuildVer`
592+
if (m_completedBuildVer>m_pendingBuildVer)
593+
m_pendingBuildVer = m_completedBuildVer;
594+
// now fill the contents
595+
m_trackedBLASes.insert(begin,end);
596+
return true;
597+
}
598+
548599
protected:
549600
inline IGPUTopLevelAccelerationStructure(core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params)
550-
: asset::ITopLevelAccelerationStructure<IGPUAccelerationStructure>(std::move(dev),std::move(params)), m_maxInstanceCount(params.maxInstanceCount) {}
601+
: asset::ITopLevelAccelerationStructure<IGPUAccelerationStructure>(std::move(dev),std::move(params)),
602+
m_maxInstanceCount(params.maxInstanceCount),m_trackedBLASes() {}
551603

552604
const uint32_t m_maxInstanceCount;
605+
// TODO: maybe replace with new readers/writers lock
606+
mutable std::mutex m_trackingLock;
607+
std::atomic<build_ver_t> m_pendingBuildVer = 0;
608+
build_ver_t m_completedBuildVer = 0;
609+
core::unordered_set<blas_smart_ptr_t> m_trackedBLASes;
553610
};
554611

555612
}

include/nbl/video/IGPUCommandBuffer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
267267
inline bool buildAccelerationStructures(const std::span<const IGPUTopLevelAccelerationStructure::DeviceBuildInfo> infos, const IGPUTopLevelAccelerationStructure::DirectBuildRangeRangeInfos buildRangeInfos)
268268
{
269269
if (buildAccelerationStructures_common(infos,buildRangeInfos))
270+
{
270271
return buildAccelerationStructures_impl(infos,buildRangeInfos);
272+
}
271273
return false;
272274
}
273275
// We don't allow different indirect command addresses due to https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pIndirectDeviceAddresses-03646
@@ -303,7 +305,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
303305
if constexpr(std::is_same_v<AccelerationStructure,IGPUBottomLevelAccelerationStructure>)
304306
return buildAccelerationStructuresIndirect_impl(indirectRangeBuffer,infos,pIndirectOffsets,pIndirectStrides,maxPrimitiveOrInstanceCounts,totalGeometryCount);
305307
else
308+
{
306309
return buildAccelerationStructuresIndirect_impl(indirectRangeBuffer,infos,pIndirectOffsets,pIndirectStrides,maxPrimitiveOrInstanceCounts);
310+
}
307311
}
308312
return false;
309313
}
@@ -862,6 +866,10 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
862866
// created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT
863867
// or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT.
864868
core::unordered_map<const IGPUDescriptorSet*,uint64_t> m_boundDescriptorSetsRecord;
869+
870+
// If the user wants the builds to be tracking
871+
core::vector<core::unordered_set<core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>>> m_TLASToBLASReferenceSets;
872+
865873
const IGPUGraphicsPipeline* m_boundGraphicsPipeline;
866874
const IGPUComputePipeline* m_boundComputePipeline;
867875
const IGPURayTracingPipeline* m_boundRayTracingPipeline;

0 commit comments

Comments
 (0)