Skip to content

Commit b84fae1

Browse files
author
devsh
committed
BLASes referenced during a TLAS build (if provided) are recorded into the buildAccelerationStructures command in the pool.
Now just time for callbacks. @Crisspl I also added that feature you wanted `bool IGPUCommandBuffer::recordReferences(const std::span<const IReferenceCounted*> refs);`
1 parent 4322084 commit b84fae1

File tree

4 files changed

+60
-10
lines changed

4 files changed

+60
-10
lines changed

include/nbl/video/IGPUAccelerationStructure.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -400,8 +400,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
400400
template<typename T> requires nbl::is_any_of_v<T,std::conditional_t<std::is_same_v<BufferType,IGPUBuffer>,uint32_t,BuildRangeInfo>,BuildRangeInfo>
401401
inline uint32_t valid(const T& buildRangeInfo) const
402402
{
403+
uint32_t retval = trackedBLASes.size();
403404
if constexpr (std::is_same_v<T,uint32_t>)
404-
return valid<BuildRangeInfo>({.instanceCount=buildRangeInfo,.instanceByteOffset=0});
405+
retval += valid<BuildRangeInfo>({.instanceCount=buildRangeInfo,.instanceByteOffset=0});
405406
else
406407
{
407408
if (IGPUAccelerationStructure::BuildInfo<BufferType>::invalid(srcAS,dstAS))
@@ -444,8 +445,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
444445
#endif
445446

446447
// destination, scratch and instanceData are required, source is optional
447-
return Base::isUpdate ? 4u:3u;
448+
retval += Base::isUpdate ? 4u:3u;
448449
}
450+
return retval;
449451
}
450452

451453
inline core::smart_refctd_ptr<const IReferenceCounted>* fillTracking(core::smart_refctd_ptr<const IReferenceCounted>* oit) const
@@ -457,6 +459,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
457459

458460
*(oit++) = core::smart_refctd_ptr<const IReferenceCounted>(instanceData.buffer);
459461

462+
for (const auto& blas : trackedBLASes)
463+
*(oit++) = core::smart_refctd_ptr<const IReferenceCounted>(blas);
464+
460465
return oit;
461466
}
462467

@@ -470,7 +475,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
470475
// + an array of `PolymorphicInstance` if our `SCreationParams::flags.hasFlags(MOTION_BIT)`, otherwise
471476
// + an array of `StaticInstance`
472477
asset::SBufferBinding<const BufferType> instanceData = {};
473-
// [optional] Provide info about what BLAS references to hold onto after the build
478+
// [optional] Provide info about what BLAS references to hold onto after the build. For performance make sure the list is compact (without repeated elements).
474479
std::span<const IGPUBottomLevelAccelerationStructure*> trackedBLASes = {};
475480
};
476481
using DeviceBuildInfo = BuildInfo<IGPUBuffer>;
@@ -550,7 +555,7 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
550555
//
551556
using build_ver_t = uint32_t;
552557
// this gets called when execution is sure to happen 100%, e.g. not during command recording but during submission
553-
inline build_ver_t nextBuildVer()
558+
inline build_ver_t registerNextBuildVer()
554559
{
555560
return m_pendingBuildVer++;
556561
}
@@ -595,6 +600,11 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
595600
m_trackedBLASes.insert(begin,end);
596601
return true;
597602
}
603+
// a little utility to make sure nothing from this build version and before gets tracked
604+
inline bool clearTrackedBLASes(const build_ver_t buildVer)
605+
{
606+
return setTrackedBLASes<const blas_smart_ptr_t*>(nullptr,nullptr,buildVer);
607+
}
598608

599609
protected:
600610
inline IGPUTopLevelAccelerationStructure(core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params)

include/nbl/video/IGPUCommandBuffer.h

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
267267
inline bool buildAccelerationStructures(const std::span<const IGPUTopLevelAccelerationStructure::DeviceBuildInfo> infos, const IGPUTopLevelAccelerationStructure::DirectBuildRangeRangeInfos buildRangeInfos)
268268
{
269269
if (buildAccelerationStructures_common(infos,buildRangeInfos))
270-
{
271270
return buildAccelerationStructures_impl(infos,buildRangeInfos);
272-
}
273271
return false;
274272
}
275273
// We don't allow different indirect command addresses due to https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkCmdBuildAccelerationStructuresIndirectKHR-pIndirectDeviceAddresses-03646
@@ -305,9 +303,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
305303
if constexpr(std::is_same_v<AccelerationStructure,IGPUBottomLevelAccelerationStructure>)
306304
return buildAccelerationStructuresIndirect_impl(indirectRangeBuffer,infos,pIndirectOffsets,pIndirectStrides,maxPrimitiveOrInstanceCounts,totalGeometryCount);
307305
else
308-
{
309306
return buildAccelerationStructuresIndirect_impl(indirectRangeBuffer,infos,pIndirectOffsets,pIndirectStrides,maxPrimitiveOrInstanceCounts);
310-
}
311307
}
312308
return false;
313309
}
@@ -540,6 +536,9 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
540536
//! Secondary CommandBuffer execute
541537
bool executeCommands(const uint32_t count, IGPUCommandBuffer* const* const cmdbufs);
542538

539+
// in case you want the commandbuffer to hold onto things as long as its not RESET
540+
bool recordReferences(const std::span<const IReferenceCounted*> refs);
541+
543542
virtual bool insertDebugMarker(const char* name, const core::vector4df_SIMD& color = core::vector4df_SIMD(1.0, 1.0, 1.0, 1.0)) = 0;
544543
virtual bool beginDebugMarker(const char* name, const core::vector4df_SIMD& color = core::vector4df_SIMD(1.0, 1.0, 1.0, 1.0)) = 0;
545544
virtual bool endDebugMarker() = 0;
@@ -712,6 +711,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
712711
m_state = STATE::INITIAL;
713712

714713
m_boundDescriptorSetsRecord.clear();
714+
m_TLASToBLASReferenceSets.clear();
715715
m_boundGraphicsPipeline= nullptr;
716716
m_boundComputePipeline= nullptr;
717717
m_boundRayTracingPipeline= nullptr;
@@ -729,6 +729,7 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
729729
{
730730
deleteCommandList();
731731
m_boundDescriptorSetsRecord.clear();
732+
m_TLASToBLASReferenceSets.clear();
732733
m_boundGraphicsPipeline= nullptr;
733734
m_boundComputePipeline= nullptr;
734735
m_boundRayTracingPipeline= nullptr;
@@ -862,13 +863,16 @@ class NBL_API2 IGPUCommandBuffer : public IBackendObject
862863
template<typename IndirectCommand> requires nbl::is_any_of_v<IndirectCommand, hlsl::DrawArraysIndirectCommand_t, hlsl::DrawElementsIndirectCommand_t>
863864
bool invalidDrawIndirectCount(const asset::SBufferBinding<const IGPUBuffer>& indirectBinding, const asset::SBufferBinding<const IGPUBuffer>& countBinding, const uint32_t maxDrawCount, const uint32_t stride);
864865

866+
865867
// This bound descriptor set record doesn't include the descriptor sets whose layout has _any_ one of its bindings
866868
// created with IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT
867869
// or IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT.
868870
core::unordered_map<const IGPUDescriptorSet*,uint64_t> m_boundDescriptorSetsRecord;
869871

870-
// If the user wants the builds to be tracking
871-
core::vector<core::unordered_set<core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>>> m_TLASToBLASReferenceSets;
872+
// If the user wants the builds to be tracking, and make the TLAS remember the BLASes that have been built into it.
873+
// NOTE: We know that a TLAS may be rebuilt multiple times per frame on purpose and not only the final BLASes need to be kept alive till submission finishes.
874+
// However, the Command Pool already tracks resources referenced in the Build Infos, so we only need pointers into those records.
875+
core::unordered_map<const IGPUTopLevelAccelerationStructure*,const IGPUTopLevelAccelerationStructure::blas_smart_ptr_t*> m_TLASToBLASReferenceSets;
872876

873877
const IGPUGraphicsPipeline* m_boundGraphicsPipeline;
874878
const IGPUComputePipeline* m_boundComputePipeline;

include/nbl/video/IGPUCommandPool.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ class IGPUCommandPool : public IBackendObject
140140
class CBlitImageCmd;
141141
class CCopyImageToBufferCmd;
142142
class CExecuteCommandsCmd;
143+
class CCustomReferenceCmd;
143144
class CWaitEventsCmd;
144145
class CCopyImageCmd;
145146
class CResolveImageCmd;
@@ -686,6 +687,17 @@ class IGPUCommandPool::CExecuteCommandsCmd final : public IVariableSizeCommand<C
686687
}
687688
};
688689

690+
class IGPUCommandPool::CCustomReferenceCmd final : public IVariableSizeCommand<CCustomReferenceCmd>
691+
{
692+
public:
693+
CCustomReferenceCmd(const uint32_t count) : IVariableSizeCommand<CCustomReferenceCmd>(count) {}
694+
695+
static uint32_t calc_resources(const uint32_t count)
696+
{
697+
return count;
698+
}
699+
};
700+
689701
class IGPUCommandPool::CWaitEventsCmd final : public IVariableSizeCommand<CWaitEventsCmd>
690702
{
691703
public:

src/nbl/video/IGPUCommandBuffer.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,7 @@ uint32_t IGPUCommandBuffer::buildAccelerationStructures_common(const std::span<c
816816

817817
if (indirectBuffer)
818818
{
819+
// TODO: maybe hoist the check
819820
if (!features.accelerationStructureIndirectBuild)
820821
{
821822
NBL_LOG_ERROR("'accelerationStructureIndirectBuild' feature not enabled!");
@@ -835,7 +836,12 @@ uint32_t IGPUCommandBuffer::buildAccelerationStructures_common(const std::span<c
835836
if (indirectBuffer)
836837
*(oit++) = core::smart_refctd_ptr<const IGPUBuffer>(indirectBuffer);
837838
for (const auto& info : infos)
839+
{
838840
oit = info.fillTracking(oit);
841+
// we still need to clear the BLAS tracking list if the TLAS has nothing to track
842+
if constexpr (std::is_same_v<DeviceBuildInfo,IGPUTopLevelAccelerationStructure::DeviceBuildInfo>)
843+
m_TLASToBLASReferenceSets[info.dstAS] = info.trackedBLASes.empty() ? nullptr:reinterpret_cast<const IGPUTopLevelAccelerationStructure::blas_smart_ptr_t*>(oit-info.trackedBLASes.size());
844+
}
839845

840846
return totalGeometries;
841847
}
@@ -2066,4 +2072,22 @@ bool IGPUCommandBuffer::executeCommands(const uint32_t count, IGPUCommandBuffer*
20662072
return executeCommands_impl(count,cmdbufs);
20672073
}
20682074

2075+
bool IGPUCommandBuffer::recordReferences(const std::span<const IReferenceCounted*> refs)
2076+
{
2077+
if (!checkStateBeforeRecording(queue_flags_t::COMPUTE_BIT|queue_flags_t::GRAPHICS_BIT|queue_flags_t::TRANSFER_BIT|queue_flags_t::SPARSE_BINDING_BIT))
2078+
return false;
2079+
2080+
auto cmd = m_cmdpool->m_commandListPool.emplace<IGPUCommandPool::CCustomReferenceCmd>(m_commandList,refs.size());
2081+
if (!cmd)
2082+
{
2083+
NBL_LOG_ERROR("out of host memory!");
2084+
return false;
2085+
}
2086+
auto oit = cmd->getVariableCountResources();
2087+
for (const auto& ref : refs)
2088+
*(oit++) = core::smart_refctd_ptr<const core::IReferenceCounted>(ref);
2089+
2090+
return true;
2091+
}
2092+
20692093
}

0 commit comments

Comments
 (0)