Skip to content

Add sub-allocated descriptor sets #657

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cc54740
Add sub-allocated descriptor set header
deprilula28 Feb 19, 2024
d10059f
Add some reusable binding API
deprilula28 Feb 19, 2024
347ff63
Work on using descriptor set layout directly
deprilula28 Feb 20, 2024
e1282c7
Remove out old bindings
deprilula28 Feb 20, 2024
28611be
Use pool address allocator
deprilula28 Feb 21, 2024
6c6046c
Use map
deprilula28 Feb 21, 2024
190067a
PR reviews
deprilula28 Feb 21, 2024
289e424
PR reviews
deprilula28 Feb 21, 2024
e0e91ff
Work on deferred freeing the descriptors
deprilula28 Feb 26, 2024
68582ea
Work on having descriptor set match with its sub allocator
deprilula28 Feb 26, 2024
d716848
PR reviews
deprilula28 Feb 27, 2024
4fd4b8f
Fix example
deprilula28 Feb 27, 2024
58c4e90
Add writing of descriptors on the allocate method
deprilula28 Feb 27, 2024
41b9a5b
Work on try allocate and timings
deprilula28 Feb 27, 2024
d608e78
Add PR comments
deprilula28 Feb 28, 2024
b326ca7
Work on nullifying descriptors
deprilula28 Feb 29, 2024
894a47c
Keep descriptor writes outside the allocate function
deprilula28 Mar 4, 2024
59c65ae
Include exporting of allocate descriptor writes instead of using them
deprilula28 Mar 4, 2024
1228f5f
Merge branch 'vulkan_1_3' into suballocdescriptorset
deprilula28 Mar 4, 2024
54250a6
Update examples submodule
deprilula28 Mar 4, 2024
2c35289
Update SubAllocatedDescriptorSet.h
devshgraphicsprogramming Mar 5, 2024
a2e2be4
Forgot that the nullification needs to be done between event-wait and…
devshgraphicsprogramming Mar 5, 2024
bc5b22d
PR review and fix compilation errors
deprilula28 Mar 8, 2024
912ed7a
PR reviews & nullifying descriptors
deprilula28 Mar 11, 2024
89e6440
Fix multi timeline functionality
deprilula28 Mar 12, 2024
ede586f
Update example
deprilula28 Mar 12, 2024
5531903
Implement depletion of sub alloc descriptor set
deprilula28 Mar 12, 2024
79c3a23
Fix API for nullify
deprilula28 Mar 13, 2024
6b5630d
Fix tabs & spaces
deprilula28 Mar 13, 2024
aca4c74
More PR reviews
deprilula28 Mar 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions include/nbl/video/IGPUDescriptorSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ class IGPUDescriptorSet : public asset::IDescriptorSet<const IGPUDescriptorSetLa
inline IDescriptorPool* getPool() const { return m_pool.get(); }
inline bool isZombie() const { return (m_pool.get() == nullptr); }

// why is this private? nothing it uses is private
// small utility
inline asset::IDescriptor::E_TYPE getBindingType(const uint32_t binding) const
{
for (auto t=0u; t<static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); t++)
{
const auto type = static_cast<asset::IDescriptor::E_TYPE>(t);
const auto& bindingRedirect = getLayout()->getDescriptorRedirect(type);
if (bindingRedirect.getStorageOffset(redirect_t::binding_number_t{binding}).data!=redirect_t::Invalid)
return type;
}
return asset::IDescriptor::E_TYPE::ET_COUNT;
}

protected:
IGPUDescriptorSet(core::smart_refctd_ptr<const IGPUDescriptorSetLayout>&& _layout, core::smart_refctd_ptr<IDescriptorPool>&& pool, IDescriptorPool::SStorageOffsets&& offsets);
virtual ~IGPUDescriptorSet();
Expand All @@ -69,7 +83,7 @@ class IGPUDescriptorSet : public asset::IDescriptorSet<const IGPUDescriptorSetLa
void processWrite(const IGPUDescriptorSet::SWriteDescriptorSet& write, const asset::IDescriptor::E_TYPE type);
bool validateCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy) const;
void processCopy(const IGPUDescriptorSet::SCopyDescriptorSet& copy);
void dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop, const asset::IDescriptor::E_TYPE type);
void dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop);

using redirect_t = IGPUDescriptorSetLayout::CBindingRedirect;
// This assumes that descriptors of a particular type in the set will always be contiguous in pool's storage memory, regardless of which binding in the set they belong to.
Expand All @@ -85,18 +99,6 @@ class IGPUDescriptorSet : public asset::IDescriptorSet<const IGPUDescriptorSetLa

return descriptors+localOffset;
}
// small utility
inline asset::IDescriptor::E_TYPE getBindingType(const uint32_t binding) const
{
for (auto t=0u; t<static_cast<uint32_t>(asset::IDescriptor::E_TYPE::ET_COUNT); t++)
{
const auto type = static_cast<asset::IDescriptor::E_TYPE>(t);
const auto& bindingRedirect = getLayout()->getDescriptorRedirect(type);
if (bindingRedirect.getStorageOffset(redirect_t::binding_number_t{binding}).data!=redirect_t::Invalid)
return type;
}
return asset::IDescriptor::E_TYPE::ET_COUNT;
}

inline core::smart_refctd_ptr<IGPUSampler>* getMutableSamplers(const uint32_t binding) const
{
Expand Down
4 changes: 2 additions & 2 deletions include/nbl/video/ILogicalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
}

// should this be joined together with the existing updateDescriptorSets?
bool nullifyDescriptors(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors, asset::IDescriptor::E_TYPE descriptorType);
bool nullifyDescriptors(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors);

//! Renderpasses and Framebuffers
core::smart_refctd_ptr<IGPURenderpass> createRenderpass(const IGPURenderpass::SCreationParams& params);
Expand Down Expand Up @@ -853,7 +853,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe

// Drops refcounted references of the descriptors in these indices for the descriptor lifetime tracking
// If the nullDescriptor device feature is enabled, this would also write a null descriptor to the descriptor set
virtual void nullifyDescriptors_impl(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) = 0;
virtual void nullifyDescriptors_impl(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors) = 0;

virtual core::smart_refctd_ptr<IGPURenderpass> createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) = 0;
virtual core::smart_refctd_ptr<IGPUFramebuffer> createFramebuffer_impl(IGPUFramebuffer::SCreationParams&& params) = 0;
Expand Down
7 changes: 3 additions & 4 deletions include/nbl/video/alloc/SubAllocatedDescriptorSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
nulls.resize(m_totalDeferredFrees);
auto outNulls = nulls.data();
eventHandler->wait(maxWaitPoint, unallocatedSize, outNulls);
m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls }, range->second.descriptorType);
m_logicalDevice->nullifyDescriptors({ nulls.data(),outNulls });

// always call with the same parameters, otherwise this turns into a mess with the non invalid_address gaps
unallocatedSize = try_multi_allocate(binding,count,outAddresses);
Expand Down Expand Up @@ -352,7 +352,7 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
auto actualEnd = multi_deallocate(nulls.data(), binding, count, addr);
// This is checked to be valid above
auto range = m_allocatableRanges.find(binding);
m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd}, range->second.descriptorType);
m_logicalDevice->nullifyDescriptors({nulls.data(),actualEnd});
}
}

Expand All @@ -367,9 +367,8 @@ class SubAllocatedDescriptorSet : public core::IReferenceCounted
{
auto& it = m_allocatableRanges[i];
frees += it.eventHandler->poll(outNulls).eventsLeft;
// TODO: this could be optimized to be put outside the loop
m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls}, it.descriptorType);
}
m_logicalDevice->nullifyDescriptors({nulls.data(),outNulls});
return frees;
}
};
Expand Down
64 changes: 35 additions & 29 deletions src/nbl/video/CVulkanLogicalDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,12 +635,15 @@ core::smart_refctd_ptr<IDescriptorPool> CVulkanLogicalDevice::createDescriptorPo
return nullptr;
}

// a lot of empirical research went into defining this constant
constexpr uint32_t MaxDescriptorSetAsWrites = 69u;

void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params)
{
// Each pNext member of any structure (including this one) in the pNext chain must be either NULL or a pointer to a valid instance of
// VkWriteDescriptorSetAccelerationStructureKHR, VkWriteDescriptorSetAccelerationStructureNV, or VkWriteDescriptorSetInlineUniformBlockEXT
core::vector<VkWriteDescriptorSet> vk_writeDescriptorSets(params.writes.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr});
core::vector<VkWriteDescriptorSetAccelerationStructureKHR> vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr});
core::vector<VkWriteDescriptorSetAccelerationStructureKHR> vk_writeDescriptorSetAS(MaxDescriptorSetAsWrites,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr});

core::vector<VkDescriptorBufferInfo> vk_bufferInfos(params.bufferCount);
core::vector<VkDescriptorImageInfo> vk_imageInfos(params.imageCount);
Expand Down Expand Up @@ -727,40 +730,41 @@ void CVulkanLogicalDevice::updateDescriptorSets_impl(const SUpdateDescriptorSets
m_devf.vk.vkUpdateDescriptorSets(m_vkdev,vk_writeDescriptorSets.size(),vk_writeDescriptorSets.data(),vk_copyDescriptorSets.size(),vk_copyDescriptorSets.data());
}

void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> drops, asset::IDescriptor::E_TYPE descriptorType)
void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> drops)
{
if (getEnabledFeatures().nullDescriptor)
{
return
return;
}

core::vector<VkWriteDescriptorSet> vk_writeDescriptorSets(drops.size(),{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,nullptr});
core::vector<VkWriteDescriptorSetAccelerationStructureKHR> vk_writeDescriptorSetAS(69u,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr});
core::vector<VkWriteDescriptorSetAccelerationStructureKHR> vk_writeDescriptorSetAS(MaxDescriptorSetAsWrites,{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,nullptr});

size_t maxSize = 0;
for (auto i = 0; i < drops.size(); i++)
{
const auto& write = drops[i];
maxSize = core::max(maxSize, write.count);
}
size_t descriptorSize;
switch (asset::IDescriptor::GetTypeCategory(descriptorType))
{
case asset::IDescriptor::EC_BUFFER:
auto descriptorType = write.dstSet->getBindingType(write.binding);
size_t descriptorSize;
switch (asset::IDescriptor::GetTypeCategory(descriptorType))
{
case asset::IDescriptor::EC_BUFFER:
descriptorSize = sizeof(VkDescriptorBufferInfo);
break;
case asset::IDescriptor::EC_IMAGE:
case asset::IDescriptor::EC_IMAGE:
descriptorSize = sizeof(VkDescriptorImageInfo);
break;
case asset::IDescriptor::EC_BUFFER_VIEW:
case asset::IDescriptor::EC_BUFFER_VIEW:
descriptorSize = sizeof(VkBufferView);
break;
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
descriptorSize = sizeof(VkAccelerationStructureKHR);
break;
}
maxSize = core::max(maxSize, write.count * descriptorSize);
}

core::vector<uint8_t> nullDescriptors(maxSize * descriptorSize, 0u);
core::vector<uint8_t> nullDescriptors(maxSize, 0u);

{
auto outWrite = vk_writeDescriptorSets.data();
Expand All @@ -769,6 +773,7 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span<const IGPUDes
for (auto i=0; i<drops.size(); i++)
{
const auto& write = drops[i];
auto descriptorType = write.dstSet->getBindingType(write.binding);

outWrite->dstSet = static_cast<const CVulkanDescriptorSet*>(write.dstSet)->getInternalObject();
outWrite->dstBinding = write.binding;
Expand All @@ -777,21 +782,22 @@ void CVulkanLogicalDevice::nullifyDescriptors_impl(const std::span<const IGPUDes
outWrite->descriptorCount = write.count;
switch (asset::IDescriptor::GetTypeCategory(descriptorType))
{
case asset::IDescriptor::EC_BUFFER:
outWrite->pBufferInfo = reinterpret_cast<VkDescriptorBufferInfo*>(nullDescriptors.data());
break;
case asset::IDescriptor::EC_IMAGE:
outWrite->pImageInfo = reinterpret_cast<VkDescriptorImageInfo*>(nullDescriptors.data());
break;
case asset::IDescriptor::EC_BUFFER_VIEW:
outWrite->pTexelBufferView = reinterpret_cast<VkBufferView*>(nullDescriptors.data());
break;
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
outWriteAS->accelerationStructureCount = write.count;
outWriteAS->pAccelerationStructures = reinterpret_cast<VkAccelerationStructureKHR*>(nullDescriptors.data());
break;
default:
assert(!"Invalid code path.");
case asset::IDescriptor::EC_BUFFER:
outWrite->pBufferInfo = reinterpret_cast<VkDescriptorBufferInfo*>(nullDescriptors.data());
break;
case asset::IDescriptor::EC_IMAGE:
outWrite->pImageInfo = reinterpret_cast<VkDescriptorImageInfo*>(nullDescriptors.data());
break;
case asset::IDescriptor::EC_BUFFER_VIEW:
outWrite->pTexelBufferView = reinterpret_cast<VkBufferView*>(nullDescriptors.data());
break;
case asset::IDescriptor::EC_ACCELERATION_STRUCTURE:
outWriteAS->accelerationStructureCount = write.count;
outWriteAS->pAccelerationStructures = reinterpret_cast<VkAccelerationStructureKHR*>(nullDescriptors.data());
outWrite->pNext = outWriteAS++;
break;
default:
assert(!"Invalid code path.");
}
outWrite++;
}
Expand Down
2 changes: 1 addition & 1 deletion src/nbl/video/CVulkanLogicalDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ class CVulkanLogicalDevice final : public ILogicalDevice
// descriptor sets
core::smart_refctd_ptr<IDescriptorPool> createDescriptorPool_impl(const IDescriptorPool::SCreateInfo& createInfo) override;
void updateDescriptorSets_impl(const SUpdateDescriptorSetsParams& params) override;
void nullifyDescriptors_impl(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors, asset::IDescriptor::E_TYPE descriptorType) override;
void nullifyDescriptors_impl(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors) override;

// renderpasses and framebuffers
core::smart_refctd_ptr<IGPURenderpass> createRenderpass_impl(const IGPURenderpass::SCreationParams& params, IGPURenderpass::SCreationParamValidationResult&& validation) override;
Expand Down
6 changes: 4 additions & 2 deletions src/nbl/video/IGPUDescriptorSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,13 @@ void IGPUDescriptorSet::processWrite(const IGPUDescriptorSet::SWriteDescriptorSe
incrementVersion();
}

void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop, const asset::IDescriptor::E_TYPE type)
void IGPUDescriptorSet::dropDescriptors(const IGPUDescriptorSet::SDropDescriptorSet& drop)
{
assert(drop.dstSet == this);

auto* dstDescriptors = drop.dstSet->getDescriptors(type, drop.binding);
const auto descriptorType = getBindingType(drop.binding);

auto* dstDescriptors = drop.dstSet->getDescriptors(descriptorType, drop.binding);
auto* dstSamplers = drop.dstSet->getMutableSamplers(drop.binding);

if (dstDescriptors)
Expand Down
13 changes: 10 additions & 3 deletions src/nbl/video/ILogicalDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -444,18 +444,25 @@ bool ILogicalDevice::updateDescriptorSets(const std::span<const IGPUDescriptorSe
return true;
}

bool ILogicalDevice::nullifyDescriptors(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors, asset::IDescriptor::E_TYPE descriptorType)
bool ILogicalDevice::nullifyDescriptors(const std::span<const IGPUDescriptorSet::SDropDescriptorSet> dropDescriptors)
{
for (const auto& drop : dropDescriptors)
{
auto ds = drop.dstSet;
if (!ds || !ds->wasCreatedBy(this))
return false;
// (no binding)
if (ds->getBindingType(drop.binding) == asset::IDescriptor::E_TYPE::ET_COUNT)
return false;
}

ds->dropDescriptors(drop, descriptorType);
for (const auto& drop : dropDescriptors)
{
auto ds = drop.dstSet;
ds->dropDescriptors(drop);
}

nullifyDescriptors_impl(dropDescriptors, descriptorType);
nullifyDescriptors_impl(dropDescriptors);
return true;
}

Expand Down