-
Notifications
You must be signed in to change notification settings - Fork 13
Sub-allocated descriptor sets #95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
c64f295
5005a4c
f18077b
8dee363
0b805e0
461a7da
5a94b7e
fd3a31f
2d9181d
04ca9e2
ffb014e
9b6764f
7906d1c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
include(common RESULT_VARIABLE RES) | ||
if(NOT RES) | ||
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") | ||
endif() | ||
|
||
nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") | ||
|
||
if(NBL_EMBED_BUILTIN_RESOURCES) | ||
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) | ||
set(RESOURCE_DIR "app_resources") | ||
|
||
get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) | ||
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) | ||
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) | ||
|
||
file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") | ||
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) | ||
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") | ||
endforeach() | ||
|
||
ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") | ||
|
||
LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) | ||
endif() | ||
Comment on lines
+1
to
+24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd make this example 1x or 2x, using 2x for basic utility/extension tests now There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. number 27 is up for grabs |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"enableParallelBuild": true, | ||
"threadsPerBuildProcess" : 2, | ||
"isExecuted": false, | ||
"scriptPath": "", | ||
"cmake": { | ||
"configurations": [ "Release", "Debug", "RelWithDebInfo" ], | ||
"buildModes": [], | ||
"requiredOptions": [] | ||
}, | ||
"profiles": [ | ||
{ | ||
"backend": "vulkan", // should be none | ||
"platform": "windows", | ||
"buildModes": [], | ||
"runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example | ||
"gpuArchitectures": [] | ||
} | ||
], | ||
"dependencies": [], | ||
"data": [ | ||
{ | ||
"dependencies": [], | ||
"command": [""], | ||
"outputs": [] | ||
} | ||
] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,190 @@ | ||
// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. | ||
// This file is part of the "Nabla Engine". | ||
// For conditions of distribution and use, see copyright notice in nabla.h | ||
|
||
|
||
#include "nbl/video/surface/CSurfaceVulkan.h" | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#include "nbl/video/alloc/SubAllocatedDescriptorSet.h" | ||
|
||
#include "../common/BasicMultiQueueApplication.hpp" | ||
#include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp" | ||
|
||
using namespace nbl; | ||
using namespace core; | ||
using namespace system; | ||
using namespace ui; | ||
using namespace asset; | ||
using namespace video; | ||
|
||
#include "nbl/builtin/hlsl/bit.hlsl" | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants | ||
class SubAllocatedDescriptorSetApp final : public examples::MonoDeviceApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication | ||
{ | ||
using device_base_t = examples::MonoDeviceApplication; | ||
using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication; | ||
|
||
// The pool cache is just a formalized way of round-robining command pools and resetting + reusing them after their most recent submit signals finished. | ||
// Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools. | ||
smart_refctd_ptr<nbl::video::ICommandPoolCache> m_poolCache; | ||
|
||
smart_refctd_ptr<nbl::video::SubAllocatedDescriptorSet> m_subAllocDescriptorSet; | ||
|
||
// This example really lets the advantages of a timeline semaphore shine through! | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
smart_refctd_ptr<ISemaphore> m_timeline; | ||
uint64_t m_iteration = 0; | ||
constexpr static inline uint64_t MaxIterations = 200; | ||
|
||
constexpr static inline uint32_t MaxDescriptorSetAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head | ||
constexpr static inline uint32_t MinDescriptorSetAllocationSize = 1u; | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
public: | ||
// Yay thanks to multiple inheritance we cannot forward ctors anymore | ||
SubAllocatedDescriptorSetApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : | ||
system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} | ||
|
||
// we stuff all our work here because its a "single shot" app | ||
bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override | ||
{ | ||
using nbl::video::IGPUDescriptorSetLayout; | ||
|
||
// Remember to call the base class initialization! | ||
if (!device_base_t::onAppInitialized(std::move(system))) | ||
return false; | ||
if (!asset_base_t::onAppInitialized(std::move(system))) | ||
return false; | ||
|
||
|
||
// We'll allow subsequent iterations to overlap each other on the GPU, the only limiting factors are | ||
// the amount of memory in the streaming buffers and the number of commandpools we can use simultaenously. | ||
constexpr auto MaxConcurrency = 64; | ||
|
||
// Since this time we don't throw the Command Pools away and we'll reset them instead, we don't create the pools with the transient flag | ||
m_poolCache = ICommandPoolCache::create(core::smart_refctd_ptr(m_device),getComputeQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE,MaxConcurrency); | ||
|
||
// In contrast to fences, we just need one semaphore to rule all dispatches | ||
m_timeline = m_device->createSemaphore(m_iteration); | ||
|
||
// Descriptor set sub allocator | ||
|
||
video::IGPUDescriptorSetLayout::SBinding bindings[1]; | ||
{ | ||
bindings[0].binding = 0; | ||
bindings[0].count = 65536u; | ||
bindings[0].createFlags = core::bitflag(IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT) | ||
| IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT | ||
| IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; | ||
bindings[0].type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; | ||
bindings[0].stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. try to use const and designated initializers if you can |
||
|
||
std::span<video::IGPUDescriptorSetLayout::SBinding> bindingsSpan(bindings); | ||
|
||
// TODO: I don't think these are needed for sub allocated descriptor sets (alignment isn't needed, and min size is 1) | ||
auto subAllocatedDescriptorSet = core::make_smart_refctd_ptr<nbl::video::SubAllocatedDescriptorSet>( | ||
bindings, MaxDescriptorSetAllocationAlignment, MinDescriptorSetAllocationSize | ||
); | ||
|
||
std::vector<uint32_t> allocation, size; | ||
{ | ||
for (uint32_t i = 0; i < 512; i++) | ||
{ | ||
allocation.push_back(core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address); | ||
size.push_back(4); | ||
} | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]); | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for (uint32_t i = 0; i < allocation.size(); i++) | ||
{ | ||
m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); | ||
assert(allocation[i] != core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address); | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
{ | ||
std::vector<uint32_t> addr, freeSize; | ||
for (uint32_t i = 0; i < 512; i+=2) | ||
{ | ||
addr.push_back(allocation[i]); | ||
freeSize.push_back(4); | ||
} | ||
subAllocatedDescriptorSet->multi_deallocate(0, addr.size(), &addr[0], &freeSize[0]); | ||
devshgraphicsprogramming marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
m_logger->log("Freed some allocations", system::ILogger::ELL_INFO); | ||
allocation.clear(); | ||
size.clear(); | ||
{ | ||
for (uint32_t i = 0; i < 512; i++) | ||
{ | ||
allocation.push_back(core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address); | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
size.push_back(2); | ||
} | ||
subAllocatedDescriptorSet->multi_allocate(0, allocation.size(), &allocation[0], &size[0]); | ||
for (uint32_t i = 0; i < allocation.size(); i++) | ||
{ | ||
m_logger->log("allocation[%d]: %d", system::ILogger::ELL_INFO, i, allocation[i]); | ||
assert(allocation[i] != core::GeneralpurposeAddressAllocator<uint32_t>::invalid_address); | ||
} | ||
} | ||
|
||
return true; | ||
} | ||
|
||
// Ok this time we'll actually have a work loop (maybe just for the sake of future WASM so we don't timeout a Browser Tab with an unresponsive script) | ||
bool keepRunning() override { return m_iteration<MaxIterations; } | ||
|
||
// Finally the first actual work-loop | ||
void workLoopBody() override | ||
{ | ||
IQueue* const queue = getComputeQueue(); | ||
|
||
// Obtain our command pool once one gets recycled | ||
uint32_t poolIx; | ||
do | ||
{ | ||
poolIx = m_poolCache->acquirePool(); | ||
} while (poolIx==ICommandPoolCache::invalid_index); | ||
|
||
smart_refctd_ptr<IGPUCommandBuffer> cmdbuf; | ||
{ | ||
m_poolCache->getPool(poolIx)->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(m_logger)); | ||
// lets record, its still a one time submit because we have to re-record with different push constants each time | ||
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); | ||
|
||
// COMMAND RECORDING | ||
|
||
auto result = cmdbuf->end(); | ||
assert(result); | ||
} | ||
|
||
|
||
const auto savedIterNum = m_iteration++; | ||
{ | ||
const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = | ||
{ | ||
.cmdbuf = cmdbuf.get() | ||
}; | ||
const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo = | ||
{ | ||
.semaphore = m_timeline.get(), | ||
.value = m_iteration, | ||
.stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT | ||
}; | ||
// Generally speaking we don't need to wait on any semaphore because in this example every dispatch gets its own clean piece of memory to use | ||
// from the point of view of the GPU. Implicit domain operations between Host and Device happen upon a submit and a semaphore/fence signal operation, | ||
// this ensures we can touch the input and get accurate values from the output memory using the CPU before and after respectively, each submit becoming PENDING. | ||
// If we actually cared about this submit seeing the memory accesses of a previous dispatch we could add a semaphore wait | ||
const IQueue::SSubmitInfo submitInfo = { | ||
.waitSemaphores = {}, | ||
.commandBuffers = {&cmdbufInfo,1}, | ||
.signalSemaphores = {&signalInfo,1} | ||
}; | ||
|
||
queue->startCapture(); | ||
auto statusCode = queue->submit({ &submitInfo,1 }); | ||
queue->endCapture(); | ||
assert(statusCode == IQueue::RESULT::SUCCESS); | ||
} | ||
} | ||
}; | ||
|
||
NBL_MAIN_FUNC(SubAllocatedDescriptorSetApp) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import org.DevshGraphicsProgramming.Agent | ||
import org.DevshGraphicsProgramming.BuilderInfo | ||
import org.DevshGraphicsProgramming.IBuilder | ||
|
||
class CStreamingAndBufferDeviceAddressBuilder extends IBuilder | ||
{ | ||
public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) | ||
deprilula28 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
super(_agent, _info) | ||
} | ||
|
||
@Override | ||
public boolean prepare(Map axisMapping) | ||
{ | ||
return true | ||
} | ||
|
||
@Override | ||
public boolean build(Map axisMapping) | ||
{ | ||
IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") | ||
IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") | ||
|
||
def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) | ||
def nameOfConfig = getNameOfConfig(config) | ||
|
||
agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") | ||
|
||
return true | ||
} | ||
|
||
@Override | ||
public boolean test(Map axisMapping) | ||
{ | ||
return true | ||
} | ||
|
||
@Override | ||
public boolean install(Map axisMapping) | ||
{ | ||
return true | ||
} | ||
} | ||
|
||
def create(Agent _agent, _info) | ||
{ | ||
return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) | ||
} | ||
|
||
return this |
Uh oh!
There was an error while loading. Please reload this page.