Skip to content

Commit aa08fed

Browse files
committed
IUtilities constructor to static create
1 parent 51ffe19 commit aa08fed

File tree

1 file changed

+74
-35
lines changed

1 file changed

+74
-35
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 74 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -19,75 +19,102 @@ namespace nbl::video
1919

2020
class NBL_API2 IUtilities : public core::IReferenceCounted
2121
{
22-
protected:
22+
protected:
2323
constexpr static inline uint32_t maxStreamingBufferAllocationAlignment = 64u*1024u; // if you need larger alignments then you're not right in the head
2424
constexpr static inline uint32_t minStreamingBufferAllocationSize = 1024u;
2525
constexpr static inline uint32_t OptimalCoalescedInvocationXferSize = sizeof(uint32_t);
2626

27-
uint32_t m_allocationAlignment = 0u;
28-
uint32_t m_allocationAlignmentForBufferImageCopy = 0u;
27+
IUtilities( core::smart_refctd_ptr<ILogicalDevice>&& device,
28+
nbl::system::logger_opt_smart_ptr&& logger,
29+
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> >&& defaultUploadBuffer,
30+
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> >&& defaultDownloadBuffer,
31+
uint32_t allocationAlignment,
32+
uint32_t allocationAlignmentForBufferImageCopy)
33+
: m_device(std::move(device))
34+
, m_logger(nbl::system::logger_opt_smart_ptr(logger))
35+
, m_defaultUploadBuffer(std::move(defaultUploadBuffer))
36+
, m_defaultDownloadBuffer(std::move(defaultDownloadBuffer))
37+
, m_allocationAlignment(allocationAlignment)
38+
, m_allocationAlignmentForBufferImageCopy(allocationAlignmentForBufferImageCopy)
39+
{
40+
m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str());
41+
m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str());
42+
}
43+
44+
IUtilities() = delete;
2945

30-
nbl::system::logger_opt_smart_ptr m_logger;
46+
public:
3147

32-
public:
33-
IUtilities(core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger=nullptr, const uint32_t downstreamSize=0x4000000u, const uint32_t upstreamSize=0x4000000u)
34-
: m_device(core::smart_refctd_ptr(device)), m_logger(nbl::system::logger_opt_smart_ptr(logger))
48+
static core::smart_refctd_ptr<IUtilities> create(core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger = nullptr, const uint32_t downstreamSize = 0x4000000u, const uint32_t upstreamSize = 0x4000000u)
3549
{
36-
auto physicalDevice = m_device->getPhysicalDevice();
50+
auto physicalDevice = device->getPhysicalDevice();
3751
const auto& limits = physicalDevice->getLimits();
38-
52+
3953
auto queueFamProps = physicalDevice->getQueueFamilyProperties();
4054
uint32_t minImageTransferGranularityVolume = 1u; // minImageTransferGranularity.width * height * depth
41-
55+
4256
for (auto& qf : queueFamProps)
4357
{
4458
uint32_t volume = qf.minImageTransferGranularity.width*qf.minImageTransferGranularity.height*qf.minImageTransferGranularity.depth;
4559
if(minImageTransferGranularityVolume<volume)
4660
minImageTransferGranularityVolume = volume;
4761
}
4862

49-
// host-mapped device memory needs to have this alignment in flush/invalidate calls, therefore this is the streaming buffer's "allocationAlignment".
50-
m_allocationAlignment = limits.nonCoherentAtomSize;
51-
m_allocationAlignmentForBufferImageCopy = core::max<uint32_t>(limits.optimalBufferCopyOffsetAlignment,m_allocationAlignment);
52-
63+
const uint32_t allocationAlignment = limits.nonCoherentAtomSize;
64+
const uint32_t allocationAlignmentForBufferImageCopy = core::max<uint32_t>(limits.optimalBufferCopyOffsetAlignment,allocationAlignment);
65+
5366
const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations * OptimalCoalescedInvocationXferSize;
5467
const uint32_t maxImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R64G64B64A64_SFLOAT).getBlockByteSize() * minImageTransferGranularityVolume;
5568
const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R8_UINT).getBlockByteSize();
5669
const uint32_t maxOptimalTransferAtom = core::max(bufferOptimalTransferAtom,maxImageOptimalTransferAtom);
5770
const uint32_t minOptimalTransferAtom = core::min(bufferOptimalTransferAtom,minImageOptimalTransferAtom);
5871

5972
// allocationAlignment <= minBlockSize <= minOptimalTransferAtom <= maxOptimalTransferAtom
60-
assert(m_allocationAlignment <= minStreamingBufferAllocationSize);
61-
assert(m_allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize);
62-
63-
assert(minStreamingBufferAllocationSize <= minOptimalTransferAtom);
64-
assert(minOptimalTransferAtom <= maxOptimalTransferAtom);
65-
66-
assert(minStreamingBufferAllocationSize % m_allocationAlignment == 0u);
67-
assert(minStreamingBufferAllocationSize % m_allocationAlignmentForBufferImageCopy == 0u);
73+
74+
const bool transferConstaintsSatisfied =
75+
(allocationAlignment <= minStreamingBufferAllocationSize) &&
76+
(allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize) &&
77+
(minStreamingBufferAllocationSize <= minOptimalTransferAtom) &&
78+
(minOptimalTransferAtom <= maxOptimalTransferAtom) &&
79+
(minStreamingBufferAllocationSize % allocationAlignment == 0u) &&
80+
(minStreamingBufferAllocationSize % allocationAlignmentForBufferImageCopy == 0u);
81+
82+
if (!transferConstaintsSatisfied)
83+
return nullptr;
6884

69-
const auto& enabledFeatures = m_device->getEnabledFeatures();
85+
const auto& enabledFeatures = device->getEnabledFeatures();
7086

7187
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
7288
auto commonUsages = core::bitflag(IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT;
7389
if (enabledFeatures.accelerationStructure)
7490
commonUsages |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT;
7591

7692
core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags(IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT);
93+
94+
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > defaultUploadBuffer = nullptr;
95+
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > defaultDownloadBuffer = nullptr;
7796

97+
// Try Create Download Buffer
7898
{
7999
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
80100
streamingBufferCreationParams.size = downstreamSize;
81101
// GPU write to RAM usages
82102
streamingBufferCreationParams.usage = commonUsages|IGPUBuffer::EUF_TRANSFER_DST_BIT;
83103
if (enabledFeatures.conditionalRendering)
84104
streamingBufferCreationParams.usage |= IGPUBuffer::EUF_CONDITIONAL_RENDERING_BIT_EXT;
85-
auto buffer = m_device->createBuffer(std::move(streamingBufferCreationParams));
105+
auto buffer = device->createBuffer(std::move(streamingBufferCreationParams));
86106
auto reqs = buffer->getMemoryReqs();
87107
reqs.memoryTypeBits &= physicalDevice->getDownStreamingMemoryTypeBits();
88108

89-
auto memOffset = m_device->allocate(reqs, buffer.get(), allocateFlags);
90-
auto mem = memOffset.memory;
109+
auto deviceMemAllocation = device->allocate(reqs, buffer.get(), allocateFlags);
110+
111+
if (!deviceMemAllocation.isValid())
112+
{
113+
// allocation failed
114+
return nullptr;
115+
}
116+
117+
auto mem = deviceMemAllocation.memory;
91118

92119
core::bitflag<IDeviceMemoryAllocation::E_MAPPING_CPU_ACCESS_FLAGS> access(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS);
93120
const auto memProps = mem->getMemoryPropertyFlags();
@@ -98,9 +125,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
98125
assert(access.value);
99126
mem->map({0ull,reqs.size},access);
100127

101-
m_defaultDownloadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
102-
m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str());
128+
defaultDownloadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
103129
}
130+
// Try Create Upload Buffer
104131
{
105132
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
106133
streamingBufferCreationParams.size = upstreamSize;
@@ -109,13 +136,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
109136
streamingBufferCreationParams.usage |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT;
110137
if (enabledFeatures.rayTracingPipeline)
111138
streamingBufferCreationParams.usage |= IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT;
112-
auto buffer = m_device->createBuffer(std::move(streamingBufferCreationParams));
139+
auto buffer = device->createBuffer(std::move(streamingBufferCreationParams));
113140

114141
auto reqs = buffer->getMemoryReqs();
115142
reqs.memoryTypeBits &= physicalDevice->getUpStreamingMemoryTypeBits();
116-
auto memOffset = m_device->allocate(reqs, buffer.get(), allocateFlags);
143+
auto deviceMemAllocation = device->allocate(reqs, buffer.get(), allocateFlags);
144+
145+
if (!deviceMemAllocation.isValid())
146+
{
147+
// allocation failed
148+
return nullptr;
149+
}
117150

118-
auto mem = memOffset.memory;
151+
auto mem = deviceMemAllocation.memory;
119152
core::bitflag<IDeviceMemoryAllocation::E_MAPPING_CPU_ACCESS_FLAGS> access(IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS);
120153
const auto memProps = mem->getMemoryPropertyFlags();
121154
if (memProps.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT))
@@ -125,16 +158,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
125158
assert(access.value);
126159
mem->map({0ull,reqs.size},access);
127160

128-
m_defaultUploadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull,upstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
129-
m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str());
161+
defaultUploadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull,upstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
130162
}
163+
131164
#if 0 // TODO: port
132165
m_propertyPoolHandler = core::make_smart_refctd_ptr<CPropertyPoolHandler>(core::smart_refctd_ptr(m_device));
133166
// smaller workgroups fill occupancy gaps better, especially on new Nvidia GPUs, but we don't want too small workgroups on mobile
134167
// TODO: investigate whether we need to clamp against 256u instead of 128u on mobile
135168
const auto scan_workgroup_size = core::max(core::roundDownToPoT(limits.maxWorkgroupSize[0]) >> 1u, 128u);
136169
m_scanner = core::make_smart_refctd_ptr<CScanner>(core::smart_refctd_ptr(m_device), scan_workgroup_size);
137170
#endif
171+
172+
return core::smart_refctd_ptr<IUtilities>(new IUtilities(std::move(device), std::move(logger), std::move(defaultUploadBuffer), std::move(defaultDownloadBuffer), allocationAlignment, allocationAlignmentForBufferImageCopy), core::dont_grab);
173+
138174
}
139175

140176
inline ~IUtilities()
@@ -762,12 +798,15 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
762798
return retval;
763799
}
764800

765-
766801
core::smart_refctd_ptr<ILogicalDevice> m_device;
802+
nbl::system::logger_opt_smart_ptr m_logger;
767803

768804
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultDownloadBuffer;
769805
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultUploadBuffer;
770-
806+
807+
uint32_t m_allocationAlignment = 0u;
808+
uint32_t m_allocationAlignmentForBufferImageCopy = 0u;
809+
771810
#if 0 // TODO: port
772811
core::smart_refctd_ptr<CPropertyPoolHandler> m_propertyPoolHandler;
773812
core::smart_refctd_ptr<CScanner> m_scanner;

0 commit comments

Comments
 (0)