@@ -19,75 +19,102 @@ namespace nbl::video
19
19
20
20
class NBL_API2 IUtilities : public core::IReferenceCounted
21
21
{
22
- protected:
22
+ protected:
23
23
constexpr static inline uint32_t maxStreamingBufferAllocationAlignment = 64u *1024u ; // if you need larger alignments then you're not right in the head
24
24
constexpr static inline uint32_t minStreamingBufferAllocationSize = 1024u ;
25
25
constexpr static inline uint32_t OptimalCoalescedInvocationXferSize = sizeof (uint32_t );
26
26
27
- uint32_t m_allocationAlignment = 0u ;
28
- uint32_t m_allocationAlignmentForBufferImageCopy = 0u ;
27
+ IUtilities ( core::smart_refctd_ptr<ILogicalDevice>&& device,
28
+ nbl::system::logger_opt_smart_ptr&& logger,
29
+ core::smart_refctd_ptr<StreamingTransientDataBufferMT<> >&& defaultUploadBuffer,
30
+ core::smart_refctd_ptr<StreamingTransientDataBufferMT<> >&& defaultDownloadBuffer,
31
+ uint32_t allocationAlignment,
32
+ uint32_t allocationAlignmentForBufferImageCopy)
33
+ : m_device(std::move(device))
34
+ , m_logger(nbl::system::logger_opt_smart_ptr(logger))
35
+ , m_defaultUploadBuffer(std::move(defaultUploadBuffer))
36
+ , m_defaultDownloadBuffer(std::move(defaultDownloadBuffer))
37
+ , m_allocationAlignment(allocationAlignment)
38
+ , m_allocationAlignmentForBufferImageCopy(allocationAlignmentForBufferImageCopy)
39
+ {
40
+ m_defaultDownloadBuffer->getBuffer ()->setObjectDebugName ((" Default Download Buffer of Utilities " +std::to_string (ptrdiff_t (this ))).c_str ());
41
+ m_defaultUploadBuffer->getBuffer ()->setObjectDebugName ((" Default Upload Buffer of Utilities " +std::to_string (ptrdiff_t (this ))).c_str ());
42
+ }
43
+
44
+ IUtilities () = delete ;
29
45
30
- nbl::system::logger_opt_smart_ptr m_logger;
46
+ public:
31
47
32
- public:
33
- IUtilities (core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger=nullptr , const uint32_t downstreamSize=0x4000000u , const uint32_t upstreamSize=0x4000000u )
34
- : m_device(core::smart_refctd_ptr(device)), m_logger(nbl::system::logger_opt_smart_ptr(logger))
48
+ static core::smart_refctd_ptr<IUtilities> create (core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger = nullptr , const uint32_t downstreamSize = 0x4000000u , const uint32_t upstreamSize = 0x4000000u )
35
49
{
36
- auto physicalDevice = m_device ->getPhysicalDevice ();
50
+ auto physicalDevice = device ->getPhysicalDevice ();
37
51
const auto & limits = physicalDevice->getLimits ();
38
-
52
+
39
53
auto queueFamProps = physicalDevice->getQueueFamilyProperties ();
40
54
uint32_t minImageTransferGranularityVolume = 1u ; // minImageTransferGranularity.width * height * depth
41
-
55
+
42
56
for (auto & qf : queueFamProps)
43
57
{
44
58
uint32_t volume = qf.minImageTransferGranularity .width *qf.minImageTransferGranularity .height *qf.minImageTransferGranularity .depth ;
45
59
if (minImageTransferGranularityVolume<volume)
46
60
minImageTransferGranularityVolume = volume;
47
61
}
48
62
49
- // host-mapped device memory needs to have this alignment in flush/invalidate calls, therefore this is the streaming buffer's "allocationAlignment".
50
- m_allocationAlignment = limits.nonCoherentAtomSize ;
51
- m_allocationAlignmentForBufferImageCopy = core::max<uint32_t >(limits.optimalBufferCopyOffsetAlignment ,m_allocationAlignment);
52
-
63
+ const uint32_t allocationAlignment = limits.nonCoherentAtomSize ;
64
+ const uint32_t allocationAlignmentForBufferImageCopy = core::max<uint32_t >(limits.optimalBufferCopyOffsetAlignment ,allocationAlignment);
65
+
53
66
const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations * OptimalCoalescedInvocationXferSize;
54
67
const uint32_t maxImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo (asset::EF_R64G64B64A64_SFLOAT).getBlockByteSize () * minImageTransferGranularityVolume;
55
68
const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo (asset::EF_R8_UINT).getBlockByteSize ();
56
69
const uint32_t maxOptimalTransferAtom = core::max (bufferOptimalTransferAtom,maxImageOptimalTransferAtom);
57
70
const uint32_t minOptimalTransferAtom = core::min (bufferOptimalTransferAtom,minImageOptimalTransferAtom);
58
71
59
72
// allocationAlignment <= minBlockSize <= minOptimalTransferAtom <= maxOptimalTransferAtom
60
- assert (m_allocationAlignment <= minStreamingBufferAllocationSize);
61
- assert (m_allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize);
62
-
63
- assert (minStreamingBufferAllocationSize <= minOptimalTransferAtom);
64
- assert (minOptimalTransferAtom <= maxOptimalTransferAtom);
65
-
66
- assert (minStreamingBufferAllocationSize % m_allocationAlignment == 0u );
67
- assert (minStreamingBufferAllocationSize % m_allocationAlignmentForBufferImageCopy == 0u );
73
+
74
+ const bool transferConstaintsSatisfied =
75
+ (allocationAlignment <= minStreamingBufferAllocationSize) &&
76
+ (allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize) &&
77
+ (minStreamingBufferAllocationSize <= minOptimalTransferAtom) &&
78
+ (minOptimalTransferAtom <= maxOptimalTransferAtom) &&
79
+ (minStreamingBufferAllocationSize % allocationAlignment == 0u ) &&
80
+ (minStreamingBufferAllocationSize % allocationAlignmentForBufferImageCopy == 0u );
81
+
82
+ if (!transferConstaintsSatisfied)
83
+ return nullptr ;
68
84
69
- const auto & enabledFeatures = m_device ->getEnabledFeatures ();
85
+ const auto & enabledFeatures = device ->getEnabledFeatures ();
70
86
71
87
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
72
88
auto commonUsages = core::bitflag (IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT;
73
89
if (enabledFeatures.accelerationStructure )
74
90
commonUsages |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT;
75
91
76
92
core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags (IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT);
93
+
94
+ core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > defaultUploadBuffer = nullptr ;
95
+ core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > defaultDownloadBuffer = nullptr ;
77
96
97
+ // Try Create Download Buffer
78
98
{
79
99
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
80
100
streamingBufferCreationParams.size = downstreamSize;
81
101
// GPU write to RAM usages
82
102
streamingBufferCreationParams.usage = commonUsages|IGPUBuffer::EUF_TRANSFER_DST_BIT;
83
103
if (enabledFeatures.conditionalRendering )
84
104
streamingBufferCreationParams.usage |= IGPUBuffer::EUF_CONDITIONAL_RENDERING_BIT_EXT;
85
- auto buffer = m_device ->createBuffer (std::move (streamingBufferCreationParams));
105
+ auto buffer = device ->createBuffer (std::move (streamingBufferCreationParams));
86
106
auto reqs = buffer->getMemoryReqs ();
87
107
reqs.memoryTypeBits &= physicalDevice->getDownStreamingMemoryTypeBits ();
88
108
89
- auto memOffset = m_device->allocate (reqs, buffer.get (), allocateFlags);
90
- auto mem = memOffset.memory ;
109
+ auto deviceMemAllocation = device->allocate (reqs, buffer.get (), allocateFlags);
110
+
111
+ if (!deviceMemAllocation.isValid ())
112
+ {
113
+ // allocation failed
114
+ return nullptr ;
115
+ }
116
+
117
+ auto mem = deviceMemAllocation.memory ;
91
118
92
119
core::bitflag<IDeviceMemoryAllocation::E_MAPPING_CPU_ACCESS_FLAGS> access (IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS);
93
120
const auto memProps = mem->getMemoryPropertyFlags ();
@@ -98,9 +125,9 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
98
125
assert (access.value );
99
126
mem->map ({0ull ,reqs.size },access);
100
127
101
- m_defaultDownloadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull ,downstreamSize,std::move (buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
102
- m_defaultDownloadBuffer->getBuffer ()->setObjectDebugName ((" Default Download Buffer of Utilities " +std::to_string (ptrdiff_t (this ))).c_str ());
128
+ defaultDownloadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull ,downstreamSize,std::move (buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
103
129
}
130
+ // Try Create Upload Buffer
104
131
{
105
132
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
106
133
streamingBufferCreationParams.size = upstreamSize;
@@ -109,13 +136,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
109
136
streamingBufferCreationParams.usage |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT;
110
137
if (enabledFeatures.rayTracingPipeline )
111
138
streamingBufferCreationParams.usage |= IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT;
112
- auto buffer = m_device ->createBuffer (std::move (streamingBufferCreationParams));
139
+ auto buffer = device ->createBuffer (std::move (streamingBufferCreationParams));
113
140
114
141
auto reqs = buffer->getMemoryReqs ();
115
142
reqs.memoryTypeBits &= physicalDevice->getUpStreamingMemoryTypeBits ();
116
- auto memOffset = m_device->allocate (reqs, buffer.get (), allocateFlags);
143
+ auto deviceMemAllocation = device->allocate (reqs, buffer.get (), allocateFlags);
144
+
145
+ if (!deviceMemAllocation.isValid ())
146
+ {
147
+ // allocation failed
148
+ return nullptr ;
149
+ }
117
150
118
- auto mem = memOffset .memory ;
151
+ auto mem = deviceMemAllocation .memory ;
119
152
core::bitflag<IDeviceMemoryAllocation::E_MAPPING_CPU_ACCESS_FLAGS> access (IDeviceMemoryAllocation::EMCAF_NO_MAPPING_ACCESS);
120
153
const auto memProps = mem->getMemoryPropertyFlags ();
121
154
if (memProps.hasFlags (IDeviceMemoryAllocation::EMPF_HOST_READABLE_BIT))
@@ -125,16 +158,19 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
125
158
assert (access.value );
126
159
mem->map ({0ull ,reqs.size },access);
127
160
128
- m_defaultUploadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull ,upstreamSize,std::move (buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
129
- m_defaultUploadBuffer->getBuffer ()->setObjectDebugName ((" Default Upload Buffer of Utilities " +std::to_string (ptrdiff_t (this ))).c_str ());
161
+ defaultUploadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull ,upstreamSize,std::move (buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
130
162
}
163
+
131
164
#if 0 // TODO: port
132
165
m_propertyPoolHandler = core::make_smart_refctd_ptr<CPropertyPoolHandler>(core::smart_refctd_ptr(m_device));
133
166
// smaller workgroups fill occupancy gaps better, especially on new Nvidia GPUs, but we don't want too small workgroups on mobile
134
167
// TODO: investigate whether we need to clamp against 256u instead of 128u on mobile
135
168
const auto scan_workgroup_size = core::max(core::roundDownToPoT(limits.maxWorkgroupSize[0]) >> 1u, 128u);
136
169
m_scanner = core::make_smart_refctd_ptr<CScanner>(core::smart_refctd_ptr(m_device), scan_workgroup_size);
137
170
#endif
171
+
172
+ return core::smart_refctd_ptr<IUtilities>(new IUtilities (std::move (device), std::move (logger), std::move (defaultUploadBuffer), std::move (defaultDownloadBuffer), allocationAlignment, allocationAlignmentForBufferImageCopy), core::dont_grab);
173
+
138
174
}
139
175
140
176
inline ~IUtilities ()
@@ -762,12 +798,15 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
762
798
return retval;
763
799
}
764
800
765
-
766
801
core::smart_refctd_ptr<ILogicalDevice> m_device;
802
+ nbl::system::logger_opt_smart_ptr m_logger;
767
803
768
804
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultDownloadBuffer;
769
805
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultUploadBuffer;
770
-
806
+
807
+ uint32_t m_allocationAlignment = 0u ;
808
+ uint32_t m_allocationAlignmentForBufferImageCopy = 0u ;
809
+
771
810
#if 0 // TODO: port
772
811
core::smart_refctd_ptr<CPropertyPoolHandler> m_propertyPoolHandler;
773
812
core::smart_refctd_ptr<CScanner> m_scanner;
0 commit comments