1
+ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
2
+ // This file is part of the "Nabla Engine".
3
+ // For conditions of distribution and use, see copyright notice in nabla.h
1
4
#ifndef _NBL_VIDEO_I_UTILITIES_H_INCLUDED_
2
5
#define _NBL_VIDEO_I_UTILITIES_H_INCLUDED_
3
6
16
19
namespace nbl ::video
17
20
{
18
21
19
- #if 0 // TODO: port
20
22
class NBL_API2 IUtilities : public core::IReferenceCounted
21
23
{
22
24
protected:
@@ -29,57 +31,53 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
29
31
nbl::system::logger_opt_smart_ptr m_logger;
30
32
31
33
public:
32
- IUtilities(core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger = nullptr, const uint32_t downstreamSize = 0x4000000u, const uint32_t upstreamSize = 0x4000000u)
33
- : m_device(std::move(device))
34
- , m_logger(std::move(logger))
34
+ IUtilities (core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger=nullptr , const uint32_t downstreamSize=0x4000000u , const uint32_t upstreamSize=0x4000000u )
35
+ : m_device(std::move(device)), m_logger(std::move(logger))
35
36
{
36
37
auto physicalDevice = m_device->getPhysicalDevice ();
37
38
const auto & limits = physicalDevice->getLimits ();
38
39
39
40
auto queueFamProps = physicalDevice->getQueueFamilyProperties ();
40
41
uint32_t minImageTransferGranularityVolume = 1u ; // minImageTransferGranularity.width * height * depth
41
42
42
- for (uint32_t i = 0; i < queueFamProps.size(); i++ )
43
+ for (auto & qf : queueFamProps)
43
44
{
44
- uint32_t volume = queueFamProps[i] .minImageTransferGranularity.width * queueFamProps[i] .minImageTransferGranularity.height * queueFamProps[i] .minImageTransferGranularity.depth;
45
- if(minImageTransferGranularityVolume < volume)
45
+ uint32_t volume = qf .minImageTransferGranularity .width *qf .minImageTransferGranularity .height *qf .minImageTransferGranularity .depth ;
46
+ if (minImageTransferGranularityVolume< volume)
46
47
minImageTransferGranularityVolume = volume;
47
48
}
48
49
49
50
// host-mapped device memory needs to have this alignment in flush/invalidate calls, therefore this is the streaming buffer's "allocationAlignment".
50
- m_allocationAlignment = static_cast<uint32_t>( limits.nonCoherentAtomSize) ;
51
- m_allocationAlignmentForBufferImageCopy = core::max(static_cast <uint32_t>(limits.optimalBufferCopyOffsetAlignment), m_allocationAlignment);
51
+ m_allocationAlignment = limits.nonCoherentAtomSize ;
52
+ m_allocationAlignmentForBufferImageCopy = core::max<uint32_t >(limits.optimalBufferCopyOffsetAlignment , m_allocationAlignment);
52
53
53
- const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations*sizeof(uint32_t);
54
+ constexpr uint32_t OptimalCoalescedInvocationXferSize = sizeof (uint32_t );
55
+ const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations * OptimalCoalescedInvocationXferSize;
54
56
const uint32_t maxImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo (asset::EF_R64G64B64A64_SFLOAT).getBlockByteSize () * minImageTransferGranularityVolume;
55
- const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R8_UINT).getBlockByteSize();;
56
- const uint32_t maxOptimalTransferAtom = core::max(bufferOptimalTransferAtom, maxImageOptimalTransferAtom);
57
- const uint32_t minOptimalTransferAtom = core::min(bufferOptimalTransferAtom, minImageOptimalTransferAtom);
57
+ const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo (asset::EF_R8_UINT).getBlockByteSize ();
58
+ const uint32_t maxOptimalTransferAtom = core::max (bufferOptimalTransferAtom,maxImageOptimalTransferAtom);
59
+ const uint32_t minOptimalTransferAtom = core::min (bufferOptimalTransferAtom,minImageOptimalTransferAtom);
58
60
59
61
// allocationAlignment <= minBlockSize <= minOptimalTransferAtom <= maxOptimalTransferAtom <= stagingBufferSize/4
60
62
assert (m_allocationAlignment <= minStreamingBufferAllocationSize);
61
63
assert (m_allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize);
62
64
63
65
assert (minStreamingBufferAllocationSize <= minOptimalTransferAtom);
64
66
65
- assert(maxOptimalTransferAtom * 4u <= upstreamSize);
66
- assert(maxOptimalTransferAtom * 4u <= downstreamSize);
67
+ assert (maxOptimalTransferAtom*OptimalCoalescedInvocationXferSize <= upstreamSize);
68
+ assert (maxOptimalTransferAtom*OptimalCoalescedInvocationXferSize <= downstreamSize);
67
69
68
70
assert (minStreamingBufferAllocationSize % m_allocationAlignment == 0u );
69
71
assert (minStreamingBufferAllocationSize % m_allocationAlignmentForBufferImageCopy == 0u );
70
72
71
73
const auto & enabledFeatures = m_device->getEnabledFeatures ();
72
74
73
75
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
74
- auto commonUsages = core::bitflag(IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
75
- if(enabledFeatures.bufferDeviceAddress)
76
- commonUsages |= IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT;
76
+ auto commonUsages = core::bitflag (IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT;
77
77
if (enabledFeatures.accelerationStructure )
78
78
commonUsages |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT;
79
79
80
- core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags(IDeviceMemoryAllocation::EMAF_NONE);
81
- if(enabledFeatures.bufferDeviceAddress)
82
- allocateFlags |= IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT;
80
+ core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags (IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT);
83
81
84
82
{
85
83
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
@@ -102,8 +100,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
102
100
if (memProps.hasFlags (IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT))
103
101
access |= IDeviceMemoryAllocation::EMCAF_WRITE;
104
102
assert (access.value );
105
- IDeviceMemoryAllocation::MappedMemoryRange memoryRange = {mem.get(),0ull,mem->getAllocationSize()};
106
- m_device->mapMemory(memoryRange, access);
103
+ mem->map ({0ull ,reqs.size },access);
107
104
108
105
m_defaultDownloadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull ,downstreamSize,std::move (buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
109
106
m_defaultDownloadBuffer->getBuffer ()->setObjectDebugName ((" Default Download Buffer of Utilities " +std::to_string (ptrdiff_t (this ))).c_str ());
@@ -130,23 +127,22 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
130
127
if (memProps.hasFlags (IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT))
131
128
access |= IDeviceMemoryAllocation::EMCAF_WRITE;
132
129
assert (access.value );
133
- IDeviceMemoryAllocation::MappedMemoryRange memoryRange = {mem.get(),0ull,mem->getAllocationSize()};
134
- m_device->mapMemory(memoryRange, access);
130
+ mem->map ({0ull ,reqs.size },access);
135
131
136
132
m_defaultUploadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull ,upstreamSize,std::move (buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
137
133
m_defaultUploadBuffer->getBuffer ()->setObjectDebugName ((" Default Upload Buffer of Utilities " +std::to_string (ptrdiff_t (this ))).c_str ());
138
134
}
135
+ #if 0 // TODO: port
139
136
m_propertyPoolHandler = core::make_smart_refctd_ptr<CPropertyPoolHandler>(core::smart_refctd_ptr(m_device));
140
137
// smaller workgroups fill occupancy gaps better, especially on new Nvidia GPUs, but we don't want too small workgroups on mobile
141
138
// TODO: investigate whether we need to clamp against 256u instead of 128u on mobile
142
139
const auto scan_workgroup_size = core::max(core::roundDownToPoT(limits.maxWorkgroupSize[0]) >> 1u, 128u);
143
140
m_scanner = core::make_smart_refctd_ptr<CScanner>(core::smart_refctd_ptr(m_device), scan_workgroup_size);
141
+ #endif
144
142
}
145
143
146
- ~IUtilities()
144
+ inline ~IUtilities ()
147
145
{
148
- m_device->unmapMemory(m_defaultDownloadBuffer->getBuffer()->getBoundMemory());
149
- m_device->unmapMemory(m_defaultUploadBuffer->getBuffer()->getBoundMemory());
150
146
}
151
147
152
148
// !
@@ -162,6 +158,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
162
158
return m_defaultDownloadBuffer.get ();
163
159
}
164
160
161
+ #if 0 // TODO: port
165
162
//!
166
163
virtual CPropertyPoolHandler* getDefaultPropertyPoolHandler() const
167
164
{
@@ -173,7 +170,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
173
170
{
174
171
return m_scanner.get();
175
172
}
176
-
173
+ # endif
177
174
// ! This function provides some guards against streamingBuffer fragmentation or allocation failure
178
175
static uint32_t getAllocationSizeForStreamingBuffer (const size_t size, const uint64_t alignment, uint32_t maxFreeBlock, const uint32_t optimalTransferAtom)
179
176
{
@@ -198,6 +195,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
198
195
return allocationSize;
199
196
}
200
197
198
+ #if 0 // TODO: port
201
199
//! WARNING: This function blocks the CPU and stalls the GPU!
202
200
inline core::smart_refctd_ptr<IGPUBuffer> createFilledDeviceLocalBufferOnDedMem(IQueue* queue, IGPUBuffer::SCreationParams&& params, const void* data)
203
201
{
@@ -396,6 +394,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
396
394
397
395
398
396
// pipelineBarrierAutoSubmit?
397
+ #endif
399
398
400
399
// --------------
401
400
// downloadBufferRangeViaStagingBuffer
@@ -406,9 +405,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
406
405
407
406
struct default_data_consumption_callback_t
408
407
{
409
- default_data_consumption_callback_t(void* dstPtr) :
410
- m_dstPtr(dstPtr)
411
- {}
408
+ default_data_consumption_callback_t (void * dstPtr) : m_dstPtr(dstPtr) {}
412
409
413
410
inline void operator ()(const size_t dstOffset, const void * srcPtr, const size_t size)
414
411
{
@@ -444,8 +441,8 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
444
441
if (m_downstreamingBuffer->needsManualFlushOrInvalidate ())
445
442
{
446
443
const auto nonCoherentAtomSize = device->getPhysicalDevice ()->getLimits ().nonCoherentAtomSize ;
447
- auto flushRange = AlignedMappedMemoryRange(m_downstreamingBuffer->getBuffer()->getBoundMemory(), m_copyRange.offset, m_copyRange.length, nonCoherentAtomSize);
448
- device->invalidateMappedMemoryRanges(1u, &flushRange);
444
+ auto flushRange = AlignedMappedMemoryRange (m_downstreamingBuffer->getBuffer ()->getBoundMemory (). memory , m_copyRange.offset ,m_copyRange.length ,nonCoherentAtomSize);
445
+ device->invalidateMappedMemoryRanges (1u ,&flushRange);
449
446
}
450
447
// Call the function
451
448
const uint8_t * copySrc = reinterpret_cast <uint8_t *>(m_downstreamingBuffer->getBufferPointer ()) + m_copyRange.offset ;
@@ -459,7 +456,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
459
456
StreamingTransientDataBufferMT<>* m_downstreamingBuffer;
460
457
const size_t m_dstOffset;
461
458
};
462
-
459
+ # if 0 // TODO: port
463
460
//! Calls the callback to copy the data to a destination Offset
464
461
//! * IMPORTANT: To make the copies ready, IUtility::getDefaultDownStreamingBuffer()->cull_frees() should be called after the `submissionFence` is signaled.
465
462
//! If the allocation from staging memory fails due to large image size or fragmentation then This function may need to submit the command buffer via the `submissionQueue` and then signal the fence.
@@ -742,20 +739,21 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
742
739
asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, IGPUImage::LAYOUT currentDstImageLayout, const core::SRange<const asset::IImage::SBufferCopy>& regions,
743
740
IQueue* submissionQueue, const IQueue::SSubmitInfo& submitInfo = {}
744
741
);
742
+ #endif
745
743
746
- protected:
747
-
744
+ protected:
748
745
// The application must round down the start of the range to the nearest multiple of VkPhysicalDeviceLimits::nonCoherentAtomSize,
749
746
// and round the end of the range up to the nearest multiple of VkPhysicalDeviceLimits::nonCoherentAtomSize.
750
- static IDeviceMemoryAllocation ::MappedMemoryRange AlignedMappedMemoryRange(IDeviceMemoryAllocation* mem, const size_t& off, const size_t& len, size_t nonCoherentAtomSize)
747
+ static ILogicalDevice ::MappedMemoryRange AlignedMappedMemoryRange (IDeviceMemoryAllocation* mem, const size_t & off, const size_t & len, size_t nonCoherentAtomSize)
751
748
{
752
- IDeviceMemoryAllocation ::MappedMemoryRange range = {};
749
+ ILogicalDevice ::MappedMemoryRange range = {};
753
750
range.memory = mem;
754
751
range.offset = core::alignDown (off, nonCoherentAtomSize);
755
752
range.length = core::min (core::alignUp (len, nonCoherentAtomSize), mem->getAllocationSize ());
756
753
return range;
757
754
}
758
755
756
+ #if 0 // TODO: port
759
757
//! Internal tool used to patch command buffers in submit info.
760
758
class CSubmitInfoPatcher
761
759
{
@@ -820,16 +818,18 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
820
818
core::vector<IGPUCommandBuffer*> m_allCommandBuffers;
821
819
core::smart_refctd_ptr<IGPUCommandBuffer> m_newCommandBuffer; // if necessary, then need to hold reference to.
822
820
};
823
-
821
+ # endif
824
822
core::smart_refctd_ptr<ILogicalDevice> m_device;
825
823
826
824
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultDownloadBuffer;
827
825
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultUploadBuffer;
828
826
827
+ #if 0 // TODO: port
829
828
core::smart_refctd_ptr<CPropertyPoolHandler> m_propertyPoolHandler;
830
829
core::smart_refctd_ptr<CScanner> m_scanner;
831
- };
832
830
#endif
831
+ };
832
+
833
833
class ImageRegionIterator
834
834
{
835
835
public:
0 commit comments