Skip to content

Commit 7c19d7f

Browse files
bring back bits of IUtilities needed for ex 05
1 parent b9637ae commit 7c19d7f

File tree

2 files changed

+42
-42
lines changed

2 files changed

+42
-42
lines changed

include/nbl/video/utilities/IUtilities.h

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
2+
// This file is part of the "Nabla Engine".
3+
// For conditions of distribution and use, see copyright notice in nabla.h
14
#ifndef _NBL_VIDEO_I_UTILITIES_H_INCLUDED_
25
#define _NBL_VIDEO_I_UTILITIES_H_INCLUDED_
36

@@ -16,7 +19,6 @@
1619
namespace nbl::video
1720
{
1821

19-
#if 0 // TODO: port
2022
class NBL_API2 IUtilities : public core::IReferenceCounted
2123
{
2224
protected:
@@ -29,57 +31,53 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
2931
nbl::system::logger_opt_smart_ptr m_logger;
3032

3133
public:
32-
IUtilities(core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger = nullptr, const uint32_t downstreamSize = 0x4000000u, const uint32_t upstreamSize = 0x4000000u)
33-
: m_device(std::move(device))
34-
, m_logger(std::move(logger))
34+
IUtilities(core::smart_refctd_ptr<ILogicalDevice>&& device, nbl::system::logger_opt_smart_ptr&& logger=nullptr, const uint32_t downstreamSize=0x4000000u, const uint32_t upstreamSize=0x4000000u)
35+
: m_device(std::move(device)), m_logger(std::move(logger))
3536
{
3637
auto physicalDevice = m_device->getPhysicalDevice();
3738
const auto& limits = physicalDevice->getLimits();
3839

3940
auto queueFamProps = physicalDevice->getQueueFamilyProperties();
4041
uint32_t minImageTransferGranularityVolume = 1u; // minImageTransferGranularity.width * height * depth
4142

42-
for (uint32_t i = 0; i < queueFamProps.size(); i++)
43+
for (auto& qf : queueFamProps)
4344
{
44-
uint32_t volume = queueFamProps[i].minImageTransferGranularity.width * queueFamProps[i].minImageTransferGranularity.height * queueFamProps[i].minImageTransferGranularity.depth;
45-
if(minImageTransferGranularityVolume < volume)
45+
uint32_t volume = qf.minImageTransferGranularity.width*qf.minImageTransferGranularity.height*qf.minImageTransferGranularity.depth;
46+
if(minImageTransferGranularityVolume<volume)
4647
minImageTransferGranularityVolume = volume;
4748
}
4849

4950
// host-mapped device memory needs to have this alignment in flush/invalidate calls, therefore this is the streaming buffer's "allocationAlignment".
50-
m_allocationAlignment = static_cast<uint32_t>(limits.nonCoherentAtomSize);
51-
m_allocationAlignmentForBufferImageCopy = core::max(static_cast<uint32_t>(limits.optimalBufferCopyOffsetAlignment), m_allocationAlignment);
51+
m_allocationAlignment = limits.nonCoherentAtomSize;
52+
m_allocationAlignmentForBufferImageCopy = core::max<uint32_t>(limits.optimalBufferCopyOffsetAlignment,m_allocationAlignment);
5253

53-
const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations*sizeof(uint32_t);
54+
constexpr uint32_t OptimalCoalescedInvocationXferSize = sizeof(uint32_t);
55+
const uint32_t bufferOptimalTransferAtom = limits.maxResidentInvocations * OptimalCoalescedInvocationXferSize;
5456
const uint32_t maxImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R64G64B64A64_SFLOAT).getBlockByteSize() * minImageTransferGranularityVolume;
55-
const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R8_UINT).getBlockByteSize();;
56-
const uint32_t maxOptimalTransferAtom = core::max(bufferOptimalTransferAtom, maxImageOptimalTransferAtom);
57-
const uint32_t minOptimalTransferAtom = core::min(bufferOptimalTransferAtom, minImageOptimalTransferAtom);
57+
const uint32_t minImageOptimalTransferAtom = limits.maxResidentInvocations * asset::TexelBlockInfo(asset::EF_R8_UINT).getBlockByteSize();
58+
const uint32_t maxOptimalTransferAtom = core::max(bufferOptimalTransferAtom,maxImageOptimalTransferAtom);
59+
const uint32_t minOptimalTransferAtom = core::min(bufferOptimalTransferAtom,minImageOptimalTransferAtom);
5860

5961
// allocationAlignment <= minBlockSize <= minOptimalTransferAtom <= maxOptimalTransferAtom <= stagingBufferSize/4
6062
assert(m_allocationAlignment <= minStreamingBufferAllocationSize);
6163
assert(m_allocationAlignmentForBufferImageCopy <= minStreamingBufferAllocationSize);
6264

6365
assert(minStreamingBufferAllocationSize <= minOptimalTransferAtom);
6466

65-
assert(maxOptimalTransferAtom * 4u <= upstreamSize);
66-
assert(maxOptimalTransferAtom * 4u <= downstreamSize);
67+
assert(maxOptimalTransferAtom*OptimalCoalescedInvocationXferSize <= upstreamSize);
68+
assert(maxOptimalTransferAtom*OptimalCoalescedInvocationXferSize <= downstreamSize);
6769

6870
assert(minStreamingBufferAllocationSize % m_allocationAlignment == 0u);
6971
assert(minStreamingBufferAllocationSize % m_allocationAlignmentForBufferImageCopy == 0u);
7072

7173
const auto& enabledFeatures = m_device->getEnabledFeatures();
7274

7375
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
74-
auto commonUsages = core::bitflag(IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT;
75-
if(enabledFeatures.bufferDeviceAddress)
76-
commonUsages |= IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT;
76+
auto commonUsages = core::bitflag(IGPUBuffer::EUF_STORAGE_TEXEL_BUFFER_BIT)|IGPUBuffer::EUF_STORAGE_BUFFER_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT;
7777
if (enabledFeatures.accelerationStructure)
7878
commonUsages |= IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT;
7979

80-
core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags(IDeviceMemoryAllocation::EMAF_NONE);
81-
if(enabledFeatures.bufferDeviceAddress)
82-
allocateFlags |= IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT;
80+
core::bitflag<IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS> allocateFlags(IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT);
8381

8482
{
8583
IGPUBuffer::SCreationParams streamingBufferCreationParams = {};
@@ -102,8 +100,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
102100
if (memProps.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT))
103101
access |= IDeviceMemoryAllocation::EMCAF_WRITE;
104102
assert(access.value);
105-
IDeviceMemoryAllocation::MappedMemoryRange memoryRange = {mem.get(),0ull,mem->getAllocationSize()};
106-
m_device->mapMemory(memoryRange, access);
103+
mem->map({0ull,reqs.size},access);
107104

108105
m_defaultDownloadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull,downstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
109106
m_defaultDownloadBuffer->getBuffer()->setObjectDebugName(("Default Download Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str());
@@ -130,23 +127,22 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
130127
if (memProps.hasFlags(IDeviceMemoryAllocation::EMPF_HOST_WRITABLE_BIT))
131128
access |= IDeviceMemoryAllocation::EMCAF_WRITE;
132129
assert(access.value);
133-
IDeviceMemoryAllocation::MappedMemoryRange memoryRange = {mem.get(),0ull,mem->getAllocationSize()};
134-
m_device->mapMemory(memoryRange, access);
130+
mem->map({0ull,reqs.size},access);
135131

136132
m_defaultUploadBuffer = core::make_smart_refctd_ptr<StreamingTransientDataBufferMT<>>(asset::SBufferRange<video::IGPUBuffer>{0ull,upstreamSize,std::move(buffer)},maxStreamingBufferAllocationAlignment,minStreamingBufferAllocationSize);
137133
m_defaultUploadBuffer->getBuffer()->setObjectDebugName(("Default Upload Buffer of Utilities "+std::to_string(ptrdiff_t(this))).c_str());
138134
}
135+
#if 0 // TODO: port
139136
m_propertyPoolHandler = core::make_smart_refctd_ptr<CPropertyPoolHandler>(core::smart_refctd_ptr(m_device));
140137
// smaller workgroups fill occupancy gaps better, especially on new Nvidia GPUs, but we don't want too small workgroups on mobile
141138
// TODO: investigate whether we need to clamp against 256u instead of 128u on mobile
142139
const auto scan_workgroup_size = core::max(core::roundDownToPoT(limits.maxWorkgroupSize[0]) >> 1u, 128u);
143140
m_scanner = core::make_smart_refctd_ptr<CScanner>(core::smart_refctd_ptr(m_device), scan_workgroup_size);
141+
#endif
144142
}
145143

146-
~IUtilities()
144+
inline ~IUtilities()
147145
{
148-
m_device->unmapMemory(m_defaultDownloadBuffer->getBuffer()->getBoundMemory());
149-
m_device->unmapMemory(m_defaultUploadBuffer->getBuffer()->getBoundMemory());
150146
}
151147

152148
//!
@@ -162,6 +158,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
162158
return m_defaultDownloadBuffer.get();
163159
}
164160

161+
#if 0 // TODO: port
165162
//!
166163
virtual CPropertyPoolHandler* getDefaultPropertyPoolHandler() const
167164
{
@@ -173,7 +170,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
173170
{
174171
return m_scanner.get();
175172
}
176-
173+
#endif
177174
//! This function provides some guards against streamingBuffer fragmentation or allocation failure
178175
static uint32_t getAllocationSizeForStreamingBuffer(const size_t size, const uint64_t alignment, uint32_t maxFreeBlock, const uint32_t optimalTransferAtom)
179176
{
@@ -198,6 +195,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
198195
return allocationSize;
199196
}
200197

198+
#if 0 // TODO: port
201199
//! WARNING: This function blocks the CPU and stalls the GPU!
202200
inline core::smart_refctd_ptr<IGPUBuffer> createFilledDeviceLocalBufferOnDedMem(IQueue* queue, IGPUBuffer::SCreationParams&& params, const void* data)
203201
{
@@ -396,6 +394,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
396394

397395

398396
// pipelineBarrierAutoSubmit?
397+
#endif
399398

400399
// --------------
401400
// downloadBufferRangeViaStagingBuffer
@@ -406,9 +405,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
406405

407406
struct default_data_consumption_callback_t
408407
{
409-
default_data_consumption_callback_t(void* dstPtr) :
410-
m_dstPtr(dstPtr)
411-
{}
408+
default_data_consumption_callback_t(void* dstPtr) : m_dstPtr(dstPtr) {}
412409

413410
inline void operator()(const size_t dstOffset, const void* srcPtr, const size_t size)
414411
{
@@ -444,8 +441,8 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
444441
if (m_downstreamingBuffer->needsManualFlushOrInvalidate())
445442
{
446443
const auto nonCoherentAtomSize = device->getPhysicalDevice()->getLimits().nonCoherentAtomSize;
447-
auto flushRange = AlignedMappedMemoryRange(m_downstreamingBuffer->getBuffer()->getBoundMemory(), m_copyRange.offset, m_copyRange.length, nonCoherentAtomSize);
448-
device->invalidateMappedMemoryRanges(1u, &flushRange);
444+
auto flushRange = AlignedMappedMemoryRange(m_downstreamingBuffer->getBuffer()->getBoundMemory().memory,m_copyRange.offset,m_copyRange.length,nonCoherentAtomSize);
445+
device->invalidateMappedMemoryRanges(1u,&flushRange);
449446
}
450447
// Call the function
451448
const uint8_t* copySrc = reinterpret_cast<uint8_t*>(m_downstreamingBuffer->getBufferPointer()) + m_copyRange.offset;
@@ -459,7 +456,7 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
459456
StreamingTransientDataBufferMT<>* m_downstreamingBuffer;
460457
const size_t m_dstOffset;
461458
};
462-
459+
#if 0 // TODO: port
463460
//! Calls the callback to copy the data to a destination Offset
464461
//! * IMPORTANT: To make the copies ready, IUtility::getDefaultDownStreamingBuffer()->cull_frees() should be called after the `submissionFence` is signaled.
465462
//! If the allocation from staging memory fails due to large image size or fragmentation then This function may need to submit the command buffer via the `submissionQueue` and then signal the fence.
@@ -742,20 +739,21 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
742739
asset::ICPUBuffer const* srcBuffer, asset::E_FORMAT srcFormat, video::IGPUImage* dstImage, IGPUImage::LAYOUT currentDstImageLayout, const core::SRange<const asset::IImage::SBufferCopy>& regions,
743740
IQueue* submissionQueue, const IQueue::SSubmitInfo& submitInfo = {}
744741
);
742+
#endif
745743

746-
protected:
747-
744+
protected:
748745
// The application must round down the start of the range to the nearest multiple of VkPhysicalDeviceLimits::nonCoherentAtomSize,
749746
// and round the end of the range up to the nearest multiple of VkPhysicalDeviceLimits::nonCoherentAtomSize.
750-
static IDeviceMemoryAllocation::MappedMemoryRange AlignedMappedMemoryRange(IDeviceMemoryAllocation* mem, const size_t& off, const size_t& len, size_t nonCoherentAtomSize)
747+
static ILogicalDevice::MappedMemoryRange AlignedMappedMemoryRange(IDeviceMemoryAllocation* mem, const size_t& off, const size_t& len, size_t nonCoherentAtomSize)
751748
{
752-
IDeviceMemoryAllocation::MappedMemoryRange range = {};
749+
ILogicalDevice::MappedMemoryRange range = {};
753750
range.memory = mem;
754751
range.offset = core::alignDown(off, nonCoherentAtomSize);
755752
range.length = core::min(core::alignUp(len, nonCoherentAtomSize), mem->getAllocationSize());
756753
return range;
757754
}
758755

756+
#if 0 // TODO: port
759757
//! Internal tool used to patch command buffers in submit info.
760758
class CSubmitInfoPatcher
761759
{
@@ -820,16 +818,18 @@ class NBL_API2 IUtilities : public core::IReferenceCounted
820818
core::vector<IGPUCommandBuffer*> m_allCommandBuffers;
821819
core::smart_refctd_ptr<IGPUCommandBuffer> m_newCommandBuffer; // if necessary, then need to hold reference to.
822820
};
823-
821+
#endif
824822
core::smart_refctd_ptr<ILogicalDevice> m_device;
825823

826824
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultDownloadBuffer;
827825
core::smart_refctd_ptr<StreamingTransientDataBufferMT<> > m_defaultUploadBuffer;
828826

827+
#if 0 // TODO: port
829828
core::smart_refctd_ptr<CPropertyPoolHandler> m_propertyPoolHandler;
830829
core::smart_refctd_ptr<CScanner> m_scanner;
831-
};
832830
#endif
831+
};
832+
833833
class ImageRegionIterator
834834
{
835835
public:

0 commit comments

Comments
 (0)