Skip to content

Commit 7ed9c5b

Browse files
author
devsh
committed
add a util function to compute blit, and update examples_tests (pipelien barriers written)
1 parent 76faa78 commit 7ed9c5b

File tree

3 files changed

+19
-201
lines changed

3 files changed

+19
-201
lines changed

examples_tests

include/nbl/video/utilities/CComputeBlit.h

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,15 @@ class CComputeBlit : public core::IReferenceCounted
134134
// the absolute minimum needed to store a single pixel of a worst case format (precise, all 4 channels)
135135
constexpr auto singlePixelStorage = 4*sizeof(hlsl::float32_t);
136136
constexpr auto ratio = singlePixelStorage/sizeof(uint16_t);
137-
const auto paddedAlphaBinCount = core::min(core::roundUp(baseBucketCount,workgroupSize),workgroupSize*ratio);
137+
// atomicAdd gets performed on MSB or LSB of a single DWORD
138+
const auto paddedAlphaBinCount = core::min(core::roundUp<uint16_t>(baseBucketCount,workgroupSize*2),workgroupSize*ratio);
138139
return paddedAlphaBinCount*layersToBlit;
139140
}
140-
141+
142+
static inline uint32_t getNormalizationByteSize(const uint16_t workgroupSize, const asset::E_FORMAT intermediateAlpha, const uint32_t layersToBlit)
143+
{
144+
return getAlphaBinCount(workgroupSize,intermediateAlpha,layersToBlit)*sizeof(uint16_t)+sizeof(uint32_t)+sizeof(uint32_t);
145+
}
141146
#if 0
142147

143148
//! Returns the number of output texels produced by one workgroup, deciding factor is `m_availableSharedMemory`.
@@ -337,19 +342,14 @@ class CComputeBlit : public core::IReferenceCounted
337342
{
338343
dispatch_info_t dispatchInfo;
339344
buildAlphaTestDispatchInfo(dispatchInfo, inImageExtent, inImageType, layersToBlit);
340-
341-
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, alphaTestPipeline->getLayout(), 0u, 1u, &alphaTestDS);
342-
cmdbuf->bindComputePipeline(alphaTestPipeline);
345+
// bind omitted
343346
dispatchHelper(cmdbuf, alphaTestPipeline->getLayout(), pushConstants, dispatchInfo);
344347
}
345348

346349
{
347350
dispatch_info_t dispatchInfo;
348351
buildBlitDispatchInfo<BlitUtilities>(dispatchInfo, inImageExtent, outImageExtent, inImageFormat, inImageType, kernels, workgroupSize, layersToBlit);
349-
350-
video::IGPUDescriptorSet* ds_raw[] = { blitDS, blitWeightsDS };
351-
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, blitPipeline->getLayout(), 0, 2, ds_raw);
352-
cmdbuf->bindComputePipeline(blitPipeline);
352+
// bind omitted
353353
dispatchHelper(cmdbuf, blitPipeline->getLayout(), pushConstants, dispatchInfo);
354354
}
355355

@@ -359,39 +359,6 @@ class CComputeBlit : public core::IReferenceCounted
359359
dispatch_info_t dispatchInfo;
360360
buildNormalizationDispatchInfo(dispatchInfo, outImageExtent, inImageType, layersToBlit);
361361

362-
assert(coverageAdjustmentScratchBuffer);
363-
IGPUCommandBuffer::SPipelineBarrierDependencyInfo depInfo;
364-
// Memory dependency to ensure the alpha test pass has finished writing to alphaTestCounterBuffer
365-
video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t alphaTestBarrier = {};
366-
alphaTestBarrier.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
367-
alphaTestBarrier.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
368-
alphaTestBarrier.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
369-
alphaTestBarrier.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
370-
alphaTestBarrier.range.buffer = coverageAdjustmentScratchBuffer;
371-
alphaTestBarrier.range.size = coverageAdjustmentScratchBuffer->getSize();
372-
alphaTestBarrier.range.offset = 0;
373-
374-
// Memory dependency to ensure that the previous compute pass has finished writing to the output image,
375-
// also transitions the layout of said image: GENERAL -> SHADER_READ_ONLY_OPTIMAL
376-
video::IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t readyForNorm = {};
377-
readyForNorm.barrier.dep.srcStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
378-
readyForNorm.barrier.dep.srcAccessMask = asset::ACCESS_FLAGS::SHADER_WRITE_BITS;
379-
readyForNorm.barrier.dep.dstStageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT;
380-
readyForNorm.barrier.dep.dstAccessMask = asset::ACCESS_FLAGS::SHADER_READ_BITS;
381-
readyForNorm.oldLayout = video::IGPUImage::LAYOUT::GENERAL;
382-
readyForNorm.newLayout = video::IGPUImage::LAYOUT::READ_ONLY_OPTIMAL;
383-
readyForNorm.image = normalizationInImage.get();
384-
readyForNorm.subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT;
385-
readyForNorm.subresourceRange.levelCount = 1u;
386-
readyForNorm.subresourceRange.layerCount = normalizationInImage->getCreationParameters().arrayLayers;
387-
388-
depInfo.bufBarriers = { &alphaTestBarrier, &alphaTestBarrier + 1 };
389-
depInfo.imgBarriers = { &readyForNorm, &readyForNorm + 1 };
390-
391-
cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, depInfo);
392-
393-
cmdbuf->bindDescriptorSets(asset::EPBP_COMPUTE, normalizationPipeline->getLayout(), 0u, 1u, &normalizationDS);
394-
cmdbuf->bindComputePipeline(normalizationPipeline);
395362
dispatchHelper(cmdbuf, normalizationPipeline->getLayout(), pushConstants, dispatchInfo);
396363
}
397364
}

src/nbl/video/utilities/CComputeBlit.cpp

Lines changed: 9 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ using namespace nbl::hlsl;
5050
5151
struct ConstevalParameters
5252
{
53-
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = )===" << retval.workgroupSize << R"===(;
54-
using kernel_weight_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.kernelWeights,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
55-
using input_sampler_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.samplers,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
56-
using input_image_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.inputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
57-
using output_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.outputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
58-
NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedMemoryDWORDs = )===" << (sharedMemoryPerInvocation* retval.workgroupSize)/sizeof(uint32_t) << R"===(;
53+
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkGroupSize = )===" << retval.workgroupSize << R"===(;
54+
using kernel_weight_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.kernelWeights,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
55+
using input_sampler_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.samplers,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
56+
using input_image_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.inputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
57+
using output_binding_t = )===" << layout->getBindingInfoForHLSL({.binding=info.outputs,.requiredStages=IShader::E_SHADER_STAGE::ESS_COMPUTE}) << R"===(;
58+
NBL_CONSTEXPR_STATIC_INLINE uint32_t SharedMemoryDWORDs = )===" << (sharedMemoryPerInvocation* retval.workgroupSize)/sizeof(uint32_t) << R"===(;
5959
};
6060
)===";
6161
return tmp.str();
@@ -135,30 +135,7 @@ core::smart_refctd_ptr<video::IGPUShader> createBlitSpecializedShader(
135135
const uint32_t smemFloatCount = m_availableSharedMemory / (sizeof(float) * outChannelCount);
136136
const uint32_t blitDimCount = static_cast<uint32_t>(imageType) + 1;
137137

138-
139-
std::ostringstream shaderSourceStream;
140-
shaderSourceStream
141-
<< "#include \"nbl/builtin/hlsl/blit/common.hlsl\"\n"
142-
"#include \"nbl/builtin/hlsl/blit/parameters.hlsl\"\n"
143-
"#include \"nbl/builtin/hlsl/blit/compute_blit.hlsl\"\n";
144-
145-
shaderSourceStream
146-
<< "typedef nbl::hlsl::blit::consteval_parameters_t<" << workgroupSize << ", 1, 1, " << smemFloatCount << ", "
147-
<< outChannelCount << ", " << blitDimCount << ", " << paddedAlphaBinCount << "> ceval_params_t;\n";
148-
149-
shaderSourceStream
150-
<< "[[vk::combinedImageSampler]] [[vk::binding(0, 0)]]\n"
151-
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::combined_sampler_t inCS;\n"
152-
"[[vk::combinedImageSampler]] [[vk::binding(0, 0)]]\n"
153-
"SamplerState inSamp;\n"
154-
155-
"[[vk::image_format(\""<< formatQualifier << "\")]]\n"
156-
"[[vk::binding(1, 0)]]\n"
157-
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::image_t outImg;\n"
158-
159-
"[[vk::binding(0, 1)]] Buffer<float32_t4> kernelWeights;\n"
160-
"[[vk::push_constant]] nbl::hlsl::blit::parameters_t params;"
161-
"groupshared float32_t sMem[" << m_availableSharedMemory / sizeof(float) << "];\n";
138+
.......
162139

163140
if (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
164141
{
@@ -184,58 +161,6 @@ core::smart_refctd_ptr<video::IGPUShader> createBlitSpecializedShader(
184161
" InCSAccessor inCSA; OutImgAccessor outImgA; KernelWeightsAccessor kwA; HistogramAccessor hA; SharedAccessor sA;\n"
185162
" blit.execute(inCSA, outImgA, kwA, hA, sA, workGroupID, localInvocationIndex);\n"
186163
"}\n";
187-
188-
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_SHADER_STAGE::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlit::createBlitSpecializedShader");
189-
auto gpuShader = m_device->createShader(std::move(cpuShader.get()));
190-
191-
return gpuShader;
192-
}
193-
194-
template <typename BlitUtilities>
195-
core::smart_refctd_ptr<video::IGPUComputePipeline> getBlitPipeline(
196-
const asset::E_FORMAT outFormat,
197-
const asset::IImage::E_TYPE imageType,
198-
const core::vectorSIMDu32& inExtent,
199-
const core::vectorSIMDu32& outExtent,
200-
const asset::IBlitUtilities::E_ALPHA_SEMANTIC alphaSemantic,
201-
const typename BlitUtilities::convolution_kernels_t& kernels,
202-
const uint32_t workgroupSize = 256,
203-
const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
204-
{
205-
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(core::vectorSIMDu32(workgroupSize, 1, 1, 1), alphaBinCount);
206-
207-
const SBlitCacheKey key =
208-
{
209-
.wgSize = workgroupSize,
210-
.imageType = imageType,
211-
.alphaBinCount = paddedAlphaBinCount,
212-
.outFormat = outFormat,
213-
.smemSize = m_availableSharedMemory,
214-
.coverageAdjustment = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE)
215-
};
216-
217-
if (m_blitPipelines.find(key) == m_blitPipelines.end())
218-
{
219-
const auto blitType = (alphaSemantic == asset::IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) ? EBT_COVERAGE_ADJUSTMENT : EBT_REGULAR;
220-
221-
auto specShader = createBlitSpecializedShader<BlitUtilities>(
222-
outFormat,
223-
imageType,
224-
inExtent,
225-
outExtent,
226-
alphaSemantic,
227-
kernels,
228-
workgroupSize,
229-
paddedAlphaBinCount);
230-
231-
IGPUComputePipeline::SCreationParams creationParams;
232-
creationParams.shader.shader = specShader.get();
233-
creationParams.shader.entryPoint = "main";
234-
creationParams.layout = m_blitPipelineLayout[blitType].get();
235-
m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_blitPipelines[key]);
236-
}
237-
238-
return m_blitPipelines[key];
239164
}
240165

241166
core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializedShader(const asset::IImage::E_TYPE imageType, const uint32_t alphaBinCount)
@@ -244,21 +169,7 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializ
244169
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
245170
const uint32_t blitDimCount = static_cast<uint32_t>(imageType) + 1;
246171

247-
std::ostringstream shaderSourceStream;
248-
249-
shaderSourceStream
250-
<< "#include \"nbl/builtin/hlsl/blit/common.hlsl\"\n"
251-
"#include \"nbl/builtin/hlsl/blit/parameters.hlsl\"\n"
252-
"#include \"nbl/builtin/hlsl/blit/alpha_test.hlsl\"\n"
253-
254-
"typedef nbl::hlsl::blit::consteval_parameters_t<" << workgroupDims.x << ", " << workgroupDims.y << ", " << workgroupDims.z << ", "
255-
"0, 0, " << blitDimCount << ", " << paddedAlphaBinCount << "> ceval_params_t;\n"
256-
257-
"[[vk::binding(0, 0)]]\n"
258-
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::combined_sampler_t inCS;\n"
259-
260-
"[[vk::binding(2 , 0)]] RWStructuredBuffer<uint32_t> statsBuff;\n"
261-
"[[vk::push_constant]] nbl::hlsl::blit::parameters_t params;"
172+
........
262173

263174
"struct PassedPixelsAccessor { void atomicAdd(uint32_t wgID, uint32_t v) { InterlockedAdd(statsBuff[wgID * (ceval_params_t::AlphaBinCount + 1) + ceval_params_t::AlphaBinCount], v); } };\n"
264175
"struct InCSAccessor { float32_t4 get(int32_t3 c, uint32_t l) { return inCS[nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::getIndexCoord<int32_t>(c, l)]; } };\n"
@@ -269,29 +180,6 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createAlphaTestSpecializ
269180
" InCSAccessor inCSA;PassedPixelsAccessor ppA;\n"
270181
" nbl::hlsl::blit::alpha_test(ppA, inCSA, params.inputDims, params.referenceAlpha, globalInvocationID, workGroupID);\n"
271182
"}\n";
272-
273-
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSLGLSL::createAlphaTestSpecializedShader");
274-
}
275-
276-
core::smart_refctd_ptr<video::IGPUComputePipeline> getAlphaTestPipeline(const uint32_t alphaBinCount, const asset::IImage::E_TYPE imageType)
277-
{
278-
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
279-
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
280-
281-
assert(paddedAlphaBinCount >= asset::IBlitUtilities::MinAlphaBinCount);
282-
const auto pipelineIndex = (paddedAlphaBinCount / asset::IBlitUtilities::MinAlphaBinCount) - 1;
283-
284-
if (m_alphaTestPipelines[pipelineIndex][imageType])
285-
return m_alphaTestPipelines[pipelineIndex][imageType];
286-
287-
auto specShader = createAlphaTestSpecializedShader(imageType, paddedAlphaBinCount);
288-
IGPUComputePipeline::SCreationParams creationParams;
289-
creationParams.shader.shader = specShader.get();
290-
creationParams.shader.entryPoint = "main";
291-
creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
292-
assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_alphaTestPipelines[pipelineIndex][imageType]));
293-
294-
return m_alphaTestPipelines[pipelineIndex][imageType];
295183
}
296184

297185
// @param `outFormat` dictates encoding.
@@ -301,22 +189,7 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
301189
const auto paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
302190
const uint32_t blitDimCount = static_cast<uint32_t>(imageType) + 1;
303191

304-
std::ostringstream shaderSourceStream;
305-
306-
shaderSourceStream
307-
<< "#include \"nbl/builtin/hlsl/blit/common.hlsl\"\n"
308-
"#include \"nbl/builtin/hlsl/blit/parameters.hlsl\"\n"
309-
"#include \"nbl/builtin/hlsl/blit/normalization.hlsl\"\n"
310-
311-
"typedef nbl::hlsl::blit::consteval_parameters_t<" << workgroupDims.x << ", " << workgroupDims.y << ", " << workgroupDims.z << ", "
312-
"0, 0, " << blitDimCount << ", " << paddedAlphaBinCount << "> ceval_params_t;\n"
313-
314-
"[[vk::binding(0, 0)]]\n"
315-
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::combined_sampler_t inCS;\n"
316-
317-
"[[vk::image_format(\"unknown\")]]\n"
318-
"[[vk::binding(1, 0)]]\n"
319-
"nbl::hlsl::blit::impl::dim_to_image_properties<ceval_params_t::BlitDimCount>::image_t outImg;\n"
192+
....
320193

321194
"[[vk::binding(2 , 0)]] RWStructuredBuffer<uint32_t> statsBuff;\n"
322195
"[[vk::push_constant]] nbl::hlsl::blit::parameters_t params;"
@@ -335,27 +208,5 @@ core::smart_refctd_ptr<video::IGPUShader> CComputeBlit::createNormalizationSpeci
335208
" InCSAccessor inCSA; OutImgAccessor outImgA; HistogramAccessor hA; PassedPixelsAccessor ppA; SharedAccessor sA;\n"
336209
" blit.execute(inCSA, outImgA, hA, ppA, sA, workGroupID, globalInvocationID, localInvocationIndex);\n"
337210
"}\n";
338-
339-
auto cpuShader = core::make_smart_refctd_ptr<asset::ICPUShader>(shaderSourceStream.str().c_str(), IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, IGPUShader::E_CONTENT_TYPE::ECT_HLSL, "CComputeBlitGLSL::createNormalizationSpecializedShader");
340-
}
341-
342-
core::smart_refctd_ptr<video::IGPUComputePipeline> getNormalizationPipeline(const asset::IImage::E_TYPE imageType, const asset::E_FORMAT outFormat,
343-
const uint32_t alphaBinCount = asset::IBlitUtilities::DefaultAlphaBinCount)
344-
{
345-
const auto workgroupDims = getDefaultWorkgroupDims(imageType);
346-
const uint32_t paddedAlphaBinCount = getPaddedAlphaBinCount(workgroupDims, alphaBinCount);
347-
const SNormalizationCacheKey key = { imageType, paddedAlphaBinCount, outFormat };
348-
349-
if (m_normalizationPipelines.find(key) == m_normalizationPipelines.end())
350-
{
351-
auto specShader = createNormalizationSpecializedShader(imageType, outFormat, paddedAlphaBinCount);
352-
IGPUComputePipeline::SCreationParams creationParams;
353-
creationParams.shader.shader = specShader.get();
354-
creationParams.shader.entryPoint = "main";
355-
creationParams.layout = m_blitPipelineLayout[EBT_COVERAGE_ADJUSTMENT].get();
356-
assert(m_device->createComputePipelines(nullptr, { &creationParams, &creationParams + 1 }, &m_normalizationPipelines[key]));
357-
}
358-
359-
return m_normalizationPipelines[key];
360211
}
361212
#endif

0 commit comments

Comments
 (0)