From c1c00d5c49a5526c0cbe1119ca6f36e0667de58f Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 13:44:28 +0430 Subject: [PATCH 01/16] Calling normalization.finalize in filters --- include/nbl/asset/filters/CBlitImageFilter.h | 3 +++ include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/nbl/asset/filters/CBlitImageFilter.h b/include/nbl/asset/filters/CBlitImageFilter.h index 16954ef5bb..bf57a1d46b 100644 --- a/include/nbl/asset/filters/CBlitImageFilter.h +++ b/include/nbl/asset/filters/CBlitImageFilter.h @@ -561,7 +561,10 @@ class CBlitImageFilter : public CImageFilternormalization.finalize(); storeToImage(core::rational<>(inv_cvg_num,inv_cvg_den),axis,outOffsetLayer); + } }; // filter in X-axis filterAxis(IImage::ET_1D,kernelX); diff --git a/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h b/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h index cc0251a7fb..479640f7c3 100644 --- a/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h +++ b/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h @@ -56,9 +56,9 @@ class CSwizzleAndConvertImageFilterBase : public CSwizzleableAndDitherableFilter static inline void normalizationPrepass(E_FORMAT rInFormat, const ExecutionPolicy& policy, state_type* state, const core::vectorSIMDu32& blockDims) { if constexpr (!std::is_void_v) - { + { assert(kInFormat==EF_UNKNOWN || rInFormat==EF_UNKNOWN); - state->normalization.initialize(); + state->normalization.initialize(); auto perOutputRegion = [policy,&blockDims,&state,rInFormat](const CMatchedSizeInOutImageFilterCommon::CommonExecuteData& commonExecuteData, CBasicImageFilterCommon::clip_region_functor_t& clip) -> bool { auto normalizePrepass = [&commonExecuteData,&blockDims,&state,rInFormat](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos) @@ -84,6 +84,7 @@ class CSwizzleAndConvertImageFilterBase : public CSwizzleableAndDitherableFilter return true; }; CMatchedSizeInOutImageFilterCommon::commonExecute(state,perOutputRegion); + state->normalization.finalize(); } } }; From 80748108c5354ba4d7c1e2f21f6a0364540bc313 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 13:46:21 +0430 Subject: [PATCH 02/16] Fixed normalization states compile error --- include/nbl/asset/filters/NormalizationStates.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/filters/NormalizationStates.h b/include/nbl/asset/filters/NormalizationStates.h index 3b5be06b62..badcdccbb8 100644 --- a/include/nbl/asset/filters/NormalizationStates.h +++ b/include/nbl/asset/filters/NormalizationStates.h @@ -183,7 +183,7 @@ class CDerivativeMapNormalizationState : public impl::CDerivativeMapNormalizatio static_assert(std::is_floating_point_v, "Integer encode types not supported yet!"); if constexpr (isotropic) { - const float isotropicMax = core::max(core::max(maxAbsPerChannel[0],maxAbsPerChannel[1]),core::max(maxAbsPerChannel[2],maxAbsPerChannel[3])); + float isotropicMax = core::max(core::max(maxAbsPerChannel[0].load(),maxAbsPerChannel[1].load()),core::max(maxAbsPerChannel[2].load(),maxAbsPerChannel[3].load())); for (auto i=0u; i<4u; i++) maxAbsPerChannel[i] = isotropicMax; } From 92a10cd047e56282be1afd4df44ae898959f4da1 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 13:50:29 +0430 Subject: [PATCH 03/16] Readme Improve --- examples_tests/22.RaytracedAO/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests/22.RaytracedAO/README.md b/examples_tests/22.RaytracedAO/README.md index 6f52403ae3..f2848fbe4b 100644 --- a/examples_tests/22.RaytracedAO/README.md +++ b/examples_tests/22.RaytracedAO/README.md @@ -58,7 +58,7 @@ You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more d | tonemapper | Tonemapper Settings for Denoiser | string | "ACES=0.4,0.8" | cropOffsetX, cropOffsetY | Used to control the offset for cropping cubemap renders (instead of highQualityEdges) | int | 0 | | cropWidth, cropHeight | Used to control the size for cropping cubemap renders (instead of highQualityEdges) | int | width-cropOffsetX, height-cropOffsetY -| envmapRegularizationFactor | if RIS is enabled then paths will be guided towards envmap based on this regularization factor.

1.0 is based on product of envmap and bxdf
0.0 is based only on bxdf

But 1.0 is never a valid value to use.
Valid Range is [0.2, 0.8] | float | 0.0 | +| envmapRegularizationFactor | Fractional blend between guiding paths based on just the BxDF (0.0) or the product of the BxDF and the Environment Map (1.0)
Valid parameter ranges are between 0.0 and 0.8 as guiding fully by the product produces extreme fireflies from indirect light or local lights. | float | 0.5 | ### Example of a sensor using all new properties described above. ```xml From 69255e9c901c26a09853a945badbbbfacb782031 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 14:04:45 +0430 Subject: [PATCH 04/16] remove the multiplication by SAMPLING_STRATEGY_COUNT because we resample --- examples_tests/22.RaytracedAO/Renderer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests/22.RaytracedAO/Renderer.cpp b/examples_tests/22.RaytracedAO/Renderer.cpp index a801618268..7e62cd7d8a 100644 --- a/examples_tests/22.RaytracedAO/Renderer.cpp +++ b/examples_tests/22.RaytracedAO/Renderer.cpp @@ -1162,7 +1162,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e uint32_t _maxRaysPerDispatch = 0u; auto setRayBufferSizes = [renderPixelCount,this,&_maxRaysPerDispatch,&raygenBufferSize,&intersectionBufferSize](uint32_t sampleMultiplier) -> void { - m_staticViewData.samplesPerPixelPerDispatch = SAMPLING_STRATEGY_COUNT*sampleMultiplier; + m_staticViewData.samplesPerPixelPerDispatch = sampleMultiplier; const size_t minimumSampleCountPerDispatch = static_cast(renderPixelCount)*getSamplesPerPixelPerDispatch(); _maxRaysPerDispatch = static_cast(minimumSampleCountPerDispatch); From 0970ed4f36102f357a50f8077db4d44d9e7ca77c Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 15:54:30 +0430 Subject: [PATCH 05/16] force alpha=1 in luma dot product --- examples_tests/22.RaytracedAO/lumaMipMapGen.comp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples_tests/22.RaytracedAO/lumaMipMapGen.comp b/examples_tests/22.RaytracedAO/lumaMipMapGen.comp index 3f09179614..1d0f9193ad 100644 --- a/examples_tests/22.RaytracedAO/lumaMipMapGen.comp +++ b/examples_tests/22.RaytracedAO/lumaMipMapGen.comp @@ -35,8 +35,8 @@ void main() if(pc.data.calcLuma > 0) { float sinTheta = sin(nbl_glsl_PI * (float(pixelCoord.y + 0.5f) / envMapSize.y)); - vec4 envMapSample = texelFetch(envMap, pixelCoord, 0); - float luma = dot(pc.data.luminanceScales, envMapSample); + vec3 envMapSample = texelFetch(envMap, pixelCoord, 0).rgb; + float luma = dot(pc.data.luminanceScales, vec4(envMapSample, 1.0f)); if(pc.data.sinFactor > 0) luma *= sinTheta; imageStore(srcLuminance, pixelCoord, vec4(luma)); From fb1517c83b5557c34b44b197eda5ec280d932200 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 15:54:45 +0430 Subject: [PATCH 06/16] default regfactor changed to 0.5f --- include/nbl/ext/MitsubaLoader/CElementFilm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h index f28d893f20..2a1ff38d14 100644 --- a/include/nbl/ext/MitsubaLoader/CElementFilm.h +++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h @@ -141,7 +141,7 @@ class CElementFilm : public IElement float denoiserBloomIntensity = 0.0f; _NBL_STATIC_INLINE_CONSTEXPR size_t MaxTonemapperArgsLen = 128; char denoiserTonemapperArgs[MaxTonemapperArgsLen+1] = {0}; - float envmapRegularizationFactor = 0.0f; // 1.0f means based envmap luminance, 0.0f means uniform + float envmapRegularizationFactor = 0.5f; // 1.0f means based envmap luminance, 0.0f means uniform }; From 6a27109951db0131397c8751b1e93e0534894d09 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 15:56:16 +0430 Subject: [PATCH 07/16] Use single image view for luminance (except mipmap gen which we need different image views for each mipmap level) --- examples_tests/22.RaytracedAO/Renderer.cpp | 60 ++++++++++--------- examples_tests/22.RaytracedAO/Renderer.h | 3 +- examples_tests/22.RaytracedAO/genWarpMap.comp | 12 ++-- examples_tests/22.RaytracedAO/main.cpp | 2 +- .../22.RaytracedAO/raytraceCommon.glsl | 10 ++-- 5 files changed, 45 insertions(+), 42 deletions(-) diff --git a/examples_tests/22.RaytracedAO/Renderer.cpp b/examples_tests/22.RaytracedAO/Renderer.cpp index 7e62cd7d8a..160c1e0469 100644 --- a/examples_tests/22.RaytracedAO/Renderer.cpp +++ b/examples_tests/22.RaytracedAO/Renderer.cpp @@ -830,7 +830,7 @@ void Renderer::finalizeScene(Renderer::InitializationData& initData) } } -core::smart_refctd_ptr Renderer::createTexture(uint32_t width, uint32_t height, E_FORMAT format, uint32_t layers) +core::smart_refctd_ptr Renderer::createTexture(uint32_t width, uint32_t height, E_FORMAT format, uint32_t mipLevels, uint32_t layers) { const auto real_layers = layers ? layers:1u; @@ -839,7 +839,7 @@ core::smart_refctd_ptr Renderer::createTexture(uint32_t width, ui imgparams.arrayLayers = real_layers; imgparams.flags = static_cast(0); imgparams.format = format; - imgparams.mipLevels = 1u; + imgparams.mipLevels = mipLevels; imgparams.samples = IImage::ESCF_1_BIT; imgparams.type = IImage::ET_2D; @@ -852,14 +852,14 @@ core::smart_refctd_ptr Renderer::createTexture(uint32_t width, ui viewparams.subresourceRange.baseArrayLayer = 0u; viewparams.subresourceRange.layerCount = real_layers; viewparams.subresourceRange.baseMipLevel = 0u; - viewparams.subresourceRange.levelCount = 1u; + viewparams.subresourceRange.levelCount = mipLevels; return m_driver->createGPUImageView(std::move(viewparams)); } core::smart_refctd_ptr Renderer::createScreenSizedTexture(E_FORMAT format, uint32_t layers) { - return createTexture(m_staticViewData.imageDimensions.x, m_staticViewData.imageDimensions.y, format, layers); + return createTexture(m_staticViewData.imageDimensions.x, m_staticViewData.imageDimensions.y, format, 1u, layers); } core::smart_refctd_ptr Renderer::SampleSequence::createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount) @@ -1310,18 +1310,15 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; } - IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfos[MipCountLuminance]; + IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {}; // luminance mip maps { ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_BORDER, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, 0u, false, ECO_ALWAYS }; auto sampler = m_driver->createGPUSampler(samplerParams); - for(uint32_t i = 0; i < MipCountLuminance; ++i) - { - luminanceDescriptorInfos[i].desc = m_luminanceMipMaps[i]; - luminanceDescriptorInfos[i].image.sampler = sampler; - luminanceDescriptorInfos[i].image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - } + luminanceDescriptorInfo.desc = m_luminanceBaseImageView; + luminanceDescriptorInfo.image.sampler = sampler; + luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; } createEmptyInteropBufferAndSetUpInfo(infos+3,m_rayBuffer[0],raygenBufferSize); @@ -1343,13 +1340,13 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e EDT_COMBINED_IMAGE_SAMPLER, }); - // Set last write which is a descriptor array + // Set last write writes[9].binding = 9u; writes[9].arrayElement = 0u; - writes[9].count = MipCountLuminance; + writes[9].count = 1u; writes[9].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; writes[9].dstSet = m_commonRaytracingDS[0].get(); - writes[9].info = luminanceDescriptorInfos; + writes[9].info = &luminanceDescriptorInfo; m_driver->updateDescriptorSets(descriptorUpdateCount,writes,0u,nullptr); // set up second DS @@ -1956,20 +1953,28 @@ bool Renderer::traceBounce(uint32_t& raycount) void Renderer::initWarpingResources() { - for(uint32_t i = 0; i < MipCountLuminance; ++i) { - const uint32_t resolution = 0x1u<<(MipCountLuminance - 1 - i); + const uint32_t resolution = 0x1u<<(MipCountLuminance - 1); const uint32_t width = std::max(resolution, 1u); const uint32_t height = std::max(resolution/2u, 1u); - m_luminanceMipMaps[i] = createTexture(width, height, EF_R32_SFLOAT); - assert(m_luminanceMipMaps[i]); + m_luminanceBaseImageView = createTexture(width, height, EF_R32_SFLOAT, MipCountLuminance); + assert(m_luminanceBaseImageView); + + m_luminanceMipMaps[0] = m_luminanceBaseImageView; + for(uint32_t i = 1; i < MipCountLuminance; ++i) + { + IGPUImageView::SCreationParams viewCreateParams = m_luminanceBaseImageView->getCreationParameters(); + viewCreateParams.subresourceRange.baseMipLevel = i; + viewCreateParams.subresourceRange.levelCount = 1u; + + m_luminanceMipMaps[i] = m_driver->createGPUImageView(std::move(viewCreateParams)); + } } { const uint32_t resolution = 0x1u<<(MipCountEnvmap-1); // same size as envmap const uint32_t width = std::max(resolution, 1u); const uint32_t height = std::max(resolution/2u, 1u); - // m_warpMap = createTexture(width, height, EF_R16G16_SFLOAT); m_warpMap = createTexture(width, height, EF_R32G32_SFLOAT); } @@ -2098,13 +2103,10 @@ void Renderer::initWarpingResources() m_warpDS = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_warpDSLayout)); - IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfos[MipCountLuminance] = {}; - for(uint32_t i = 0; i < MipCountLuminance; ++i) - { - luminanceDescriptorInfos[i].desc = m_luminanceMipMaps[i]; - luminanceDescriptorInfos[i].image.sampler = nullptr; - luminanceDescriptorInfos[i].image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - } + IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {}; + luminanceDescriptorInfo.desc = m_luminanceBaseImageView; + luminanceDescriptorInfo.image.sampler = nullptr; + luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; warpMapDescriptorInfo.desc = m_warpMap; @@ -2114,10 +2116,10 @@ void Renderer::initWarpingResources() IGPUDescriptorSet::SWriteDescriptorSet writes[2u]; writes[0].binding = 0u; writes[0].arrayElement = 0u; - writes[0].count = MipCountLuminance; + writes[0].count = 1u; writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; writes[0].dstSet = m_warpDS.get(); - writes[0].info = luminanceDescriptorInfos; + writes[0].info = &luminanceDescriptorInfo; writes[1].binding = 1u; writes[1].arrayElement = 0u; @@ -2288,7 +2290,7 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); m_driver->dispatch(workGroups[0],workGroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); + COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT); } // Calc Mipmaps diff --git a/examples_tests/22.RaytracedAO/Renderer.h b/examples_tests/22.RaytracedAO/Renderer.h index 2c98f18b28..518d75d213 100644 --- a/examples_tests/22.RaytracedAO/Renderer.h +++ b/examples_tests/22.RaytracedAO/Renderer.h @@ -129,7 +129,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac void finalizeScene(InitializationData& initData); // - nbl::core::smart_refctd_ptr createTexture(uint32_t width, uint32_t height, nbl::asset::E_FORMAT format, uint32_t layers=0u); + nbl::core::smart_refctd_ptr createTexture(uint32_t width, uint32_t height, nbl::asset::E_FORMAT format, uint32_t mipLevels=1u, uint32_t layers=0u); nbl::core::smart_refctd_ptr createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers=0u); // @@ -249,6 +249,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac // Shader and Resources for Generating Luminance MipMaps from EnvMap static constexpr uint32_t MipCountLuminance = MipCountEnvmap; + nbl::core::smart_refctd_ptr m_luminanceBaseImageView; nbl::core::smart_refctd_ptr m_luminanceMipMaps[MipCountLuminance]; uint32_t m_lumaWorkGroups[2]; nbl::core::smart_refctd_ptr m_lumaDSLayout; diff --git a/examples_tests/22.RaytracedAO/genWarpMap.comp b/examples_tests/22.RaytracedAO/genWarpMap.comp index 12c685f43e..a3b9e98dd2 100644 --- a/examples_tests/22.RaytracedAO/genWarpMap.comp +++ b/examples_tests/22.RaytracedAO/genWarpMap.comp @@ -5,7 +5,7 @@ layout(local_size_x = WARP_MAP_GEN_WORKGROUP_DIM, local_size_y = WARP_MAP_GEN_WORKGROUP_DIM) in; -layout(set = 0, binding = 0) uniform sampler2D luminance[MAX_LUMINANCE_LEVELS]; +layout(set = 0, binding = 0) uniform sampler2D luminance; layout(set = 0, binding = 1, rg32f) uniform image2D warpMap; layout(push_constant) uniform PushConstants @@ -30,13 +30,13 @@ void main() // (skip 0 which is 1x1 and useless in warping) for(int i = int(pc.data.lumaMipCount - 1); i >= 1; --i) { - ivec2 luminanceMipSize = textureSize(luminance[i], 0).xy; + ivec2 luminanceMipSize = textureSize(luminance, i).xy; vec4 values = vec4(0); - values[0] = texelFetch(luminance[i], p + ivec2(0, 1), 0).r; - values[1] = texelFetch(luminance[i], p + ivec2(1, 1), 0).r; - values[2] = texelFetch(luminance[i], p + ivec2(1, 0), 0).r; - values[3] = texelFetch(luminance[i], p + ivec2(0, 0), 0).r; + values[0] = texelFetch(luminance, p + ivec2(0, 1), i).r; + values[1] = texelFetch(luminance, p + ivec2(1, 1), i).r; + values[2] = texelFetch(luminance, p + ivec2(1, 0), i).r; + values[3] = texelFetch(luminance, p + ivec2(0, 0), i).r; float wy_0 = 0.0f; float wy_1 = 0.0f; diff --git a/examples_tests/22.RaytracedAO/main.cpp b/examples_tests/22.RaytracedAO/main.cpp index 6f2cc89522..678b0db264 100644 --- a/examples_tests/22.RaytracedAO/main.cpp +++ b/examples_tests/22.RaytracedAO/main.cpp @@ -414,7 +414,7 @@ int main(int argc, char** argv) mainSensorData.denoiserInfo.bloomIntensity = film.denoiserBloomIntensity; mainSensorData.denoiserInfo.tonemapperArgs = std::string(film.denoiserTonemapperArgs); mainSensorData.fileFormat = film.fileFormat; - mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.2f, 0.8f); + mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.0f, 0.8f); mainSensorData.outputFilePath = std::filesystem::path(film.outputFilePath); if(!isFileExtensionCompatibleWithFormat(mainSensorData.outputFilePath.extension().string(), mainSensorData.fileFormat)) { diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index b20161f7b9..3a1eb03e56 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -50,7 +50,7 @@ layout(set = 2, binding = 6, r32ui) restrict uniform uimage2DArray normalAOV; // environment emitter layout(set = 2, binding = 7) uniform sampler2D envMap; layout(set = 2, binding = 8) uniform sampler2D warpMap; -layout(set = 2, binding = 9) uniform sampler2D luminance[MAX_LUMINANCE_LEVELS]; +layout(set = 2, binding = 9) uniform sampler2D luminance; void clear_raycount() { @@ -272,13 +272,13 @@ vec3 nbl_glsl_unormSphericalToCartesian(in vec2 uv, out float sinTheta) // return regularized pdf of sample float Envmap_regularized_deferred_pdf(in vec3 rayDirection) { - const ivec2 luminanceMapSize = textureSize(luminance[0], 0); - uint lastLuminanceMip = uint(log2(luminanceMapSize.x)); // TODO: later turn into push constant + const ivec2 luminanceMapSize = textureSize(luminance, 0); + int lastLuminanceMip = int(log2(luminanceMapSize.x)); // TODO: later turn into push constant const vec2 envmapUV = nbl_glsl_sampling_generateUVCoordFromDirection(rayDirection); float sinTheta = length(rayDirection.zx); - float sumLum = texelFetch(luminance[lastLuminanceMip], ivec2(0), 0).r; - float lum = textureLod(luminance[0], envmapUV, 0).r; + float sumLum = texelFetch(luminance, ivec2(0), lastLuminanceMip).r; + float lum = textureLod(luminance, envmapUV, 0).r; float bigfactor = float(luminanceMapSize.x*luminanceMapSize.y)/sumLum; return bigfactor*(lum/(sinTheta*2.0f*nbl_glsl_PI*nbl_glsl_PI)); } From 830d284b07cd384c1e96a6ac0fbe8cfdfa56c2e4 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 16:39:30 +0430 Subject: [PATCH 08/16] BugFix Allocation of DownloadBuffer --- examples_tests/22.RaytracedAO/Renderer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples_tests/22.RaytracedAO/Renderer.cpp b/examples_tests/22.RaytracedAO/Renderer.cpp index 160c1e0469..d5b8f94e5a 100644 --- a/examples_tests/22.RaytracedAO/Renderer.cpp +++ b/examples_tests/22.RaytracedAO/Renderer.cpp @@ -2221,7 +2221,7 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) } IImage::SBufferCopy copyRegion = {}; - copyRegion.bufferOffset = 0u; + copyRegion.bufferOffset = address; copyRegion.bufferRowLength = 0u; copyRegion.bufferImageHeight = 0u; //copyRegion.imageSubresource.aspectMask = wait for Vulkan; @@ -2264,6 +2264,8 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) variance = avg_x2 - avg_x * avg_x; // V[x] = E[X^2]-E[X]^2 std::cout << "Final Luminance Variance = " << variance << std::endl; + + downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr); } float regularizationFactor = envMapRegularizationFactor*(1.0f-1.0f/(1.0f+variance)); From 425562be93086983a0fc55e1ccf4580f2bd57c51 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 17:08:54 +0430 Subject: [PATCH 09/16] cleanup #ifdef ONLY_BXDF_SAMPLING --- .../22.RaytracedAO/raytraceCommon.glsl | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index 3a1eb03e56..582825c52c 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -7,9 +7,6 @@ #include -// #define ONLY_BXDF_SAMPLING -// #define ONLY_ENV_SAMPLING - layout(push_constant, row_major) uniform PushConstants { RaytraceShaderCommonData_t cummon; @@ -352,6 +349,11 @@ nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray( // (1) BXDF Sample and Weight nbl_glsl_LightSample bxdfSample; nbl_glsl_MC_quot_pdf_aov_t bxdfCosThroughput = nbl_glsl_MC_runGenerateAndRemainderStream(precomp,gcs,rnps,rand[0],bxdfSample); + + nbl_glsl_LightSample outSample; + nbl_glsl_MC_quot_pdf_aov_t result; + +#ifndef ONLY_BXDF_SAMPLING float bxdfWeight = 0; float p_bxdf_bxdf = bxdfCosThroughput.pdf; // BxDF PDF evaluated with BxDF sample (returned from @@ -407,10 +409,7 @@ nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray( } const float bxdfChoiceProb = w_bxdf/w_sum; -#endif - - nbl_glsl_LightSample outSample; - nbl_glsl_MC_quot_pdf_aov_t result; +#endif // ifdef TRADE_REGISTERS_FOR_IEEE754_ACCURACY float rcpChoiceProb; float w_star_over_p_env = w_sum; @@ -429,13 +428,11 @@ nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray( result.quotient *= w_star_over_p_env; result.pdf /= w_star_over_p_env; +#endif // ifndef ONLY_BXDF_SAMPLING #ifdef ONLY_BXDF_SAMPLING outSample = bxdfSample; result = bxdfCosThroughput; -#elif defined(ONLY_ENV_SAMPLING) - outSample = envmapSample; - result = envmapSampleThroughput; #endif // russian roulette From ab79146bfcdfad09f6c4707be3093e95965a30de Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 17:10:41 +0430 Subject: [PATCH 10/16] eachStrategyStride divide by SAMPLING_STRATEGY_COUNT --- examples_tests/22.RaytracedAO/raytraceCommon.glsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index 582825c52c..c340a3d0af 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -326,7 +326,7 @@ mat2x3 rand6d(in uvec3 scramble_key, in int _sample, int depth) --depth; // int offset = int(_sample)*SAMPLE_SEQUENCE_STRIDE+depth; - int eachStrategyStride = SAMPLE_SEQUENCE_STRIDE/2; // get this from cpp side? + int eachStrategyStride = SAMPLE_SEQUENCE_STRIDE/SAMPLING_STRATEGY_COUNT; const nbl_glsl_sampling_quantized3D quant1 = texelFetch(quantizedSampleSequence, offset).xy; const nbl_glsl_sampling_quantized3D quant2 = texelFetch(quantizedSampleSequence, offset + eachStrategyStride).xy; From 30323e84300437eebb1771250edb927720ed4344 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 17:41:15 +0430 Subject: [PATCH 11/16] two scramble_keys --- .../22.RaytracedAO/raytraceCommon.glsl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index c340a3d0af..0a0ba77049 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -319,7 +319,7 @@ void Envmap_generateRegularizedSample_and_pdf(out float pdf, out nbl_glsl_LightS } #include -mat2x3 rand6d(in uvec3 scramble_key, in int _sample, int depth) +mat2x3 rand6d(in uvec3 scramble_keys[2], in int _sample, int depth) { mat2x3 retVal; // decrement depth because first vertex is rasterized and picked with a different sample sequence @@ -330,21 +330,21 @@ mat2x3 rand6d(in uvec3 scramble_key, in int _sample, int depth) const nbl_glsl_sampling_quantized3D quant1 = texelFetch(quantizedSampleSequence, offset).xy; const nbl_glsl_sampling_quantized3D quant2 = texelFetch(quantizedSampleSequence, offset + eachStrategyStride).xy; - retVal[0] = nbl_glsl_sampling_decodeSample3Dimensions(quant1,scramble_key); - retVal[1] = nbl_glsl_sampling_decodeSample3Dimensions(quant2,scramble_key); + retVal[0] = nbl_glsl_sampling_decodeSample3Dimensions(quant1,scramble_keys[0]); + retVal[1] = nbl_glsl_sampling_decodeSample3Dimensions(quant2,scramble_keys[1]); return retVal; } nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray( out vec3 direction, - in uvec3 scramble_key, + in uvec3 scramble_keys[2], in uint sampleID, in uint depth, in nbl_glsl_MC_precomputed_t precomp, in nbl_glsl_MC_instr_stream_t gcs, in nbl_glsl_MC_instr_stream_t rnps ) { - mat2x3 rand = rand6d(scramble_key,int(sampleID),int(depth)); + mat2x3 rand = rand6d(scramble_keys,int(sampleID),int(depth)); // (1) BXDF Sample and Weight nbl_glsl_LightSample bxdfSample; @@ -476,12 +476,16 @@ void generate_next_rays( vec3 nextThroughput[MAX_RAYS_GENERATED]; float nextAoVThroughputScale[MAX_RAYS_GENERATED]; { - const uvec3 scramble_key = uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)); + const uvec3 scramble_keys[2] = { + uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)), + uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)) + }; + for (uint i=0u; i Date: Thu, 28 Apr 2022 17:50:51 +0430 Subject: [PATCH 12/16] rename glsl envmap sampling function --- examples_tests/22.RaytracedAO/raytraceCommon.glsl | 4 ++-- include/nbl/builtin/glsl/sampling/envmap.glsl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index 0a0ba77049..0e8e8d5b6f 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -271,7 +271,7 @@ float Envmap_regularized_deferred_pdf(in vec3 rayDirection) { const ivec2 luminanceMapSize = textureSize(luminance, 0); int lastLuminanceMip = int(log2(luminanceMapSize.x)); // TODO: later turn into push constant - const vec2 envmapUV = nbl_glsl_sampling_generateUVCoordFromDirection(rayDirection); + const vec2 envmapUV = nbl_glsl_sampling_envmap_generateUVCoordFromDirection(rayDirection); float sinTheta = length(rayDirection.zx); float sumLum = texelFetch(luminance, ivec2(0), lastLuminanceMip).r; @@ -550,7 +550,7 @@ struct Contribution void Contribution_initMiss(out Contribution contrib, in float aovThroughputScale) { - vec2 uv = nbl_glsl_sampling_generateUVCoordFromDirection(-normalizedV); + vec2 uv = nbl_glsl_sampling_envmap_generateUVCoordFromDirection(-normalizedV); // funny little trick borrowed from things like Progressive Photon Mapping const float bias = 0.0625f*(1.f-aovThroughputScale)*pow(pc.cummon.rcpFramesDispatched,0.08f); contrib.albedo = contrib.color = textureGrad(envMap, uv, vec2(bias*0.5,0.f), vec2(0.f,bias)).rgb; diff --git a/include/nbl/builtin/glsl/sampling/envmap.glsl b/include/nbl/builtin/glsl/sampling/envmap.glsl index 8aea359be7..69709e3ec0 100644 --- a/include/nbl/builtin/glsl/sampling/envmap.glsl +++ b/include/nbl/builtin/glsl/sampling/envmap.glsl @@ -3,7 +3,7 @@ #include -vec2 nbl_glsl_sampling_generateUVCoordFromDirection(vec3 v) +vec2 nbl_glsl_sampling_envmap_generateUVCoordFromDirection(vec3 v) { vec2 uv = vec2(atan(v.z, v.x), acos(v.y)); uv.x *= nbl_glsl_RECIPROCAL_PI*0.5; From b733cec00780b9509fda87b7b4c902d17237c3c9 Mon Sep 17 00:00:00 2001 From: Erfan Date: Thu, 28 Apr 2022 17:54:10 +0430 Subject: [PATCH 13/16] warn instead of assert --- examples_tests/22.RaytracedAO/main.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/examples_tests/22.RaytracedAO/main.cpp b/examples_tests/22.RaytracedAO/main.cpp index 678b0db264..1f17d9fb4d 100644 --- a/examples_tests/22.RaytracedAO/main.cpp +++ b/examples_tests/22.RaytracedAO/main.cpp @@ -621,7 +621,11 @@ int main(int argc, char** argv) { mainSensorData.width = film.cropWidth; mainSensorData.height = film.cropHeight; - assert(film.cropOffsetX == 0 && film.cropOffsetY == 0); + + if(film.cropOffsetX != 0 || film.cropOffsetY != 0) + { + std::cout << "[WARN] CropOffsets are non-zero. cropping is not supported for non cubemap renders." << std::endl; + } mainSensorData.staticCamera = smgr->addCameraSceneNode(nullptr); auto& staticCamera = mainSensorData.staticCamera; From ff1c6a10c5ad4dfbb587ce88e4b26d9566fd8c13 Mon Sep 17 00:00:00 2001 From: Erfan Date: Fri, 29 Apr 2022 16:53:17 +0430 Subject: [PATCH 14/16] 1 step towards seperation of envmap sampling as an extension --- examples_tests/22.RaytracedAO/Renderer.cpp | 151 ++++++++++++------ examples_tests/22.RaytracedAO/Renderer.h | 27 +++- .../22.RaytracedAO/raytraceCommon.glsl | 1 - .../gen_luma_mipmap.comp | 9 +- .../EnvmapImportanceSampling/gen_warpmap.comp | 9 +- .../EnvmapImportanceSampling/parameters.glsl | 11 +- src/nbl/builtin/CMakeLists.txt | 3 + 7 files changed, 143 insertions(+), 68 deletions(-) rename examples_tests/22.RaytracedAO/lumaMipMapGen.comp => include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp (90%) rename examples_tests/22.RaytracedAO/genWarpMap.comp => include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp (94%) rename examples_tests/22.RaytracedAO/warpCommon.h => include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl (50%) diff --git a/examples_tests/22.RaytracedAO/Renderer.cpp b/examples_tests/22.RaytracedAO/Renderer.cpp index d5b8f94e5a..0e4f5b8cdc 100644 --- a/examples_tests/22.RaytracedAO/Renderer.cpp +++ b/examples_tests/22.RaytracedAO/Renderer.cpp @@ -129,8 +129,6 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I bindings[7].type = asset::EDT_COMBINED_IMAGE_SAMPLER; bindings[8].type = asset::EDT_COMBINED_IMAGE_SAMPLER; bindings[9].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[9].count = MipCountLuminance; - m_commonRaytracingDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+raytracingCommonDescriptorCount); } @@ -789,7 +787,7 @@ void Renderer::initSceneNonAreaLights(Renderer::InitializationData& initData) // TODO: better filter and GPU accelerated m_finalEnvmap->regenerateMipMapLevels(); - initWarpingResources(); + m_computeWarpMapInfo = initWarpingResources(m_finalEnvmap); } void Renderer::finalizeScene(Renderer::InitializationData& initData) @@ -1142,7 +1140,7 @@ void Renderer::deinitSceneResources() void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float envMapRegularizationFactor) { - bool enableRIS = computeWarpMap(envMapRegularizationFactor); + bool enableRIS = computeWarpMap(envMapRegularizationFactor, m_computeWarpMapInfo); m_staticViewData.imageDimensions = {width, height}; m_rcpPixelSize = { 2.f/float(m_staticViewData.imageDimensions.x),-2.f/float(m_staticViewData.imageDimensions.y) }; @@ -1951,12 +1949,27 @@ bool Renderer::traceBounce(uint32_t& raycount) return true; } -void Renderer::initWarpingResources() +void getEnvmapResolutionFromMipLevel(uint32_t level, uint32_t& outWidth, uint32_t& outHeight) +{ + const uint32_t resolution = 0x1u<<(level); + outWidth = std::max(resolution, 1u); + outHeight = std::max(resolution/2u, 1u); +} + +Renderer::ComputeWarpMapInfo Renderer::initWarpingResources(core::smart_refctd_ptr envmap, uint32_t lumaMipMapGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension) { + const uint32_t MipCountEnvMap = envmap->getCreationParameters().subresourceRange.levelCount; + const uint32_t MipCountLuminance = MipCountEnvMap; + + ComputeWarpMapInfo ret = {}; + ret.lumaMipMapGenWorkgroupDimension = lumaMipMapGenWorkgroupDimension; + ret.warpMapGenWorkgroupDimension = warpMapGenWorkgroupDimension; + ret.MipCountLuminance = MipCountLuminance; + ret.MipCountEnvmap = MipCountEnvMap; + { - const uint32_t resolution = 0x1u<<(MipCountLuminance - 1); - const uint32_t width = std::max(resolution, 1u); - const uint32_t height = std::max(resolution/2u, 1u); + uint32_t width, height = 0u; + getEnvmapResolutionFromMipLevel(MipCountLuminance - 1, width, height); m_luminanceBaseImageView = createTexture(width, height, EF_R32_SFLOAT, MipCountLuminance); assert(m_luminanceBaseImageView); @@ -1972,9 +1985,8 @@ void Renderer::initWarpingResources() } { - const uint32_t resolution = 0x1u<<(MipCountEnvmap-1); // same size as envmap - const uint32_t width = std::max(resolution, 1u); - const uint32_t height = std::max(resolution/2u, 1u); + uint32_t width, height = 0u; + getEnvmapResolutionFromMipLevel(MipCountEnvmap - 1, width, height); m_warpMap = createTexture(width, height, EF_R32G32_SFLOAT); } @@ -2021,7 +2033,7 @@ void Renderer::initWarpingResources() lumaSamplerParams.CompareEnable = false; auto lumaSampler = m_driver->createGPUSampler(lumaSamplerParams); - core::smart_refctd_ptr samplers[MipCountLuminance]; + core::smart_refctd_ptr samplers[MaxMipCountLuminance]; for(uint32_t i = 0u; i < MipCountLuminance; ++i) samplers[i] = lumaSampler; @@ -2056,7 +2068,7 @@ void Renderer::initWarpingResources() const uint32_t dst = i + 1; IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {}; - envMapDescriptorInfo.desc = m_finalEnvmap; + envMapDescriptorInfo.desc = envmap; envMapDescriptorInfo.image.sampler = nullptr; envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; @@ -2133,31 +2145,82 @@ void Renderer::initWarpingResources() } { - m_lumaGPUShader = gpuSpecializedShaderFromFile(m_assetManager, m_driver, "../lumaMipMapGen.comp"); - assert(m_lumaGPUShader); + + const char* sourceFmt = +R"===(#version 430 core + +#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u +#define WARP_MAP_GEN_WORKGROUP_DIM %u +#define MAX_LUMINANCE_LEVELS %u + +#include "%s" + +)==="; + + { + const size_t extraSize = 3u*8u+128u; + auto lumaShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); + snprintf( + reinterpret_cast(lumaShader->getPointer()),lumaShader->getSize(), sourceFmt, + lumaMipMapGenWorkgroupDimension, + warpMapGenWorkgroupDimension, + MipCountLuminance, + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp" + ); + + auto cpuSpecializedShader = core::make_smart_refctd_ptr( + core::make_smart_refctd_ptr(std::move(lumaShader),ICPUShader::buffer_contains_glsl), + ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} + ); + + auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); + + m_lumaGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); + assert(m_lumaGPUShader); + } m_lumaPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_lumaPipelineLayout), core::smart_refctd_ptr(m_lumaGPUShader)); assert(m_lumaPipeline); - m_warpGPUShader = gpuSpecializedShaderFromFile(m_assetManager, m_driver, "../genWarpMap.comp"); - assert(m_warpGPUShader); + { + const size_t extraSize = 3u*8u+128u; + auto warpGenShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); + snprintf( + reinterpret_cast(warpGenShader->getPointer()),warpGenShader->getSize(), sourceFmt, + lumaMipMapGenWorkgroupDimension, + warpMapGenWorkgroupDimension, + MipCountLuminance, + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp" + ); + + auto cpuSpecializedShader = core::make_smart_refctd_ptr( + core::make_smart_refctd_ptr(std::move(warpGenShader),ICPUShader::buffer_contains_glsl), + ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} + ); + + auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); + + m_warpGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); + assert(m_warpGPUShader); + } m_warpPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_warpPipelineLayout), core::smart_refctd_ptr(m_warpGPUShader)); assert(m_warpPipeline); } + return ret; } void Renderer::deinitWarpingResources() { m_lumaPipeline = nullptr; m_lumaGPUShader = nullptr; - for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i) + for(uint32_t i = 0u; i < MaxMipCountLuminance - 1; ++i) m_lumaDS[i] = nullptr; m_lumaPipelineLayout = nullptr; m_lumaDSLayout = nullptr; - for(uint32_t i = 0; i < MipCountLuminance; ++i) + for(uint32_t i = 0; i < MaxMipCountLuminance; ++i) m_luminanceMipMaps[i] = nullptr; m_warpPipeline = nullptr; @@ -2168,7 +2231,7 @@ void Renderer::deinitWarpingResources() m_warpMap = nullptr; } -bool Renderer::computeWarpMap(float envMapRegularizationFactor) +bool Renderer::computeWarpMap(float envMapRegularizationFactor, ComputeWarpMapInfo info) { bool enableRIS = false; @@ -2184,13 +2247,13 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) pcData.sinFactor = 0; m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr); - const uint32_t resolution = 0x1u<<(MipCountLuminance - 1); - const uint32_t sourceMipWidth = std::max(resolution, 1u); - const uint32_t sourceMipHeight = std::max(resolution/2u, 1u); + + uint32_t sourceMipWidth, sourceMipHeight = 0u; + getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1, sourceMipWidth, sourceMipHeight); uint32_t workGroups[2] = { - (sourceMipWidth-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u, - (sourceMipHeight-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u + (sourceMipWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, + (sourceMipHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u }; m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); @@ -2201,9 +2264,8 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) // Download Luma Image and caclulate Variance and new Regularization Factor float variance = 0.0f; { - const uint32_t resolution = 0x1u<<(MipCountLuminance - 1); - const uint32_t width = std::max(resolution, 1u); - const uint32_t height = std::max(resolution/2u, 1u); + uint32_t width, height = 0u; + getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1, width, height); const uint32_t colorBufferBytesize = width * height * asset::getTexelOrBlockBytesize(EF_R32_SFLOAT); @@ -2280,14 +2342,13 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) pcData.sinFactor = 1; m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr); - - const uint32_t resolution = 0x1u<<(MipCountLuminance - 1); - const uint32_t sourceMipWidth = std::max(resolution, 1u); - const uint32_t sourceMipHeight = std::max(resolution/2u, 1u); + + uint32_t sourceMipWidth, sourceMipHeight = 0u; + getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1, sourceMipWidth, sourceMipHeight); uint32_t workGroups[2] = { - (sourceMipWidth-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u, - (sourceMipHeight-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u + (sourceMipWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, + (sourceMipHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u }; m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); @@ -2296,17 +2357,16 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) } // Calc Mipmaps - for(uint32_t s = 0; s < MipCountLuminance - 1; ++s) + for(uint32_t s = 0; s < info.MipCountLuminance - 1; ++s) { m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[s].get(),nullptr); - const uint32_t resolution = 0x1u<<(MipCountLuminance - 1 - s); - const uint32_t sourceMipWidth = std::max(resolution, 1u); - const uint32_t sourceMipHeight = std::max(resolution/2u, 1u); + uint32_t sourceMipWidth, sourceMipHeight = 0u; + getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1 - s, sourceMipWidth, sourceMipHeight); uint32_t workGroups[2] = { - (sourceMipWidth-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u, - (sourceMipHeight-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u + (sourceMipWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, + (sourceMipHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u }; pcData.calcLuma = 0; @@ -2320,17 +2380,16 @@ bool Renderer::computeWarpMap(float envMapRegularizationFactor) m_driver->bindComputePipeline(m_warpPipeline.get()); WarpMapGenShaderData_t warpPcData = {}; - warpPcData.lumaMipCount = MipCountLuminance; + warpPcData.lumaMipCount = info.MipCountLuminance; m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr); - const uint32_t resolution = 0x1u<<(MipCountEnvmap-1); - const uint32_t warpMapWidth = std::max(resolution, 1u); - const uint32_t warpMapHeight = std::max(resolution/2u, 1u); + uint32_t warpMapWidth, warpMapHeight = 0u; + getEnvmapResolutionFromMipLevel(info.MipCountEnvmap - 1, warpMapWidth, warpMapHeight); uint32_t workGroups[2] = { - (warpMapWidth-1u)/WARP_MAP_GEN_WORKGROUP_DIM+1u, - (warpMapHeight-1u)/WARP_MAP_GEN_WORKGROUP_DIM+1u + (warpMapWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, + (warpMapHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u }; m_driver->pushConstants(m_warpPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(warpPcData),&warpPcData); diff --git a/examples_tests/22.RaytracedAO/Renderer.h b/examples_tests/22.RaytracedAO/Renderer.h index 518d75d213..171fd0c879 100644 --- a/examples_tests/22.RaytracedAO/Renderer.h +++ b/examples_tests/22.RaytracedAO/Renderer.h @@ -23,7 +23,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac public: #include "rasterizationCommon.h" #include "raytraceCommon.h" - #include "warpCommon.h" + #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl" #ifdef __cplusplus #undef uint #undef vec4 @@ -248,12 +248,23 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr blendEnvMeshBuffer; // Shader and Resources for Generating Luminance MipMaps from EnvMap - static constexpr uint32_t MipCountLuminance = MipCountEnvmap; + struct ComputeWarpMapInfo + { + uint32_t MipCountEnvmap; + uint32_t MipCountLuminance; + uint32_t lumaMipMapGenWorkgroupDimension; + uint32_t warpMapGenWorkgroupDimension; + }; + + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + nbl::core::smart_refctd_ptr m_luminanceBaseImageView; - nbl::core::smart_refctd_ptr m_luminanceMipMaps[MipCountLuminance]; + nbl::core::smart_refctd_ptr m_luminanceMipMaps[MaxMipCountLuminance]; uint32_t m_lumaWorkGroups[2]; nbl::core::smart_refctd_ptr m_lumaDSLayout; - nbl::core::smart_refctd_ptr m_lumaDS[MipCountLuminance - 1]; + nbl::core::smart_refctd_ptr m_lumaDS[MaxMipCountLuminance - 1]; nbl::core::smart_refctd_ptr m_lumaPipelineLayout; nbl::core::smart_refctd_ptr m_lumaGPUShader; nbl::core::smart_refctd_ptr m_lumaPipeline; @@ -267,12 +278,16 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_warpGPUShader; nbl::core::smart_refctd_ptr m_warpPipeline; - void initWarpingResources(); + ComputeWarpMapInfo initWarpingResources( + nbl::core::smart_refctd_ptr envmap, + uint32_t lumaMipMapGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension, + uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension); void deinitWarpingResources(); // returns if RIS should be enabled based on variance calculations - bool computeWarpMap(float envMapRegularizationFactor); + bool computeWarpMap(float envMapRegularizationFactor, ComputeWarpMapInfo info); + ComputeWarpMapInfo m_computeWarpMapInfo; std::future compileShadersFuture; }; diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index 0a0ba77049..e29b571ad7 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -2,7 +2,6 @@ #define _RAYTRACE_COMMON_GLSL_INCLUDED_ #include "virtualGeometry.glsl" -#include "warpCommon.h" #include #include diff --git a/examples_tests/22.RaytracedAO/lumaMipMapGen.comp b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp similarity index 90% rename from examples_tests/22.RaytracedAO/lumaMipMapGen.comp rename to include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp index 1d0f9193ad..cbe3571f5d 100644 --- a/examples_tests/22.RaytracedAO/lumaMipMapGen.comp +++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp @@ -1,6 +1,7 @@ -#version 430 core +#ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_LUMA_INCLUDED_ +#define _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_LUMA_INCLUDED_ -#include "warpCommon.h" +#include #include layout(local_size_x = LUMA_MIP_MAP_GEN_WORKGROUP_DIM, local_size_y = LUMA_MIP_MAP_GEN_WORKGROUP_DIM) in; @@ -54,4 +55,6 @@ void main() } } } -} \ No newline at end of file +} + +#endif \ No newline at end of file diff --git a/examples_tests/22.RaytracedAO/genWarpMap.comp b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp similarity index 94% rename from examples_tests/22.RaytracedAO/genWarpMap.comp rename to include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp index a3b9e98dd2..6bd7a7c5ea 100644 --- a/examples_tests/22.RaytracedAO/genWarpMap.comp +++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp @@ -1,6 +1,7 @@ -#version 430 core +#ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_WARPMAP_INCLUDED_ +#define _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_WARPMAP_INCLUDED_ -#include "warpCommon.h" +#include #include layout(local_size_x = WARP_MAP_GEN_WORKGROUP_DIM, local_size_y = WARP_MAP_GEN_WORKGROUP_DIM) in; @@ -128,4 +129,6 @@ void main() */ } -} \ No newline at end of file +} + +#endif \ No newline at end of file diff --git a/examples_tests/22.RaytracedAO/warpCommon.h b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl similarity index 50% rename from examples_tests/22.RaytracedAO/warpCommon.h rename to include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl index a04dbe03b3..4af40f76df 100644 --- a/examples_tests/22.RaytracedAO/warpCommon.h +++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl @@ -1,12 +1,5 @@ -#ifndef _WARP_COMMON_H_INCLUDED_ -#define _WARP_COMMON_H_INCLUDED_ - -#include "common.h" - -#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM 16 -#define WARP_MAP_GEN_WORKGROUP_DIM 16 - -#define MAX_LUMINANCE_LEVELS 13 +#ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_PARAMETERS_STRUCT_INCLUDED_ +#define _NBL_GLSL_EXT_ENVMAP_SAMPLING_PARAMETERS_STRUCT_INCLUDED_ struct LumaMipMapGenShaderData_t { diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index be6a25dd42..b204878a51 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -143,6 +143,9 @@ set(nbl_resources_to_embed "nbl/builtin/glsl/workgroup/vote.glsl" # ext shouldn't be built into the engine, but there's no harm including some non-dynamic GLSL source to make life easier #"nbl/builtin/glsl/ext/.glsl" + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp" + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp" + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl" "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" "nbl/builtin/glsl/ext/FFT/fft.glsl" "nbl/builtin/glsl/ext/FFT/parameters_struct.glsl" From bc63b65ff367114ab7c2f37e28535d7c6f60ac87 Mon Sep 17 00:00:00 2001 From: Erfan Date: Fri, 29 Apr 2022 18:16:57 +0430 Subject: [PATCH 15/16] EnvmapImportanceSampling Extension Header+Impl --- examples_tests/22.RaytracedAO/CMakeLists.txt | 1 + examples_tests/22.RaytracedAO/Renderer.cpp | 469 +---------------- examples_tests/22.RaytracedAO/Renderer.h | 42 +- .../EnvmapImportanceSampling/parameters.glsl | 19 + .../EnvmapImportanceSampling.h | 84 +++ .../EnvmapImportanceSampling.cpp | 493 ++++++++++++++++++ 6 files changed, 610 insertions(+), 498 deletions(-) create mode 100644 include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h create mode 100644 src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp diff --git a/examples_tests/22.RaytracedAO/CMakeLists.txt b/examples_tests/22.RaytracedAO/CMakeLists.txt index b3e2b275ed..c8c4e1e7f7 100644 --- a/examples_tests/22.RaytracedAO/CMakeLists.txt +++ b/examples_tests/22.RaytracedAO/CMakeLists.txt @@ -31,6 +31,7 @@ endif() set(EXTRA_SOURCES ../../src/nbl/ext/DebugDraw/CDraw3DLine.cpp + ../../src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp Renderer.cpp CommandLineHandler.cpp ) diff --git a/examples_tests/22.RaytracedAO/Renderer.cpp b/examples_tests/22.RaytracedAO/Renderer.cpp index 0e4f5b8cdc..ea4adcace5 100644 --- a/examples_tests/22.RaytracedAO/Renderer.cpp +++ b/examples_tests/22.RaytracedAO/Renderer.cpp @@ -53,7 +53,8 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_framesDispatched(0u), m_rcpPixelSize{0.f,0.f}, m_staticViewData{{0u,0u},0u,0u}, m_raytraceCommonData{core::matrix4SIMD(), vec3(),0.f,0u,0u,0u,0.f}, m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),1.f,0u,0u,0u},m_cullWorkGroups(0u), - m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr) + m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr), + m_envMapImportanceSampling(_driver) { // TODO: reimplement m_useDenoiser = false; @@ -787,7 +788,7 @@ void Renderer::initSceneNonAreaLights(Renderer::InitializationData& initData) // TODO: better filter and GPU accelerated m_finalEnvmap->regenerateMipMapLevels(); - m_computeWarpMapInfo = initWarpingResources(m_finalEnvmap); + m_envMapImportanceSampling.initResources(m_finalEnvmap); } void Renderer::finalizeScene(Renderer::InitializationData& initData) @@ -1118,7 +1119,7 @@ void Renderer::deinitSceneResources() m_sceneBound = core::aabbox3df(FLT_MAX, FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX); m_finalEnvmap = nullptr; - deinitWarpingResources(); + m_envMapImportanceSampling.deinitResources(); m_staticViewData = {{0u,0u},0u,0u}; auto rr = m_rrManager->getRadeonRaysAPI(); @@ -1140,7 +1141,7 @@ void Renderer::deinitSceneResources() void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float envMapRegularizationFactor) { - bool enableRIS = computeWarpMap(envMapRegularizationFactor, m_computeWarpMapInfo); + bool enableRIS = m_envMapImportanceSampling.computeWarpMap(envMapRegularizationFactor); m_staticViewData.imageDimensions = {width, height}; m_rcpPixelSize = { 2.f/float(m_staticViewData.imageDimensions.x),-2.f/float(m_staticViewData.imageDimensions.y) }; @@ -1281,7 +1282,10 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e constexpr uint32_t MaxDescritorUpdates = 10u; IGPUDescriptorSet::SDescriptorInfo infos[MaxDescritorUpdates]; IGPUDescriptorSet::SWriteDescriptorSet writes[MaxDescritorUpdates]; - + + auto warpMap = m_envMapImportanceSampling.getWarpMapImageView(); + auto lumaMap = m_envMapImportanceSampling.getLuminanceImageView(); + // set up m_commonRaytracingDS core::smart_refctd_ptr _staticViewDataBuffer; size_t staticViewDataBufferSize=0u; @@ -1302,7 +1306,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e } // warpmap { - setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(m_warpMap)); + setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(m_envMapImportanceSampling.m_warpMap)); ISampler::SParams samplerParams = { ISampler::ETC_REPEAT, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; infos[8].image.sampler = m_driver->createGPUSampler(samplerParams); infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; @@ -1314,7 +1318,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_BORDER, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, 0u, false, ECO_ALWAYS }; auto sampler = m_driver->createGPUSampler(samplerParams); - luminanceDescriptorInfo.desc = m_luminanceBaseImageView; + luminanceDescriptorInfo.desc = m_envMapImportanceSampling.m_luminanceBaseImageView; luminanceDescriptorInfo.image.sampler = sampler; luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; } @@ -1949,457 +1953,6 @@ bool Renderer::traceBounce(uint32_t& raycount) return true; } -void getEnvmapResolutionFromMipLevel(uint32_t level, uint32_t& outWidth, uint32_t& outHeight) -{ - const uint32_t resolution = 0x1u<<(level); - outWidth = std::max(resolution, 1u); - outHeight = std::max(resolution/2u, 1u); -} - -Renderer::ComputeWarpMapInfo Renderer::initWarpingResources(core::smart_refctd_ptr envmap, uint32_t lumaMipMapGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension) -{ - const uint32_t MipCountEnvMap = envmap->getCreationParameters().subresourceRange.levelCount; - const uint32_t MipCountLuminance = MipCountEnvMap; - - ComputeWarpMapInfo ret = {}; - ret.lumaMipMapGenWorkgroupDimension = lumaMipMapGenWorkgroupDimension; - ret.warpMapGenWorkgroupDimension = warpMapGenWorkgroupDimension; - ret.MipCountLuminance = MipCountLuminance; - ret.MipCountEnvmap = MipCountEnvMap; - - { - uint32_t width, height = 0u; - getEnvmapResolutionFromMipLevel(MipCountLuminance - 1, width, height); - m_luminanceBaseImageView = createTexture(width, height, EF_R32_SFLOAT, MipCountLuminance); - assert(m_luminanceBaseImageView); - - m_luminanceMipMaps[0] = m_luminanceBaseImageView; - for(uint32_t i = 1; i < MipCountLuminance; ++i) - { - IGPUImageView::SCreationParams viewCreateParams = m_luminanceBaseImageView->getCreationParameters(); - viewCreateParams.subresourceRange.baseMipLevel = i; - viewCreateParams.subresourceRange.levelCount = 1u; - - m_luminanceMipMaps[i] = m_driver->createGPUImageView(std::move(viewCreateParams)); - } - } - - { - uint32_t width, height = 0u; - getEnvmapResolutionFromMipLevel(MipCountEnvmap - 1, width, height); - m_warpMap = createTexture(width, height, EF_R32G32_SFLOAT); - } - - ISampler::SParams samplerParams; - samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE; - samplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST; - samplerParams.MipmapMode = ISampler::ESMM_NEAREST; - samplerParams.AnisotropicFilter = 0u; - samplerParams.CompareEnable = false; - auto nearestSampler = m_driver->createGPUSampler(samplerParams); - - // Create DescriptorLayout - { - { - constexpr auto lumaDescriptorCount = 3u; - IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = 1u; - bindings[0].samplers = &nearestSampler; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_IMAGE; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - bindings[2].binding = 2u; - bindings[2].type = asset::EDT_STORAGE_IMAGE; - bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[2].count = 1u; - - m_lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount); - } - - { - - ISampler::SParams lumaSamplerParams; - lumaSamplerParams.TextureWrapU = lumaSamplerParams.TextureWrapV = lumaSamplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER; - lumaSamplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; - lumaSamplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST; - lumaSamplerParams.MipmapMode = ISampler::ESMM_NEAREST; - lumaSamplerParams.AnisotropicFilter = 0u; - lumaSamplerParams.CompareEnable = false; - auto lumaSampler = m_driver->createGPUSampler(lumaSamplerParams); - - core::smart_refctd_ptr samplers[MaxMipCountLuminance]; - for(uint32_t i = 0u; i < MipCountLuminance; ++i) - samplers[i] = lumaSampler; - - constexpr auto warpDescriptorCount = 2u; - IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount]; - bindings[0].binding = 0u; - bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; - bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[0].count = MipCountLuminance; - bindings[0].samplers = samplers; - - bindings[1].binding = 1u; - bindings[1].type = asset::EDT_STORAGE_IMAGE; - bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; - bindings[1].count = 1u; - - m_warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount); - } - } - - { - { - SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(LumaMipMapGenShaderData_t)}; - m_lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_lumaDSLayout)); - - for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i) - m_lumaDS[i] = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_lumaDSLayout)); - - for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i) - { - const uint32_t src = i; - const uint32_t dst = i + 1; - - IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {}; - envMapDescriptorInfo.desc = envmap; - envMapDescriptorInfo.image.sampler = nullptr; - envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo srcMipDescriptorInfo = {}; - srcMipDescriptorInfo.desc = m_luminanceMipMaps[src]; - srcMipDescriptorInfo.image.sampler = nullptr; - srcMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; - - IGPUDescriptorSet::SDescriptorInfo dstMipDescriptorInfo = {}; - dstMipDescriptorInfo.desc = m_luminanceMipMaps[dst]; - dstMipDescriptorInfo.image.sampler = nullptr; - dstMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[3u]; - writes[0].binding = 0u; - writes[0].arrayElement = 0u; - writes[0].count = 1u; - writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; - writes[0].dstSet = m_lumaDS[i].get(); - writes[0].info = &envMapDescriptorInfo; - - writes[1].binding = 1u; - writes[1].arrayElement = 0u; - writes[1].count = 1u; - writes[1].descriptorType = EDT_STORAGE_IMAGE; - writes[1].dstSet = m_lumaDS[i].get(); - writes[1].info = &srcMipDescriptorInfo; - - writes[2].binding = 2u; - writes[2].arrayElement = 0u; - writes[2].count = 1u; - writes[2].descriptorType = EDT_STORAGE_IMAGE; - writes[2].dstSet = m_lumaDS[i].get(); - writes[2].info = &dstMipDescriptorInfo; - - m_driver->updateDescriptorSets(3u,writes,0u,nullptr); - } - } - - { - - SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(WarpMapGenShaderData_t)}; - m_warpPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_warpDSLayout)); - - m_warpDS = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_warpDSLayout)); - - IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {}; - luminanceDescriptorInfo.desc = m_luminanceBaseImageView; - luminanceDescriptorInfo.image.sampler = nullptr; - luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; - - IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; - warpMapDescriptorInfo.desc = m_warpMap; - warpMapDescriptorInfo.image.sampler = nullptr; - warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; - - IGPUDescriptorSet::SWriteDescriptorSet writes[2u]; - writes[0].binding = 0u; - writes[0].arrayElement = 0u; - writes[0].count = 1u; - writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; - writes[0].dstSet = m_warpDS.get(); - writes[0].info = &luminanceDescriptorInfo; - - writes[1].binding = 1u; - writes[1].arrayElement = 0u; - writes[1].count = 1u; - writes[1].descriptorType = EDT_STORAGE_IMAGE; - writes[1].dstSet = m_warpDS.get(); - writes[1].info = &warpMapDescriptorInfo; - - m_driver->updateDescriptorSets(2u,writes,0u,nullptr); - } - } - - { - - const char* sourceFmt = -R"===(#version 430 core - -#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u -#define WARP_MAP_GEN_WORKGROUP_DIM %u -#define MAX_LUMINANCE_LEVELS %u - -#include "%s" - -)==="; - - { - const size_t extraSize = 3u*8u+128u; - auto lumaShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); - snprintf( - reinterpret_cast(lumaShader->getPointer()),lumaShader->getSize(), sourceFmt, - lumaMipMapGenWorkgroupDimension, - warpMapGenWorkgroupDimension, - MipCountLuminance, - "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp" - ); - - auto cpuSpecializedShader = core::make_smart_refctd_ptr( - core::make_smart_refctd_ptr(std::move(lumaShader),ICPUShader::buffer_contains_glsl), - ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} - ); - - auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); - - m_lumaGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); - assert(m_lumaGPUShader); - } - - m_lumaPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_lumaPipelineLayout), core::smart_refctd_ptr(m_lumaGPUShader)); - assert(m_lumaPipeline); - - { - const size_t extraSize = 3u*8u+128u; - auto warpGenShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); - snprintf( - reinterpret_cast(warpGenShader->getPointer()),warpGenShader->getSize(), sourceFmt, - lumaMipMapGenWorkgroupDimension, - warpMapGenWorkgroupDimension, - MipCountLuminance, - "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp" - ); - - auto cpuSpecializedShader = core::make_smart_refctd_ptr( - core::make_smart_refctd_ptr(std::move(warpGenShader),ICPUShader::buffer_contains_glsl), - ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} - ); - - auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); - - m_warpGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); - assert(m_warpGPUShader); - } - - m_warpPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_warpPipelineLayout), core::smart_refctd_ptr(m_warpGPUShader)); - assert(m_warpPipeline); - } - - return ret; -} - -void Renderer::deinitWarpingResources() -{ - m_lumaPipeline = nullptr; - m_lumaGPUShader = nullptr; - for(uint32_t i = 0u; i < MaxMipCountLuminance - 1; ++i) - m_lumaDS[i] = nullptr; - m_lumaPipelineLayout = nullptr; - m_lumaDSLayout = nullptr; - - for(uint32_t i = 0; i < MaxMipCountLuminance; ++i) - m_luminanceMipMaps[i] = nullptr; - - m_warpPipeline = nullptr; - m_warpGPUShader = nullptr; - m_warpDS = nullptr; - m_warpPipelineLayout = nullptr; - m_warpDSLayout = nullptr; - m_warpMap = nullptr; -} - -bool Renderer::computeWarpMap(float envMapRegularizationFactor, ComputeWarpMapInfo info) -{ - bool enableRIS = false; - - LumaMipMapGenShaderData_t pcData = {}; - const nbl::core::vectorSIMDf lumaScales = nbl::core::vectorSIMDf(0.2126729f, 0.7151522f, 0.0721750f, 1.0f); - - m_driver->bindComputePipeline(m_lumaPipeline.get()); - - // Calc Luma without Sin Factor - { - pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * envMapRegularizationFactor, lumaScales[1] * envMapRegularizationFactor, lumaScales[2] * envMapRegularizationFactor, (1.0f-envMapRegularizationFactor)); - pcData.calcLuma = 1; - pcData.sinFactor = 0; - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr); - - - uint32_t sourceMipWidth, sourceMipHeight = 0u; - getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1, sourceMipWidth, sourceMipHeight); - - uint32_t workGroups[2] = { - (sourceMipWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, - (sourceMipHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u - }; - - m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); - m_driver->dispatch(workGroups[0],workGroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); - } - - // Download Luma Image and caclulate Variance and new Regularization Factor - float variance = 0.0f; - { - uint32_t width, height = 0u; - getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1, width, height); - - const uint32_t colorBufferBytesize = width * height * asset::getTexelOrBlockBytesize(EF_R32_SFLOAT); - - auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); - - constexpr uint64_t timeoutInNanoSeconds = 300000000000u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - - uint32_t address = std::remove_pointer::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation! - const uint32_t alignment = 4096u; // common page size - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint, 1u, &address, &colorBufferBytesize, &alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR); - } - - IImage::SBufferCopy copyRegion = {}; - copyRegion.bufferOffset = address; - copyRegion.bufferRowLength = 0u; - copyRegion.bufferImageHeight = 0u; - //copyRegion.imageSubresource.aspectMask = wait for Vulkan; - copyRegion.imageSubresource.mipLevel = 0u; - copyRegion.imageSubresource.baseArrayLayer = 0u; - copyRegion.imageSubresource.layerCount = 1u; - copyRegion.imageOffset = { 0u,0u,0u }; - copyRegion.imageExtent = { width, height, 1u }; - - auto luminanceGPUImage = m_luminanceMipMaps[0].get()->getCreationParameters().image.get(); - m_driver->copyImageToBuffer(luminanceGPUImage, downloadStagingArea->getBuffer(), 1, ©Region); - - auto downloadFence = m_driver->placeFence(true); - - auto* data = reinterpret_cast(downloadStagingArea->getBufferPointer()) + address; - - // wait for download fence and then invalidate the CPU cache - { - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED||result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr); - } - if (downloadStagingArea->needsManualFlushOrInvalidate()) - m_driver->invalidateMappedMemoryRanges({{downloadStagingArea->getBuffer()->getBoundMemory(),address,colorBufferBytesize}}); - } - - float* fltData = reinterpret_cast(data); - float avg_x2 = 0.0f; - float avg_x = 0.0f; - for(uint32_t i = 0; i < width * height; ++i) - { - const float x = fltData[i]; - const float x2 = x*x; - const float n = float(i + 1); - avg_x = avg_x + (x-avg_x)/(n); - avg_x2 = avg_x2 + (x2-avg_x2)/(n); - } - - variance = avg_x2 - avg_x * avg_x; // V[x] = E[X^2]-E[X]^2 - std::cout << "Final Luminance Variance = " << variance << std::endl; - - downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr); - } - - float regularizationFactor = envMapRegularizationFactor*(1.0f-1.0f/(1.0f+variance)); - std::cout << "New Regularization Factor based on Variance = " << regularizationFactor << std::endl; - constexpr float varianceThreshold = 0.001f; - enableRIS = (variance >= varianceThreshold); - - // Calc Luma again with Sin Factor and new Regularization Factor - { - pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * regularizationFactor, lumaScales[1] * regularizationFactor, lumaScales[2] * regularizationFactor, (1.0f-regularizationFactor)); - pcData.calcLuma = 1; - pcData.sinFactor = 1; - - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr); - - uint32_t sourceMipWidth, sourceMipHeight = 0u; - getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1, sourceMipWidth, sourceMipHeight); - - uint32_t workGroups[2] = { - (sourceMipWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, - (sourceMipHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u - }; - - m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); - m_driver->dispatch(workGroups[0],workGroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT); - } - - // Calc Mipmaps - for(uint32_t s = 0; s < info.MipCountLuminance - 1; ++s) - { - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[s].get(),nullptr); - - uint32_t sourceMipWidth, sourceMipHeight = 0u; - getEnvmapResolutionFromMipLevel(info.MipCountLuminance - 1 - s, sourceMipWidth, sourceMipHeight); - - uint32_t workGroups[2] = { - (sourceMipWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, - (sourceMipHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u - }; - - pcData.calcLuma = 0; - m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); - m_driver->dispatch(workGroups[0],workGroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); - } - - // Generate WarpMap - { - m_driver->bindComputePipeline(m_warpPipeline.get()); - - WarpMapGenShaderData_t warpPcData = {}; - warpPcData.lumaMipCount = info.MipCountLuminance; - - m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr); - - uint32_t warpMapWidth, warpMapHeight = 0u; - getEnvmapResolutionFromMipLevel(info.MipCountEnvmap - 1, warpMapWidth, warpMapHeight); - - uint32_t workGroups[2] = { - (warpMapWidth-1u)/info.lumaMipMapGenWorkgroupDimension+1u, - (warpMapHeight-1u)/info.lumaMipMapGenWorkgroupDimension+1u - }; - - m_driver->pushConstants(m_warpPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(warpPcData),&warpPcData); - m_driver->dispatch(workGroups[0],workGroups[1],1); - COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); - } - - return enableRIS; -} - const float Renderer::AntiAliasingSequence[Renderer::AntiAliasingSequenceLength][2] = { {0.229027962000000, 0.100901043000000}, diff --git a/examples_tests/22.RaytracedAO/Renderer.h b/examples_tests/22.RaytracedAO/Renderer.h index 171fd0c879..202c2d9f0a 100644 --- a/examples_tests/22.RaytracedAO/Renderer.h +++ b/examples_tests/22.RaytracedAO/Renderer.h @@ -7,6 +7,7 @@ #undef PI #include "nbl/ext/MitsubaLoader/CMitsubaLoader.h" +#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h" #include @@ -246,48 +247,9 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr blendEnvPipeline; nbl::core::smart_refctd_ptr blendEnvDescriptorSet; nbl::core::smart_refctd_ptr blendEnvMeshBuffer; - - // Shader and Resources for Generating Luminance MipMaps from EnvMap - struct ComputeWarpMapInfo - { - uint32_t MipCountEnvmap; - uint32_t MipCountLuminance; - uint32_t lumaMipMapGenWorkgroupDimension; - uint32_t warpMapGenWorkgroupDimension; - }; - static constexpr uint32_t MaxMipCountLuminance = 13u; - static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; - static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; - - nbl::core::smart_refctd_ptr m_luminanceBaseImageView; - nbl::core::smart_refctd_ptr m_luminanceMipMaps[MaxMipCountLuminance]; - uint32_t m_lumaWorkGroups[2]; - nbl::core::smart_refctd_ptr m_lumaDSLayout; - nbl::core::smart_refctd_ptr m_lumaDS[MaxMipCountLuminance - 1]; - nbl::core::smart_refctd_ptr m_lumaPipelineLayout; - nbl::core::smart_refctd_ptr m_lumaGPUShader; - nbl::core::smart_refctd_ptr m_lumaPipeline; - - // Shader and Resources for EnvironmentalMap Sample Warping - nbl::core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance - - nbl::core::smart_refctd_ptr m_warpDSLayout; - nbl::core::smart_refctd_ptr m_warpDS; - nbl::core::smart_refctd_ptr m_warpPipelineLayout; - nbl::core::smart_refctd_ptr m_warpGPUShader; - nbl::core::smart_refctd_ptr m_warpPipeline; - - ComputeWarpMapInfo initWarpingResources( - nbl::core::smart_refctd_ptr envmap, - uint32_t lumaMipMapGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension, - uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension); - void deinitWarpingResources(); - - // returns if RIS should be enabled based on variance calculations - bool computeWarpMap(float envMapRegularizationFactor, ComputeWarpMapInfo info); + nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling; - ComputeWarpMapInfo m_computeWarpMapInfo; std::future compileShadersFuture; }; diff --git a/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl index 4af40f76df..3e3b1c89ef 100644 --- a/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl +++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl @@ -1,6 +1,25 @@ #ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_PARAMETERS_STRUCT_INCLUDED_ #define _NBL_GLSL_EXT_ENVMAP_SAMPLING_PARAMETERS_STRUCT_INCLUDED_ +#ifdef __cplusplus + #define uint uint32_t + struct uvec2 + { + uint x,y; + }; + struct vec2 + { + float x,y; + }; + struct vec3 + { + float x,y,z; + }; + #define vec4 nbl::core::vectorSIMDf + #define mat4 nbl::core::matrix4SIMD + #define mat4x3 nbl::core::matrix3x4SIMD +#endif + struct LumaMipMapGenShaderData_t { vec4 luminanceScales; diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h new file mode 100644 index 0000000000..24c09998c8 --- /dev/null +++ b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h @@ -0,0 +1,84 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ +#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_ + +#include "nabla.h" +#include "nbl/video/IGPUShader.h" +#include "nbl/asset/ICPUShader.h" + +namespace nbl +{ +namespace ext +{ +namespace EnvmapImportanceSampling +{ + +#include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl" +#undef uint +#undef vec4 +#undef mat4 +#undef mat4x3 + +class EnvmapImportanceSampling final : public core::IReferenceCounted +{ + public: + EnvmapImportanceSampling(nbl::video::IVideoDriver* _driver); + + ~EnvmapImportanceSampling() {} + + static constexpr uint32_t MaxMipCountLuminance = 13u; + static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u; + static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u; + + void initResources( + nbl::core::smart_refctd_ptr envmap, + uint32_t lumaMipMapGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension, + uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension); + + void deinitResources(); + + // returns if RIS should be enabled based on variance calculations + bool computeWarpMap(float envMapRegularizationFactor); + + nbl::core::smart_refctd_ptr getLuminanceImageView() { return m_luminanceBaseImageView; } + nbl::core::smart_refctd_ptr getWarpMapImageView() { return m_warpMap; } + + nbl::core::smart_refctd_ptr m_luminanceBaseImageView; + nbl::core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance + private: + + nbl::core::smart_refctd_ptr createTexture(uint32_t width, uint32_t height, nbl::asset::E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 1u); + + uint32_t m_mipCountEnvmap; + uint32_t m_mipCountLuminance; + uint32_t m_lumaMipMapGenWorkgroupDimension; + uint32_t m_warpMapGenWorkgroupDimension; + + nbl::core::smart_refctd_ptr m_luminanceMipMaps[MaxMipCountLuminance]; + uint32_t m_lumaWorkGroups[2]; + nbl::core::smart_refctd_ptr m_lumaDSLayout; + nbl::core::smart_refctd_ptr m_lumaDS[MaxMipCountLuminance - 1]; + nbl::core::smart_refctd_ptr m_lumaPipelineLayout; + nbl::core::smart_refctd_ptr m_lumaGPUShader; + nbl::core::smart_refctd_ptr m_lumaPipeline; + + // Shader and Resources for EnvironmentalMap Sample Warping + + nbl::core::smart_refctd_ptr m_warpDSLayout; + nbl::core::smart_refctd_ptr m_warpDS; + nbl::core::smart_refctd_ptr m_warpPipelineLayout; + nbl::core::smart_refctd_ptr m_warpGPUShader; + nbl::core::smart_refctd_ptr m_warpPipeline; + + nbl::video::IVideoDriver* m_driver; +}; + + +} +} +} + +#endif diff --git a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp new file mode 100644 index 0000000000..bf2fa7dc45 --- /dev/null +++ b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp @@ -0,0 +1,493 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h" + +#include + +using namespace nbl; +using namespace nbl::asset; +using namespace nbl::video; +using namespace ext::EnvmapImportanceSampling; + +EnvmapImportanceSampling::EnvmapImportanceSampling(IVideoDriver* _driver) : m_driver(_driver) +{ +} + +void getEnvmapResolutionFromMipLevel(uint32_t level, uint32_t& outWidth, uint32_t& outHeight) +{ + const uint32_t resolution = 0x1u<<(level); + outWidth = std::max(resolution, 1u); + outHeight = std::max(resolution/2u, 1u); +} + +core::smart_refctd_ptr EnvmapImportanceSampling::createTexture(uint32_t width, uint32_t height, E_FORMAT format, uint32_t mipLevels, uint32_t layers) +{ + const auto real_layers = layers ? layers:1u; + + IGPUImage::SCreationParams imgparams; + imgparams.extent = {width, height, 1u}; + imgparams.arrayLayers = real_layers; + imgparams.flags = static_cast(0); + imgparams.format = format; + imgparams.mipLevels = mipLevels; + imgparams.samples = IImage::ESCF_1_BIT; + imgparams.type = IImage::ET_2D; + + IGPUImageView::SCreationParams viewparams; + viewparams.flags = static_cast(0); + viewparams.format = format; + viewparams.image = m_driver->createDeviceLocalGPUImageOnDedMem(std::move(imgparams)); + viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D; + viewparams.subresourceRange.aspectMask = static_cast(0); + viewparams.subresourceRange.baseArrayLayer = 0u; + viewparams.subresourceRange.layerCount = real_layers; + viewparams.subresourceRange.baseMipLevel = 0u; + viewparams.subresourceRange.levelCount = mipLevels; + + return m_driver->createGPUImageView(std::move(viewparams)); +} + +void EnvmapImportanceSampling::initResources(core::smart_refctd_ptr envmap, uint32_t lumaMipMapGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension) +{ + const uint32_t MipCountEnvMap = envmap->getCreationParameters().subresourceRange.levelCount; + const uint32_t MipCountLuminance = MipCountEnvMap; + + m_lumaMipMapGenWorkgroupDimension = lumaMipMapGenWorkgroupDimension; + m_warpMapGenWorkgroupDimension = warpMapGenWorkgroupDimension; + m_mipCountLuminance = MipCountLuminance; + m_mipCountEnvmap = MipCountEnvMap; + + { + uint32_t width, height = 0u; + getEnvmapResolutionFromMipLevel(MipCountLuminance - 1, width, height); + m_luminanceBaseImageView = createTexture(width, height, EF_R32_SFLOAT, MipCountLuminance); + assert(m_luminanceBaseImageView); + + m_luminanceMipMaps[0] = m_luminanceBaseImageView; + for(uint32_t i = 1; i < MipCountLuminance; ++i) + { + IGPUImageView::SCreationParams viewCreateParams = m_luminanceBaseImageView->getCreationParameters(); + viewCreateParams.subresourceRange.baseMipLevel = i; + viewCreateParams.subresourceRange.levelCount = 1u; + + m_luminanceMipMaps[i] = m_driver->createGPUImageView(std::move(viewCreateParams)); + } + } + + { + uint32_t width, height = 0u; + getEnvmapResolutionFromMipLevel(m_mipCountEnvmap - 1, width, height); + m_warpMap = createTexture(width, height, EF_R32G32_SFLOAT); + } + + ISampler::SParams samplerParams; + samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE; + samplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST; + samplerParams.MipmapMode = ISampler::ESMM_NEAREST; + samplerParams.AnisotropicFilter = 0u; + samplerParams.CompareEnable = false; + auto nearestSampler = m_driver->createGPUSampler(samplerParams); + + // Create DescriptorLayout + { + { + constexpr auto lumaDescriptorCount = 3u; + IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount]; + bindings[0].binding = 0u; + bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; + bindings[0].count = 1u; + bindings[0].samplers = &nearestSampler; + + bindings[1].binding = 1u; + bindings[1].type = asset::EDT_STORAGE_IMAGE; + bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; + bindings[1].count = 1u; + + bindings[2].binding = 2u; + bindings[2].type = asset::EDT_STORAGE_IMAGE; + bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE; + bindings[2].count = 1u; + + m_lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount); + } + + { + + ISampler::SParams lumaSamplerParams; + lumaSamplerParams.TextureWrapU = lumaSamplerParams.TextureWrapV = lumaSamplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER; + lumaSamplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK; + lumaSamplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST; + lumaSamplerParams.MipmapMode = ISampler::ESMM_NEAREST; + lumaSamplerParams.AnisotropicFilter = 0u; + lumaSamplerParams.CompareEnable = false; + auto lumaSampler = m_driver->createGPUSampler(lumaSamplerParams); + + core::smart_refctd_ptr samplers[MaxMipCountLuminance]; + for(uint32_t i = 0u; i < MipCountLuminance; ++i) + samplers[i] = lumaSampler; + + constexpr auto warpDescriptorCount = 2u; + IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount]; + bindings[0].binding = 0u; + bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE; + bindings[0].count = MipCountLuminance; + bindings[0].samplers = samplers; + + bindings[1].binding = 1u; + bindings[1].type = asset::EDT_STORAGE_IMAGE; + bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE; + bindings[1].count = 1u; + + m_warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount); + } + } + + { + { + SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(LumaMipMapGenShaderData_t)}; + m_lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_lumaDSLayout)); + + for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i) + m_lumaDS[i] = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_lumaDSLayout)); + + for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i) + { + const uint32_t src = i; + const uint32_t dst = i + 1; + + IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {}; + envMapDescriptorInfo.desc = envmap; + envMapDescriptorInfo.image.sampler = nullptr; + envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo srcMipDescriptorInfo = {}; + srcMipDescriptorInfo.desc = m_luminanceMipMaps[src]; + srcMipDescriptorInfo.image.sampler = nullptr; + srcMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; + + IGPUDescriptorSet::SDescriptorInfo dstMipDescriptorInfo = {}; + dstMipDescriptorInfo.desc = m_luminanceMipMaps[dst]; + dstMipDescriptorInfo.image.sampler = nullptr; + dstMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; + + IGPUDescriptorSet::SWriteDescriptorSet writes[3u]; + writes[0].binding = 0u; + writes[0].arrayElement = 0u; + writes[0].count = 1u; + writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; + writes[0].dstSet = m_lumaDS[i].get(); + writes[0].info = &envMapDescriptorInfo; + + writes[1].binding = 1u; + writes[1].arrayElement = 0u; + writes[1].count = 1u; + writes[1].descriptorType = EDT_STORAGE_IMAGE; + writes[1].dstSet = m_lumaDS[i].get(); + writes[1].info = &srcMipDescriptorInfo; + + writes[2].binding = 2u; + writes[2].arrayElement = 0u; + writes[2].count = 1u; + writes[2].descriptorType = EDT_STORAGE_IMAGE; + writes[2].dstSet = m_lumaDS[i].get(); + writes[2].info = &dstMipDescriptorInfo; + + m_driver->updateDescriptorSets(3u,writes,0u,nullptr); + } + } + + { + + SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(WarpMapGenShaderData_t)}; + m_warpPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_warpDSLayout)); + + m_warpDS = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_warpDSLayout)); + + IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {}; + luminanceDescriptorInfo.desc = m_luminanceBaseImageView; + luminanceDescriptorInfo.image.sampler = nullptr; + luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {}; + warpMapDescriptorInfo.desc = m_warpMap; + warpMapDescriptorInfo.image.sampler = nullptr; + warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL; + + IGPUDescriptorSet::SWriteDescriptorSet writes[2u]; + writes[0].binding = 0u; + writes[0].arrayElement = 0u; + writes[0].count = 1u; + writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; + writes[0].dstSet = m_warpDS.get(); + writes[0].info = &luminanceDescriptorInfo; + + writes[1].binding = 1u; + writes[1].arrayElement = 0u; + writes[1].count = 1u; + writes[1].descriptorType = EDT_STORAGE_IMAGE; + writes[1].dstSet = m_warpDS.get(); + writes[1].info = &warpMapDescriptorInfo; + + m_driver->updateDescriptorSets(2u,writes,0u,nullptr); + } + } + + { + + const char* sourceFmt = +R"===(#version 430 core + +#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u +#define WARP_MAP_GEN_WORKGROUP_DIM %u +#define MAX_LUMINANCE_LEVELS %u + +#include "%s" + +)==="; + + { + const size_t extraSize = 3u*8u+128u; + auto lumaShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); + snprintf( + reinterpret_cast(lumaShader->getPointer()),lumaShader->getSize(), sourceFmt, + lumaMipMapGenWorkgroupDimension, + warpMapGenWorkgroupDimension, + MipCountLuminance, + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp" + ); + + auto cpuSpecializedShader = core::make_smart_refctd_ptr( + core::make_smart_refctd_ptr(std::move(lumaShader),ICPUShader::buffer_contains_glsl), + ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} + ); + + auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); + + m_lumaGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); + assert(m_lumaGPUShader); + } + + m_lumaPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_lumaPipelineLayout), core::smart_refctd_ptr(m_lumaGPUShader)); + assert(m_lumaPipeline); + + { + const size_t extraSize = 3u*8u+128u; + auto warpGenShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); + snprintf( + reinterpret_cast(warpGenShader->getPointer()),warpGenShader->getSize(), sourceFmt, + lumaMipMapGenWorkgroupDimension, + warpMapGenWorkgroupDimension, + MipCountLuminance, + "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp" + ); + + auto cpuSpecializedShader = core::make_smart_refctd_ptr( + core::make_smart_refctd_ptr(std::move(warpGenShader),ICPUShader::buffer_contains_glsl), + ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} + ); + + auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); + + m_warpGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); + assert(m_warpGPUShader); + } + + m_warpPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_warpPipelineLayout), core::smart_refctd_ptr(m_warpGPUShader)); + assert(m_warpPipeline); + } +} + +void EnvmapImportanceSampling::deinitResources() +{ + m_lumaPipeline = nullptr; + m_lumaGPUShader = nullptr; + for(uint32_t i = 0u; i < MaxMipCountLuminance - 1; ++i) + m_lumaDS[i] = nullptr; + m_lumaPipelineLayout = nullptr; + m_lumaDSLayout = nullptr; + + for(uint32_t i = 0; i < MaxMipCountLuminance; ++i) + m_luminanceMipMaps[i] = nullptr; + + m_warpPipeline = nullptr; + m_warpGPUShader = nullptr; + m_warpDS = nullptr; + m_warpPipelineLayout = nullptr; + m_warpDSLayout = nullptr; + m_warpMap = nullptr; +} + +bool EnvmapImportanceSampling::computeWarpMap(float envMapRegularizationFactor) +{ + bool enableRIS = false; + + LumaMipMapGenShaderData_t pcData = {}; + const nbl::core::vectorSIMDf lumaScales = nbl::core::vectorSIMDf(0.2126729f, 0.7151522f, 0.0721750f, 1.0f); + + m_driver->bindComputePipeline(m_lumaPipeline.get()); + + // Calc Luma without Sin Factor + { + pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * envMapRegularizationFactor, lumaScales[1] * envMapRegularizationFactor, lumaScales[2] * envMapRegularizationFactor, (1.0f-envMapRegularizationFactor)); + pcData.calcLuma = 1; + pcData.sinFactor = 0; + m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr); + + + uint32_t sourceMipWidth, sourceMipHeight = 0u; + getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1, sourceMipWidth, sourceMipHeight); + + uint32_t workGroups[2] = { + (sourceMipWidth-1u)/m_lumaMipMapGenWorkgroupDimension+1u, + (sourceMipHeight-1u)/m_lumaMipMapGenWorkgroupDimension+1u + }; + + m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); + m_driver->dispatch(workGroups[0],workGroups[1],1); + COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); + } + + // Download Luma Image and caclulate Variance and new Regularization Factor + float variance = 0.0f; + { + uint32_t width, height = 0u; + getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1, width, height); + + const uint32_t colorBufferBytesize = width * height * asset::getTexelOrBlockBytesize(EF_R32_SFLOAT); + + auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer(); + + constexpr uint64_t timeoutInNanoSeconds = 300000000000u; + const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); + + uint32_t address = std::remove_pointer::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation! + const uint32_t alignment = 4096u; // common page size + auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint, 1u, &address, &colorBufferBytesize, &alignment); + if (unallocatedSize) + { + os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR); + } + + IImage::SBufferCopy copyRegion = {}; + copyRegion.bufferOffset = address; + copyRegion.bufferRowLength = 0u; + copyRegion.bufferImageHeight = 0u; + //copyRegion.imageSubresource.aspectMask = wait for Vulkan; + copyRegion.imageSubresource.mipLevel = 0u; + copyRegion.imageSubresource.baseArrayLayer = 0u; + copyRegion.imageSubresource.layerCount = 1u; + copyRegion.imageOffset = { 0u,0u,0u }; + copyRegion.imageExtent = { width, height, 1u }; + + auto luminanceGPUImage = m_luminanceMipMaps[0].get()->getCreationParameters().image.get(); + m_driver->copyImageToBuffer(luminanceGPUImage, downloadStagingArea->getBuffer(), 1, ©Region); + + auto downloadFence = m_driver->placeFence(true); + + auto* data = reinterpret_cast(downloadStagingArea->getBufferPointer()) + address; + + // wait for download fence and then invalidate the CPU cache + { + auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); + if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED||result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) + { + os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR); + downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr); + } + if (downloadStagingArea->needsManualFlushOrInvalidate()) + m_driver->invalidateMappedMemoryRanges({{downloadStagingArea->getBuffer()->getBoundMemory(),address,colorBufferBytesize}}); + } + + float* fltData = reinterpret_cast(data); + float avg_x2 = 0.0f; + float avg_x = 0.0f; + for(uint32_t i = 0; i < width * height; ++i) + { + const float x = fltData[i]; + const float x2 = x*x; + const float n = float(i + 1); + avg_x = avg_x + (x-avg_x)/(n); + avg_x2 = avg_x2 + (x2-avg_x2)/(n); + } + + variance = avg_x2 - avg_x * avg_x; // V[x] = E[X^2]-E[X]^2 + std::cout << "Final Luminance Variance = " << variance << std::endl; + + downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr); + } + + float regularizationFactor = envMapRegularizationFactor*(1.0f-1.0f/(1.0f+variance)); + std::cout << "New Regularization Factor based on Variance = " << regularizationFactor << std::endl; + constexpr float varianceThreshold = 0.001f; + enableRIS = (variance >= varianceThreshold); + + // Calc Luma again with Sin Factor and new Regularization Factor + { + pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * regularizationFactor, lumaScales[1] * regularizationFactor, lumaScales[2] * regularizationFactor, (1.0f-regularizationFactor)); + pcData.calcLuma = 1; + pcData.sinFactor = 1; + + m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr); + + uint32_t sourceMipWidth, sourceMipHeight = 0u; + getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1, sourceMipWidth, sourceMipHeight); + + uint32_t workGroups[2] = { + (sourceMipWidth-1u)/m_lumaMipMapGenWorkgroupDimension+1u, + (sourceMipHeight-1u)/m_lumaMipMapGenWorkgroupDimension+1u + }; + + m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); + m_driver->dispatch(workGroups[0],workGroups[1],1); + COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT); + } + + // Calc Mipmaps + for(uint32_t s = 0; s < m_mipCountLuminance - 1; ++s) + { + m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[s].get(),nullptr); + + uint32_t sourceMipWidth, sourceMipHeight = 0u; + getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1 - s, sourceMipWidth, sourceMipHeight); + + uint32_t workGroups[2] = { + (sourceMipWidth-1u)/m_lumaMipMapGenWorkgroupDimension+1u, + (sourceMipHeight-1u)/m_lumaMipMapGenWorkgroupDimension+1u + }; + + pcData.calcLuma = 0; + m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData); + m_driver->dispatch(workGroups[0],workGroups[1],1); + COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); + } + + // Generate WarpMap + { + m_driver->bindComputePipeline(m_warpPipeline.get()); + + WarpMapGenShaderData_t warpPcData = {}; + warpPcData.lumaMipCount = m_mipCountLuminance; + + m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr); + + uint32_t warpMapWidth, warpMapHeight = 0u; + getEnvmapResolutionFromMipLevel(m_mipCountEnvmap - 1, warpMapWidth, warpMapHeight); + + uint32_t workGroups[2] = { + (warpMapWidth-1u)/m_warpMapGenWorkgroupDimension+1u, + (warpMapHeight-1u)/m_warpMapGenWorkgroupDimension+1u + }; + + m_driver->pushConstants(m_warpPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(warpPcData),&warpPcData); + m_driver->dispatch(workGroups[0],workGroups[1],1); + COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT); + } + + return enableRIS; +} + + From c084ec7f0005c287c37a3412f048571e36d4174d Mon Sep 17 00:00:00 2001 From: Erfan Date: Fri, 29 Apr 2022 19:06:27 +0430 Subject: [PATCH 16/16] function from envmap sampling rename --- examples_tests/22.RaytracedAO/raytraceCommon.glsl | 11 +---------- include/nbl/builtin/glsl/sampling/envmap.glsl | 9 +++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl index 29a5e5b41e..cb6f33b95e 100644 --- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl +++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl @@ -256,15 +256,6 @@ vec3 load_normal_and_prefetch_textures( return geomNormal; } -vec3 nbl_glsl_unormSphericalToCartesian(in vec2 uv, out float sinTheta) -{ - vec3 dir; - nbl_glsl_sincos((uv.x-0.5)*2.f*nbl_glsl_PI,dir.y,dir.x); - nbl_glsl_sincos(uv.y*nbl_glsl_PI,sinTheta,dir.z); - dir.xy *= sinTheta; - return dir; -} - // return regularized pdf of sample float Envmap_regularized_deferred_pdf(in vec3 rayDirection) { @@ -305,7 +296,7 @@ void Envmap_generateRegularizedSample_and_pdf(out float pdf, out nbl_glsl_LightS const vec2 uv = yDiff*interpolant.y+yVals[0]; float sinTheta; - const vec3 L = nbl_glsl_unormSphericalToCartesian(uv, sinTheta); + const vec3 L = nbl_glsl_sampling_envmap_generateDirectionFromUVCoord(uv, sinTheta); lightSample = nbl_glsl_createLightSample(L, interaction); const float detInterpolJacobian = determinant(mat2( diff --git a/include/nbl/builtin/glsl/sampling/envmap.glsl b/include/nbl/builtin/glsl/sampling/envmap.glsl index 69709e3ec0..6f68673761 100644 --- a/include/nbl/builtin/glsl/sampling/envmap.glsl +++ b/include/nbl/builtin/glsl/sampling/envmap.glsl @@ -12,4 +12,13 @@ vec2 nbl_glsl_sampling_envmap_generateUVCoordFromDirection(vec3 v) return uv; } +vec3 nbl_glsl_sampling_envmap_generateDirectionFromUVCoord(in vec2 uv, out float sinTheta) +{ + vec3 dir; + nbl_glsl_sincos((uv.x-0.5)*2.f*nbl_glsl_PI,dir.y,dir.x); + nbl_glsl_sincos(uv.y*nbl_glsl_PI,sinTheta,dir.z); + dir.xy *= sinTheta; + return dir; +} + #endif