Skip to content

Commit 9a233c7

Browse files
author
devsh
committed
fix preload sizes
1 parent 997f6f3 commit 9a233c7

File tree

2 files changed

+7
-7
lines changed

2 files changed

+7
-7
lines changed

include/nbl/video/utilities/CComputeBlit.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class CComputeBlit : public core::IReferenceCounted
159159
return computePerWorkGroup(sharedMemorySize,minSupport,maxSupport,type,inExtent,outExtent,halfPrecision);
160160
}
161161
NBL_API2 static hlsl::blit::SPerWorkgroup computePerWorkGroup(
162-
const uint16_t sharedMemorySize, const hlsl::float32_t3 minSupportInOutput, const hlsl::float32_t3 maxSupportInOutput, const IGPUImage::E_TYPE type,
162+
const uint16_t sharedMemorySize, const hlsl::float32_t3 minSupportInInput, const hlsl::float32_t3 maxSupportInInput, const IGPUImage::E_TYPE type,
163163
const hlsl::uint16_t3 inExtent, const hlsl::uint16_t3 outExtent, const bool halfPrecision=false
164164
);
165165

src/nbl/video/utilities/CComputeBlit.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ struct ConstevalParameters
117117
}
118118

119119
SPerWorkgroup CComputeBlit::computePerWorkGroup(
120-
const uint16_t sharedMemorySize, const float32_t3 minSupportInOutput, const float32_t3 maxSupportInOutput, const IGPUImage::E_TYPE type,
120+
const uint16_t sharedMemorySize, const float32_t3 minSupportInInput, const float32_t3 maxSupportInInput, const IGPUImage::E_TYPE type,
121121
const uint16_t3 inExtent, const uint16_t3 outExtent, const bool halfPrecision
122122
)
123123
{
@@ -126,16 +126,16 @@ SPerWorkgroup CComputeBlit::computePerWorkGroup(
126126

127127
const auto Dims = static_cast<uint8_t>(type)+1;
128128
const auto scale = float32_t3(inExtent)/float32_t3(outExtent);
129-
const auto supportWidthInOutput = maxSupportInOutput-minSupportInOutput;
129+
const auto supportWidthInInput = maxSupportInInput-minSupportInInput;
130130

131131
IGPUImage::E_TYPE minDimAxes[3] = { IGPUImage::ET_1D, IGPUImage::ET_2D, IGPUImage::ET_3D };
132132
using namespace nbl::hlsl;
133133
for (uint16_t3 output(1,1,1); true;)
134134
{
135135
// now try and grow our support
136-
const auto combinedSupportInOutput = supportWidthInOutput+float32_t3(output-uint16_t3(1,1,1));
136+
const auto combinedSupportInInput = supportWidthInInput+float32_t3(output-uint16_t3(1,1,1))*scale;
137137
// note that its not ceil on purpose
138-
uint32_t3 preload = uint32_t3(hlsl::floor(combinedSupportInOutput*scale))+uint32_t3(1,1,1);
138+
uint32_t3 preload = uint32_t3(hlsl::floor(combinedSupportInInput))+uint32_t3(1,1,1);
139139
// Set the unused dimensions to 1 to avoid weird behaviours with scaled kernels
140140
for (auto a=Dims; a<3; a++)
141141
preload[a] = 1;
@@ -162,9 +162,9 @@ SPerWorkgroup CComputeBlit::computePerWorkGroup(
162162

163163
// we want to fix the dimension that's the smallest, so that we increase the volume of the support by a smallest increment and stay close to a cube shape
164164
{
165-
std::sort(minDimAxes,minDimAxes+Dims,[output](const IGPUImage::E_TYPE a, const IGPUImage::E_TYPE b)->bool
165+
std::sort(minDimAxes,minDimAxes+Dims,[preload](const IGPUImage::E_TYPE a, const IGPUImage::E_TYPE b)->bool
166166
{
167-
return output[a]<output[b];
167+
return preload[a]<preload[b];
168168
}
169169
);
170170
// grow along smallest axis, but skip if already grown to output size

0 commit comments

Comments
 (0)