@@ -12,6 +12,7 @@ namespace hlsl
12
12
namespace prefix_sum_blur
13
13
{
14
14
15
+ // Requires an *inclusive* prefix sum
15
16
template<typename PrefixSumAccessor, typename T>
16
17
struct BoxSampler
17
18
{
@@ -20,7 +21,6 @@ struct BoxSampler
20
21
PrefixSumAccessor prefixSumAccessor;
21
22
uint16_t wrapMode;
22
23
uint16_t linearSize;
23
- T normalizationFactor;
24
24
25
25
T operator ()(float32_t ix, float32_t radius, float32_t borderColor)
26
26
{
@@ -33,7 +33,8 @@ struct BoxSampler
33
33
const int32_t leftFlIdx = (int32_t)floor (leftIdx);
34
34
const int32_t leftClIdx = (int32_t)ceil (leftIdx);
35
35
36
- assert (linearSize > 1 );
36
+ assert (linearSize > 1 && radius >= 0 );
37
+ assert (borderColor >= 0 && borderColor <= 1 );
37
38
38
39
T result = 0 ;
39
40
if (rightClIdx < linearSize)
@@ -45,10 +46,15 @@ struct BoxSampler
45
46
switch (wrapMode) {
46
47
case ETC_REPEAT:
47
48
{
49
+ const uint32_t flooredMod = rightFlIdx % linearSize;
50
+ const uint32_t ceiledMod = rightClIdx % linearSize;
48
51
const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
49
- const T floored = prefixSumAccessor.template get<T, uint32_t>(rightFlIdx % linearSize) + last;
50
- const T ceiled = prefixSumAccessor.template get<T, uint32_t>(rightClIdx % linearSize) + last;
51
- result += lerp (floored, ceiled, alpha);
52
+ const T periodicOffset = (T (rightFlIdx) / linearSize) * last;
53
+ const T floored = prefixSumAccessor.template get<T, uint32_t>(flooredMod);
54
+ T ceiled = prefixSumAccessor.template get<T, uint32_t>(ceiledMod);
55
+ if (flooredMod == lastIdx && ceiledMod == 0 )
56
+ ceiled += last;
57
+ result += lerp (floored, ceiled, alpha) + periodicOffset;
52
58
break ;
53
59
}
54
60
case ETC_CLAMP_TO_BORDER:
@@ -114,10 +120,15 @@ struct BoxSampler
114
120
switch (wrapMode) {
115
121
case ETC_REPEAT:
116
122
{
123
+ const uint32_t flooredMod = (linearSize + leftFlIdx) % linearSize;
124
+ const uint32_t ceiledMod = (linearSize + leftClIdx) % linearSize;
117
125
const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
118
- const T floored = prefixSumAccessor.template get<T, uint32_t>((lastIdx + leftFlIdx) % linearSize) + floor (T (leftFlIdx) / linearSize) * last;
119
- const T ceiled = prefixSumAccessor.template get<T, uint32_t>((lastIdx + leftClIdx) % linearSize) + floor (T (leftClIdx) / linearSize) * last;
120
- result -= lerp (floored, ceiled, alpha);
126
+ const T periodicOffset = (T (linearSize + leftClIdx) / T (linearSize)) * last;
127
+ const T floored = prefixSumAccessor.template get<T, uint32_t>(flooredMod);
128
+ T ceiled = prefixSumAccessor.template get<T, uint32_t>(ceiledMod);
129
+ if (flooredMod == lastIdx && ceiledMod == 0 )
130
+ ceiled += last;
131
+ result -= lerp (floored, ceiled, alpha) - periodicOffset;
121
132
break ;
122
133
}
123
134
case ETC_CLAMP_TO_BORDER:
@@ -127,36 +138,36 @@ struct BoxSampler
127
138
}
128
139
case ETC_CLAMP_TO_EDGE:
129
140
{
130
- result -= (1 - abs (leftIdx) ) * prefixSumAccessor.template get<T, uint32_t>(0 );
141
+ result -= (leftIdx + 1 ) * prefixSumAccessor.template get<T, uint32_t>(0 );
131
142
break ;
132
143
}
133
144
case ETC_MIRROR:
134
145
{
135
146
const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
136
147
T floored, ceiled;
137
148
138
- if (abs (leftFlIdx + 1 ) % (2 * linearSize) == 0 )
139
- floored = -(abs (leftFlIdx + 1 ) / linearSize) * last;
149
+ if (abs (leftFlIdx) % (2 * linearSize) == 0 )
150
+ floored = -(abs (leftFlIdx) / linearSize) * last;
140
151
else
141
152
{
142
- const uint32_t period = uint32_t (ceil (float32_t (abs (leftFlIdx + 1 )) / linearSize));
153
+ const uint32_t period = uint32_t (ceil (float32_t (abs (leftFlIdx)) / linearSize));
143
154
if ((period & 0x1u) == 1 )
144
- floored = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftFlIdx + 1 ) - 1 ) % linearSize);
155
+ floored = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftFlIdx) - 1 ) % linearSize);
145
156
else
146
- floored = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(( leftFlIdx + 1 ) % linearSize - 1 ));
157
+ floored = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(leftFlIdx % linearSize - 1 ));
147
158
}
148
159
149
160
if (leftClIdx == 0 ) // Special case, wouldn't be possible for `floored` above
150
161
ceiled = 0 ;
151
- else if (abs (leftClIdx + 1 ) % (2 * linearSize) == 0 )
152
- ceiled = -(abs (leftClIdx + 1 ) / linearSize) * last;
162
+ else if (abs (leftClIdx) % (2 * linearSize) == 0 )
163
+ ceiled = -(abs (leftClIdx) / linearSize) * last;
153
164
else
154
165
{
155
- const uint32_t period = uint32_t (ceil (float32_t (abs (leftClIdx + 1 )) / linearSize));
166
+ const uint32_t period = uint32_t (ceil (float32_t (abs (leftClIdx)) / linearSize));
156
167
if ((period & 0x1u) == 1 )
157
- ceiled = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftClIdx + 1 ) - 1 ) % linearSize);
168
+ ceiled = -(period - 1 ) * last - prefixSumAccessor.template get<T, uint32_t>((abs (leftClIdx) - 1 ) % linearSize);
158
169
else
159
- ceiled = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(( leftClIdx + 1 ) % linearSize - 1 ));
170
+ ceiled = -(period - 1 ) * last - (last - prefixSumAccessor.template get<T, uint32_t>(leftClIdx % linearSize - 1 ));
160
171
}
161
172
162
173
result -= lerp (floored, ceiled, alpha);
@@ -166,13 +177,13 @@ struct BoxSampler
166
177
{
167
178
const T last = prefixSumAccessor.template get<T, uint32_t>(lastIdx);
168
179
const T lastMinusOne = prefixSumAccessor.template get<T, uint32_t>(lastIdx - 1 );
169
- result -= (1 - abs (leftIdx) ) * (last - lastMinusOne);
180
+ result -= (leftIdx + 1 ) * (last - lastMinusOne);
170
181
break ;
171
182
}
172
183
}
173
184
}
174
185
175
- return result * normalizationFactor ;
186
+ return result / ( 2 * radius + 1 ) ;
176
187
}
177
188
};
178
189
0 commit comments