Skip to content

Commit 0be1ef5

Browse files
committed
fix padding issues with pooling for Vivado, Vitis, Catapult
1 parent dd32d3b commit 0be1ef5

File tree

3 files changed

+52
-64
lines changed

3 files changed

+52
-64
lines changed

hls4ml/templates/catapult/nnet_utils/nnet_pooling.h

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -107,22 +107,20 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
107107
// TODO partition the arrays according to the reuse factor
108108
const int limit = pool_op_limit_1d<CONFIG_T>();
109109
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
110-
// Add any necessary padding
111-
unsigned padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
112-
if (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
113-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
114-
}
110+
// Add padding and reduce input width to area covered by pooling function
111+
static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
112+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
115113

116114
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
117115
// Loop over input image x in steps of stride
118-
for (int ii = 0; ii < padded_width; ii += CONFIG_T::stride_width) {
116+
for (int ii = 0; ii < restricted_padded_width; ii += CONFIG_T::stride_width) {
119117
data_T pool[CONFIG_T::pool_width];
120118
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
121119
// Keep track of number of pixels in image vs padding region
122120
unsigned img_overlap = 0;
123121
// Loop over pool window x
124122
for (int jj = 0; jj < CONFIG_T::stride_width; jj++) {
125-
if (ii + jj < CONFIG_T::pad_left || ii + jj >= (padded_width - CONFIG_T::pad_right)) {
123+
if (ii + jj < CONFIG_T::pad_left || ii + jj >= (full_padded_width - CONFIG_T::pad_right)) {
126124
// Add padding
127125
pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
128126
if (CONFIG_T::count_pad) {
@@ -211,19 +209,17 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
211209
// TODO partition the arrays according to the reuse factor
212210
const int limit = pool_op_limit<CONFIG_T>();
213211
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
214-
// Add any necessary padding
215-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
216-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
217-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
218-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
219-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
220-
}
212+
// Add padding and reduce input width to area covered by pooling function
213+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
214+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
215+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
216+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
221217

222218
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
223219
// Loop over input image y in steps of stride
224-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
220+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
225221
// Loop over input image x in steps of stride
226-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
222+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
227223
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
228224
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
229225
// Keep track of number of pixels in image vs padding region
@@ -232,8 +228,8 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
232228
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
233229
// Loop over pool window x
234230
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
235-
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
236-
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
231+
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
232+
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
237233
// Add padding
238234
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
239235
if (CONFIG_T::count_pad) {
@@ -275,19 +271,17 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
275271
// TODO partition the arrays according to the reuse factor
276272
const int limit = pool_op_limit<CONFIG_T>();
277273
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
278-
// Add any necessary padding
279-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
280-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
281-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
282-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
283-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
284-
}
274+
// Add padding and reduce input width to area covered by pooling function
275+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
276+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
277+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
278+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
285279

286280
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
287281
// Loop over input image y in steps of stride
288-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
282+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
289283
// Loop over input image x in steps of stride
290-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
284+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
291285
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
292286
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
293287
// Keep track of number of pixels in image vs padding region
@@ -296,8 +290,8 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
296290
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
297291
// Loop over pool window x
298292
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
299-
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
300-
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
293+
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
294+
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
301295
// Add padding
302296
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
303297
if (CONFIG_T::count_pad) {

hls4ml/templates/vitis/nnet_utils/nnet_pooling.h

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -89,14 +89,13 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
8989
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
9090
// Add any necessary padding
9191

92-
unsigned padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
93-
if (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
94-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
95-
}
92+
// Add padding and reduce input width to area covered by pooling function
93+
static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
94+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
9695

9796
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
9897
// Loop over input image x in steps of stride
99-
for (int ii = 0; ii < padded_width; ii += CONFIG_T::stride_width) {
98+
for (int ii = 0; ii < restricted_padded_width; ii += CONFIG_T::stride_width) {
10099
unsigned overlap_pixel = 0;
101100
data_T pool[CONFIG_T::pool_width];
102101
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
@@ -179,18 +178,17 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
179178
const int limit = pool_op_limit<CONFIG_T>();
180179
#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
181180
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
182-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
183-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
184-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
185-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
186-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
187-
}
181+
// Add padding and reduce input width to area covered by pooling function
182+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
183+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
184+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
185+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
188186

189187
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
190188
// Loop over input image y in steps of stride
191-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
189+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
192190
// Loop over input image x in steps of stride
193-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
191+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
194192
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
195193
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
196194

@@ -234,19 +232,17 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
234232
const int limit = pool_op_limit<CONFIG_T>();
235233
#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
236234
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
237-
// Add any necessary padding
238-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
239-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
240-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
241-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
242-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
243-
}
235+
// Add padding and reduce input width to area covered by pooling function
236+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
237+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
238+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
239+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
244240

245241
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
246242
// Loop over input image y in steps of stride
247-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
243+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
248244
// Loop over input image x in steps of stride
249-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
245+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
250246
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
251247
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
252248
// Keep track of number of pixels in image vs padding region
@@ -255,8 +251,8 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
255251
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
256252
// Loop over pool window x
257253
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
258-
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
259-
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
254+
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
255+
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
260256
// Add padding
261257
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
262258
if (CONFIG_T::count_pad)

hls4ml/templates/vivado/nnet_utils/nnet_pooling.h

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,13 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
8787
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
8888
// Add any necessary padding
8989

90-
unsigned padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
91-
if (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
92-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
93-
}
90+
// Add padding and reduce input width to area covered by pooling function
91+
static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
92+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
9493

9594
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
9695
// Loop over input image x in steps of stride
97-
for (int ii = 0; ii < padded_width; ii += CONFIG_T::stride_width) {
96+
for (int ii = 0; ii < restricted_padded_width; ii += CONFIG_T::stride_width) {
9897
unsigned overlap_pixel = 0;
9998
data_T pool[CONFIG_T::pool_width];
10099
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
@@ -176,12 +175,11 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
176175
const int limit = pool_op_limit<CONFIG_T>();
177176
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
178177

179-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
180-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
181-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
182-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
183-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
184-
}
178+
// Add padding and reduce input width to area covered by pooling function
179+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
180+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
181+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
182+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
185183

186184
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
187185

0 commit comments

Comments
 (0)