Skip to content

Commit 1520518

Browse files
authored
Merge pull request #1056 from fastmachinelearning/vitis_vivado_pooling
Update pooling logic for Vivado, Vitis, and Catapult backends
2 parents 2cb6fe1 + 938eb5e commit 1520518

File tree

3 files changed

+71
-79
lines changed

3 files changed

+71
-79
lines changed

hls4ml/templates/catapult/nnet_utils/nnet_pooling.h

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -107,22 +107,20 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
107107
// TODO partition the arrays according to the reuse factor
108108
const int limit = pool_op_limit_1d<CONFIG_T>();
109109
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
110-
// Add any necessary padding
111-
unsigned padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
112-
if (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
113-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
114-
}
110+
// Add padding and reduce input width to area covered by pooling function
111+
static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
112+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
115113

116114
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
117115
// Loop over input image x in steps of stride
118-
for (int ii = 0; ii < padded_width; ii += CONFIG_T::stride_width) {
116+
for (int ii = 0; ii < restricted_padded_width; ii += CONFIG_T::stride_width) {
119117
data_T pool[CONFIG_T::pool_width];
120118
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
121119
// Keep track of number of pixels in image vs padding region
122120
unsigned img_overlap = 0;
123121
// Loop over pool window x
124122
for (int jj = 0; jj < CONFIG_T::stride_width; jj++) {
125-
if (ii + jj < CONFIG_T::pad_left || ii + jj >= (padded_width - CONFIG_T::pad_right)) {
123+
if (ii + jj < CONFIG_T::pad_left || ii + jj >= (full_padded_width - CONFIG_T::pad_right)) {
126124
// Add padding
127125
pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
128126
if (CONFIG_T::count_pad) {
@@ -211,19 +209,17 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
211209
// TODO partition the arrays according to the reuse factor
212210
const int limit = pool_op_limit<CONFIG_T>();
213211
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
214-
// Add any necessary padding
215-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
216-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
217-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
218-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
219-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
220-
}
212+
// Add padding and reduce input width to area covered by pooling function
213+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
214+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
215+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
216+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
221217

222218
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
223219
// Loop over input image y in steps of stride
224-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
220+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
225221
// Loop over input image x in steps of stride
226-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
222+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
227223
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
228224
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
229225
// Keep track of number of pixels in image vs padding region
@@ -232,8 +228,8 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
232228
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
233229
// Loop over pool window x
234230
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
235-
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
236-
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
231+
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
232+
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
237233
// Add padding
238234
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
239235
if (CONFIG_T::count_pad) {
@@ -275,19 +271,17 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
275271
// TODO partition the arrays according to the reuse factor
276272
const int limit = pool_op_limit<CONFIG_T>();
277273
#pragma HLS ALLOCATION function instances=CONFIG_T::pool_op limit=limit
278-
// Add any necessary padding
279-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
280-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
281-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
282-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
283-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
284-
}
274+
// Add padding and reduce input width to area covered by pooling function
275+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
276+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
277+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
278+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
285279

286280
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
287281
// Loop over input image y in steps of stride
288-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
282+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
289283
// Loop over input image x in steps of stride
290-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
284+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
291285
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
292286
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
293287
// Keep track of number of pixels in image vs padding region
@@ -296,8 +290,8 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
296290
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
297291
// Loop over pool window x
298292
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
299-
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
300-
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
293+
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
294+
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
301295
// Add padding
302296
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
303297
if (CONFIG_T::count_pad) {

hls4ml/templates/vitis/nnet_utils/nnet_pooling.h

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct pooling1d_config {
7070
static const unsigned n_out = (n_in - pool_width) / stride_width + 1;
7171
static const unsigned pad_left = 0;
7272
static const unsigned pad_right = 0;
73+
static const bool count_pad = false;
7374
// Pooling function
7475
static const Pool_Op pool_op = Max;
7576
};
@@ -88,14 +89,13 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
8889
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
8990
// Add any necessary padding
9091

91-
unsigned padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
92-
if (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
93-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
94-
}
92+
// Add padding and reduce input width to area covered by pooling function
93+
static constexpr int full_padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
94+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
9595

9696
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
9797
// Loop over input image x in steps of stride
98-
for (int ii = 0; ii < padded_width; ii += CONFIG_T::stride_width) {
98+
for (int ii = 0; ii < restricted_padded_width; ii += CONFIG_T::stride_width) {
9999
unsigned overlap_pixel = 0;
100100
data_T pool[CONFIG_T::pool_width];
101101
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
@@ -130,6 +130,7 @@ void global_pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T r
130130

131131
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
132132
data_T pool[CONFIG_T::n_in];
133+
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
133134
for (int jj = 0; jj < CONFIG_T::n_in; jj++) {
134135
pool[jj] = data[jj * CONFIG_T::n_filt + ff];
135136
}
@@ -154,6 +155,7 @@ struct pooling2d_config {
154155
static const unsigned pad_bottom = 0;
155156
static const unsigned pad_left = 0;
156157
static const unsigned pad_right = 0;
158+
static const bool count_pad = false;
157159
// Pooling function
158160
static const Pool_Op pool_op = Max;
159161
// Reuse factor
@@ -176,18 +178,17 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
176178
const int limit = pool_op_limit<CONFIG_T>();
177179
#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
178180
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
179-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
180-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
181-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
182-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
183-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
184-
}
181+
// Add padding and reduce input width to area covered by pooling function
182+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
183+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
184+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
185+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
185186

186187
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
187188
// Loop over input image y in steps of stride
188-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
189+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
189190
// Loop over input image x in steps of stride
190-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
191+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
191192
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
192193
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
193194

@@ -231,34 +232,35 @@ void pooling2d_cf(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
231232
const int limit = pool_op_limit<CONFIG_T>();
232233
#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
233234
CONFIG_T::pool_op, typename CONFIG_T::accum_t> limit=limit
234-
// Add any necessary padding
235-
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
236-
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
237-
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
238-
padded_height -= padded_height - (padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height);
239-
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
240-
}
235+
// Add padding and reduce input width to area covered by pooling function
236+
static constexpr int full_padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
237+
static constexpr int full_padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
238+
static constexpr int restricted_padded_width = full_padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width;
239+
static constexpr int restricted_padded_height = full_padded_height / CONFIG_T::stride_height * CONFIG_T::stride_height;
241240

242241
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
243242
// Loop over input image y in steps of stride
244-
for (int ii = 0; ii < padded_height; ii += CONFIG_T::stride_height) {
243+
for (int ii = 0; ii < restricted_padded_height; ii += CONFIG_T::stride_height) {
245244
// Loop over input image x in steps of stride
246-
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
245+
for (int jj = 0; jj < restricted_padded_width; jj += CONFIG_T::stride_width) {
247246
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
247+
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0
248248
// Keep track of number of pixels in image vs padding region
249249
unsigned img_overlap = 0;
250250
// Loop over pool window y
251251
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
252252
// Loop over pool window x
253253
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
254-
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
255-
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
254+
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (full_padded_height - CONFIG_T::pad_bottom) ||
255+
jj + ll < CONFIG_T::pad_left || jj + ll >= (full_padded_width - CONFIG_T::pad_right)) {
256256
// Add padding
257257
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
258+
if (CONFIG_T::count_pad)
259+
img_overlap++;
258260
} else {
259261
pool[kk * CONFIG_T::stride_width + ll] =
260-
data[(ii + kk) * CONFIG_T::in_width + ff * CONFIG_T::in_width * CONFIG_T::in_height + ll +
261-
jj];
262+
data[(ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width +
263+
ff * CONFIG_T::in_width * CONFIG_T::in_height + ll + jj - CONFIG_T::pad_left];
262264
img_overlap++;
263265
}
264266
}

0 commit comments

Comments
 (0)