Skip to content

Commit 8092409

Browse files
clean up error with multiple filters and larger kernels
1 parent 91f1c4c commit 8092409

File tree

5 files changed

+12
-9
lines changed

5 files changed

+12
-9
lines changed

hls4ml/backends/fpga/fpga_backend.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -510,11 +510,9 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
510510

511511
return generated_code
512512

513-
def _compute_conv1d_tr_im2col(self, input_shape, kernel=3, stride=1):
513+
def _compute_conv1d_tr_im2col(self, input_shape, out_w, kernel=3, stride=1):
514514
W, C = input_shape
515515

516-
out_w = W # working with padding in a different way for transpose layers
517-
518516
tr_kernel = (kernel+stride-1)//stride
519517

520518
input_img = np.arange(1, W * C + 1)
@@ -535,10 +533,11 @@ def _compute_conv1d_tr_im2col(self, input_shape, kernel=3, stride=1):
535533
return im_matrix
536534

537535

538-
def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, kernel=3, stride=1):
536+
def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, out_W, kernel=3, stride=1):
539537

540538
im2col_matrix = self._compute_conv1d_tr_im2col(
541539
(in_W, in_C),
540+
out_W,
542541
kernel,
543542
stride,
544543
)

hls4ml/backends/fpga/passes/codegen.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def _generate_im2col_1d_transpose(self, node):
3838
node.get_attr('n_partitions'),
3939
node.get_input_variable().shape[0],
4040
node.get_input_variable().shape[1],
41+
node.get_attr('num_out'),
4142
kernel=node.get_attr('filt_width'),
4243
stride=node.get_attr('stride_width'),
4344
)

hls4ml/backends/vivado/passes/convolution_templates.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ def format(self, node):
113113
static const unsigned min_width = {min_width};
114114
static const ap_uint<filt_width> pixels[min_width];
115115
static const unsigned n_partitions = {n_partitions};
116-
static const unsigned n_pixels = in_width / n_partitions;
116+
static const unsigned num_out = {num_out};
117+
static const unsigned n_pixels = num_out / n_partitions;
117118
template<class data_T, class CONFIG_T>
118119
using fill_buffer = nnet::{fill_fn}<data_T, CONFIG_T>;
119120
typedef {accum_t.name} accum_t;

hls4ml/backends/vivado/vivado_backend.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,15 +196,17 @@ def init_conv1dtranspose(self, layer):
196196
layer.set_attr('strategy', 'latency')
197197

198198
in_width = layer.get_input_variable().shape[0]
199+
num_out = 1 + in_width + (layer.get_output_variable().shape[1] + layer.get_attr('pad_left'))//layer.get_attr('stride_width')
199200
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
200-
valid_pf = self.get_valid_conv_partition_splits(1, in_width)
201+
valid_pf = self.get_valid_conv_partition_splits(1, num_out)
201202
if chosen_pf not in valid_pf:
202203
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
203204
print('WARNING: Invalid ParallelizationFactor={} in layer "{}". Using ParallelizationFactor={} instead. Valid ParallelizationFactor(s): {}.'
204205
.format(chosen_pf, layer.name, closest_pf, ','.join(map(str, valid_pf))))
205206
else:
206207
closest_pf = chosen_pf
207-
layer.set_attr('n_partitions', in_width // closest_pf)
208+
layer.set_attr('n_partitions', num_out // closest_pf)
209+
layer.set_attr('num_out', num_out)
208210

209211
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
210212

hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_resource.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ void conv_1d_transpose_resource_cl(
4141
for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {
4242
if (i_fw < CONFIG_T::filt_width) {
4343
trfilt_weights[i_sw][
44-
i_fw * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
44+
i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw * CONFIG_T::n_chan + i_nc
4545
] = weights[
4646
filt_ind * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
4747
];
4848
}
4949
else {
5050
trfilt_weights[i_sw][
51-
i_fw * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
51+
i_fw * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_nc
5252
] = 0;
5353
}
5454
}

0 commit comments

Comments
 (0)