clean up error with multiple filters and larger kernels

Jonathan-Shoemaker · Jonathan-Shoemaker · commit 809240963308 · 2022-10-12T22:34:03.000-05:00
diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
@@ -510,11 +510,9 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
 
         return generated_code
 
-    def _compute_conv1d_tr_im2col(self, input_shape, kernel=3, stride=1):
+    def _compute_conv1d_tr_im2col(self, input_shape, out_w, kernel=3, stride=1):
         W, C = input_shape
 
-        out_w = W # working with padding in a different way for transpose layers
-
         tr_kernel = (kernel+stride-1)//stride
 
         input_img = np.arange(1, W * C + 1)
@@ -535,10 +533,11 @@ def _compute_conv1d_tr_im2col(self, input_shape, kernel=3, stride=1):
         return im_matrix
 
 
-    def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, kernel=3, stride=1):
+    def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, out_W, kernel=3, stride=1):
 
         im2col_matrix = self._compute_conv1d_tr_im2col(
             (in_W, in_C),
+            out_W,
             kernel,
             stride,
         )
diff --git a/hls4ml/backends/fpga/passes/codegen.py b/hls4ml/backends/fpga/passes/codegen.py
@@ -38,6 +38,7 @@ def _generate_im2col_1d_transpose(self, node):
             node.get_attr('n_partitions'),
             node.get_input_variable().shape[0],
             node.get_input_variable().shape[1],
+            node.get_attr('num_out'),
             kernel=node.get_attr('filt_width'),
             stride=node.get_attr('stride_width'),
         )
diff --git a/hls4ml/backends/vivado/passes/convolution_templates.py b/hls4ml/backends/vivado/passes/convolution_templates.py
@@ -113,7 +113,8 @@ def format(self, node):
     static const unsigned min_width = {min_width};
     static const ap_uint<filt_width> pixels[min_width];
     static const unsigned n_partitions = {n_partitions};
-    static const unsigned n_pixels = in_width / n_partitions;
+    static const unsigned num_out = {num_out};
+    static const unsigned n_pixels = num_out / n_partitions;
     template<class data_T, class CONFIG_T>
     using fill_buffer = nnet::{fill_fn}<data_T, CONFIG_T>;
     typedef {accum_t.name} accum_t;
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
@@ -196,15 +196,17 @@ def init_conv1dtranspose(self, layer):
             layer.set_attr('strategy', 'latency')
 
         in_width = layer.get_input_variable().shape[0]
+        num_out = 1 + in_width + (layer.get_output_variable().shape[1] + layer.get_attr('pad_left'))//layer.get_attr('stride_width')
         chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
-        valid_pf = self.get_valid_conv_partition_splits(1, in_width)
+        valid_pf = self.get_valid_conv_partition_splits(1, num_out)
         if chosen_pf not in valid_pf:
             closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
             print('WARNING: Invalid ParallelizationFactor={} in layer "{}". Using ParallelizationFactor={} instead. Valid ParallelizationFactor(s): {}.'
                   .format(chosen_pf, layer.name, closest_pf, ','.join(map(str, valid_pf))))
         else:
             closest_pf = chosen_pf
-        layer.set_attr('n_partitions', in_width // closest_pf)
+        layer.set_attr('n_partitions', num_out // closest_pf)
+        layer.set_attr('num_out', num_out)
         
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_resource.h b/hls4ml/templates/vivado/nnet_utils/nnet_conv1dtranspose_resource.h
@@ -41,14 +41,14 @@ void conv_1d_transpose_resource_cl(
                 for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {
                     if (i_fw < CONFIG_T::filt_width) {
                         trfilt_weights[i_sw][
-                            i_fw * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
+                            i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw  * CONFIG_T::n_chan + i_nc
                         ] = weights[
                             filt_ind * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
                         ];
                     }
                     else {
                         trfilt_weights[i_sw][
-                            i_fw * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc
+                            i_fw * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_nc
                         ] = 0;
                     }
                 }

Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@ def _generate_im2col_1d_transpose(self, node):`
`38`	`38`	`node.get_attr('n_partitions'),`
`39`	`39`	`node.get_input_variable().shape[0],`
`40`	`40`	`node.get_input_variable().shape[1],`
	`41`	`+ node.get_attr('num_out'),`
`41`	`42`	`kernel=node.get_attr('filt_width'),`
`42`	`43`	`stride=node.get_attr('stride_width'),`
`43`	`44`	`)`
Original file line number	Diff line number	Diff line change
`@@ -41,14 +41,14 @@ void conv_1d_transpose_resource_cl(`
`41`	`41`	`for (unsigned i_nc = 0; i_nc < CONFIG_T::n_chan; i_nc++) {`
`42`	`42`	`if (i_fw < CONFIG_T::filt_width) {`
`43`	`43`	`trfilt_weights[i_sw][`
`44`		`- i_fw * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc`
	`44`	`+ i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_fw * CONFIG_T::n_chan + i_nc`
`45`	`45`	`] = weights[`
`46`	`46`	`filt_ind * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc`
`47`	`47`	`];`
`48`	`48`	`}`
`49`	`49`	`else {`
`50`	`50`	`trfilt_weights[i_sw][`
`51`		`- i_fw * CONFIG_T::n_filt * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan + i_nc`
	`51`	`+ i_fw * CONFIG_T::n_chan + i_nf * CONFIG_T::n_chan * CONFIG_T::trfilt_width + i_nc`
`52`	`52`	`] = 0;`
`53`	`53`	`}`
`54`	`54`	`}`