fastmachinelearning
diff --git a/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 161 additions & 0 deletions b/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 161 additions & 0 deletions
diff --git a/‎hls4ml/backends/fpga/fpga_types.py
Lines changed: 9 additions & 0 deletions b/‎hls4ml/backends/fpga/fpga_types.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎hls4ml/backends/fpga/passes/codegen.py
Lines changed: 35 additions & 3 deletions b/‎hls4ml/backends/fpga/passes/codegen.py
Lines changed: 35 additions & 3 deletions
diff --git a/‎hls4ml/backends/vivado/passes/conv_same_pad.py
Lines changed: 111 additions & 1 deletion b/‎hls4ml/backends/vivado/passes/conv_same_pad.py
Lines changed: 111 additions & 1 deletion
@@ -158,6 +158,22 @@ def get_layer_mult_size(self, layer):
             n_out = layer.get_attr('n_out')
             return n_in, n_out
 
+        if 'Conv1DTranspose' in layer.class_name:
+            trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
+                // layer.get_attr('stride_width')
+            n_in = layer.get_attr('n_chan') * trfilt_width
+            n_out = layer.get_attr('n_filt')
+            return n_in, n_out
+
+        if 'Conv2DTranspose' in layer.class_name:
+            trfilt_width = (layer.get_attr('filt_width') + layer.get_attr('stride_width') - 1) \
+                // layer.get_attr('stride_width')
+            trfilt_height = (layer.get_attr('filt_height') + layer.get_attr('stride_height') - 1) \
+                // layer.get_attr('stride_height')
+            n_in = layer.get_attr('n_chan') * trfilt_height * trfilt_width
+            n_out = layer.get_attr('n_filt')
+            return n_in, n_out
+
         if 'Conv1D' in layer.class_name:
             n_in = layer.get_attr('n_chan') * layer.get_attr('filt_width')
             n_out = layer.get_attr('n_filt')
@@ -713,7 +729,67 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
             "    ) {{\n"
         ).format(index=layer_idx)
         indent = '    '
+        for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
+            generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
+            for pixel_idx, arr in enumerate(partition):
+                buffer_stmts = []
+                for j, v in enumerate(arr):
+                    if v == 0:
+                        val = '0'
+                    else:
+                        val = 'data[{}]'.format(int(v-1))
+                    buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
+                generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
+            generated_code += '\n' + indent * 2 + '}\n'
+
+        generated_code += indent + '}\n'
+        generated_code += '};\n'
+
+        return generated_code
+
+    def _compute_conv1d_tr_im2col(self, input_shape, out_w, kernel=3, stride=1):
+        W, C = input_shape
+
+        tr_kernel = (kernel+stride-1)//stride
+
+        input_img = np.arange(1, W * C + 1)
+        im_matrix = np.zeros((tr_kernel * C * out_w, ))
+
+        index = 0
+        for i_ow in range(out_w):
+            for i_kw in range(tr_kernel):
+                for i_c in range(C):
+                    # input column is just the output column shifted
+                    input_col = i_ow - (tr_kernel-1) + i_kw
+                    if (input_col >= 0 and input_col < W):
+                        im_matrix[index] = input_img[input_col * C + i_c]
+                    else:
+                        im_matrix[index] = 0
+                    index += 1
+        im_matrix = im_matrix.reshape(out_w, -1)
+        return im_matrix
+
+
+    def generate_conv1d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, out_W, kernel=3, stride=1):
 
+        im2col_matrix = self._compute_conv1d_tr_im2col(
+            (in_W, in_C),
+            out_W,
+            kernel,
+            stride,
+        )
+
+        generated_code = (
+            "template<class data_T, typename CONFIG_T>\n"
+            "class fill_buffer_{index} : public FillConv1DBuffer<data_T, CONFIG_T> {{\n"
+            "    public:\n"
+            "    static void fill_buffer(\n"
+            "        data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n"
+            "        data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
+            "        const unsigned partition\n"
+            "    ) {{\n"
+        ).format(index=layer_idx)
+        indent = '    '
         for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
             generated_code += indent * 2 + f'if (partition == {partition_idx:>3}) {{\n'
             for pixel_idx, arr in enumerate(partition):
@@ -862,6 +938,91 @@ def generate_conv2d_line_buffer_fn(
 
         return generated_code
 
+    def _compute_conv2d_tr_im2col(self, input_shape, out_shape, kernel=(3, 3), stride=(1, 1)):
+        H, W, C = input_shape
+        kernel_h, kernel_w = kernel
+        stride_h, stride_w = stride
+        out_h, out_w = out_shape
+
+        tr_kernel_h = (kernel_h+stride_h-1)//stride_h
+        tr_kernel_w = (kernel_w+stride_w-1)//stride_w
+
+        input_img = np.arange(1, H * W * C + 1)
+        im_matrix = np.zeros((tr_kernel_h * tr_kernel_w * C * out_h * out_w, ))
+
+        index = 0
+        for i_oh in range(out_h):
+            for i_ow in range(out_w):
+                for i_kh in range(tr_kernel_h):
+                    input_row = i_oh - (tr_kernel_h-1) + i_kh
+                    for i_kw in range(tr_kernel_w):
+                        for i_c in range(C):
+                            if (input_row < 0 or input_row >= H):
+                                im_matrix[index] = 0
+                            else:
+                                input_col = i_ow - (tr_kernel_w-1) + i_kw
+                                if (input_col >= 0 and input_col < W):
+                                    im_matrix[index] = input_img[input_row * W * C + input_col * C + i_c]
+                                else:
+                                    im_matrix[index] = 0
+                            index += 1
+        
+        im_matrix = im_matrix.reshape(out_h * out_w, -1)
+        return im_matrix
+
+
+    def generate_conv2d_tr_line_buffer_fn(self, layer_idx, n_partitions, in_H, in_W, in_C, out_H, out_W, kernel=(3, 3), stride=(1, 1)):
+        if isinstance(kernel, Iterable):
+            kernel_height = kernel[0]
+            kernel_width = kernel[1]
+        else:
+            kernel_height = kernel
+            kernel_width = kernel
+
+        if isinstance(stride, Iterable):
+            stride_height = stride[0]
+            stride_width = stride[1]
+        else:
+            stride_height = stride
+            stride_width = stride
+
+        im2col_matrix = self._compute_conv2d_tr_im2col(
+            (in_H, in_W, in_C),
+            (out_W, out_W),
+            (kernel_height, kernel_width),
+            (stride_height, stride_width),
+        )
+
+        generated_code = (
+            "template<class data_T, typename CONFIG_T>\n"
+            "class fill_buffer_{index} : public FillConv2DBuffer<data_T, CONFIG_T> {{\n"
+            "    public:\n"
+            "    static void fill_buffer(\n"
+            "        data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_T::n_chan],\n"
+            "        data_T buffer[CONFIG_T::n_pixels][CONFIG_T::trfilt_height * CONFIG_T::trfilt_width * CONFIG_T::n_chan],\n"
+            "        const unsigned partition\n"
+            "    ) {{\n"
+        ).format(index=layer_idx)
+        indent = '    '
+
+        for partition_idx, partition in enumerate(np.split(im2col_matrix, n_partitions)):
+            generated_code += indent * 2 + 'if (partition == {:>3}) {{\n'.format(partition_idx)
+            for pixel_idx, arr in enumerate(partition):
+                buffer_stmts = []
+                for j, v in enumerate(arr):
+                    if v == 0:
+                        val = '0'
+                    else:
+                        val = 'data[{}]'.format(int(v-1))
+                    buffer_stmts.append('buffer[{}][{}] = {:>10};'.format(pixel_idx, j, val))
+                generated_code += indent * 3 + ' '.join(buffer_stmts) + '\n'
+            generated_code += '\n' + indent * 2 + '}\n'
+
+        generated_code += indent + '}\n'
+        generated_code += '};\n'
+
+        return generated_code
+
     @model_optimizer()
     def write_hls(self, model):
         self.writer.write_hls(model)
 
@@ -326,6 +326,15 @@ def __init__(self, type_converter):
 
 class StaticWeightVariableDefinition(VariableDefinition):
     def definition_cpp(self, name_suffix='', as_reference=False):
+        if self.keep_dims > 0:
+            size_str = ''
+            for dim in range(self.keep_dims):
+                size_str += '[{cur_dim}]'.format(cur_dim=self.shape[dim])
+            final_dim = 1
+            for dim in range(self.keep_dims, len(self.shape)):
+                final_dim *= self.shape[dim]
+            size_str += '[{last_dim}]'.format(last_dim=final_dim)
+            return '{type} {name}{sizes}'.format(type=self.type.name, name=self.name, sizes=size_str)
         return '{type} {name}[{size}]'.format(type=self.type.name, name=self.name, size=self.data_length)
 
 class StaticWeightVariableConverter(object):
 
@@ -1,17 +1,21 @@
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Conv1D, Conv2D
+from hls4ml.model.layers import Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose
 from hls4ml.model.types import Source
 
 class GenerateConvIm2col(OptimizerPass):
     ''' Generates tcode for im2col step of 1D/2d convolution '''
     def match(self, node):
-        return isinstance(node, (Conv1D, Conv2D)) and \
+        return isinstance(node, (Conv1D, Conv2D, Conv1DTranspose, Conv2DTranspose)) and \
             node.model.config.get_config_value('IOType') == 'io_parallel'
 
     def transform(self, model, node):
         node_class = node.__class__.__name__
-        if '1D' in node_class:
+        if '1DTranspose' in node_class:
+            self._generate_im2col_1d_transpose(node)
+        elif '1D' in node_class:
             self._generate_im2col_1d(node)
+        elif '2DTranspose' in node_class:
+            self._generate_im2col_2d_transpose(node)
         elif '2D' in node_class:
             self._generate_im2col_2d(node)
         else:
@@ -30,6 +34,19 @@ def _generate_im2col_1d(self, node):
 
         node.set_attr('line_buffer_codegen', Source(code_str))
 
+    def _generate_im2col_1d_transpose(self, node):
+        code_str = node.model.config.backend.generate_conv1d_tr_line_buffer_fn(
+            node.get_attr('index'),
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            node.get_attr('proc_width'),
+            kernel=node.get_attr('filt_width'),
+            stride=node.get_attr('stride_width'),
+        )
+
+        node.set_attr('line_buffer_codegen', Source(code_str))
+
     def _generate_im2col_2d(self, node):
         code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
             node.get_attr('index'),
@@ -43,3 +60,18 @@ def _generate_im2col_2d(self, node):
         )
 
         node.set_attr('line_buffer_codegen', Source(code_str))
+
+    def _generate_im2col_2d_transpose(self, node):
+        code_str = node.model.config.backend.generate_conv2d_tr_line_buffer_fn(
+            node.get_attr('index'),
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            node.get_input_variable().shape[2],
+            node.get_attr('proc_height'),
+            node.get_attr('proc_width'),
+            kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
+            stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
+        )
+        
+        node.set_attr('line_buffer_codegen', Source(code_str))
@@ -1,5 +1,5 @@
 from hls4ml.model.optimizer import OptimizerPass
-from hls4ml.model.layers import Conv1D, SeparableConv1D, Conv2D, SeparableConv2D
+from hls4ml.model.layers import Conv1D, SeparableConv1D, Conv2D, SeparableConv2D, Conv1DTranspose, Conv2DTranspose
 
 class InsertZeroPaddingBeforeConv1D(OptimizerPass):
     name = 'insert_zero_padding_before_conv1d'
@@ -46,6 +46,53 @@ def transform(self, model, node):
 
         return True
 
+class InsertZeroPaddingBeforeConv1DTranspose(OptimizerPass):
+    name = 'insert_zero_padding_before_conv1dtranspose'
+    
+    def match(self, node):
+        is_match = isinstance(node, (Conv1DTranspose)) and \
+            node.get_attr('padding') == 'same' and \
+            node.get_attr('filt_width') != 1
+        return is_match
+
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_stream':
+            return False
+        
+        # Get the padding parameters from Conv1D layer
+        pad_left = node.get_attr('pad_left')
+        pad_right = node.get_attr('pad_right')
+        convtr_out_width = node.get_attr('out_width')
+        in_width = node.get_attr('in_width')
+        stride_width = node.get_attr('stride_width')
+        trfilt_width = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
+            // node.get_attr('stride_width')
+
+        add_right = (convtr_out_width + pad_left)//stride_width - (in_width-1)
+
+        out_width = in_width + add_right + trfilt_width-1
+
+        attrs = {
+            'pad_left': trfilt_width-1,
+            'pad_right': add_right,
+            'in_width': in_width,
+            'out_width': out_width,
+            'n_chan': node.get_attr('n_chan'),
+            'data_format': node.get_attr('data_format', 'channels_last')
+        }
+
+        # Switch Conv1DTranspose to be 'valid'. I think this is wrong
+        node.set_attr('padding', 'valid')
+        node.set_attr('in_width', out_width)
+        node.set_attr('pad_left', pad_left + (trfilt_width-1)*stride_width)
+
+        # Insert new ZeroPadding1D node above Conv1DTranspose
+        padding_layer = model.make_node('ZeroPadding1D', 'zp1d_' + node.name, attrs, node.inputs.copy())
+        padding_layer.get_output_variable().type.precision = node.get_input_variable().type.precision
+        model.insert_node(padding_layer)
+
+        return True
+
 class InsertZeroPaddingBeforeConv2D(OptimizerPass):
     name = 'insert_zero_padding_before_conv2d'
 
@@ -100,3 +147,66 @@ def transform(self, model, node):
         model.insert_node(padding_layer, before=node)
 
         return True
+
+class InsertZeroPaddingBeforeConv2DTranspose(OptimizerPass):
+    name = 'insert_zero_padding_before_conv2dtranspose'
+
+    def match(self, node):
+        is_match = isinstance(node, Conv2DTranspose) and \
+            node.get_attr('padding') == 'same' and \
+            node.get_attr('filt_width') != 1
+        return is_match
+    
+    def transform(self, model, node):
+        if model.config.get_config_value('IOType') != 'io_stream':
+            return False
+        
+        # Get the padding parameters from Conv2DTranspose layer
+        pad_left = node.get_attr('pad_left')
+        pad_right = node.get_attr('pad_right')
+        pad_top = node.get_attr('pad_top')
+        pad_bottom = node.get_attr('pad_bottom')
+        convtr_out_width = node.get_attr('out_width')
+        convtr_out_height = node.get_attr('out_height')
+        in_width = node.get_attr('in_width')
+        in_height = node.get_attr('in_height')
+        stride_width = node.get_attr('stride_width')
+        stride_height = node.get_attr('stride_height')
+        trfilt_width = (node.get_attr('filt_width') + node.get_attr('stride_width') - 1) \
+             // node.get_attr('stride_width')
+        trfilt_height = (node.get_attr('filt_height') + node.get_attr('stride_height') - 1) \
+             // node.get_attr('stride_height')
+
+        add_right = (convtr_out_width + pad_left)//stride_width-(in_width-1)
+        add_bottom = (convtr_out_height + pad_top)//stride_height-(in_height-1)
+
+        out_width = in_width + add_right + trfilt_width-1
+        out_height = in_height + add_bottom + trfilt_height-1
+
+        attrs = {
+            'pad_left': trfilt_width-1,
+            'pad_right': add_right,
+            'pad_top': trfilt_height-1,
+            'pad_bottom': add_bottom,
+            'in_width': in_width,
+            'in_height': in_height,
+            'out_width': out_width,
+            'out_height': out_height,
+            'n_chan': node.get_attr('n_chan'),
+            'data_format': node.get_attr('data_format', 'channels_last')
+        }
+
+        # switch Conv2DTranspose to be 'valid'. This is technically not true though
+        node.set_attr('padding', 'valid')
+        node.set_attr('in_width', out_width)
+        node.set_attr('in_height', out_height)
+        node.set_attr('pad_left', pad_left + (trfilt_width-1)*stride_width)
+        node.set_attr('pad_top', pad_top + (trfilt_height-1)*stride_height)
+
+        # insert new ZeroPadding2D ndoe above Conv2DTranspose
+        padding_layer = model.make_node('ZeroPadding2D', 'zp2d_' + node.name, attrs, node.inputs.copy())
+        padding_layer.get_output_variable().type.precision = node.get_input_variable().type.precision
+        model.insert_node(padding_layer, before=node)
+
+        return True
+