fastmachinelearning
diff --git a/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml
Lines changed: 2 additions & 2 deletions
diff --git a/‎Jenkinsfile
Lines changed: 4 additions & 3 deletions b/‎Jenkinsfile
Lines changed: 4 additions & 3 deletions
diff --git a/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 2 additions & 2 deletions b/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎hls4ml/backends/fpga/passes/codegen.py
Lines changed: 72 additions & 8 deletions b/‎hls4ml/backends/fpga/passes/codegen.py
Lines changed: 72 additions & 8 deletions
diff --git a/‎hls4ml/backends/quartus/quartus_backend.py
Lines changed: 2 additions & 2 deletions b/‎hls4ml/backends/quartus/quartus_backend.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎hls4ml/backends/vivado/passes/convolution_templates.py
Lines changed: 21 additions & 6 deletions b/‎hls4ml/backends/vivado/passes/convolution_templates.py
Lines changed: 21 additions & 6 deletions
diff --git a/‎hls4ml/backends/vivado/vivado_backend.py
Lines changed: 44 additions & 35 deletions b/‎hls4ml/backends/vivado/vivado_backend.py
Lines changed: 44 additions & 35 deletions
@@ -30,7 +30,7 @@ repos:
     args: ["--profile", "black", --line-length=125]
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.15.2
+  rev: v3.16.0
   hooks:
   - id: pyupgrade
     args: ["--py36-plus"]
@@ -41,7 +41,7 @@ repos:
   - id: setup-cfg-fmt
 
 - repo: https://github.com/pycqa/flake8
-  rev: 7.0.0
+  rev: 7.1.0
   hooks:
   - id: flake8
     exclude: docs/conf.py
 
@@ -1,7 +1,7 @@
 pipeline {
   agent {
     docker {
-      image 'vivado-el7:3'
+      image 'vivado-alma9:1'
       args  '-v /data/Xilinx:/data/Xilinx'
     }
   }
@@ -14,8 +14,9 @@ pipeline {
       steps {
         dir(path: 'test') {
           sh '''#!/bin/bash --login
-              conda activate hls4ml-py38
-              pip install tensorflow pyparsing
+              conda activate hls4ml-py310
+              conda install -y jupyterhub pydot graphviz pytest pytest-cov
+              pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/google/qkeras.git pyparsing
               pip install -U ../ --user
               ./convert-keras-models.sh -x -f keras-models.txt
               pip uninstall hls4ml -y'''
 
@@ -685,7 +685,7 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
 
         The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
         to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
-        the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
+        the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
         we need to do this for every convolution layer.
 
         Args:
@@ -782,7 +782,7 @@ def generate_conv2d_line_buffer_fn(
 
         The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
         to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
-        the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
+        the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
         we need to do this for every convolution layer.
 
         Args:
 
@@ -1,4 +1,4 @@
-from hls4ml.model.layers import Conv1D, Conv2D
+from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import Source
 
@@ -7,16 +7,27 @@ class GenerateConvIm2col(OptimizerPass):
     '''Generates tcode for im2col step of 1D/2d convolution'''
 
     def match(self, node):
-        return isinstance(node, (Conv1D, Conv2D)) and node.model.config.get_config_value('IOType') == 'io_parallel'
+        return (
+            isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
+            and node.model.config.get_config_value('IOType') == 'io_parallel'
+        )
 
     def transform(self, model, node):
-        node_class = node.__class__.__name__
-        if '1D' in node_class:
-            self._generate_im2col_1d(node)
-        elif '2D' in node_class:
-            self._generate_im2col_2d(node)
+        node_class = node.class_name
+        if 'Separable' in node_class:
+            if '1D' in node_class:
+                self._generate_separable_im2col_1d(node)
+            elif '2D' in node_class:
+                self._generate_separable_im2col_2d(node)
+            else:
+                raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
         else:
-            raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
+            if '1D' in node_class:
+                self._generate_im2col_1d(node)
+            elif '2D' in node_class:
+                self._generate_im2col_2d(node)
+            else:
+                raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
 
     def _generate_im2col_1d(self, node):
         code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
@@ -49,3 +60,56 @@ def _generate_im2col_2d(self, node):
         )
 
         node.set_attr('line_buffer_codegen', Source(code_str))
+
+    def _generate_separable_im2col_1d(self, node):
+        dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            str(node.get_attr('index')) + '_dw',
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=node.get_attr('filt_width'),
+            stride=node.get_attr('stride_width'),
+            pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
+        )
+
+        node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
+
+        pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            str(node.get_attr('index')) + '_pw',
+            node.get_attr('n_partitions'),
+            node.get_output_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=1,
+        )
+
+        node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
+
+    def _generate_separable_im2col_2d(self, node):
+        dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            str(node.get_attr('index')) + '_dw',
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
+            stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
+            pad=(
+                node.get_attr('pad_top'),
+                node.get_attr('pad_bottom'),
+                node.get_attr('pad_left'),
+                node.get_attr('pad_right'),
+            ),
+        )
+
+        node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
+
+        pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            str(node.get_attr('index')) + '_pw',
+            node.get_attr('n_partitions'),
+            node.get_output_variable().shape[0],
+            node.get_output_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(1, 1),
+        )
+
+        node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
@@ -274,7 +274,7 @@ def init_conv1d(self, layer):
         # - combination - at compile-time, the decision between Winograd and im2col is made
         # - im2col - specifically use im2col
         # - Winograd - use Winograd, if possible
-        layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'combination'))
+        layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'im2col'))
 
         layer.set_attr(
             'n_partitions', 1
@@ -305,7 +305,7 @@ def init_conv2d(self, layer):
         # - combination - at compile-time, the decision between Winograd and im2col is made
         # - im2col - specifically use im2col
         # - Winograd - use Winograd, if possible
-        layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'combination'))
+        layer.set_attr('implementation', layer.model.config.get_layer_config_value(layer, 'Implementation', 'im2col'))
 
         layer.set_attr(
             'n_partitions', 1
 
@@ -254,8 +254,8 @@ def __init__(self):
     '{input}, {output}, {d}, {p}, {z}, {b});'
 )
 
-sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
-sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
+sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
+sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
 
 
 class SeparableConv1DConfigTemplate(LayerConfigTemplate):
@@ -286,7 +286,10 @@ def format(self, node):
         params['index'] = str(node.index) + '_depthwise'
         params['weight_t'] = node.get_weights('depthwise').type
         params['bias_t'] = node.get_weights('zero_bias').type
-        params['fill_fn'] = 'FillConv1DBuffer'
+        if node.model.config.get_config_value('IOType') == 'io_parallel':
+            params['fill_fn'] = f'fill_buffer_{node.index}_dw'
+        else:
+            params['fill_fn'] = 'FillConv1DBuffer'
 
         if node.get_attr('unscaled'):
             params['scale_index_type'] = 'scale_index_unscaled'
@@ -317,13 +320,17 @@ def format(self, node):
 
         params['filt_width'] = 1
         params['stride_width'] = 1
+        params['pad_left'] = params['pad_right'] = 0
         params['dilation'] = node.get_attr('dilation', 1)
         params['nzeros'] = node.get_weights('pointwise').nzeros
         params['index'] = str(node.index) + '_pointwise'
         params['weight_t'] = node.get_weights('pointwise').type
         params['min_width'] = params['in_width']
         params['instructions'] = '0'
-        params['fill_fn'] = 'FillConv1DBuffer'
+        if node.model.config.get_config_value('IOType') == 'io_parallel':
+            params['fill_fn'] = f'fill_buffer_{node.index}_pw'
+        else:
+            params['fill_fn'] = 'FillConv1DBuffer'
 
         if node.get_attr('unscaled'):
             params['scale_index_type'] = 'scale_index_unscaled'
@@ -402,7 +409,10 @@ def format(self, node):
         params['nzeros'] = node.get_weights('depthwise').nzeros
         params['index'] = str(node.index) + '_depthwise'
         params['weight_t'] = node.get_weights('depthwise').type
-        params['fill_fn'] = 'FillConv2DBuffer'
+        if node.model.config.get_config_value('IOType') == 'io_parallel':
+            params['fill_fn'] = f'fill_buffer_{node.index}_dw'
+        else:
+            params['fill_fn'] = 'FillConv2DBuffer'
 
         if node.get_attr('unscaled_h'):
             params['scale_index_height_type'] = 'scale_index_unscaled'
@@ -440,14 +450,19 @@ def format(self, node):
 
         params['filt_height'] = params['filt_width'] = 1
         params['stride_height'] = params['stride_width'] = 1
+        params['pad_left'] = params['pad_right'] = 0
+        params['pad_top'] = params['pad_bottom'] = 0
         params['dilation'] = node.get_attr('dilation', 1)
         params['nzeros'] = node.get_weights('pointwise').nzeros
         params['index'] = str(node.index) + '_pointwise'
         params['weight_t'] = node.get_weights('pointwise').type
         params['min_height'] = params['in_height']
         params['min_width'] = params['in_width']
         params['instructions'] = '0'
-        params['fill_fn'] = 'FillConv2DBuffer'
+        if node.model.config.get_config_value('IOType') == 'io_parallel':
+            params['fill_fn'] = f'fill_buffer_{node.index}_pw'
+        else:
+            params['fill_fn'] = 'FillConv2DBuffer'
 
         if node.get_attr('unscaled_h'):
             params['scale_index_height_type'] = 'scale_index_unscaled'
 
@@ -18,8 +18,6 @@
     Embedding,
     GarNet,
     GarNetStack,
-    GlobalPooling1D,
-    GlobalPooling2D,
     Layer,
     Pooling1D,
     Pooling2D,
@@ -31,7 +29,6 @@
 from hls4ml.model.optimizer import get_backend_passes, layer_optimizer
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, PackedType
 from hls4ml.report import parse_vivado_report
-from hls4ml.utils.fixed_point_utils import ceil_log2
 
 
 class VivadoBackend(FPGABackend):
@@ -295,9 +292,20 @@ def init_sepconv1d(self, layer):
         else:
             layer.set_attr('strategy', 'latency')
 
-        layer.set_attr(
-            'n_partitions', 1
-        )  # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
+        out_width = layer.get_output_variable().shape[0]
+        chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
+        valid_pf = self.get_valid_conv_partition_splits(1, out_width)
+        if chosen_pf not in valid_pf:
+            closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
+            valid_pf_str = ','.join(map(str, valid_pf))
+            print(
+                f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
+                f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
+            )
+        else:
+            closest_pf = chosen_pf
+        layer.set_attr('n_partitions', out_width // closest_pf)
+
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
         # Set the output type of the depthwise phase
@@ -350,9 +358,21 @@ def init_sepconv2d(self, layer):
         else:
             layer.set_attr('strategy', 'latency')
 
-        layer.set_attr(
-            'n_partitions', 1
-        )  # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
+        out_height = layer.get_output_variable().shape[0]
+        out_width = layer.get_output_variable().shape[1]
+        chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
+        valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
+        if chosen_pf not in valid_pf:
+            closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
+            valid_pf_str = ','.join(map(str, valid_pf))
+            print(
+                f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
+                f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
+            )
+        else:
+            closest_pf = chosen_pf
+        layer.set_attr('n_partitions', out_height * out_width // closest_pf)
+
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
         # Set the output type of the depthwise phase
@@ -373,42 +393,31 @@ def init_depconv2d(self, layer):
         else:
             layer.set_attr('strategy', 'latency')
 
-        layer.set_attr(
-            'n_partitions', 1
-        )  # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
-        layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
+        out_height = layer.get_output_variable().shape[0]
+        out_width = layer.get_output_variable().shape[1]
+        chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
+        valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
+        if chosen_pf not in valid_pf:
+            closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
+            valid_pf_str = ','.join(map(str, valid_pf))
+            print(
+                f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
+                f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
+            )
+        else:
+            closest_pf = chosen_pf
+        layer.set_attr('n_partitions', out_height * out_width // closest_pf)
 
-    def _set_pooling_accum_t(self, layer, pool_size):
-        extra_bits = ceil_log2(pool_size)
-        accum_t = layer.get_attr('accum_t')
-        accum_t.precision.width += extra_bits * 2
-        if isinstance(accum_t.precision, FixedPrecisionType):
-            accum_t.precision.integer += extra_bits
+        layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
     @layer_optimizer(Pooling1D)
     def init_pooling1d(self, layer):
-        pool_size = layer.get_attr('pool_width')
-        self._set_pooling_accum_t(layer, pool_size)
-
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
     @layer_optimizer(Pooling2D)
     def init_pooling2d(self, layer):
-        pool_size = layer.get_attr('pool_height') * layer.get_attr('pool_width')
-        self._set_pooling_accum_t(layer, pool_size)
-
         layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
 
-    @layer_optimizer(GlobalPooling1D)
-    def init_global_pooling1d(self, layer):
-        pool_size = layer.get_attr('n_in')
-        self._set_pooling_accum_t(layer, pool_size)
-
-    @layer_optimizer(GlobalPooling2D)
-    def init_global_pooling2d(self, layer):
-        pool_size = layer.get_attr('in_height') * layer.get_attr('in_width')
-        self._set_pooling_accum_t(layer, pool_size)
-
     @layer_optimizer(Softmax)
     def init_softmax(self, layer):
         if layer.model.config.get_config_value('IOType') == 'io_parallel':