fastmachinelearning
diff --git a/‎.gitlab-ci.yml
Lines changed: 1 addition & 1 deletion b/‎.gitlab-ci.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎Jenkinsfile
Lines changed: 4 additions & 3 deletions b/‎Jenkinsfile
Lines changed: 4 additions & 3 deletions
diff --git a/‎hls4ml/backends/catapult/passes/pointwise.py
Lines changed: 0 additions & 5 deletions b/‎hls4ml/backends/catapult/passes/pointwise.py
Lines changed: 0 additions & 5 deletions
diff --git a/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 12 additions & 4 deletions b/‎hls4ml/backends/fpga/fpga_backend.py
Lines changed: 12 additions & 4 deletions
diff --git a/‎hls4ml/backends/fpga/passes/codegen.py
Lines changed: 73 additions & 8 deletions b/‎hls4ml/backends/fpga/passes/codegen.py
Lines changed: 73 additions & 8 deletions
diff --git a/‎hls4ml/backends/fpga/passes/hgq_proxy_model.py
Lines changed: 107 additions & 0 deletions b/‎hls4ml/backends/fpga/passes/hgq_proxy_model.py
Lines changed: 107 additions & 0 deletions
diff --git a/‎hls4ml/backends/quartus/passes/core_templates.py
Lines changed: 2 additions & 1 deletion b/‎hls4ml/backends/quartus/passes/core_templates.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎hls4ml/backends/quartus/passes/pointwise.py
Lines changed: 0 additions & 5 deletions b/‎hls4ml/backends/quartus/passes/pointwise.py
Lines changed: 0 additions & 5 deletions
diff --git a/‎hls4ml/backends/quartus/passes/recurrent_templates.py
Lines changed: 10 additions & 1 deletion b/‎hls4ml/backends/quartus/passes/recurrent_templates.py
Lines changed: 10 additions & 1 deletion
@@ -7,7 +7,7 @@ generator:
   stage: generate
   image: python:3.8-alpine
   variables:
-    N_TESTS_PER_YAML: 5
+    N_TESTS_PER_YAML: 4
   tags:
     - k8s-default
   before_script:
 
@@ -2,7 +2,7 @@ exclude: (^hls4ml\/templates\/(vivado|quartus)\/(ap_types|ac_types)\/|^test/pyte
 
 repos:
 - repo: https://github.com/psf/black
-  rev: 24.4.2
+  rev: 24.8.0
   hooks:
   - id: black
     language_version: python3
@@ -30,7 +30,7 @@ repos:
     args: ["--profile", "black", --line-length=125]
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.15.2
+  rev: v3.17.0
   hooks:
   - id: pyupgrade
     args: ["--py36-plus"]
@@ -41,7 +41,7 @@ repos:
   - id: setup-cfg-fmt
 
 - repo: https://github.com/pycqa/flake8
-  rev: 7.0.0
+  rev: 7.1.1
   hooks:
   - id: flake8
     exclude: docs/conf.py
 
@@ -1,7 +1,7 @@
 pipeline {
   agent {
     docker {
-      image 'vivado-el7:3'
+      image 'vivado-alma9:1'
       args  '-v /data/Xilinx:/data/Xilinx'
     }
   }
@@ -14,8 +14,9 @@ pipeline {
       steps {
         dir(path: 'test') {
           sh '''#!/bin/bash --login
-              conda activate hls4ml-py38
-              pip install tensorflow pyparsing
+              conda activate hls4ml-py310
+              conda install -y jupyterhub pydot graphviz pytest pytest-cov
+              pip install pytest-randomly jupyter onnx>=1.4.0 matplotlib pandas seaborn pydigitalwavetools==1.1 pyyaml tensorflow==2.14 qonnx torch git+https://github.com/google/qkeras.git pyparsing
               pip install -U ../ --user
               ./convert-keras-models.sh -x -f keras-models.txt
               pip uninstall hls4ml -y'''
 
@@ -1,7 +1,5 @@
 from copy import copy
 
-import numpy as np
-
 from hls4ml.backends.catapult.passes.convolution_templates import (
     Conv1DConfigTemplate,
     Conv1DFunctionTemplate,
@@ -78,9 +76,6 @@ def match(self, node):
     def transform(self, model, node):
         dim = node.__class__.__name__[-2:]  # '1D' or '2D'
         pw_node = model.make_node('PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy())
-        if len(node.weights['weight'].data.shape) == 2:  # This can happen if we assign weights of Dense layer to 1x1 Conv2D
-            expand_axis = tuple(range(int(dim[0])))
-            pw_node.weights['weight'].data = np.expand_dims(node.weights['weight'].data, axis=expand_axis)
         pw_node.weights['bias'].data = node.weights['bias'].data
         # Set strategy to ensure lowercase string is passed to the template
         if model.config.is_resource_strategy(pw_node):
 
@@ -55,8 +55,6 @@ def __init__(self, name):
             Dense,
             Conv1D,
             Conv2D,
-            SeparableConv1D,
-            SeparableConv2D,
             Pooling1D,
             Pooling2D,
             GlobalPooling1D,
@@ -79,6 +77,16 @@ def __init__(self, name):
             attrs.append(ConfigurableAttribute('reuse_factor', default=1))
             self.attribute_map[layer] = attrs
 
+        # seperable is kind of special because it is effectively two layers that will be split
+        for layer in (SeparableConv1D, SeparableConv2D):
+            attrs = self.attribute_map.get(layer, [])
+            attrs.append(TypeAttribute('depthwise_accum'))
+            attrs.append(TypeAttribute('pointwise_accum'))
+            attrs.append(TypeAttribute('depthwise_result'))
+            attrs.append(ConfigurableAttribute('depthwise_reuse_factor', default=1))
+            attrs.append(ConfigurableAttribute('pointwise_reuse_factor', default=1))
+            self.attribute_map[layer] = attrs
+
         act_attrs = self.attribute_map.get(Activation, [])
         act_attrs.append(ConfigurableAttribute('table_size', default=1024))
         act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8)))
@@ -687,7 +695,7 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
 
         The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
         to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
-        the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
+        the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
         we need to do this for every convolution layer.
 
         Args:
@@ -784,7 +792,7 @@ def generate_conv2d_line_buffer_fn(
 
         The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
         to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
-        the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
+        the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
         we need to do this for every convolution layer.
 
         Args:
 
@@ -1,22 +1,34 @@
-from hls4ml.model.layers import Conv1D, Conv2D
+from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
 from hls4ml.model.optimizer import OptimizerPass
 from hls4ml.model.types import Source
 
 
 class GenerateConvIm2col(OptimizerPass):
     '''Generates tcode for im2col step of 1D/2d convolution'''
 
+    # Note, DepthwizeConv1D/2D also matches because it inherits from Conv1D/2D
     def match(self, node):
-        return isinstance(node, (Conv1D, Conv2D)) and node.model.config.get_config_value('IOType') == 'io_parallel'
+        return (
+            isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
+            and node.model.config.get_config_value('IOType') == 'io_parallel'
+        )
 
     def transform(self, model, node):
-        node_class = node.__class__.__name__
-        if '1D' in node_class:
-            self._generate_im2col_1d(node)
-        elif '2D' in node_class:
-            self._generate_im2col_2d(node)
+        node_class = node.class_name
+        if 'Separable' in node_class:
+            if '1D' in node_class:
+                self._generate_separable_im2col_1d(node)
+            elif '2D' in node_class:
+                self._generate_separable_im2col_2d(node)
+            else:
+                raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
         else:
-            raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
+            if '1D' in node_class:
+                self._generate_im2col_1d(node)
+            elif '2D' in node_class:
+                self._generate_im2col_2d(node)
+            else:
+                raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
 
     def _generate_im2col_1d(self, node):
         code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
@@ -49,3 +61,56 @@ def _generate_im2col_2d(self, node):
         )
 
         node.set_attr('line_buffer_codegen', Source(code_str))
+
+    def _generate_separable_im2col_1d(self, node):
+        dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            str(node.get_attr('index')) + '_dw',
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=node.get_attr('filt_width'),
+            stride=node.get_attr('stride_width'),
+            pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
+        )
+
+        node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
+
+        pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
+            str(node.get_attr('index')) + '_pw',
+            node.get_attr('n_partitions'),
+            node.get_output_variable().shape[0],
+            node.get_input_variable().shape[1],
+            kernel=1,
+        )
+
+        node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
+
+    def _generate_separable_im2col_2d(self, node):
+        dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            str(node.get_attr('index')) + '_dw',
+            node.get_attr('n_partitions'),
+            node.get_input_variable().shape[0],
+            node.get_input_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
+            stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
+            pad=(
+                node.get_attr('pad_top'),
+                node.get_attr('pad_bottom'),
+                node.get_attr('pad_left'),
+                node.get_attr('pad_right'),
+            ),
+        )
+
+        node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
+
+        pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
+            str(node.get_attr('index')) + '_pw',
+            node.get_attr('n_partitions'),
+            node.get_output_variable().shape[0],
+            node.get_output_variable().shape[1],
+            node.get_input_variable().shape[2],
+            kernel=(1, 1),
+        )
+
+        node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
@@ -0,0 +1,107 @@
+import numpy as np
+
+from hls4ml.backends import Backend
+from hls4ml.backends.template import FunctionCallTemplate
+from hls4ml.model.layers import Layer
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.optimizer.passes.hgq_proxy_model import FixedPointQuantizer, UnaryLUT
+from hls4ml.model.types import Source
+
+
+def to_apfixed(k, b, i, RND, SAT):
+    u = 'u' if k == 0 else ''
+    return f'ap_{u}fixed<{b},{i},AP_{RND},AP_{SAT}>'
+
+
+def to_acfixed(k, b, i, RND, SAT):
+    k = 'false' if k == 0 else 'true'
+    return f'ac_fixed<{b},{i},{k},AC_{RND},AC_{SAT}>'
+
+
+def generate_mask_fn(
+    name: str, shape: tuple[int, ...], k: np.ndarray, b: np.ndarray, i: np.ndarray, RND: str, SAT: str, backend: str
+) -> str:
+    """Generate heterogenous quantization mask function, ONLY works for IOType=io_parallel"""
+    assert k.shape[0] == b.shape[0] == i.shape[0] == 1
+    assert backend.lower() in ('quartus', 'vivado', 'vitis'), f'Backend {backend} not tested'
+    Ks, Bs, Is = k[0], b[0], i[0]
+    Ks, Bs, Is = np.broadcast_to(Ks, shape), np.broadcast_to(Bs, shape), np.broadcast_to(Is, shape)
+    Ks, Bs, Is = Ks.ravel(), Bs.ravel(), Is.ravel()
+    masks = []
+    to_fixed = to_acfixed if backend.lower() == 'quartus' else to_apfixed
+    for idx, (k, b, i) in enumerate(zip(Ks, Bs, Is)):
+        if b == 0:
+            fn = f'out[{idx}] = 0;'
+        else:
+            fn = f'out[{idx}] = {to_fixed(k, b, i, RND, SAT)}(inp[{idx}]);'
+        masks.append(f'    {fn}')
+    body = "\n".join(masks)
+    mask_fn = f'''
+template<typename input_t, typename output_t>
+void {name}(input_t *inp, output_t *out) {{
+    #pragma HLS INLINE
+
+{body}
+}}
+'''
+    return mask_fn
+
+
+class ProcessFixedPointQuantizerLayer(OptimizerPass):
+    def match(self, node: Layer):
+        return isinstance(node, FixedPointQuantizer)
+
+    def transform(self, model, node: FixedPointQuantizer):
+        if node.fusible:
+            model.remove_node(node, rewire=True)
+            return True
+
+        if model.config.config['IOType'] != 'io_parallel':
+            raise NotImplementedError('Heterogenous quantization for activations is only supported with IOType=io_parallel')
+
+        backend = model.config.config['Backend']
+
+        name = node.name
+
+        assert node.mask_kbi is not None
+        k, b, i = node.mask_kbi
+        RND = node.RND
+        SAT = node.SAT
+        mask_fn: str = generate_mask_fn(name, node.get_input_variable().shape, k, b, i, RND, SAT, backend)
+
+        node.set_attr('mask_fn_codegen', Source(mask_fn))
+
+
+class ProcessFixedPointQuantizerCall(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(FixedPointQuantizer, include_header=[])
+        self.template = 'nnet::{name}<{input_t}, {output_t}>({input}, {output});'
+
+    def format(self, node):
+        params = self._default_function_params(node)
+
+        return self.template.format(**params)
+
+
+class ProcessUnaryLUTCall(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(UnaryLUT, include_header=[])
+        self.template = 'nnet::unary_lut<{input_t}, {output_t}, {config}>({input}, {output}, {table});'
+        self.include_header = [
+            'nnet_utils/nnet_activation.h',
+            'nnet_utils/nnet_activation_stream.h',
+        ]
+
+    def format(self, node):
+        params = self._default_function_params(node)
+        node.attributes['result_t'].precision = node.attributes['table_t'].precision
+        params['config'] = f'unary_lut_config{node.index}'
+        params['table'] = node.get_weights('table').name
+
+        return self.template.format(**params)
+
+
+def register_hgq_proxy_model(backend: Backend):
+    backend.register_pass('process_fixed_point_quantizer_layer', ProcessFixedPointQuantizerLayer)
+    backend.register_template(ProcessFixedPointQuantizerCall)
+    backend.register_template(ProcessUnaryLUTCall)
@@ -1,6 +1,7 @@
 from hls4ml.backends.backend import get_backend
 from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
 from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax
+from hls4ml.model.optimizer.passes.hgq_proxy_model import UnaryLUT
 
 # Dense templates
 
@@ -152,7 +153,7 @@ def format(self, node):
 
 class ActivationConfigTemplate(LayerConfigTemplate):
     def __init__(self):
-        super().__init__((Activation, ParametrizedActivation, PReLU))
+        super().__init__((Activation, ParametrizedActivation, PReLU, UnaryLUT))
         self.template = activ_config_template
 
     def format(self, node):
 
@@ -1,7 +1,5 @@
 from copy import copy
 
-import numpy as np
-
 from hls4ml.backends.fpga.fpga_layers import PointwiseConv1D, PointwiseConv2D
 from hls4ml.backends.quartus.passes.convolution_templates import (
     Conv1DConfigTemplate,
@@ -86,9 +84,6 @@ def transform(self, model, node):
         pw_node = model.make_node(
             'PointwiseConv' + dim, node.name, copy(node.attributes), node.inputs.copy(), outputs=node.outputs.copy()
         )
-        if len(node.weights['weight'].data.shape) == 2:  # This can happen if we assign weights of Dense layer to 1x1 Conv2D
-            expand_axis = tuple(range(int(dim[0])))
-            pw_node.weights['weight'].data = np.expand_dims(node.weights['weight'].data, axis=expand_axis)
         pw_node.weights['bias'].data = node.weights['bias'].data
         model.replace_node(node, pw_node)
 
 
@@ -66,6 +66,7 @@
     using activation_recr = nnet::activation::{recurrent_activation}<x_T, y_T, config_T>;
 
     static const unsigned reuse_factor = {reuse};
+    static const unsigned pytorch_order = {pytorch};
     static const bool store_weights_in_bram = false;
 }};\n'''
 
@@ -92,6 +93,7 @@ def format(self, node):
         params['config_mult_h'] = f'config{node.index}_h_mult'
         params['act_t'] = '{}_config{}'.format(node.get_attr('activation'), str(node.index) + '_act')
         params['act_recurrent_t'] = '{}_config{}'.format(node.get_attr('recurrent_activation'), str(node.index) + '_rec_act')
+        params['pytorch'] = 'true' if node.get_attr('pytorch', False) else 'false'
         gru_config = self.gru_template.format(**params)
 
         # Activation is on candidate hidden state, dimensionality (1, n_units)
@@ -256,6 +258,9 @@ def format(self, node):
 }};\n"""
 
 simple_rnn_function_template = 'nnet::simple_rnn<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+simple_rnn_pytorch_function_template = (
+    'nnet::simple_rnn_pytorch<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+)
 
 
 class SimpleRNNConfigTemplate(LayerConfigTemplate):
@@ -301,5 +306,9 @@ def __init__(self):
 
     def format(self, node):
         params = self._default_function_params(node)
-        params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
+        if node.get_attr('pytorch', False):
+            self.template = simple_rnn_pytorch_function_template
+            params['weights'] = 'w{0}, wr{0}, b{0}, br{0}'.format(str(node.index))
+        else:
+            params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
         return self.template.format(**params)