fastmachinelearning
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md
Lines changed: 1 addition & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-sphinx.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/build-sphinx.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎hls4ml/backends/fpga/fpga_types.py
Lines changed: 11 additions & 0 deletions b/‎hls4ml/backends/fpga/fpga_types.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎hls4ml/backends/vivado/passes/clone.py renamed to ‎hls4ml/backends/fpga/passes/clone.py
Lines changed: 0 additions & 1 deletion b/‎hls4ml/backends/vivado/passes/clone.py renamed to ‎hls4ml/backends/fpga/passes/clone.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎hls4ml/backends/quartus/passes/core_templates.py
Lines changed: 3 additions & 3 deletions b/‎hls4ml/backends/quartus/passes/core_templates.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎hls4ml/backends/quartus/passes/merge_templates.py
Lines changed: 103 additions & 0 deletions b/‎hls4ml/backends/quartus/passes/merge_templates.py
Lines changed: 103 additions & 0 deletions
diff --git a/‎hls4ml/backends/quartus/passes/recurrent_templates.py
Lines changed: 133 additions & 0 deletions b/‎hls4ml/backends/quartus/passes/recurrent_templates.py
Lines changed: 133 additions & 0 deletions
diff --git a/‎hls4ml/backends/quartus/passes/resource_strategy.py
Lines changed: 46 additions & 0 deletions b/‎hls4ml/backends/quartus/passes/resource_strategy.py
Lines changed: 46 additions & 0 deletions
diff --git a/‎hls4ml/backends/quartus/passes/transform_types.py
Lines changed: 3 additions & 4 deletions b/‎hls4ml/backends/quartus/passes/transform_types.py
Lines changed: 3 additions & 4 deletions
@@ -30,7 +30,7 @@ Note: Please delete options that are not relevant.
 
 ## Checklist
 
-- [ ] I have read the [guidelines for contributing](https://github.com/fastmachinelearning/hls4ml/blob/master/CONTRIBUTING.md).
+- [ ] I have read the [guidelines for contributing](https://github.com/fastmachinelearning/hls4ml/blob/main/CONTRIBUTING.md).
 - [ ] I have commented my code, particularly in hard-to-understand areas.
 - [ ] I have made corresponding changes to the documentation.
 - [ ] My changes generate no new warnings.
 
@@ -2,7 +2,7 @@ name: build-sphinx
 on:
   push:
     branches:    
-      - master
+      - main
 
 jobs:
   build:
@@ -30,4 +30,4 @@ jobs:
       with:
         branch: gh-pages
         directory: gh-pages
-        github_token: ${{ secrets.PERSONAL_TOKEN }}
+        github_token: ${{ secrets.PERSONAL_TOKEN }}
@@ -258,6 +258,13 @@ def definition_cpp(self, name_suffix='', as_reference=False):
         else: # Declaration
             return 'hls::stream<{type}> {name}{suffix}("{name}")'.format(type=self.type.name, name=self.name, suffix=name_suffix)
 
+class QuartusStreamVariableDefinition(VariableDefinition):
+    def definition_cpp(self, name_suffix='', as_reference=False):
+        if as_reference: # Function parameter
+            return 'stream<{type}> &{name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix)
+        else:            # Declaration
+            return 'stream<{type}> {name}{suffix}'.format(type=self.type.name, name=self.name, suffix=name_suffix)
+
 class StreamVariableConverter(object):
     def __init__(self, type_converter, prefix, definition_cls):
         self.type_converter = type_converter
@@ -280,6 +287,10 @@ class VivadoStreamVariableConverter(StreamVariableConverter):
     def __init__(self, type_converter):
         super().__init__(type_converter=type_converter, prefix='Vivado', definition_cls=VivadoStreamVariableDefinition)
 
+class QuartusStreamVariableConverter(StreamVariableConverter):
+    def __init__(self, type_converter):
+        super().__init__(type_converter=type_converter, prefix='Quartus', definition_cls=QuartusStreamVariableDefinition)
+
 #endregion
 
 #region InplaceVariable
 
@@ -3,7 +3,6 @@
 from hls4ml.model.optimizer import OptimizerPass
 
 from hls4ml.model.layers import Layer, register_layer
-from hls4ml.backends import get_backend
 from hls4ml.backends.template import FunctionCallTemplate
 
 class Clone(Layer):
 
@@ -36,7 +36,7 @@
 
 dense_function_template = 'nnet::dense_{strategy}<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {b});'
 
-dense_include_list = ['nnet_utils/nnet_dense.h', 'nnet_utils/nnet_dense_compressed.h']
+dense_include_list = ['nnet_utils/nnet_dense.h', 'nnet_utils/nnet_dense_compressed.h', 'nnet_utils/nnet_dense_stream.h']
 
 class DenseConfigTemplate(LayerConfigTemplate):
     def __init__(self):
@@ -80,7 +80,7 @@ def format(self, node):
 
 batchnorm_function_template = 'nnet::normalize<{input_t}, {output_t}, {config}>({input}, {output}, {scale}, {bias});'
 
-batchnorm_include_list = ['nnet_utils/nnet_batchnorm.h']
+batchnorm_include_list = ['nnet_utils/nnet_batchnorm.h', 'nnet_utils/nnet_batchnorm_stream.h']
 
 class BatchNormalizationConfigTemplate(LayerConfigTemplate):
     def __init__(self):
@@ -130,7 +130,7 @@ def format(self, node):
 activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
 param_activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {param}, {output});'
 
-activ_include_list = ['nnet_utils/nnet_activation.h']
+activ_include_list = ['nnet_utils/nnet_activation.h', 'nnet_utils/nnet_activation_stream.h']
 
 class ActivationConfigTemplate(LayerConfigTemplate):
     def __init__(self):
 
@@ -0,0 +1,103 @@
+from hls4ml.backends.backend import get_backend
+from hls4ml.model.layers import Concatenate, Dot, Merge
+from hls4ml.backends.template import LayerConfigTemplate, FunctionCallTemplate
+
+# TODO - Very similar to vivado/merge_templates.py - only difference is on line 67: get_backend('vivado').product_type(inp1.type.precision, inp2.type.precision)
+# TODO - Look into ways of having passes similar accross many backends in a shared folder thorugh inheritance and overriding.
+
+# Merge templates
+merge_config_template = """struct config{index} : nnet::merge_config {{
+    static const unsigned n_elem = {n_elem};
+}};\n"""
+
+merge_function_template = 'nnet::{merge}<{input1_t}, {input2_t}, {output_t}, {config}>({input1}, {input2}, {output});'
+merge_include_list = ['nnet_utils/nnet_merge.h', 'nnet_utils/nnet_merge_stream.h']
+
+class MergeConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(Merge)
+        self.template = merge_config_template
+
+    def format(self, node):
+        params = self._default_config_params(node)
+        params['n_elem'] = node.get_input_variable(node.inputs[0]).size_cpp()
+
+        return self.template.format(**params)
+
+class MergeFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__((Merge, Concatenate, Dot), include_header=merge_include_list)
+        self.template = merge_function_template
+
+    def format(self, node):
+        params = {}
+        params['merge'] = node.get_attr('op').lower()
+        params['config'] = 'config{}'.format(node.index)
+        params['input1_t'] = node.get_input_variable(node.inputs[0]).type.name
+        params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+        params['output_t'] = node.get_output_variable().type.name
+        params['input1'] = node.get_input_variable(node.inputs[0]).name
+        params['input2'] = node.get_input_variable(node.inputs[1]).name
+        params['output'] = node.get_output_variable().name
+
+        return self.template.format(**params)
+
+
+# Dot templates
+dot_config_template = """struct config{index} : nnet::dot_config {{
+    static const unsigned n_in = {n_in};
+    static const unsigned n_out = {n_out};
+    
+    static const unsigned reuse_factor = {reuse};
+    
+    typedef {accum_t.name} accum_t;
+    
+    template<class x_T, class y_T>
+    using product = nnet::product::{product_type}<x_T, y_T>;
+}};\n"""
+
+class DotConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(Dot)
+        self.template = dot_config_template
+
+    def format(self, node):
+        inp1 = node.get_input_variable(node.inputs[0])
+        inp2 = node.get_input_variable(node.inputs[1])
+        params = node._default_config_params()
+        params['n_out'] = 1
+        params['n_in'] = inp1.shape[0]
+        params['product_type'] = get_backend('quartus').product_type(inp1.type.precision, inp2.type.precision)
+        
+        return self.template.format(**params)
+
+
+# Concatenate templates
+concat_config_template = """struct config{index} : nnet::concat_config {{
+    static const unsigned n_elem1_0 = {n_elem1_0};
+    static const unsigned n_elem1_1 = {n_elem1_1};
+    static const unsigned n_elem1_2 = {n_elem1_2};
+    static const unsigned n_elem2_0 = {n_elem2_0};
+    static const unsigned n_elem2_1 = {n_elem2_1};
+    static const unsigned n_elem2_2 = {n_elem2_2};
+
+    static const int axis = {axis};
+}};\n"""
+
+class ConcatenateConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(Concatenate)
+        self.template = concat_config_template
+
+    def format(self, node):
+        params = self._default_config_params(node)
+        for i in range(3):
+            params.setdefault('n_elem1_{}'.format(i), 0)
+            params.setdefault('n_elem2_{}'.format(i), 0)
+        inp1 = node.get_input_variable(node.inputs[0])
+        inp2 = node.get_input_variable(node.inputs[1])
+        for i, (s1, s2) in enumerate(zip(inp1.shape, inp2.shape)):
+            params['n_elem1_{}'.format(i)] = s1
+            params['n_elem2_{}'.format(i)] = s2
+
+        return self.template.format(**params)
@@ -0,0 +1,133 @@
+from hls4ml.backends.backend import get_backend
+from hls4ml.model.layers import GRU
+from hls4ml.backends.template import LayerConfigTemplate, FunctionCallTemplate
+
+recurrent_include_list = ['nnet_utils/nnet_recurrent.h', 'nnet_utils/nnet_recurrent_stream.h']
+
+# Shared Matrix Multiplication Template (Dense)
+recr_mult_config_template = '''struct config{index}_mult : nnet::dense_config {{
+    static const unsigned n_in = {n_in};
+    static const unsigned n_out = {n_out};
+        
+    static const unsigned rf_pad = {rfpad};
+    static const unsigned bf_pad = {bfpad};
+    static const unsigned reuse_factor = {reuse};
+    static const unsigned reuse_factor_rounded = reuse_factor + rf_pad;
+    static const unsigned block_factor = DIV_ROUNDUP(n_in*n_out, reuse_factor);
+    static const unsigned block_factor_rounded = block_factor + bf_pad;
+    static const unsigned multiplier_factor = MIN(n_in, reuse_factor);
+    static const unsigned multiplier_limit = DIV_ROUNDUP(n_in*n_out, multiplier_factor);
+    static const unsigned multiplier_scale = multiplier_limit/n_out;
+    typedef {accum_t.name} accum_t;
+    typedef {bias_t.name} bias_t;
+    typedef {weight_t.name} weight_t;
+    
+    template<class x_T, class y_T>
+    using product = nnet::product::{product_type}<x_T, y_T>;
+}};\n'''
+
+# Activation Template 
+activ_config_template = '''struct {type}_config{index} : nnet::activ_config {{
+    static const unsigned n_in = {n_in};
+    static const unsigned table_size = {table_size};
+    static const unsigned io_type = nnet::{iotype};
+    static const unsigned reuse_factor = {reuse};
+}};\n'''
+
+# GRU Template
+gru_config_template = '''struct config{index} : nnet::gru_config {{
+    static const unsigned n_in  = {n_in};
+    static const unsigned n_out = {n_out};
+    static const unsigned n_units = {n_units};
+    static const unsigned n_timesteps = {n_timesteps};
+    static const unsigned n_outputs = {n_outputs};
+    static const bool return_sequences = {return_sequences};
+    
+    typedef {accum_t.name} accum_t;
+    typedef {weight_t.name} weight_t;
+    typedef {bias_t.name} bias_t;
+    
+    typedef {config_mult_x} mult_config_x;
+    typedef {config_mult_h} mult_config_h;
+    
+    typedef {act_t} ACT_CONFIG_T;
+    template<class x_T, class y_T, class config_T>
+    using activation = nnet::activation::{activation}<x_T, y_T, config_T>;
+
+    typedef {act_recurrent_t} ACT_CONFIG_RECURRENT_T;
+    template<class x_T, class y_T, class config_T>
+    using activation_recr = nnet::activation::{recurrent_activation}<x_T, y_T, config_T>;
+    
+    static const unsigned reuse_factor = {reuse};
+    static const bool store_weights_in_bram = false;
+}};\n'''
+
+gru_function_template = 'nnet::gru<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});'
+
+class GRUConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(GRU)
+        self.gru_template = gru_config_template
+        self.act_template = activ_config_template
+        self.recr_act_template = activ_config_template
+        self.mult_x_template = recr_mult_config_template
+        self.mult_h_template = recr_mult_config_template
+    
+    def format(self, node):
+        # Input has shape (n_timesteps, inp_dimensionality)
+        # Output / hidden units has shape (1 if !return_sequences else n_timesteps , n_units)
+        params = self._default_config_params(node)
+        params['n_units'] = node.get_attr('n_out')
+        params['n_outputs'] = node.get_attr('n_timesteps') if node.get_attr('return_sequences', False) else '1' 
+        params['return_sequences'] ='true' if node.get_attr('return_sequences', False) else 'false'
+        params['config_mult_x'] = 'config{}_x_mult'.format(node.index)
+        params['config_mult_h'] = 'config{}_h_mult'.format(node.index)
+        params['act_t'] = '{}_config{}'.format(node.get_attr('activation'), str(node.index) + '_act')
+        params['act_recurrent_t'] = '{}_config{}'.format(node.get_attr('recurrent_activation'), str(node.index) + '_rec_act')
+        gru_config = self.gru_template.format(**params)
+
+        # Activation is on candidate hidden state, dimensionality (1, n_units)
+        act_params = self._default_config_params(node)
+        act_params['type'] = node.get_attr('activation')
+        act_params['n_in'] = node.get_attr('n_out')
+        act_params['index'] = str(node.index) + '_act'
+        act_config = self.act_template.format(**act_params)
+
+        # Recurrent activation is on reset and update gates (therefore x2), dimensionality (1, n_units)
+        recr_act_params = self._default_config_params(node)
+        recr_act_params['type'] = node.get_attr('recurrent_activation')
+        recr_act_params['n_in'] = str(node.get_attr('n_out')) + ' * 2'
+        recr_act_params['index'] = str(node.index) + '_rec_act'
+        recr_act_config = self.recr_act_template.format(**recr_act_params)
+
+        # Multiplication config for matrix multiplications of type Wx (reset, update and candidate states)
+        mult_params_x = self._default_config_params(node)
+        mult_params_x['n_in'] = node.get_attr('n_in')
+        mult_params_x['n_out'] = str(node.get_attr('n_out')) + ' * 3'
+        mult_params_x['product_type'] = get_backend('quartus').product_type(node.get_input_variable().type.precision, node.get_weights('weight').type.precision)
+        mult_params_x['index'] = str(node.index) + '_x'
+        mult_config_x = self.mult_x_template.format(**mult_params_x)
+    
+        # Multiplication config for matrix multiplications of type Wh (reset, update and candidate states)
+        mult_params_h = self._default_config_params(node)
+        mult_params_h['n_in'] = node.get_attr('n_out')
+        mult_params_h['n_out'] = str(node.get_attr('n_out')) + ' * 3'
+        mult_params_h['reuse_factor'] = params['recurrent_reuse_factor']
+        mult_params_h['product_type'] = get_backend('quartus').product_type(node.get_input_variable().type.precision, node.get_weights('recurrent_weight').type.precision)
+        mult_params_h['index'] = str(node.index) + '_h'
+        mult_config_h = self.mult_h_template.format(**mult_params_h)
+
+        return mult_config_x + '\n' + mult_config_h + '\n' + recr_act_config + '\n' + act_config + '\n' + gru_config
+
+class GRUFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(GRU, include_header=recurrent_include_list)
+        self.template = gru_function_template
+
+    def format(self, node):
+        params = self._default_function_params(node)
+        params['w'] = node.get_weights('weight').name
+        params['b'] = node.get_weights('bias').name
+        params['wr'] = node.get_weights('recurrent_weight').name
+        params['br'] = node.get_weights('recurrent_bias').name
+        return self.template.format(**params)
@@ -0,0 +1,46 @@
+import numpy as np
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.layers import Dense, GRU
+
+class ApplyResourceStrategy(OptimizerPass):
+    ''' Transposes the weights to use the dense_resource matrix multiply routine '''
+    def match(self, node):
+        node_matches = isinstance(node, (Dense, GRU))
+        is_resource_strategy = True # node.get_attr('strategy', '').lower() == 'resource' ... Quartus only supports resource strategy
+        already_transformed = node.get_attr('_weights_transposed', False) == True
+        return node_matches and is_resource_strategy and not already_transformed
+
+    def transform(self, model, node):
+        if isinstance(node, Dense) and not node.model.config.get_compression(node):
+            rf = node.get_attr('reuse_factor')
+            bf = int((node.attributes['n_in']*node.attributes['n_out'])/rf)
+            bf_rounded = int(pow(2, np.ceil(np.log2(bf))))
+            rf_rounded = int(pow(2, np.ceil(np.log2(rf))))
+
+            node.weights['weight'].data = np.transpose(node.weights['weight'].data).flatten()
+
+            if(node.attributes['n_in']*node.attributes['n_out'] > 2048 and rf_rounded != rf):
+                node.set_attr('rfpad', rf_rounded-rf)
+                node.set_attr('bfpad', bf_rounded-bf)
+
+                temp = np.empty([bf_rounded, rf_rounded])
+                for i in range(rf_rounded):
+                    for j in range (bf_rounded):
+                        if (i < rf and j < bf):
+                            w_index = i + rf * j
+                            temp[j][i] = node.weights['weight'].data[w_index]
+                        else:
+                            temp[j][i] = 0
+                node.weights['weight'].data = temp.flatten()
+                node.weights['weight'].data_length = node.weights['weight'].data.size
+        
+        elif isinstance(node, GRU):
+            node.weights['weight'].data = np.transpose(node.weights['weight'].data)
+            node.weights['recurrent_weight'].data = np.transpose(node.weights['recurrent_weight'].data)
+        
+        else:
+            raise Exception('Unexpected layer {} with resource strategy'.format(node.class_name))
+
+        node.set_attr('_weights_transposed', True)
+        return False 
+
@@ -1,14 +1,14 @@
 
 from hls4ml.model.optimizer import GlobalOptimizerPass
 from hls4ml.model.types import InplaceVariable
-from hls4ml.backends.fpga.fpga_types import ACTypeConverter, QuartusArrayVariableConverter, HLSTypeConverter, QuartusInplaceVariableConverter, QuartusStructMemberVariableConverter, StaticWeightVariableConverter
-
+from hls4ml.backends.fpga.fpga_types import ACTypeConverter, QuartusArrayVariableConverter, HLSTypeConverter, QuartusInplaceVariableConverter, QuartusStreamVariableConverter, QuartusStructMemberVariableConverter, StaticWeightVariableConverter
 
 class TransformTypes(GlobalOptimizerPass):
     def __init__(self):
         self.type_converter = HLSTypeConverter(precision_converter=ACTypeConverter())
         self.array_var_converter = QuartusArrayVariableConverter(type_converter=self.type_converter)
         self.struct_var_converter = QuartusStructMemberVariableConverter(type_converter=self.type_converter)
+        self.stream_var_converter = QuartusStreamVariableConverter(type_converter=self.type_converter)
         self.weight_var_converter = StaticWeightVariableConverter(type_converter=self.type_converter)
         self.inplace_var_converter = QuartusInplaceVariableConverter(type_converter=self.type_converter)
 
@@ -18,9 +18,8 @@ def transform(self, model, node):
         for out_name, var in node.variables.items():
             if isinstance(var, InplaceVariable):
                 new_var = self.inplace_var_converter.convert(var, io_type)
-
             if io_type == 'io_stream':
-                raise Exception('Streaming IO is not supported in Quartus.')
+                new_var = self.stream_var_converter.convert(var)
             elif io_type == 'io_parallel':
                 if node.name in node.model.inputs:
                     new_var = self.struct_var_converter.convert(var, pragma='hls_register', struct_name='inputs')