fastmachinelearning
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/index.rst
Lines changed: 1 addition & 3 deletions b/‎docs/index.rst
Lines changed: 1 addition & 3 deletions
diff --git a/‎hls4ml/backends/backend.py
Lines changed: 6 additions & 8 deletions b/‎hls4ml/backends/backend.py
Lines changed: 6 additions & 8 deletions
diff --git a/‎hls4ml/backends/vivado/passes/fifo_depth_optimization.py
Lines changed: 90 additions & 0 deletions b/‎hls4ml/backends/vivado/passes/fifo_depth_optimization.py
Lines changed: 90 additions & 0 deletions
diff --git a/‎hls4ml/backends/vivado/passes/pooling_templates.py
Lines changed: 4 additions & 4 deletions b/‎hls4ml/backends/vivado/passes/pooling_templates.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎hls4ml/backends/vivado/passes/recurrent_templates.py
Lines changed: 2 additions & 2 deletions b/‎hls4ml/backends/vivado/passes/recurrent_templates.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎hls4ml/backends/vivado/passes/transform_types.py
Lines changed: 0 additions & 2 deletions b/‎hls4ml/backends/vivado/passes/transform_types.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎hls4ml/backends/vivado/vivado_backend.py
Lines changed: 10 additions & 10 deletions b/‎hls4ml/backends/vivado/vivado_backend.py
Lines changed: 10 additions & 10 deletions
diff --git a/‎hls4ml/backends/vivado_accelerator/passes/__init__.py b/‎hls4ml/backends/vivado_accelerator/passes/__init__.py
diff --git a/‎hls4ml/backends/vivado_accelerator/passes/fifo_depth_optimization.py
Lines changed: 58 additions & 0 deletions b/‎hls4ml/backends/vivado_accelerator/passes/fifo_depth_optimization.py
Lines changed: 58 additions & 0 deletions
@@ -8,7 +8,7 @@
 
 A package for machine learning inference in FPGAs. We create firmware implementations of machine learning algorithms using high level synthesis language (HLS). We translate traditional open-source machine learning package models into HLS that can be configured for your use-case!
 
-**Contact:** hls4ml.help@gmail.com
+If you have any questions, comments, or ideas regarding hls4ml or just want to show us how you use hls4ml, don't hesitate to reach us through the [discussions](https://github.com/fastmachinelearning/hls4ml/discussions) tab.
 
 # Documentation & Tutorial
 
 
@@ -35,9 +35,7 @@ Welcome to hls4ml's documentation!
 
 ``hls4ml`` is a Python package for machine learning inference in FPGAs. We create firmware implementations of machine learning algorithms using high level synthesis language (HLS). We translate traditional open-source machine learning package models into HLS that can be configured for your use-case!
 
-The project is currently in development, so please let us know if you are interested, your experiences with the package, and if you would like new features to be added.
-
-Contact: hls4ml.help@gmail.com
+The project is currently in development, so please let us know if you are interested, your experiences with the package, and if you would like new features to be added. You can reach us through our GitHub page.
 
 
 Project Status
 
@@ -25,14 +25,12 @@ def _init_class_optimizers(self):
         return class_optimizers
 
     def _init_file_optimizers(self):
-        opt_path = os.path.dirname(inspect.getfile(self.__class__)) + '/passes'
-        module_path = self.__module__[:self.__module__.rfind('.')] + '.passes'
-        file_optimizers = extract_optimizers_from_path(opt_path, module_path, self)
-        for base in self.__class__.__bases__:
-            opt_path = os.path.dirname(inspect.getfile(base)) + '/passes'
-            module_path = base.__module__[:base.__module__.rfind('.')] + '.passes'
-            base_optimizers = extract_optimizers_from_path(opt_path, module_path, self)
-            file_optimizers.update(base_optimizers)
+        file_optimizers = {}
+        for cls in [*self.__class__.__bases__, self.__class__]:
+            opt_path = os.path.dirname(inspect.getfile(cls)) + '/passes'
+            module_path = cls.__module__[:cls.__module__.rfind('.')] + '.passes'
+            cls_optimizers = extract_optimizers_from_path(opt_path, module_path, self)
+            file_optimizers.update(cls_optimizers)
         return file_optimizers
 
     def _get_layer_initializers(self):
 
@@ -0,0 +1,90 @@
+import json
+
+from pyDigitalWaveTools.vcd.parser import VcdParser
+
+from hls4ml.model.optimizer.optimizer import ConfigurableOptimizerPass, ModelOptimizerPass
+
+
+def populate_values(values, name, data, depth):
+    values.append({'name': name, 'data': [], 'max': 0, 'depth': 0})
+    get_values = lambda x: int(x[1][1:], 2)
+    values[-1]['data'] = [get_values(x) for x in data]
+    values[-1]['max'] = max(values[-1]['data'])
+    values[-1]['depth'] = int(depth[0][1][1:], 2)
+    return values
+
+
+def set_big_fifos(vars_to_profile, profiling_fifo_depth):
+    for k, v in vars_to_profile.items():
+        v.pragma = (v.pragma[0], profiling_fifo_depth)
+
+
+def get_vcd_data(model):
+    model.write()
+    model.build(reset=False, csim=True, synth=True, cosim=True, validation=False, export=False, vsynth=False,
+                fifo_opt=True)
+
+    with open(
+            model.config.get_output_dir() + '/' + model.config.get_project_name() + '_prj' + '/solution1/sim/verilog/fifo_opt.vcd') as vcd_file:
+        vcd = VcdParser()
+        vcd.parse(vcd_file)
+        data = vcd.scope.toJson()
+    return data
+
+
+def generate_max_depth_file(model, maxs):
+    with open(model.config.get_output_dir() + '/max_depth.json', 'w') as f:
+        json.dump(maxs, f, indent=4)
+
+
+def set_fifo_depth(model, maxs):
+    for k, v in model.output_vars.items():
+        filtered_max = [x['max'] for x in maxs if v.name in x['name']]
+        if len(filtered_max) == 0:
+            continue
+        if len(filtered_max) > 1:
+            print('WARNING! Check names of FIFOs')
+        v.pragma = (v.pragma[0], filtered_max[0] + 1)
+
+
+class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
+    def __init__(self):
+        self.values = []
+
+    def transform(self, model):
+        # use `large_fifo_depth = 0` to keep the default fifo depth
+        profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000)
+
+        # check axi-stream or io-stream, if not one the 2 exit
+        if not (model.config.get_config_value('IOType') == 'io_stream'):
+            raise Exception('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config')
+
+        # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs
+        # and so they will be profiled
+        if profiling_fifo_depth:
+            vars_to_profile = {k: v for k, v in model.output_vars.items() if v != model.get_output_variables()[0] and
+                               v != model.get_input_variables()[0]}
+
+            set_big_fifos(vars_to_profile, profiling_fifo_depth)
+
+        data = get_vcd_data(model)
+
+        if len(data['children']) == 0:
+            print("FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible. Consider increasing profiling_fifo_depth.")
+            return False
+
+        n_elem = len(data['children'][0]['children'][0]['children'])
+        for i in range(n_elem):
+            name = data['children'][0]['children'][0]['children'][i]['name']
+            data_p = data['children'][0]['children'][0]['children'][i]['children'][0]['data']
+            depth = data['children'][0]['children'][0]['children'][i]['children'][1]['data']
+            populate_values(self.values, name, data_p, depth)
+
+        maxs = [{'name': i['name'], 'max': i['max'], 'depth': i['depth']} for i in self.values]
+
+        generate_max_depth_file(model, maxs)
+
+        set_fifo_depth(model, maxs)
+
+        print('[hls4ml] - FIFO optimization completed')
+        return False
@@ -18,7 +18,7 @@
     static const unsigned stride_width = {stride_width};
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
     static const nnet::conv_implementation implementation = nnet::conv_implementation::{implementation};
-    static const unsigned reuse = {reuse};
+    static const unsigned reuse_factor = {reuse};
     typedef {accum_t.name} accum_t;
 }};\n"""
 
@@ -43,15 +43,15 @@
     static const unsigned pad_right = {pad_right};
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
     static const nnet::conv_implementation implementation = nnet::conv_implementation::{implementation};
-    static const unsigned reuse = {reuse};
+    static const unsigned reuse_factor = {reuse};
     typedef {accum_t.name} accum_t;
 }};\n"""
 
 global_pooling1d_config_template = """struct config{index} : nnet::pooling1d_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_filt = {n_filt};
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
-    static const unsigned reuse = {reuse};
+    static const unsigned reuse_factor = {reuse};
     typedef {accum_t.name} accum_t;
 }};\n"""
 
@@ -60,7 +60,7 @@
     static const unsigned in_width = {in_width};
     static const unsigned n_filt = {n_filt};
     static const nnet::Pool_Op pool_op = nnet::{pool_op};
-    static const unsigned reuse = {reuse};
+    static const unsigned reuse_factor = {reuse};
     typedef {accum_t.name} accum_t;
 }};\n"""
 
 
@@ -17,8 +17,8 @@
     typedef {bias_t.name} bias_t;
     typedef {weight_t.name} weight_t;
     typedef ap_{index_t} index_t;
-    template<class x_T, class y_T, class res_T>
-    using product = nnet::product::{product_type}<x_T, y_T, res_T>;
+    template<class x_T, class y_T>
+    using product = nnet::product::{product_type}<x_T, y_T>;
 }};\n"""
 
 #activation templates
 
@@ -20,8 +20,6 @@ def transform(self, model, node):
                 new_var = self.inplace_var_converter.convert(var, io_type)
             if io_type == 'io_stream':
                 new_var = self.stream_var_converter.convert(var)
-            elif io_type == 'io_serial':
-                new_var = self.array_var_converter.convert(var, pragma='stream')
             elif io_type == 'io_parallel':
                 if node.name in node.model.inputs:
                     new_var = self.array_var_converter.convert(var, pragma='reshape')
 
@@ -73,11 +73,17 @@ def _register_flows(self):
         ]
         self._writer_flow = register_flow('write', writer_passes, requires=['vivado:ip'], backend=self.name)
 
+        fifo_depth_opt_passes = [
+            'vivado:fifo_depth_optimization'
+        ] + writer_passes # After optimization, a new project will be written
+
+        register_flow('fifo_depth_optimization', fifo_depth_opt_passes, requires=[self._writer_flow], backend=self.name)
+
         all_passes = get_backend_passes(self.name)
 
         extras = [
             # Ideally this should be empty
-            opt_pass for opt_pass in all_passes if opt_pass not in initializers + streaming_passes + quantization_passes + optimization_passes + vivado_types + templates + writer_passes
+            opt_pass for opt_pass in all_passes if opt_pass not in initializers + streaming_passes + quantization_passes + optimization_passes + vivado_types + templates + writer_passes + fifo_depth_opt_passes
         ]
 
         if len(extras) > 0:
@@ -106,16 +112,16 @@ def create_initial_config(self, part='xcku115-flvb2104-2-i', clock_period=5, io_
 
         return config
 
-    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False):
+    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False, fifo_opt=False):
         if 'linux' in sys.platform:
             found = os.system('command -v vivado_hls > /dev/null')
             if found != 0:
                 raise Exception('Vivado HLS installation not found. Make sure "vivado_hls" is on PATH.')
 
         curr_dir = os.getcwd()
         os.chdir(model.config.get_output_dir())
-        os.system('vivado_hls -f build_prj.tcl "reset={reset} csim={csim} synth={synth} cosim={cosim} validation={validation} export={export} vsynth={vsynth}"'
-            .format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth))
+        os.system('vivado_hls -f build_prj.tcl "reset={reset} csim={csim} synth={synth} cosim={cosim} validation={validation} export={export} vsynth={vsynth} fifo_opt={fifo_opt}"'
+            .format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth, fifo_opt=fifo_opt))
         os.chdir(curr_dir)
 
         return parse_vivado_report(model.config.get_output_dir())
@@ -338,9 +344,6 @@ def init_lstm(self, layer):
         reuse_factor = layer.model.config.get_reuse_factor(layer)
         layer.set_attr('recurrent_reuse_factor', reuse_factor)
 
-        recurrent_bias = np.zeros(layer.weights['recurrent_weight'].shape[1])
-        layer.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias)
-
         index_t = IntegerPrecisionType(width=1, signed=False)
 
         if 'table_t' not in layer.attributes:
@@ -364,9 +367,6 @@ def init_gru(self, layer):
         reuse_factor = layer.model.config.get_reuse_factor(layer)
         layer.set_attr('recurrent_reuse_factor', reuse_factor)
 
-        recurrent_bias = np.zeros(layer.weights['recurrent_weight'].shape[1])
-        layer.add_weights_variable(name='recurrent_bias', var_name='br{index}', data=recurrent_bias)
-
         index_t = IntegerPrecisionType(width=1, signed=False)
 
         if 'table_t' not in layer.attributes:
 
@@ -0,0 +1,58 @@
+from hls4ml.backends.vivado.passes.fifo_depth_optimization import set_big_fifos, get_vcd_data, populate_values, \
+    generate_max_depth_file, set_fifo_depth
+from hls4ml.model.optimizer.optimizer import ConfigurableOptimizerPass, ModelOptimizerPass
+
+
+class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
+    def __init__(self):
+        self.values = []
+
+    def transform(self, model):
+        # use `large_fifo_depth = 0` to keep the default fifo depth
+        profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000)
+
+        # check axi-stream or io-stream, if not one the 2 exit
+        if not(model.config.get_config_value('IOType') == 'io_stream' or
+               model.config.get_config_value('AcceleratorConfig')['Interface'] == 'axi_stream' or
+               model.config.get_config_value('AcceleratorConfig')['Interface'] == 'axi_master'):
+            raise Exception('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config '
+                            'or `axi_stream` or `axi_master` in `AcceleratorConfig` interface field')
+
+        # initialize all the fifos to 10000 so that they will be automatically implemented in BRAMs and so they will be
+        # profiled
+
+        if profiling_fifo_depth:
+            set_big_fifos(model.output_vars, profiling_fifo_depth)
+
+        data = get_vcd_data(model)
+
+        for i in range(1, len(data['children'][0]['children'][0]['children'])):
+            # wrapper fifos
+            populate_values(self.values,
+                            data['children'][0]['children'][0]['children'][i]['name'],
+                            data['children'][0]['children'][0]['children'][i]['children'][0]['data'],
+                            data['children'][0]['children'][0]['children'][i]['children'][1]['data'])
+
+        n_elem = len(data['children'][0]['children'][0]['children'][0]['children'])
+        for i in range(n_elem):
+            name   = data['children'][0]['children'][0]['children'][0]['children'][i]['name']
+            data_p = data['children'][0]['children'][0]['children'][0]['children'][i]['children'][0]['data']
+            depth  = data['children'][0]['children'][0]['children'][0]['children'][i]['children'][1]['data']
+            populate_values(self.values, name, data_p, depth)
+
+        maxs = [{'name': i['name'], 'max': i['max'], 'depth': i['depth']} for i in self.values]
+
+        generate_max_depth_file(model, maxs)
+
+        set_fifo_depth(model, maxs)
+
+        inp = model.get_input_variables()[0]
+        out = model.get_output_variables()[0]
+        for x in maxs:
+            if 'in_local' in x['name']:
+                inp.pragma = (inp.pragma[0], x['max'] + 1)
+            elif 'out_local' in x['name']:
+                out.pragma = (out.pragma[0], x['max'] + 1)
+
+        print('[hls4ml] - FIFO optimization completed')
+        return False