complete implementation of seperable -> dw + pw, untested

jmitrevs · jmitrevs · commit 0925a3dee501 · 2024-06-11T19:27:07.000-05:00
diff --git a/hls4ml/backends/vivado/passes/convolution_templates.py b/hls4ml/backends/vivado/passes/convolution_templates.py
@@ -280,7 +280,7 @@ def format(self, node):
         # Override bias and bias_t since these are zeros in depthwise step of SepConv1D
         params['bias'] = params['zero_bias']
         params['bias_t'] = params['zero_bias_t']
-        params['n_filt'] = params['n_chan']  # In depthwise step n_chan == n_filt
+        params['n_filt'] = params['n_chan'] * node.get_attr('depth_multiplier')  # In depthwise step n_chan == n_filt
         params['dilation'] = node.get_attr('dilation', 1)
         params['nzeros'] = node.get_weights('depthwise').nzeros
         params['index'] = str(node.index) + '_depthwise'
diff --git a/hls4ml/converters/keras/convolution.py b/hls4ml/converters/keras/convolution.py
@@ -60,6 +60,9 @@ def parse_conv2d_layer(keras_layer, input_names, input_shapes, data_reader):
 
     layer['bias_data'] = get_weights_data(data_reader, layer['name'], 'bias')
 
+    if 'depth_multiplier' in keras_layer['config']:
+        layer['depth_multiplier'] = keras_layer['config']['depth_multiplier']
+
     if 'filters' in keras_layer['config']:
         layer['n_filt'] = keras_layer['config']['filters']
     else:
diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
@@ -615,6 +615,44 @@ def replace_node(self, old_node, new_node):
         self.graph = OrderedDict((new_node.name, new_node) if k == old_node.name else (k, v) for k, v in self.graph.items())
         self._update_model_outputs()
 
+    def split_node(self, old_node, new_node1, new_node2):
+        """Replace an existing node in the graph with two nodes in sequence.
+
+        Args:
+            old_node (Layer): The node to replace
+            new_node1 (Layer): The first new node in sequence
+            new_node2 (Layer): The second new node in sequence
+
+        """
+
+        # fmt: off
+        assert len(new_node1.inputs) == len(old_node.inputs), \
+            f'{new_node1.name} and {old_node.name} have different number of inputs'
+        assert len(new_node2.outputs) == len(old_node.outputs), \
+            f'{new_node2.name} and {old_node.name} have different number of outputs'
+        # fmt: on
+
+        repl = {old_name: new_name for old_name, new_name in zip(old_node.outputs, new_node2.outputs)}
+        repl.update({old_name: new_name for old_name, new_name in zip(old_node.inputs, new_node1.inputs)})
+
+        for node in self.graph.values():
+            for i, n in enumerate(node.inputs):
+                if n in repl:
+                    node.inputs[i] = repl[n]
+            for i, n in enumerate(node.outputs):
+                if n in repl:
+                    node.outputs[i] = repl[n]
+
+        new_graph = OrderedDict()
+        for key, value in self.graph.items():
+            if key == old_node.name:
+                new_graph[new_node1.name] = new_node1
+                new_graph[new_node2.name] = new_node2
+            else:
+                new_graph[key] = value
+        self.graph = new_graph
+        self._update_model_outputs()
+
     def _update_model_outputs(self):
         '''Update the model outputs
 
diff --git a/hls4ml/model/layers.py b/hls4ml/model/layers.py
@@ -447,6 +447,7 @@ class SeparableConv1D(Layer):
         Attribute('out_width'),
         Attribute('n_chan'),
         Attribute('n_filt'),
+        Attribute('depth_multiplier', default=1),
         Attribute('filt_width'),
         Attribute('stride_width'),
         Attribute('pad_left'),
@@ -484,12 +485,27 @@ def initialize(self):
 
 
 class DepthwiseConv1D(Conv1D):
+    _expected_attributes = [
+        Attribute('in_width'),
+        Attribute('out_width'),
+        Attribute('n_chan'),
+        Attribute('depth_multiplier', default=1),
+        Attribute('filt_width'),
+        Attribute('stride_width'),
+        Attribute('pad_left'),
+        Attribute('pad_right'),
+        WeightAttribute('depthwise'),
+        WeightAttribute('bias'),
+        TypeAttribute('depthwise'),
+        TypeAttribute('bias'),
+    ]
+
     def initialize(self):
         if self.get_attr('data_format') == 'channels_last':
-            shape = [self.attributes['out_width'], self.attributes['n_chan']]
+            shape = [self.attributes['out_width'], self.attributes['n_chan'] * self.attributes['depth_multiplier']]
             dims = [f'OUT_HEIGHT_{self.index}', f'N_CHAN_{self.index}']
         else:
-            shape = [self.attributes['n_chan'], self.attributes['out_width']]
+            shape = [self.attributes['n_chan'] * self.attributes['depth_multiplier'], self.attributes['out_width']]
             dims = [f'N_CHAN_{self.index}', f'OUT_WIDTH_{self.index}']
         self.add_output_variable(shape, dims)
 
@@ -498,6 +514,7 @@ def initialize(self):
         )
 
         self.add_bias(quantizer=self.get_attr('bias_quantizer'))
+        self.set_attr('n_filt', self.get_attr('n_chan') * self.get_attr('depth_multiplier'))
 
 
 class Conv2D(Layer):
@@ -594,6 +611,7 @@ class SeparableConv2D(Layer):
         Attribute('out_width'),
         Attribute('n_chan'),
         Attribute('n_filt'),
+        Attribute('depth_multiplier', default=1),
         Attribute('filt_height'),
         Attribute('filt_width'),
         Attribute('stride_height'),
@@ -634,12 +652,41 @@ def initialize(self):
 
 
 class DepthwiseConv2D(Conv2D):
+    _expected_attributes = [
+        Attribute('in_height'),
+        Attribute('in_width'),
+        Attribute('out_height'),
+        Attribute('out_width'),
+        Attribute('n_chan'),
+        Attribute('depth_multiplier', default=1),
+        Attribute('filt_height'),
+        Attribute('filt_width'),
+        Attribute('stride_height'),
+        Attribute('stride_width'),
+        Attribute('pad_top'),
+        Attribute('pad_bottom'),
+        Attribute('pad_left'),
+        Attribute('pad_right'),
+        WeightAttribute('weight'),
+        WeightAttribute('bias'),
+        TypeAttribute('weight'),
+        TypeAttribute('bias'),
+    ]
+
     def initialize(self):
         if self.get_attr('data_format') == 'channels_last':
-            shape = [self.attributes['out_height'], self.attributes['out_width'], self.attributes['n_chan']]
+            shape = [
+                self.attributes['out_height'],
+                self.attributes['out_width'],
+                self.attributes['n_chan'] * self.attributes['depth_multiplier'],
+            ]
             dims = [f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
         else:
-            shape = [self.attributes['n_chan'], self.attributes['out_height'], self.attributes['out_width']]
+            shape = [
+                self.attributes['n_chan'] * self.attributes['depth_multiplier'],
+                self.attributes['out_height'],
+                self.attributes['out_width'],
+            ]
             dims = [f'N_CHAN_{self.index}', f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}']
         self.add_output_variable(shape, dims)
 
@@ -648,6 +695,7 @@ def initialize(self):
         )
 
         self.add_bias(quantizer=self.get_attr('bias_quantizer'))
+        self.set_attr('n_filt', self.get_attr('n_chan') * self.get_attr('depth_multiplier'))
 
 
 class Pooling1D(Layer):
diff --git a/hls4ml/model/optimizer/__init__.py b/hls4ml/model/optimizer/__init__.py
@@ -33,6 +33,7 @@
 register_flow(
     'convert',
     [
+        'seperable_to_depthwise_and_conv',  # has to be before precision inference
         'infer_precision_types',
         'channels_last_converter',
         'remove_transpose_before_flatten',
diff --git a/hls4ml/model/optimizer/passes/seperable_to_dw_conv.py b/hls4ml/model/optimizer/passes/seperable_to_dw_conv.py
@@ -0,0 +1,124 @@
+"""
+This optimizer converts a seperable convolution to a depthwise followed by a regular convolution.
+For backends with a custom pointwise implementations the regular convolution will subsequently
+be converted to a pointwise convolution by a different optimizer.
+"""
+
+import copy
+
+from hls4ml.model.layers import SeparableConv1D, SeparableConv2D
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class SeperableToDepthwiseAndConv(OptimizerPass):
+    """Convert Seperable to DepthwiseConv + Conv (potentially later Pointwise)"""
+
+    _dw_attributes = (
+        'in_width',
+        'out_width',
+        'n_chan',
+        'depth_multiplier',
+        'pad_left',
+        'pad_right',
+        'filt_width',
+        'stride_width',
+        'dilation_width',
+        'in_height',
+        'out_height',
+        'pad_top',
+        'pad_bottom',
+        'filt_height',
+        'stride_height',
+        'dilation_height',
+        'data_format',
+        'depthwise_data',
+        'depthwise_quantizer',
+    )
+
+    _pw_attributes = ('out_width', 'n_filt', 'dilation_width', 'out_height', 'dilation_height', 'data_format', 'use_bias')
+
+    def match(self, node):
+        return isinstance(node, (SeparableConv1D, SeparableConv2D))
+
+    def transform(self, model, node):
+        dim = node.__class__.__name__[-2:]  # '1D' or '2D'
+
+        # get the layer configuration name
+        layer_config = model.config.get_layer_config(node)
+
+        # First do depthwise
+        dw_name = f'{node.name}_depthwise'
+
+        # now the layer config (so that set configuration get copied)
+        dw_layer_config = copy.deepcopy(layer_config)
+
+        if dw_layer_config:
+            dw_precision_cfg = dw_layer_config.setdefault('Precision', {})
+            if 'depthwise' in dw_precision_cfg:
+                dw_precision_cfg['weight'] = dw_precision_cfg['depthwise']
+                del dw_precision_cfg['depthwise']
+            if 'depthwise_accum' in dw_precision_cfg:
+                dw_precision_cfg['accum'] = dw_precision_cfg['depthwise_accum']
+                del dw_precision_cfg['depthwise_accum']
+            if 'depthwise_result' in dw_precision_cfg:
+                dw_precision_cfg['result'] = dw_precision_cfg['depthwise_result']
+                del dw_precision_cfg['depthwise_result']
+            dw_precision_cfg.pop('pointwise', None)
+            dw_precision_cfg.pop('pointwise_accum', None)
+            model.config.set_name_config(dw_name, dw_layer_config)
+            model.config.parse_name_config(dw_name, dw_layer_config)
+
+        # creating the attributes
+        dw_attributes = {k: node.attributes.get(k, None) for k in SeperableToDepthwiseAndConv._dw_attributes}
+
+        dw_attributes['use_bias'] = False
+
+        new_dw = model.make_node('DepthwiseConv' + dim, dw_name, dw_attributes, [node.inputs[0]])
+
+        # Then do convolution
+        pw_name = f'{node.name}_pointwise'
+
+        # now the layer config (so that set configuration get copied)
+        pw_layer_config = copy.deepcopy(layer_config)
+
+        if pw_layer_config:
+            pw_precision_cfg = pw_layer_config.setdefault('Precision', {})
+            if 'pointwise' in pw_precision_cfg:
+                pw_precision_cfg['weight'] = pw_precision_cfg['pointwise']
+                del pw_precision_cfg['pointwise']
+            if 'pointwise_accum' in pw_precision_cfg:
+                pw_precision_cfg['accum'] = pw_precision_cfg['pointwise_accum']
+                del pw_precision_cfg['pointwise_accum']
+            if 'pointwise_result' in pw_precision_cfg:
+                pw_precision_cfg['result'] = pw_precision_cfg['pointwise_result']
+                del pw_precision_cfg['pointwise_result']
+            pw_precision_cfg.pop('depthwise', None)
+            pw_precision_cfg.pop('depthwise_accum', None)
+            model.config.set_name_config(pw_name, pw_layer_config)
+            model.config.parse_name_config(pw_name, pw_layer_config)
+
+        # creating the attributes
+        pw_attributes = {k: node.attributes.get(k, None) for k in SeperableToDepthwiseAndConv._pw_attributes}
+        pw_attributes['filt_width'] = 1
+        pw_attributes['filt_height'] = 1
+        pw_attributes['stride_width'] = 1
+        pw_attributes['stride_height'] = 1
+        pw_attributes['pad_left'] = 0
+        pw_attributes['pad_right'] = 0
+        pw_attributes['pad_top'] = 0
+        pw_attributes['pad_bottom'] = 0
+        pw_attributes['in_width'] = pw_attributes['out_width']
+        pw_attributes['in_height'] = pw_attributes['out_height']
+        pw_attributes['n_chan'] = node.get_attr('n_chan') * node.get_attr('depth_multiplier')
+        pw_attributes['weight_data'] = node.get_attr('pointwise_data')
+        pw_attributes['weight_quantizer'] = node.get_attr('pointwise_quantizer')
+        pw_attributes['bias_data'] = node.get_attr('bias_data')
+        pw_attributes['bias_quantizer'] = node.get_attr('bias_quantizer')
+
+        # note this is just regular convolution. It is replaced by a special pointwise implementation
+        # if available by another optimizer
+        new_pw = model.make_node('Conv' + dim, pw_name, pw_attributes, [dw_name])
+
+        model.split_node(node, new_dw, new_pw)
+
+        return True

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@`
`33`	`33`	`register_flow(`
`34`	`34`	`'convert',`
`35`	`35`	`[`
	`36`	`+ 'seperable_to_depthwise_and_conv', # has to be before precision inference`
`36`	`37`	`'infer_precision_types',`
`37`	`38`	`'channels_last_converter',`
`38`	`39`	`'remove_transpose_before_flatten',`