Add support for einsum operation to pytorch parser (requires 1116) (#1273)

JanFSchulte · web-flow · commit 6cdf842d3b4b · 2025-06-03T20:07:22.000Z
* add einsum support to pytorch parser

* use _validate_einsum_expr to extract output shape
diff --git a/hls4ml/converters/pytorch/core.py b/hls4ml/converters/pytorch/core.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from hls4ml.converters.pytorch_to_hls import pytorch_handler
+from hls4ml.utils.einsum_utils import _validate_einsum_expr
 
 
 @pytorch_handler('Constant')
@@ -157,3 +158,29 @@ def parse_batchnorm_layer(operation, layer_name, input_names, input_shapes, node
         layer['n_filt'] = input_shapes[0][1]  # Always channel first for Pytorch
 
     return layer, [shape for shape in input_shapes[0]]
+
+
+@pytorch_handler('einsum')
+def parse_einsum_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config):
+    assert 'einsum' in operation
+
+    layer = {}
+
+    if len(input_names) != 2:
+        raise Exception('Only einsum operations with two inputs are supported')
+    layer['class_name'] = 'Einsum'
+    layer['name'] = layer_name
+    layer['inputs'] = input_names
+
+    # Need to set batch size to a real value instead of 'None'. Using '1' as dummy value
+    import copy
+
+    input_shapes_tmp = copy.deepcopy(input_shapes)
+    input_shapes_tmp[0][0] = 1
+    input_shapes_tmp[1][0] = 1
+    layer['inp0_shape'] = tuple(input_shapes_tmp[0])
+    layer['inp1_shape'] = tuple(input_shapes_tmp[1])
+
+    layer['equation'], layer['out_shape'] = _validate_einsum_expr(node.args[0], layer['inp0_shape'], layer['inp1_shape'])
+
+    return layer, [shape for shape in input_shapes[0]]
diff --git a/test/pytest/test_pytorch_api.py b/test/pytest/test_pytorch_api.py
@@ -877,3 +877,79 @@ def forward(self, x):
     rtol = 0
     atol = 5.0e-2
     np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=rtol, atol=atol)
+
+
+class EinsumOuterProduct(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, y):
+        return torch.einsum('bi,bj->bij', x, y)
+
+
+class EinsumBatchMatMul(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, y):
+        return torch.einsum('bij,bjk->bik', x, y)
+
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Vitis'])
+@pytest.mark.parametrize('io_type', ['io_parallel'])
+def test_einsum_outer_product(backend, io_type):
+
+    model = EinsumOuterProduct()
+    model.eval()
+
+    X_input = np.random.rand(3, 4)
+    Y_input = np.random.rand(3, 5)
+
+    pytorch_prediction = model(torch.Tensor(X_input), torch.Tensor(Y_input)).detach().numpy()
+
+    config = config_from_pytorch_model(
+        model,
+        [(None, 4), (None, 5)],
+        default_precision='ap_fixed<16,6>',
+        channels_last_conversion="internal",
+        transpose_outputs=False,
+    )
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_einsum_outer_product_{backend}_{io_type}')
+
+    hls_model = convert_from_pytorch_model(model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type)
+
+    hls_model.compile()
+
+    hls_prediction = np.reshape(hls_model.predict([X_input, Y_input]), pytorch_prediction.shape)
+
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=1e-2, atol=0.01)
+
+
+@pytest.mark.parametrize('backend', ['Vivado', 'Vitis'])
+@pytest.mark.parametrize('io_type', ['io_parallel'])
+def test_einsum_batch_matmul(backend, io_type):
+
+    model = EinsumBatchMatMul()
+    model.eval()
+
+    X_input = np.random.rand(3, 2, 5)
+    Y_input = np.random.rand(3, 5, 4)
+
+    pytorch_prediction = model(torch.Tensor(X_input), torch.Tensor(Y_input)).detach().numpy()
+
+    config = config_from_pytorch_model(
+        model,
+        [(None, 2, 5), (None, 5, 4)],
+        default_precision='ap_fixed<16,6>',
+        channels_last_conversion="internal",
+        transpose_outputs=False,
+    )
+    output_dir = str(test_root_path / f'hls4mlprj_pytorch_einsum_batch_matmul_{backend}_{io_type}')
+
+    hls_model = convert_from_pytorch_model(model, hls_config=config, output_dir=output_dir, backend=backend, io_type=io_type)
+
+    hls_model.compile()
+
+    hls_prediction = np.reshape(hls_model.predict([X_input, Y_input]), pytorch_prediction.shape)
+
+    np.testing.assert_allclose(hls_prediction, pytorch_prediction, rtol=1e-2, atol=0.01)