Add ConstantArgument support to fx_import (llvm#4244)

alaa-ali · web-flow · commit 4f3a60b2edfe · 2025-07-01T19:21:29.000+08:00
This PR will fix the following issue: [fx_importer NotImplementedError: MultiheadAttention layer with NeedWeight = false](llvm#4158) The following error was raised before this fix: Python Error: NotImplementedError: OutputKind.USER_OUTPUT for <class 'torch.export.graph_signature.ConstantArgument'>: ConstantArgument(name='', value=None) This occurs for an exported MultiheadAttention layer with "NeedWeight = false" which means weights are not going to be returned by the layer. So, the second output attn_output_weights will be None in this case.
diff --git a/python/torch_mlir/extras/fx_importer.py b/python/torch_mlir/extras/fx_importer.py
@@ -630,6 +630,7 @@ def import_program(
             OutputKind,
             TensorArgument,
             SymIntArgument,
+            ConstantArgument,
         )
 
         sig = prog.graph_signature
@@ -650,24 +651,35 @@ def import_program(
         constant_tensors: Dict[Node, torch.Tensor] = {}
         parameter_bindings: Dict[Node, Tuple[Any, InputInfo]] = {}
         buffer_bindings: Dict[Node, Tuple[Any, InputInfo]] = {}
+        constant_output_values: Dict[int, Any] = {}
+        constant_input_values: Dict[Node, Any] = {}
 
         # Derive user outputs that we preserve. These will be nodes of the
         # producer for the output.
-        user_outputs: List[Node] = []
+        user_outputs: List[Optional[Node]] = []
         user_output_types: List[IrType] = []
-        for output_spec in sig.output_specs:
+        for i, output_spec in enumerate(sig.output_specs):
             kind = output_spec.kind
             arg = output_spec.arg
             if kind == OutputKind.USER_OUTPUT:
-                if not isinstance(arg, (TensorArgument, SymIntArgument)):
+                if not isinstance(
+                    arg, (TensorArgument, SymIntArgument, ConstantArgument)
+                ):
                     raise NotImplementedError(
                         f"OutputKind.USER_OUTPUT for {type(arg)}: {arg}"
                     )
-                output_producer_node = all_producer_nodes[arg.name]
-                user_outputs.append(output_producer_node)
-                user_output_types.append(
-                    self._cc.node_val_to_type(output_producer_node)
-                )
+                if isinstance(arg, (TensorArgument, SymIntArgument)):
+                    output_producer_node = all_producer_nodes[arg.name]
+                    user_outputs.append(output_producer_node)
+                    user_output_types.append(
+                        self._cc.node_val_to_type(output_producer_node)
+                    )
+                elif isinstance(arg, ConstantArgument):
+                    # Constant Outputs don't have a node so we will only store their values
+                    constant_output_values[i] = arg.value
+                    # Placeholder for constant outputs in the node list
+                    user_outputs.append(None)
+                    user_output_types.append(self._cc.value_info_to_type(arg.value))
             elif kind == OutputKind.BUFFER_MUTATION and isinstance(arg, TensorArgument):
                 mutable_buffer_target_producers[output_spec.target] = arg.name
 
@@ -678,16 +690,22 @@ def import_program(
             arg = input_spec.arg
             if input_spec.kind == InputKind.USER_INPUT:
                 # Set up user input.
-                if not isinstance(arg, (TensorArgument, SymIntArgument)):
+                if not isinstance(
+                    arg, (TensorArgument, SymIntArgument, ConstantArgument)
+                ):
                     raise NotImplementedError(
                         f"InputKind.USER_INPUT for {type(arg)}: {arg}"
                     )
                 placeholder_node = placeholder_nodes[arg.name]
-                mutable = placeholder_node.name in mutated_user_inputs
-                user_inputs.append(placeholder_node)
-                user_input_types.append(
-                    self._cc.node_val_to_type(placeholder_node, mutable=mutable)
-                )
+                if isinstance(arg, (TensorArgument, SymIntArgument)):
+                    mutable = placeholder_node.name in mutated_user_inputs
+                    user_inputs.append(placeholder_node)
+                    user_input_types.append(
+                        self._cc.node_val_to_type(placeholder_node, mutable=mutable)
+                    )
+                elif isinstance(arg, ConstantArgument):
+                    # Constant argument will be handled separately, they are not mutable and do not need function parameters
+                    constant_input_values[placeholder_node] = arg.value
             elif input_spec.kind == InputKind.CONSTANT_TENSOR and isinstance(
                 arg, TensorArgument
             ):
@@ -778,6 +796,9 @@ def import_program(
         for constant_node, constant_tensor in constant_tensors.items():
             node_importer.import_constant(loc, constant_node, constant_tensor)
 
+        for constant_node, constant_value in constant_input_values.items():
+            node_importer.import_constant(loc, constant_node, constant_value)
+
         # Bind user inputs to IR values.
         for user_input_node, block_arg_value in zip(user_inputs, entry_block.arguments):
             if user_input_node.name in mutated_user_inputs:
@@ -804,7 +825,10 @@ def import_program(
             skip_placeholders_outputs=True,
             import_symbolic_shape_expressions=import_symbolic_shape_expressions,
         )
-        node_importer.return_node_values(loc, user_outputs)
+
+        # Call the return function that handles both nodes and constant values
+        node_importer.return_node_values(loc, user_outputs, constant_output_values)
+
         self.symbol_table.insert(func_op)
         return func_op
 
@@ -1419,9 +1443,17 @@ def on_produced(value: Value):
 
             self._on_node_produced[info.store_producer_node] = on_produced
 
-    def return_node_values(self, loc, nodes: List[Node]):
+    def return_node_values(self, loc, nodes: List[Node], constants: Dict[int, Any]):
+        # This function returns both node values and constant values
         with loc, InsertionPoint(self._b):
-            operands = [self.resolve_node_value(n) for n in nodes]
+            operands = [
+                (
+                    self.resolve_node_value(n)
+                    if isinstance(n, Node)
+                    else self._import_literal(constants[index])
+                )
+                for index, n in enumerate(nodes)
+            ]
             func_dialect.ReturnOp(operands, loc=loc)
 
     def import_nodes(
diff --git a/test/python/fx_importer/v2.3/mutation_import.py b/test/python/fx_importer/v2.3/mutation_import.py
@@ -171,3 +171,238 @@ def forward(self, x):
     )
     print(m)
     m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_single_input_const_argument
+# CHECK: %[[int2:.+]] = torch.constant.int 2
+# CHECK: %[[buffer:.+]] = torch.aten.mul.Scalar %arg0, %[[int2]] : !torch.vtensor<[3,4],f32>, !torch.int -> !torch.vtensor<[3,4],f32>
+# CHECK: return %[[buffer]] : !torch.vtensor<[3,4],f32>
+def test_single_input_const_argument():
+    class SingleConstantInputModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(self, x, y=2):  # Single constant input
+            return x * y
+
+    m = fx.export_and_import(
+        SingleConstantInputModule(),
+        torch.randn(3, 4),
+        experimental_support_mutation=True,
+    )
+    print(m)
+    m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_single_output_const_argument
+# CHECK: %[[float1:.+]] = torch.constant.float 5.000000e-01
+# CHECK: %[[buffer:.+]] = torch.aten.mul.Scalar %arg0, %[[float1]]
+# CHECK: %[[float2:.+]] = torch.constant.float 5.000000e-01
+# CHECK: return %[[buffer]], %[[float2]] : !torch.vtensor<[3,4],f32>, !torch.float
+def test_single_output_const_argument():
+    class SingleConstantOutputModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.scale = 0.5  # Single constant output
+
+        def forward(self, x):
+            scaled = x * self.scale
+            return scaled, self.scale  # Return tensor + constant
+
+    m = fx.export_and_import(
+        SingleConstantOutputModule(),
+        torch.randn(3, 4),
+        experimental_support_mutation=True,
+    )
+    print(m)
+    m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_multiple_input_const_argument
+# CHECK: %[[float2:.+]] = torch.constant.float 2.000000e+00
+# CHECK: %[[buffer0:.+]] = torch.aten.mul.Scalar %arg0, %[[float2]] : !torch.vtensor<[3,4],f32>, !torch.float -> !torch.vtensor<[3,4],f32>
+# CHECK: %[[float3:.+]] = torch.constant.float 3.000000e+00
+# CHECK: %[[int1:.+]] = torch.constant.int 1
+# CHECK: %[[buffer1:.+]] = torch.aten.add.Scalar %[[buffer0]], %[[float3]], %[[int1]] : !torch.vtensor<[3,4],f32>, !torch.float, !torch.int -> !torch.vtensor<[3,4],f32>
+# CHECK: return %[[buffer1]] : !torch.vtensor<[3,4],f32>
+def test_multiple_input_const_argument():
+    class MultipleConstantInputModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(
+            self, x, scale=2.0, offset=1.0, multiplier=3
+        ):  # Multiple constant inputs
+            return x * scale + offset * multiplier
+
+    m = fx.export_and_import(
+        MultipleConstantInputModule(),
+        torch.randn(3, 4),
+        experimental_support_mutation=True,
+    )
+    print(m)
+    m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_multiple_output_const_argument
+# CHECK: %[[float5:.+]] = torch.constant.float 5.000000e-01
+# CHECK: %[[buffer:.+]] = torch.aten.mul.Scalar %arg0, %[[float5]] : !torch.vtensor<[3,4],f32>, !torch.float -> !torch.vtensor<[3,4],f32>
+# CHECK: %[[str:.+]] = torch.constant.str "model"
+# CHECK: %[[int42:.+]] = torch.constant.int 42
+# CHECK: %[[true:.+]] = torch.constant.bool true
+# CHECK: %[[none:.+]] = torch.constant.none
+# CHECK: return %[[buffer]], %[[float5]]
+# CHECK-SAME: %[[str]], %[[int42]], %[[true]], %[[none]] : !torch.vtensor<[3,4],f32>, !torch.float, !torch.str, !torch.int, !torch.bool, !torch.none
+def test_multiple_output_const_argument():
+    class MultipleConstantOutputModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.scale = 0.5
+            self.name = "model"
+            self.version = 42
+
+        def forward(self, x):
+            result = x * self.scale
+            # Return tensor + multiple constants
+            return result, self.scale, self.name, self.version, True, None
+
+    m = fx.export_and_import(
+        MultipleConstantOutputModule(),
+        torch.randn(3, 4),
+        experimental_support_mutation=True,
+    )
+    print(m)
+    m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_input_output_const_argument
+# CHECK: %[[float5:.+]] = torch.constant.float 5.000000e-01
+# CHECK: %[[buffer0:.+]] = torch.aten.mul.Scalar %arg0, %[[float5]]
+# CHECK: %[[float2:.+]] = torch.constant.float 2.000000e+00
+# CHECK: %[[buffer1:.+]] = torch.aten.mul.Scalar %[[buffer0]], %[[float2]] : !torch.vtensor<[3,4],f32>, !torch.float -> !torch.vtensor<[3,4],f32>
+# CHECK: %[[float1:.+]] = torch.constant.float 1.000000e+00
+# CHECK: %[[int1:.+]] = torch.constant.int 1
+# CHECK: %[[buffer2:.+]] = torch.aten.add.Scalar %[[buffer1]], %[[float1]], %[[int1]]
+# CHECK: %[[str:.+]] = torch.constant.str "combined_model"
+# CHECK: %[[true:.+]] = torch.constant.bool true
+# CHECK: %[[none:.+]] = torch.constant.none
+# CHECK: return %[[buffer2]], %[[float5]]
+# CHECK-SAME: %[[str]]
+def test_input_output_const_argument():
+    class CombinedConstantModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.base_scale = 0.5
+            self.model_name = "combined_model"
+
+        def forward(self, x, user_scale=2.0, add_bias=True, bias_value=1.0):
+            if add_bias:
+                result = (x * self.base_scale * user_scale) + bias_value
+            else:
+                result = x * self.base_scale * user_scale
+
+            # Return mix of tensors and constants (both output and input)
+            return (
+                result,  # tensor
+                self.base_scale,  # constantArgument output
+                self.model_name,  # constantArgument output
+                user_scale,  # constantArgument input
+                add_bias,  # constantArgument input
+                bias_value,  # constantArgument input
+                None,  # constantArgument literal (output)
+            )
+
+    m = fx.export_and_import(
+        CombinedConstantModule(), torch.randn(3, 4), experimental_support_mutation=True
+    )
+    print(m)
+    m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_const_argument_edge_cases
+# CHECK: func.func @main(%arg0: !torch.vtensor<[3,4],f32>) ->
+# CHECK-SAME: (!torch.vtensor<[3,4],f32>, !torch.float, !torch.int, !torch.str, !torch.bool, !torch.none, !torch.none, !torch.str, !torch.int, !torch.bool)
+# CHECK: %[[float314:.+]] = torch.constant.float 3.140000e+00
+# CHECK: %[[buffer:.+]] = torch.aten.mul.Scalar %arg0, %[[float314]]
+# CHECK: %[[int42:.+]] = torch.constant.int 42
+# CHECK: %[[string1:.+]] = torch.constant.str "test"
+# CHECK: %[[true:.+]] = torch.constant.bool true
+# CHECK: %[[none:.+]] = torch.constant.none
+# CHECK: %[[string2:.+]] = torch.constant.str "default"
+# CHECK: %[[int0:.+]] = torch.constant.int 0
+# CHECK: %[[false:.+]] = torch.constant.bool false
+# CHECK: return %[[buffer]], %[[float314]]
+# CHECK-SAME: %[[int42]], %[[string1]], %[[true]], %[[none]], %[[none]]
+# CHECK-SAME: %[[string2]], %[[int0]], %[[false]]
+def test_const_argument_edge_cases():
+    class EdgeCaseConstantModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.float_val = 3.14
+            self.int_val = 42
+            self.str_val = "test"
+            self.bool_val = True
+            self.none_val = None
+
+        def forward(self, x, input_none=None, input_str="default"):
+            result = x * self.float_val
+
+            # Return all different ConstantArgument types
+            return (
+                result,  # tensor
+                self.float_val,  # float output constantArgument
+                self.int_val,  # int output constantArgument
+                self.str_val,  # string output constantArgument
+                self.bool_val,  # bool output constantArgument
+                self.none_val,  # None output constantArgument
+                input_none,  # None input constantArgument
+                input_str,  # string input constantArgument
+                0,  # literal int
+                False,  # literal bool
+            )
+
+    m = fx.export_and_import(
+        EdgeCaseConstantModule(), torch.randn(3, 4), experimental_support_mutation=True
+    )
+    print(m)
+    m.operation.verify()
+
+
+@run
+# CHECK-LABEL: test_const_argument_from_multiheadattention_layer
+# CHECK: func.func @main(%arg0: !torch.vtensor<[1,10,64],f32>, %arg1: !torch.vtensor<[1,10,64],f32>, %arg2: !torch.vtensor<[1,10,64],f32>) ->
+# CHECK-SAME: (!torch.vtensor<[1,10,64],f32>, !torch.none)
+# CHECK: %[[int1:.+]] = torch.constant.int 1
+# CHECK: %[[int0:.+]] = torch.constant.int 0
+# CHECK-DAG: %[[buffer:.+]] = torch.aten.transpose.int %arg0, %[[int1]], %[[int0]] : !torch.vtensor<[1,10,64],f32>, !torch.int, !torch.int -> !torch.vtensor<[10,1,64],f32>
+def test_const_argument_from_multiheadattention_layer():
+    """
+    Test case using actual MultiheadAttention where a constantArgument appears automatically
+    due to returning the attention layer without the weights (need_weights=False)
+    """
+
+    class AttentionLikeConstantModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.attn = torch.nn.MultiheadAttention(
+                embed_dim=64, num_heads=1, dropout=0.1, batch_first=True
+            )
+
+        def forward(self, query, key, value, need_weights=False):
+            return self.attn(query, key, value, need_weights=need_weights)
+
+    m = fx.export_and_import(
+        AttentionLikeConstantModule(),
+        torch.randn(1, 10, 64),  # query
+        torch.randn(1, 10, 64),  # key
+        torch.randn(1, 10, 64),  # value
+        experimental_support_mutation=True,
+    )
+    print(m)
+    m.operation.verify()