Updated the comparison logic to handle sequences separately (#12251)

Juntian777 · facebook-github-bot · commit 321bc984410d · 2025-07-08T10:34:20.000-07:00
Summary: Pull Request resolved: #12251 Previously, the numerical comparators were designed to compare two inputs regardless of whether they were sequences, which involved stacking a list of tensors into one for comparison. The updated logic now restricts comparators to only compare two tensors at a time, with sequence handling managed externally. Reviewed By: Gasoonjia Differential Revision: D77893628
diff --git a/devtools/inspector/_inspector.py b/devtools/inspector/_inspector.py
@@ -42,6 +42,7 @@
 from executorch.devtools.etrecord import ETRecord, parse_etrecord
 from executorch.devtools.inspector._inspector_utils import (
     calculate_time_scale_factor,
+    compare_intermediate_outputs,
     create_debug_handle_to_op_node_mapping,
     DebugHandle,
     display_or_print_df,
@@ -1415,8 +1416,8 @@ def calculate_numeric_gap(self, distance: str = "MSE") -> pd.DataFrame:
                         runtime_debug_handle, runtime_debug_handle_to_op_name
                     ),
                     "runtime_intermediate_output": runtime_intermediate_output,
-                    "gap": comparator.compare(
-                        aot_intermediate_output, runtime_intermediate_output
+                    "gap": compare_intermediate_outputs(
+                        aot_intermediate_output, runtime_intermediate_output, comparator
                     ),
                 }
             )
diff --git a/devtools/inspector/_inspector_utils.py b/devtools/inspector/_inspector_utils.py
@@ -762,32 +762,29 @@ def convert_to_float_tensor(input_data: Any) -> torch.Tensor:
     This function handles the following types of input:
     - Scalar (int or float): Converts to a tensor with a single element.
     - Tensor: Converts to a float64 tensor on CPU.
-    - Sequence of Tensors: Stacks the tensors into a single float64 tensor on CPU.
     The resulting tensor is detached, moved to CPU, and cast to torch.float64.
     Parameters:
-    input_data (Any): The input data to be converted to a tensor. It can be a scalar,
-                      a tensor, or a list of tensors.
+    input_data (Any): The input data to be converted to a tensor. It can be a scalar
+                      or a tensor.
     Returns:
     torch.Tensor: A tensor on CPU with dtype torch.float64.
-    Raises:
-    ValueError: If the input_data cannot be converted to a tensor.
+    Raises error if the input is not a scalar or a tensor
     """
+    # Assert that the input is not a Sequence
+    assert not isinstance(input_data, Sequence)
     try:
-        # Check if the input is a Sequence of tensors
-        if isinstance(input_data, Sequence):
-            input_tensor = torch.stack([convert_to_float_tensor(a) for a in input_data])
         # Try to convert the input to a tensor
-        else:
-            input_tensor = torch.as_tensor(input_data, dtype=torch.float64)
+        input_tensor = torch.as_tensor(input_data, dtype=torch.float64)
     except Exception as e:
         raise ValueError(
             f"Cannot convert value of type {type(input_data)} to a tensor: {e}"
         )
-    input_tensor = input_tensor.detach().cpu().double()
 
+    input_tensor = input_tensor.detach().cpu().double()
     # Convert NaN to 0.0
     if torch.isnan(input_tensor).any():
         input_tensor = torch.nan_to_num(input_tensor)
+
     return input_tensor
 
 
@@ -837,3 +834,33 @@ def find_op_names(
             result.append(op_name)
 
     return result
+
+
+def compare_intermediate_outputs(a: Any, b: Any, comparator) -> List[float]:
+    """
+    Compare two outputs, handling both sequence and non-sequence cases,
+    and return a list of comparison results.
+    Parameters:
+    a: The first intermediate output to compare.
+    b: The second intermediate output to compare.
+    comparator: A comparator object with a `compare` method.
+    Returns:
+    List[float]: A list of comparison results.
+    Raises:
+    ValueError: If one input is a sequence and the other is not, or if sequences have different lengths.
+    """
+    is_a_sequence = isinstance(a, Sequence)
+    is_b_sequence = isinstance(b, Sequence)
+    if is_a_sequence and is_b_sequence:
+        # Ensure both sequences have the same length
+        if len(a) != len(b):
+            raise ValueError("Sequences must have the same length for comparison.")
+
+        # Compare each element in the sequences and return the list of results
+        return [comparator.compare(x, y) for x, y in zip(a, b)]
+    elif not is_a_sequence and not is_b_sequence:
+        # Compare non-sequence items and return the result in a list
+        return [comparator.compare(a, b)]
+    else:
+        # Raise an error if one is a sequence and the other is not
+        raise ValueError("Both inputs must be sequences or both must be non-sequences.")
diff --git a/devtools/inspector/tests/inspector_test.py b/devtools/inspector/tests/inspector_test.py
@@ -636,22 +636,22 @@ def test_calculate_numeric_gap(self):
             for i, row in df.iterrows():
                 # Dummpy key to get the expected aot/runtime internmediate outputs
                 key = (i,)
-                # aot_intermediate_output should equal aot_intermediate_outputs[h]
+                # aot_intermediate_output should equal aot_intermediate_outputs[key]
                 self.assertTrue(
                     torch.allclose(
                         row["aot_intermediate_output"],
                         aot_intermediate_outputs[key],
                     )
                 )
-                # runtime_intermediate_output should equal runtime_intermediate_outputs[h]
+                # runtime_intermediate_output should equal runtime_intermediate_outputs[key]
                 self.assertTrue(
                     torch.allclose(
                         row["runtime_intermediate_output"],
                         runtime_intermediate_outputs[key],
                     )
                 )
                 # gap should equal 3.0
-                self.assertEqual(row["gap"], 3.0)
+                self.assertEqual(row["gap"][0], 3.0)
 
     def _gen_random_float_list(self) -> List[float]:
         return [random.uniform(0, 10) for _ in range(RAW_DATA_SIZE)]
diff --git a/devtools/inspector/tests/inspector_utils_test.py b/devtools/inspector/tests/inspector_utils_test.py
@@ -29,6 +29,7 @@
     calculate_mse,
     calculate_snr,
     calculate_time_scale_factor,
+    compare_intermediate_outputs,
     convert_to_float_tensor,
     create_debug_handle_to_op_node_mapping,
     EDGE_DIALECT_GRAPH_KEY,
@@ -42,6 +43,7 @@
     NodeFilter,
     TimeScale,
 )
+from executorch.devtools.inspector.numerical_comparator import L1Comparator
 
 
 class TestInspectorUtils(unittest.TestCase):
@@ -420,19 +422,10 @@ def test_convert_input_to_tensor_convertible_inputs(self):
         )
         self.assertEqual(actual_output2.device.type, "cpu")
 
-        # List of tensors -> stacked tensor float32 CPU
+        # List of tensors -> AssertionError
         t_list = [torch.tensor([1, 2]), torch.tensor([2, 3]), torch.tensor([3, 4])]
-        actual_output3 = convert_to_float_tensor(t_list)
-        self.assertIsInstance(actual_output3, torch.Tensor)
-        self.assertEqual(actual_output3.dtype, torch.float64)
-        self.assertEqual(tuple(actual_output3.shape), (3, 2))
-        self.assertTrue(
-            torch.allclose(
-                actual_output3,
-                torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0]], dtype=torch.float64),
-            )
-        )
-        self.assertEqual(actual_output3.device.type, "cpu")
+        with self.assertRaises(AssertionError):
+            convert_to_float_tensor(t_list)
 
     def test_convert_input_to_tensor_non_convertible_raises(self):
         class X:
@@ -566,6 +559,24 @@ def test_find_op_names_matching_handles(self):
             find_op_names(debug_handle, debug_handle_to_op_name), ["op1", "op2"]
         )
 
+    def test_compare_intermediate_outputs_sequences(self):
+        a = [1.0, 2.0, 3.0]
+        b = [1.0, 2.5, 3.5]
+        result = compare_intermediate_outputs(a, b, L1Comparator())
+        self.assertEqual(result, [0.0, 0.5, 0.5])
+
+    def test_compare_intermediate_outputs_diff_len_sequences(self):
+        a = [1.0, 2.0]
+        b = [1.0, 2.0, 3.0]
+        with self.assertRaises(ValueError):
+            compare_intermediate_outputs(a, b, L1Comparator())
+
+    def test_compare_intermediate_outputs_sequence_and_non_sequence(self):
+        a = [1.0, 2.0]
+        b = 1.0
+        with self.assertRaises(ValueError):
+            compare_intermediate_outputs(a, b, L1Comparator())
+
 
 def gen_mock_operator_graph_with_expected_map() -> (
     Tuple[OperatorGraph, Dict[int, OperatorNode]]
diff --git a/devtools/inspector/tests/l1_comparator_test.py b/devtools/inspector/tests/l1_comparator_test.py
@@ -47,10 +47,3 @@ def test_2D_tensors(self):
         expected = 14.0
         result = self.l1_comparator.compare(a, b)
         self.assertAlmostEqual(result, expected)
-
-    def test_list_of_tensors(self):
-        a = [torch.tensor([2, 4]), torch.tensor([5, 2])]
-        b = [torch.tensor([1, 2]), torch.tensor([3, 5])]
-        expected = 8.0
-        result = self.l1_comparator.compare(a, b)
-        self.assertAlmostEqual(result, expected)
diff --git a/devtools/inspector/tests/mse_comparator_test.py b/devtools/inspector/tests/mse_comparator_test.py
@@ -47,10 +47,3 @@ def test_2D_tensors(self):
         expected = (9.0 + 49.0 + 9.0 + 36.0) / 4.0
         result = self.mse_comparator.compare(a, b)
         self.assertAlmostEqual(result, expected)
-
-    def test_list_of_tensors(self):
-        a = [torch.tensor([2, 4]), torch.tensor([15, 2])]
-        b = [torch.tensor([1, 2]), torch.tensor([9, 5])]
-        expected = (1.0 + 4.0 + 36.0 + 9.0) / 4.0
-        result = self.mse_comparator.compare(a, b)
-        self.assertAlmostEqual(result, expected)
diff --git a/devtools/inspector/tests/snr_comparator_test.py b/devtools/inspector/tests/snr_comparator_test.py
@@ -50,13 +50,3 @@ def test_2D_tensors(self):
         expected = 10 * math.log10(37.25 / 17.0)
         result = self.snr_comparator.compare(a, b)
         self.assertAlmostEqual(result, expected)
-
-    def test_list_of_tensors(self):
-        # original_power = mean(4, 16, 25, 4]) = 12.25
-        # error = a - b = [1, 2, 2, -3] squared = [1, 4, 4, 9] mean = 18/4 = 4.5
-        # SNR = 10 * log10(37.25/17.0)
-        a = [torch.tensor([2, 4]), torch.tensor([5, 2])]
-        b = [torch.tensor([1, 2]), torch.tensor([3, 5])]
-        expected = 10 * math.log10(12.25 / 4.5)
-        result = self.snr_comparator.compare(a, b)
-        self.assertAlmostEqual(result, expected)