[SPARK-52563][PS] Fix var naming bug in _assert_pandas_almost_equal

petern48 · ksbeyer · commit 1d188fd400de · 2025-07-14T13:40:57.000-07:00
### What changes were proposed in this pull request? Small bug fix where the wrong variable names were used ### Why are the changes needed? The function uses lval and rval instead of the parameters val1 and val2 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#51253 from petern48/pandas_assert_bug. Authored-by: Peter Nguyen <petern0408@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/python/pyspark/pandas/tests/test_utils.py b/python/pyspark/pandas/tests/test_utils.py
@@ -163,6 +163,60 @@ def test_index_error_assert_pandas_equal(self):
             },
         )
 
+    def test_dataframe_error_assert_pandas_almost_equal(self):
+        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        pdf2 = pd.DataFrame({"a": [1, 3, 3], "b": [4, 5, 6]})
+
+        with self.assertRaises(PySparkAssertionError) as pe:
+            _assert_pandas_almost_equal(pdf1, pdf2, True)
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
+                "left": pdf1.to_string(),
+                "left_dtype": str(pdf1.dtypes),
+                "right": pdf2.to_string(),
+                "right_dtype": str(pdf2.dtypes),
+            },
+        )
+
+    def test_series_error_assert_pandas_equal(self):
+        series1 = pd.Series([1, 2, 3])
+        series2 = pd.Series([4, 5, 6])
+
+        with self.assertRaises(PySparkAssertionError) as pe:
+            _assert_pandas_almost_equal(series1, series2, True)
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="DIFFERENT_PANDAS_SERIES",
+            messageParameters={
+                "left": series1.to_string(),
+                "left_dtype": str(series1.dtype),
+                "right": series2.to_string(),
+                "right_dtype": str(series2.dtype),
+            },
+        )
+
+    def test_index_error_assert_pandas_almost_equal(self):
+        index1 = pd.Index([1, 2, 3])
+        index2 = pd.Index([4, 5, 6])
+
+        with self.assertRaises(PySparkAssertionError) as pe:
+            _assert_pandas_almost_equal(index1, index2, True)
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="DIFFERENT_PANDAS_INDEX",
+            messageParameters={
+                "left": index1,
+                "left_dtype": str(index1.dtype),
+                "right": index2,
+                "right_dtype": str(index2.dtype),
+            },
+        )
+
     def test_multiindex_error_assert_pandas_almost_equal(self):
         pdf1 = pd.DataFrame({"a": [1, 2], "b": [4, 10]}, index=[0, 1])
         pdf2 = pd.DataFrame({"a": [1, 5, 3], "b": [1, 5, 6]}, index=[0, 1, 3])
diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
@@ -127,8 +127,8 @@ def _assert_pandas_almost_equal(
 
     def compare_vals_approx(val1, val2):
         # compare vals for approximate equality
-        if isinstance(lval, (float, decimal.Decimal)) or isinstance(rval, (float, decimal.Decimal)):
-            if abs(float(lval) - float(rval)) > (atol + rtol * abs(float(rval))):
+        if isinstance(val1, (float, decimal.Decimal)) or isinstance(val2, (float, decimal.Decimal)):
+            if abs(float(val1) - float(val2)) > (atol + rtol * abs(float(val2))):
                 return False
         elif val1 != val2:
             return False