Skip to content

Commit 1d188fd

Browse files
petern48ksbeyer
authored andcommitted
[SPARK-52563][PS] Fix var naming bug in _assert_pandas_almost_equal
### What changes were proposed in this pull request? Small bug fix where the wrong variable names were used ### Why are the changes needed? The function uses lval and rval instead of the parameters val1 and val2 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#51253 from petern48/pandas_assert_bug. Authored-by: Peter Nguyen <petern0408@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent 7db6eef commit 1d188fd

File tree

2 files changed

+56
-2
lines changed

2 files changed

+56
-2
lines changed

python/pyspark/pandas/tests/test_utils.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,60 @@ def test_index_error_assert_pandas_equal(self):
163163
},
164164
)
165165

166+
def test_dataframe_error_assert_pandas_almost_equal(self):
167+
pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
168+
pdf2 = pd.DataFrame({"a": [1, 3, 3], "b": [4, 5, 6]})
169+
170+
with self.assertRaises(PySparkAssertionError) as pe:
171+
_assert_pandas_almost_equal(pdf1, pdf2, True)
172+
173+
self.check_error(
174+
exception=pe.exception,
175+
errorClass="DIFFERENT_PANDAS_DATAFRAME",
176+
messageParameters={
177+
"left": pdf1.to_string(),
178+
"left_dtype": str(pdf1.dtypes),
179+
"right": pdf2.to_string(),
180+
"right_dtype": str(pdf2.dtypes),
181+
},
182+
)
183+
184+
def test_series_error_assert_pandas_equal(self):
185+
series1 = pd.Series([1, 2, 3])
186+
series2 = pd.Series([4, 5, 6])
187+
188+
with self.assertRaises(PySparkAssertionError) as pe:
189+
_assert_pandas_almost_equal(series1, series2, True)
190+
191+
self.check_error(
192+
exception=pe.exception,
193+
errorClass="DIFFERENT_PANDAS_SERIES",
194+
messageParameters={
195+
"left": series1.to_string(),
196+
"left_dtype": str(series1.dtype),
197+
"right": series2.to_string(),
198+
"right_dtype": str(series2.dtype),
199+
},
200+
)
201+
202+
def test_index_error_assert_pandas_almost_equal(self):
203+
index1 = pd.Index([1, 2, 3])
204+
index2 = pd.Index([4, 5, 6])
205+
206+
with self.assertRaises(PySparkAssertionError) as pe:
207+
_assert_pandas_almost_equal(index1, index2, True)
208+
209+
self.check_error(
210+
exception=pe.exception,
211+
errorClass="DIFFERENT_PANDAS_INDEX",
212+
messageParameters={
213+
"left": index1,
214+
"left_dtype": str(index1.dtype),
215+
"right": index2,
216+
"right_dtype": str(index2.dtype),
217+
},
218+
)
219+
166220
def test_multiindex_error_assert_pandas_almost_equal(self):
167221
pdf1 = pd.DataFrame({"a": [1, 2], "b": [4, 10]}, index=[0, 1])
168222
pdf2 = pd.DataFrame({"a": [1, 5, 3], "b": [1, 5, 6]}, index=[0, 1, 3])

python/pyspark/testing/pandasutils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ def _assert_pandas_almost_equal(
127127

128128
def compare_vals_approx(val1, val2):
129129
# compare vals for approximate equality
130-
if isinstance(lval, (float, decimal.Decimal)) or isinstance(rval, (float, decimal.Decimal)):
131-
if abs(float(lval) - float(rval)) > (atol + rtol * abs(float(rval))):
130+
if isinstance(val1, (float, decimal.Decimal)) or isinstance(val2, (float, decimal.Decimal)):
131+
if abs(float(val1) - float(val2)) > (atol + rtol * abs(float(val2))):
132132
return False
133133
elif val1 != val2:
134134
return False

0 commit comments

Comments
 (0)