From e4aa9b65ce04a88b8996df9ed39971ee7b189b63 Mon Sep 17 00:00:00 2001 From: RITAMIT2023 Date: Mon, 23 Jun 2025 16:38:03 +0530 Subject: [PATCH 1/2] Raise MergeError on mismatched signed/unsigned int merge keys --- pandas/core/reshape/merge.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 34f3e2c626378..b7ef9ac113cfb 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1670,6 +1670,17 @@ def _maybe_coerce_merge_keys(self) -> None: lk = extract_array(lk, extract_numpy=True) rk = extract_array(rk, extract_numpy=True) + # Explicitly disallow merging int64 and uint64 (or vice versa) + if ( + (lk.dtype == np.dtype("int64") and rk.dtype == np.dtype("uint64")) + or (lk.dtype == np.dtype("uint64") and rk.dtype == np.dtype("int64")) + ): + raise ValueError( + f"You are trying to merge on int64 and uint64 columns for key '{name}'. " + "This is not allowed as it can lead to incorrect results. " + "Please cast both columns to the same signedness before merging." + ) + lk_is_cat = isinstance(lk.dtype, CategoricalDtype) rk_is_cat = isinstance(rk.dtype, CategoricalDtype) lk_is_object_or_string = is_object_dtype(lk.dtype) or is_string_dtype( From bc802704251fd03d0dfef647fcb918fc523fb89f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 23 Jun 2025 11:18:33 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/core/reshape/merge.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index b7ef9ac113cfb..9da6c6dec0c72 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1671,9 +1671,8 @@ def _maybe_coerce_merge_keys(self) -> None: rk = extract_array(rk, extract_numpy=True) # Explicitly disallow merging int64 and uint64 (or vice versa) - if ( - (lk.dtype == np.dtype("int64") and rk.dtype == np.dtype("uint64")) - or (lk.dtype == np.dtype("uint64") and rk.dtype == np.dtype("int64")) + if (lk.dtype == np.dtype("int64") and rk.dtype == np.dtype("uint64")) or ( + lk.dtype == np.dtype("uint64") and rk.dtype == np.dtype("int64") ): raise ValueError( f"You are trying to merge on int64 and uint64 columns for key '{name}'. "