Skip to content

Commit c49ed5e

Browse files
committed
Add "force_suffixes" flag to pd.merge
1 parent 09e6754 commit c49ed5e

File tree

3 files changed

+63
-3
lines changed

3 files changed

+63
-3
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11046,6 +11046,7 @@ def merge(
1104611046
right_index: bool = False,
1104711047
sort: bool = False,
1104811048
suffixes: Suffixes = ("_x", "_y"),
11049+
force_suffixes: bool = False,
1104911050
copy: bool | lib.NoDefault = lib.no_default,
1105011051
indicator: str | bool = False,
1105111052
validate: MergeValidate | None = None,
@@ -11065,6 +11066,7 @@ def merge(
1106511066
right_index=right_index,
1106611067
sort=sort,
1106711068
suffixes=suffixes,
11069+
force_suffixes=force_suffixes,
1106811070
indicator=indicator,
1106911071
validate=validate,
1107011072
)

pandas/core/reshape/merge.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ def merge(
153153
right_index: bool = False,
154154
sort: bool = False,
155155
suffixes: Suffixes = ("_x", "_y"),
156+
force_suffixes: bool = False,
156157
copy: bool | lib.NoDefault = lib.no_default,
157158
indicator: str | bool = False,
158159
validate: str | None = None,
@@ -395,6 +396,7 @@ def merge(
395396
right_index=right_index,
396397
sort=sort,
397398
suffixes=suffixes,
399+
force_suffixes=force_suffixes,
398400
indicator=indicator,
399401
validate=validate,
400402
)
@@ -411,6 +413,7 @@ def _cross_merge(
411413
right_index: bool = False,
412414
sort: bool = False,
413415
suffixes: Suffixes = ("_x", "_y"),
416+
force_suffixes: bool = False,
414417
indicator: str | bool = False,
415418
validate: str | None = None,
416419
) -> DataFrame:
@@ -447,6 +450,7 @@ def _cross_merge(
447450
right_index=right_index,
448451
sort=sort,
449452
suffixes=suffixes,
453+
force_suffixes=force_suffixes,
450454
indicator=indicator,
451455
validate=validate,
452456
)
@@ -966,6 +970,7 @@ def __init__(
966970
right_index: bool = False,
967971
sort: bool = True,
968972
suffixes: Suffixes = ("_x", "_y"),
973+
force_suffixes: bool = False,
969974
indicator: str | bool = False,
970975
validate: str | None = None,
971976
) -> None:
@@ -978,6 +983,8 @@ def __init__(
978983
self.on = com.maybe_make_list(on)
979984

980985
self.suffixes = suffixes
986+
self.force_suffixes = force_suffixes
987+
981988
self.sort = sort or how == "outer"
982989

983990
self.left_index = left_index
@@ -1088,7 +1095,7 @@ def _reindex_and_concat(
10881095
right = self.right[:]
10891096

10901097
llabels, rlabels = _items_overlap_with_suffix(
1091-
self.left._info_axis, self.right._info_axis, self.suffixes
1098+
self.left._info_axis, self.right._info_axis, self.suffixes, self.force_suffixes
10921099
)
10931100

10941101
if left_indexer is not None and not is_range_indexer(left_indexer, len(left)):
@@ -3007,7 +3014,7 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame:
30073014

30083015

30093016
def _items_overlap_with_suffix(
3010-
left: Index, right: Index, suffixes: Suffixes
3017+
left: Index, right: Index, suffixes: Suffixes, force_suffixes: bool = False
30113018
) -> tuple[Index, Index]:
30123019
"""
30133020
Suffixes type validation.
@@ -3023,7 +3030,11 @@ def _items_overlap_with_suffix(
30233030
"Provide 'suffixes' as a tuple instead."
30243031
)
30253032

3026-
to_rename = left.intersection(right)
3033+
if not force_suffixes:
3034+
to_rename = left.intersection(right)
3035+
else:
3036+
to_rename = left.union(right)
3037+
30273038
if len(to_rename) == 0:
30283039
return left, right
30293040

pandas/tests/reshape/merge/test_merge.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,6 +2369,52 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols):
23692369
tm.assert_frame_equal(result, expected)
23702370

23712371

2372+
@pytest.mark.parametrize("force_suffixes", [False, True])
2373+
def test_merge_suffix_with_force_simple(force_suffixes):
2374+
a = DataFrame({"A": [1, 2, 3, 98], "B": [4, 5, 6, 99], "ALPHABET": ["A", "B", "C", "Z"]})
2375+
b = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "alphabet": ["a", "b", "c"]})
2376+
2377+
if force_suffixes:
2378+
expected = DataFrame([[1, 4, "A", 1, 4, "a"], [2, 5, "B", 2, 5, "b"], [3, 6, "C", 3, 6, "c"]], columns=["A_x", "B_x", "ALPHABET_x", "a_y", "b_y", "alphabet_y"])
2379+
else:
2380+
expected = DataFrame([[1, 4, "A", 1, 4, "a"], [2, 5, "B", 2, 5, "b"], [3, 6, "C", 3, 6, "c"]], columns=["A", "B", "ALPHABET", "a", "b", "alphabet"])
2381+
2382+
result = merge(a, b, left_on=["A", "B"], right_on=["a", "b"],
2383+
force_suffixes=force_suffixes)
2384+
tm.assert_frame_equal(result, expected)
2385+
2386+
@pytest.mark.parametrize(
2387+
"col1, col2, kwargs, expected_cols",
2388+
[
2389+
(0, 0, {"suffixes": ("", "_dup")}, ["0", "0_dup"]),
2390+
(0, 0, {"suffixes": (None, "_dup")}, [0, "0_dup"]),
2391+
(0, 0, {"suffixes": ("_x", "_y")}, ["0_x", "0_y"]),
2392+
(0, 0, {"suffixes": ["_x", "_y"]}, ["0_x", "0_y"]),
2393+
("a", 0, {"suffixes": (None, "_y")}, ["a", "0_y"]),
2394+
(0.0, 0.0, {"suffixes": ("_x", None)}, ["0.0_x", 0.0]),
2395+
("b", "b", {"suffixes": (None, "_y")}, ["b", "b_y"]),
2396+
("a", "a", {"suffixes": ("_x", None)}, ["a_x", "a"]),
2397+
("a", "b", {"suffixes": ("_x", None)}, ["a_x", "b"]),
2398+
("a", "a", {"suffixes": (None, "_x")}, ["a", "a_x"]),
2399+
(0, 0, {"suffixes": ("_a", None)}, ["0_a", 0]),
2400+
("a", "a", {}, ["a_x", "a_y"]),
2401+
(0, 0, {}, ["0_x", "0_y"]),
2402+
],
2403+
)
2404+
def test_merge_suffix_with_force(col1, col2, kwargs, expected_cols):
2405+
# issue: 24782
2406+
a = DataFrame({col1: [1, 2, 3]})
2407+
b = DataFrame({col2: [4, 5, 6]})
2408+
2409+
expected = DataFrame([[1, 4], [2, 5], [3, 6]], columns=expected_cols)
2410+
2411+
result = a.merge(b, left_index=True, right_index=True, force_suffixes=True, **kwargs)
2412+
tm.assert_frame_equal(result, expected)
2413+
2414+
result = merge(a, b, left_index=True, right_index=True, force_suffixes=True, **kwargs)
2415+
tm.assert_frame_equal(result, expected)
2416+
2417+
23722418
@pytest.mark.parametrize(
23732419
"how,expected",
23742420
[
@@ -2577,6 +2623,7 @@ def test_categorical_non_unique_monotonic(n_categories):
25772623
tm.assert_frame_equal(expected, result)
25782624

25792625

2626+
25802627
def test_merge_join_categorical_multiindex():
25812628
# From issue 16627
25822629
a = {

0 commit comments

Comments
 (0)