Skip to content

Commit 218798c

Browse files
committed
Add "force_suffixes" flag to pd.merge
1 parent 3550556 commit 218798c

File tree

3 files changed

+63
-3
lines changed

3 files changed

+63
-3
lines changed

pandas/core/frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11102,6 +11102,7 @@ def merge(
1110211102
right_index: bool = False,
1110311103
sort: bool = False,
1110411104
suffixes: Suffixes = ("_x", "_y"),
11105+
force_suffixes: bool = False,
1110511106
copy: bool | lib.NoDefault = lib.no_default,
1110611107
indicator: str | bool = False,
1110711108
validate: MergeValidate | None = None,
@@ -11121,6 +11122,7 @@ def merge(
1112111122
right_index=right_index,
1112211123
sort=sort,
1112311124
suffixes=suffixes,
11125+
force_suffixes=force_suffixes,
1112411126
indicator=indicator,
1112511127
validate=validate,
1112611128
)

pandas/core/reshape/merge.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def merge(
154154
right_index: bool = False,
155155
sort: bool = False,
156156
suffixes: Suffixes = ("_x", "_y"),
157+
force_suffixes: bool = False,
157158
copy: bool | lib.NoDefault = lib.no_default,
158159
indicator: str | bool = False,
159160
validate: str | None = None,
@@ -396,6 +397,7 @@ def merge(
396397
right_index=right_index,
397398
sort=sort,
398399
suffixes=suffixes,
400+
force_suffixes=force_suffixes,
399401
indicator=indicator,
400402
validate=validate,
401403
)
@@ -412,6 +414,7 @@ def _cross_merge(
412414
right_index: bool = False,
413415
sort: bool = False,
414416
suffixes: Suffixes = ("_x", "_y"),
417+
force_suffixes: bool = False,
415418
indicator: str | bool = False,
416419
validate: str | None = None,
417420
) -> DataFrame:
@@ -448,6 +451,7 @@ def _cross_merge(
448451
right_index=right_index,
449452
sort=sort,
450453
suffixes=suffixes,
454+
force_suffixes=force_suffixes,
451455
indicator=indicator,
452456
validate=validate,
453457
)
@@ -967,6 +971,7 @@ def __init__(
967971
right_index: bool = False,
968972
sort: bool = True,
969973
suffixes: Suffixes = ("_x", "_y"),
974+
force_suffixes: bool = False,
970975
indicator: str | bool = False,
971976
validate: str | None = None,
972977
) -> None:
@@ -979,6 +984,8 @@ def __init__(
979984
self.on = com.maybe_make_list(on)
980985

981986
self.suffixes = suffixes
987+
self.force_suffixes = force_suffixes
988+
982989
self.sort = sort or how == "outer"
983990

984991
self.left_index = left_index
@@ -1089,7 +1096,7 @@ def _reindex_and_concat(
10891096
right = self.right[:]
10901097

10911098
llabels, rlabels = _items_overlap_with_suffix(
1092-
self.left._info_axis, self.right._info_axis, self.suffixes
1099+
self.left._info_axis, self.right._info_axis, self.suffixes, self.force_suffixes
10931100
)
10941101

10951102
if left_indexer is not None and not is_range_indexer(left_indexer, len(left)):
@@ -3013,7 +3020,7 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame:
30133020

30143021

30153022
def _items_overlap_with_suffix(
3016-
left: Index, right: Index, suffixes: Suffixes
3023+
left: Index, right: Index, suffixes: Suffixes, force_suffixes: bool = False
30173024
) -> tuple[Index, Index]:
30183025
"""
30193026
Suffixes type validation.
@@ -3029,7 +3036,11 @@ def _items_overlap_with_suffix(
30293036
"Provide 'suffixes' as a tuple instead."
30303037
)
30313038

3032-
to_rename = left.intersection(right)
3039+
if not force_suffixes:
3040+
to_rename = left.intersection(right)
3041+
else:
3042+
to_rename = left.union(right)
3043+
30333044
if len(to_rename) == 0:
30343045
return left, right
30353046

pandas/tests/reshape/merge/test_merge.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2369,6 +2369,52 @@ def test_merge_suffix(col1, col2, kwargs, expected_cols):
23692369
tm.assert_frame_equal(result, expected)
23702370

23712371

2372+
@pytest.mark.parametrize("force_suffixes", [False, True])
2373+
def test_merge_suffix_with_force_simple(force_suffixes):
2374+
a = DataFrame({"A": [1, 2, 3, 98], "B": [4, 5, 6, 99], "ALPHABET": ["A", "B", "C", "Z"]})
2375+
b = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "alphabet": ["a", "b", "c"]})
2376+
2377+
if force_suffixes:
2378+
expected = DataFrame([[1, 4, "A", 1, 4, "a"], [2, 5, "B", 2, 5, "b"], [3, 6, "C", 3, 6, "c"]], columns=["A_x", "B_x", "ALPHABET_x", "a_y", "b_y", "alphabet_y"])
2379+
else:
2380+
expected = DataFrame([[1, 4, "A", 1, 4, "a"], [2, 5, "B", 2, 5, "b"], [3, 6, "C", 3, 6, "c"]], columns=["A", "B", "ALPHABET", "a", "b", "alphabet"])
2381+
2382+
result = merge(a, b, left_on=["A", "B"], right_on=["a", "b"],
2383+
force_suffixes=force_suffixes)
2384+
tm.assert_frame_equal(result, expected)
2385+
2386+
@pytest.mark.parametrize(
2387+
"col1, col2, kwargs, expected_cols",
2388+
[
2389+
(0, 0, {"suffixes": ("", "_dup")}, ["0", "0_dup"]),
2390+
(0, 0, {"suffixes": (None, "_dup")}, [0, "0_dup"]),
2391+
(0, 0, {"suffixes": ("_x", "_y")}, ["0_x", "0_y"]),
2392+
(0, 0, {"suffixes": ["_x", "_y"]}, ["0_x", "0_y"]),
2393+
("a", 0, {"suffixes": (None, "_y")}, ["a", "0_y"]),
2394+
(0.0, 0.0, {"suffixes": ("_x", None)}, ["0.0_x", 0.0]),
2395+
("b", "b", {"suffixes": (None, "_y")}, ["b", "b_y"]),
2396+
("a", "a", {"suffixes": ("_x", None)}, ["a_x", "a"]),
2397+
("a", "b", {"suffixes": ("_x", None)}, ["a_x", "b"]),
2398+
("a", "a", {"suffixes": (None, "_x")}, ["a", "a_x"]),
2399+
(0, 0, {"suffixes": ("_a", None)}, ["0_a", 0]),
2400+
("a", "a", {}, ["a_x", "a_y"]),
2401+
(0, 0, {}, ["0_x", "0_y"]),
2402+
],
2403+
)
2404+
def test_merge_suffix_with_force(col1, col2, kwargs, expected_cols):
2405+
# issue: 24782
2406+
a = DataFrame({col1: [1, 2, 3]})
2407+
b = DataFrame({col2: [4, 5, 6]})
2408+
2409+
expected = DataFrame([[1, 4], [2, 5], [3, 6]], columns=expected_cols)
2410+
2411+
result = a.merge(b, left_index=True, right_index=True, force_suffixes=True, **kwargs)
2412+
tm.assert_frame_equal(result, expected)
2413+
2414+
result = merge(a, b, left_index=True, right_index=True, force_suffixes=True, **kwargs)
2415+
tm.assert_frame_equal(result, expected)
2416+
2417+
23722418
@pytest.mark.parametrize(
23732419
"how,expected",
23742420
[
@@ -2577,6 +2623,7 @@ def test_categorical_non_unique_monotonic(n_categories):
25772623
tm.assert_frame_equal(expected, result)
25782624

25792625

2626+
25802627
def test_merge_join_categorical_multiindex():
25812628
# From issue 16627
25822629
a = {

0 commit comments

Comments
 (0)