diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py index 61cb0943..64774c97 100644 --- a/python/datafusion/dataframe.py +++ b/python/datafusion/dataframe.py @@ -643,6 +643,7 @@ def join( left_on: None = None, right_on: None = None, join_keys: None = None, + keep_duplicate_keys: bool = False, ) -> DataFrame: ... @overload @@ -655,6 +656,7 @@ def join( left_on: str | Sequence[str], right_on: str | Sequence[str], join_keys: tuple[list[str], list[str]] | None = None, + keep_duplicate_keys: bool = False, ) -> DataFrame: ... @overload @@ -667,6 +669,7 @@ def join( join_keys: tuple[list[str], list[str]], left_on: None = None, right_on: None = None, + keep_duplicate_keys: bool = False, ) -> DataFrame: ... def join( @@ -678,6 +681,7 @@ def join( left_on: str | Sequence[str] | None = None, right_on: str | Sequence[str] | None = None, join_keys: tuple[list[str], list[str]] | None = None, + keep_duplicate_keys: bool = False, ) -> DataFrame: """Join this :py:class:`DataFrame` with another :py:class:`DataFrame`. @@ -690,11 +694,23 @@ def join( "right", "full", "semi", "anti". left_on: Join column of the left dataframe. right_on: Join column of the right dataframe. + keep_duplicate_keys: When False, the columns from the right DataFrame + that have identical names in the ``on`` fields to the left DataFrame + will be dropped. join_keys: Tuple of two lists of column names to join on. [Deprecated] Returns: DataFrame after join. """ + if join_keys is not None: + warnings.warn( + "`join_keys` is deprecated, use `on` or `left_on` with `right_on`", + category=DeprecationWarning, + stacklevel=2, + ) + left_on = join_keys[0] + right_on = join_keys[1] + # This check is to prevent breaking API changes where users prior to # DF 43.0.0 would pass the join_keys as a positional argument instead # of a keyword argument. @@ -705,18 +721,10 @@ def join( and isinstance(on[1], list) ): # We know this is safe because we've checked the types - join_keys = on # type: ignore[assignment] + left_on = on[0] + right_on = on[1] on = None - if join_keys is not None: - warnings.warn( - "`join_keys` is deprecated, use `on` or `left_on` with `right_on`", - category=DeprecationWarning, - stacklevel=2, - ) - left_on = join_keys[0] - right_on = join_keys[1] - if on is not None: if left_on is not None or right_on is not None: error_msg = "`left_on` or `right_on` should not provided with `on`" @@ -735,7 +743,9 @@ def join( if isinstance(right_on, str): right_on = [right_on] - return DataFrame(self.df.join(right.df, how, left_on, right_on)) + return DataFrame( + self.df.join(right.df, how, left_on, right_on, keep_duplicate_keys) + ) def join_on( self, diff --git a/python/datafusion/dataframe_formatter.py b/python/datafusion/dataframe_formatter.py index 2323224b..ffafde53 100644 --- a/python/datafusion/dataframe_formatter.py +++ b/python/datafusion/dataframe_formatter.py @@ -368,7 +368,7 @@ def _build_table_container_start(self) -> list[str]: f"max-height: {self.max_height}px; overflow: auto; border: " '1px solid #ccc;">' ) - html.append('