Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/docs/source/reference/pyspark.sql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ Date and Timestamp Functions
timestamp_micros
timestamp_millis
timestamp_seconds
time_diff
to_date
to_time
to_timestamp
Expand Down
7 changes: 7 additions & 0 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3643,6 +3643,13 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
timestamp_seconds.__doc__ = pysparkfuncs.timestamp_seconds.__doc__


def time_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Column:
return _invoke_function_over_columns("time_diff", lit(unit), start, end)


time_diff.__doc__ = pysparkfuncs.time_diff.__doc__


def timestamp_millis(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("timestamp_millis", col)

Expand Down
1 change: 1 addition & 0 deletions python/pyspark/sql/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@
"timestamp_micros",
"timestamp_millis",
"timestamp_seconds",
"time_diff",
"to_date",
"to_time",
"to_timestamp",
Expand Down
57 changes: 57 additions & 0 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12702,6 +12702,63 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("timestamp_seconds", col)


@_try_remote_functions
def time_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Column:
"""
Returns the difference between two times, measured in specified units.

.. versionadded:: 4.1.0

Parameters
----------
unit : literal string
The unit to truncate the time to. Supported units are: "HOUR", "MINUTE", "SECOND",
"MILLISECOND", and "MICROSECOND". The unit is case-insensitive.
start : :class:`~pyspark.sql.Column` or column name
A starting time.
end : :class:`~pyspark.sql.Column` or column name
An ending time.

Returns
-------
:class:`~pyspark.sql.Column`
The difference between two times, in the specified units.

See Also
--------
:meth:`pyspark.sql.functions.date_diff`
:meth:`pyspark.sql.functions.timestamp_diff`

Examples
--------
>>> import datetime
>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([("13:08:15", "21:30:28")], ['start', 'end']) \
... .withColumn("start", sf.col("start").cast("time")) \
... .withColumn("end", sf.col("end").cast("time"))
>>> df.select('*', sf.time_diff('HOUR', 'start', 'end')).show()
+--------+--------+---------------------------+
| start| end|time_diff(HOUR, start, end)|
+--------+--------+---------------------------+
|20:30:29|21:30:28| 0|
+--------+--------+---------------------------+
>>> df.select('*', sf.time_diff('MINUTE', 'start', 'end')).show()
+--------+--------+-----------------------------+
| start| end|time_diff(MINUTE, start, end)|
+--------+--------+-----------------------------+
|20:30:29|21:30:28| 59|
+--------+--------+-----------------------------+
"""
from pyspark.sql.classic.column import _to_java_column

return _invoke_function(
"time_trunc",
_enum_to_value(unit),
_to_java_column(start),
_to_java_column(end)
)


@_try_remote_functions
def timestamp_millis(col: "ColumnOrName") -> Column:
"""
Expand Down
12 changes: 12 additions & 0 deletions python/pyspark/sql/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,18 @@ def test_rand_functions(self):
rndn2 = df.select("key", F.randn(0)).collect()
self.assertEqual(sorted(rndn1), sorted(rndn2))

def test_time_diff(self):
# SPARK-5XXXX: test the time_diff function.
df = self.spark.createDataFrame(
[(datetime.time(20, 30, 29)), (datetime.time(21, 30, 29))], ["start", "end"])
result = 1
row_from_col = df.select(F.time_diff("hour", df.start, df.end)).first()
self.assertIsInstance(row_from_col[0], datetime.time)
self.assertEqual(row_from_col[0], result)
row_from_name = df.select(F.time_diff("hour", "start", "end")).first()
self.assertIsInstance(row_from_name[0], datetime.time)
self.assertEqual(row_from_name[0], result)

def test_try_parse_url(self):
df = self.spark.createDataFrame(
[("https://spark.apache.org/path?query=1", "QUERY", "query")],
Expand Down