Skip to content

Commit 5f983b0

Browse files
drexler-skyyhuang-db
authored andcommitted
[SPARK-52305][PYTHON] Refine the docstring for isnotnull, equal_null, nullif, nullifzero, nvl, nvl2, zeroifnull
### What changes were proposed in this pull request? Refine the docstring for `isnotnull`, `equal_null`, `nullif`, `nullifzero`, `nvl`, `nvl2`, `zeroifnull` ### Why are the changes needed? Improve docs and test coverage ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Doctests ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#51016 from drexler-sky/docstring2. Authored-by: Evan Wu <evan123wu@gmail.com> Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
1 parent dc6d9f4 commit 5f983b0

File tree

1 file changed

+124
-36
lines changed

1 file changed

+124
-36
lines changed

python/pyspark/sql/functions/builtin.py

Lines changed: 124 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25365,13 +25365,27 @@ def isnotnull(col: "ColumnOrName") -> Column:
2536525365

2536625366
Parameters
2536725367
----------
25368-
col : :class:`~pyspark.sql.Column` or str
25368+
col : :class:`~pyspark.sql.Column` or column name
2536925369

2537025370
Examples
2537125371
--------
25372+
>>> from pyspark.sql import functions as sf
2537225373
>>> df = spark.createDataFrame([(None,), (1,)], ["e"])
25373-
>>> df.select(isnotnull(df.e).alias('r')).collect()
25374-
[Row(r=False), Row(r=True)]
25374+
>>> df.select('*', sf.isnotnull(df.e)).show()
25375+
+----+---------------+
25376+
| e|(e IS NOT NULL)|
25377+
+----+---------------+
25378+
|NULL| false|
25379+
| 1| true|
25380+
+----+---------------+
25381+
25382+
>>> df.select('*', sf.isnotnull('e')).show()
25383+
+----+---------------+
25384+
| e|(e IS NOT NULL)|
25385+
+----+---------------+
25386+
|NULL| false|
25387+
| 1| true|
25388+
+----+---------------+
2537525389
"""
2537625390
return _invoke_function_over_columns("isnotnull", col)
2537725391

@@ -25386,14 +25400,28 @@ def equal_null(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
2538625400

2538725401
Parameters
2538825402
----------
25389-
col1 : :class:`~pyspark.sql.Column` or str
25390-
col2 : :class:`~pyspark.sql.Column` or str
25403+
col1 : :class:`~pyspark.sql.Column` or column name
25404+
col2 : :class:`~pyspark.sql.Column` or column name
2539125405

2539225406
Examples
2539325407
--------
25408+
>>> from pyspark.sql import functions as sf
2539425409
>>> df = spark.createDataFrame([(None, None,), (1, 9,)], ["a", "b"])
25395-
>>> df.select(equal_null(df.a, df.b).alias('r')).collect()
25396-
[Row(r=True), Row(r=False)]
25410+
>>> df.select('*', sf.equal_null(df.a, df.b)).show()
25411+
+----+----+----------------+
25412+
| a| b|equal_null(a, b)|
25413+
+----+----+----------------+
25414+
|NULL|NULL| true|
25415+
| 1| 9| false|
25416+
+----+----+----------------+
25417+
25418+
>>> df.select('*', sf.equal_null('a', 'b')).show()
25419+
+----+----+----------------+
25420+
| a| b|equal_null(a, b)|
25421+
+----+----+----------------+
25422+
|NULL|NULL| true|
25423+
| 1| 9| false|
25424+
+----+----+----------------+
2539725425
"""
2539825426
return _invoke_function_over_columns("equal_null", col1, col2)
2539925427

@@ -25407,14 +25435,28 @@ def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
2540725435

2540825436
Parameters
2540925437
----------
25410-
col1 : :class:`~pyspark.sql.Column` or str
25411-
col2 : :class:`~pyspark.sql.Column` or str
25438+
col1 : :class:`~pyspark.sql.Column` or column name
25439+
col2 : :class:`~pyspark.sql.Column` or column name
2541225440

2541325441
Examples
2541425442
--------
25443+
>>> import pyspark.sql.functions as sf
2541525444
>>> df = spark.createDataFrame([(None, None,), (1, 9,)], ["a", "b"])
25416-
>>> df.select(nullif(df.a, df.b).alias('r')).collect()
25417-
[Row(r=None), Row(r=1)]
25445+
>>> df.select('*', sf.nullif(df.a, df.b)).show()
25446+
+----+----+------------+
25447+
| a| b|nullif(a, b)|
25448+
+----+----+------------+
25449+
|NULL|NULL| NULL|
25450+
| 1| 9| 1|
25451+
+----+----+------------+
25452+
25453+
>>> df.select('*', sf.nullif('a', 'b')).show()
25454+
+----+----+------------+
25455+
| a| b|nullif(a, b)|
25456+
+----+----+------------+
25457+
|NULL|NULL| NULL|
25458+
| 1| 9| 1|
25459+
+----+----+------------+
2541825460
"""
2541925461
return _invoke_function_over_columns("nullif", col1, col2)
2542025462

@@ -25428,18 +25470,27 @@ def nullifzero(col: "ColumnOrName") -> Column:
2542825470

2542925471
Parameters
2543025472
----------
25431-
col : :class:`~pyspark.sql.Column` or str
25473+
col : :class:`~pyspark.sql.Column` or column name
2543225474

2543325475
Examples
2543425476
--------
25477+
>>> import pyspark.sql.functions as sf
2543525478
>>> df = spark.createDataFrame([(0,), (1,)], ["a"])
25436-
>>> df.select(nullifzero(df.a).alias("result")).show()
25437-
+------+
25438-
|result|
25439-
+------+
25440-
| NULL|
25441-
| 1|
25442-
+------+
25479+
>>> df.select('*', sf.nullifzero(df.a)).show()
25480+
+---+-------------+
25481+
| a|nullifzero(a)|
25482+
+---+-------------+
25483+
| 0| NULL|
25484+
| 1| 1|
25485+
+---+-------------+
25486+
25487+
>>> df.select('*', sf.nullifzero('a')).show()
25488+
+---+-------------+
25489+
| a|nullifzero(a)|
25490+
+---+-------------+
25491+
| 0| NULL|
25492+
| 1| 1|
25493+
+---+-------------+
2544325494
"""
2544425495
return _invoke_function_over_columns("nullifzero", col)
2544525496

@@ -25453,14 +25504,28 @@ def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
2545325504

2545425505
Parameters
2545525506
----------
25456-
col1 : :class:`~pyspark.sql.Column` or str
25457-
col2 : :class:`~pyspark.sql.Column` or str
25507+
col1 : :class:`~pyspark.sql.Column` or column name
25508+
col2 : :class:`~pyspark.sql.Column` or column name
2545825509

2545925510
Examples
2546025511
--------
25512+
>>> import pyspark.sql.functions as sf
2546125513
>>> df = spark.createDataFrame([(None, 8,), (1, 9,)], ["a", "b"])
25462-
>>> df.select(nvl(df.a, df.b).alias('r')).collect()
25463-
[Row(r=8), Row(r=1)]
25514+
>>> df.select('*', sf.nvl(df.a, df.b)).show()
25515+
+----+---+---------+
25516+
| a| b|nvl(a, b)|
25517+
+----+---+---------+
25518+
|NULL| 8| 8|
25519+
| 1| 9| 1|
25520+
+----+---+---------+
25521+
25522+
>>> df.select('*', sf.nvl('a', 'b')).show()
25523+
+----+---+---------+
25524+
| a| b|nvl(a, b)|
25525+
+----+---+---------+
25526+
|NULL| 8| 8|
25527+
| 1| 9| 1|
25528+
+----+---+---------+
2546425529
"""
2546525530
return _invoke_function_over_columns("nvl", col1, col2)
2546625531

@@ -25474,15 +25539,29 @@ def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Co
2547425539

2547525540
Parameters
2547625541
----------
25477-
col1 : :class:`~pyspark.sql.Column` or str
25478-
col2 : :class:`~pyspark.sql.Column` or str
25479-
col3 : :class:`~pyspark.sql.Column` or str
25542+
col1 : :class:`~pyspark.sql.Column` or column name
25543+
col2 : :class:`~pyspark.sql.Column` or column name
25544+
col3 : :class:`~pyspark.sql.Column` or column name
2548025545

2548125546
Examples
2548225547
--------
25548+
>>> import pyspark.sql.functions as sf
2548325549
>>> df = spark.createDataFrame([(None, 8, 6,), (1, 9, 9,)], ["a", "b", "c"])
25484-
>>> df.select(nvl2(df.a, df.b, df.c).alias('r')).collect()
25485-
[Row(r=6), Row(r=9)]
25550+
>>> df.select('*', sf.nvl2(df.a, df.b, df.c)).show()
25551+
+----+---+---+-------------+
25552+
| a| b| c|nvl2(a, b, c)|
25553+
+----+---+---+-------------+
25554+
|NULL| 8| 6| 6|
25555+
| 1| 9| 9| 9|
25556+
+----+---+---+-------------+
25557+
25558+
>>> df.select('*', sf.nvl2('a', 'b', 'c')).show()
25559+
+----+---+---+-------------+
25560+
| a| b| c|nvl2(a, b, c)|
25561+
+----+---+---+-------------+
25562+
|NULL| 8| 6| 6|
25563+
| 1| 9| 9| 9|
25564+
+----+---+---+-------------+
2548625565
"""
2548725566
return _invoke_function_over_columns("nvl2", col1, col2, col3)
2548825567

@@ -25496,18 +25575,27 @@ def zeroifnull(col: "ColumnOrName") -> Column:
2549625575

2549725576
Parameters
2549825577
----------
25499-
col : :class:`~pyspark.sql.Column` or str
25578+
col : :class:`~pyspark.sql.Column` or column name
2550025579

2550125580
Examples
2550225581
--------
25582+
>>> import pyspark.sql.functions as sf
2550325583
>>> df = spark.createDataFrame([(None,), (1,)], ["a"])
25504-
>>> df.select(zeroifnull(df.a).alias("result")).show()
25505-
+------+
25506-
|result|
25507-
+------+
25508-
| 0|
25509-
| 1|
25510-
+------+
25584+
>>> df.select('*', sf.zeroifnull(df.a)).show()
25585+
+----+-------------+
25586+
| a|zeroifnull(a)|
25587+
+----+-------------+
25588+
|NULL| 0|
25589+
| 1| 1|
25590+
+----+-------------+
25591+
25592+
>>> df.select('*', sf.zeroifnull('a')).show()
25593+
+----+-------------+
25594+
| a|zeroifnull(a)|
25595+
+----+-------------+
25596+
|NULL| 0|
25597+
| 1| 1|
25598+
+----+-------------+
2551125599
"""
2551225600
return _invoke_function_over_columns("zeroifnull", col)
2551325601

0 commit comments

Comments
 (0)