Skip to content

Commit 10c7e3b

Browse files
MaxGekkhaoyangeng-db
authored andcommitted
[SPARK-52782][SQL] Return NULL from +/- on datetime with NULL
### What changes were proposed in this pull request? In the PR, I propose to return `NULL` if any datetime argument of `+`/`-` operator is `NULL`. ### Why are the changes needed? To follow the common convention for other operators and functions, and do not confuse Spark SQL users. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the modified tests: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z time.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z date.sql" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#51454 from MaxGekk/subtract-null. Authored-by: Max Gekk <max.gekk@gmail.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
1 parent a7b49c1 commit 10c7e3b

File tree

16 files changed

+86
-470
lines changed

16 files changed

+86
-470
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BinaryArithmeticWithDatetimeResolver.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ import org.apache.spark.sql.types.{
4949
AnsiIntervalType,
5050
AnyTimestampTypeExpression,
5151
CalendarIntervalType,
52+
DatetimeType,
5253
DateType,
5354
DayTimeIntervalType,
5455
NullType,
@@ -84,7 +85,11 @@ object BinaryArithmeticWithDatetimeResolver {
8485
case (DateType, CalendarIntervalType) =>
8586
DateAddInterval(l, r, ansiEnabled = mode == EvalMode.ANSI)
8687
case (_: TimeType, _: DayTimeIntervalType) => TimeAddInterval(l, r)
88+
case (_: DatetimeType, _: NullType) =>
89+
a.copy(right = Cast(a.right, DayTimeIntervalType.DEFAULT))
8790
case (_: DayTimeIntervalType, _: TimeType) => TimeAddInterval(r, l)
91+
case (_: NullType, _: DatetimeType) =>
92+
a.copy(left = Cast(a.left, DayTimeIntervalType.DEFAULT))
8893
case (_, CalendarIntervalType | _: DayTimeIntervalType) =>
8994
Cast(TimestampAddInterval(l, r), l.dataType)
9095
case (CalendarIntervalType, DateType) =>
@@ -109,9 +114,9 @@ object BinaryArithmeticWithDatetimeResolver {
109114
case (CalendarIntervalType, CalendarIntervalType) |
110115
(_: DayTimeIntervalType, _: DayTimeIntervalType) =>
111116
s
112-
case (_: NullType, _: AnsiIntervalType) =>
117+
case (_: NullType, _: AnsiIntervalType | _: DatetimeType) =>
113118
s.copy(left = Cast(s.left, s.right.dataType))
114-
case (_: AnsiIntervalType, _: NullType) =>
119+
case (_: AnsiIntervalType | _: DatetimeType, _: NullType) =>
115120
s.copy(right = Cast(s.right, s.left.dataType))
116121
case (DateType, CalendarIntervalType) =>
117122
DatetimeSub(

sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,45 +1595,13 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
15951595
-- !query
15961596
select timestamp'2011-11-11 11:11:11' + null
15971597
-- !query analysis
1598-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
1599-
{
1600-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
1601-
"sqlState" : "42K09",
1602-
"messageParameters" : {
1603-
"left" : "\"TIMESTAMP\"",
1604-
"right" : "\"VOID\"",
1605-
"sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + NULL)\""
1606-
},
1607-
"queryContext" : [ {
1608-
"objectType" : "",
1609-
"objectName" : "",
1610-
"startIndex" : 8,
1611-
"stopIndex" : 44,
1612-
"fragment" : "timestamp'2011-11-11 11:11:11' + null"
1613-
} ]
1614-
}
1598+
[Analyzer test output redacted due to nondeterminism]
16151599

16161600

16171601
-- !query
16181602
select null + timestamp'2011-11-11 11:11:11'
16191603
-- !query analysis
1620-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
1621-
{
1622-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
1623-
"sqlState" : "42K09",
1624-
"messageParameters" : {
1625-
"left" : "\"VOID\"",
1626-
"right" : "\"TIMESTAMP\"",
1627-
"sqlExpr" : "\"(NULL + TIMESTAMP '2011-11-11 11:11:11')\""
1628-
},
1629-
"queryContext" : [ {
1630-
"objectType" : "",
1631-
"objectName" : "",
1632-
"startIndex" : 8,
1633-
"stopIndex" : 44,
1634-
"fragment" : "null + timestamp'2011-11-11 11:11:11'"
1635-
} ]
1636-
}
1604+
[Analyzer test output redacted due to nondeterminism]
16371605

16381606

16391607
-- !query

sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/timestamp.sql.out

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -711,45 +711,13 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
711711
-- !query
712712
select timestamp'2011-11-11 11:11:11' + null
713713
-- !query analysis
714-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
715-
{
716-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
717-
"sqlState" : "42K09",
718-
"messageParameters" : {
719-
"left" : "\"TIMESTAMP\"",
720-
"right" : "\"VOID\"",
721-
"sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + NULL)\""
722-
},
723-
"queryContext" : [ {
724-
"objectType" : "",
725-
"objectName" : "",
726-
"startIndex" : 8,
727-
"stopIndex" : 44,
728-
"fragment" : "timestamp'2011-11-11 11:11:11' + null"
729-
} ]
730-
}
714+
[Analyzer test output redacted due to nondeterminism]
731715

732716

733717
-- !query
734718
select null + timestamp'2011-11-11 11:11:11'
735719
-- !query analysis
736-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
737-
{
738-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
739-
"sqlState" : "42K09",
740-
"messageParameters" : {
741-
"left" : "\"VOID\"",
742-
"right" : "\"TIMESTAMP\"",
743-
"sqlExpr" : "\"(NULL + TIMESTAMP '2011-11-11 11:11:11')\""
744-
},
745-
"queryContext" : [ {
746-
"objectType" : "",
747-
"objectName" : "",
748-
"startIndex" : 8,
749-
"stopIndex" : 44,
750-
"fragment" : "null + timestamp'2011-11-11 11:11:11'"
751-
} ]
752-
}
720+
[Analyzer test output redacted due to nondeterminism]
753721

754722

755723
-- !query

sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out

Lines changed: 13 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -432,23 +432,15 @@ Project [cast(12:43:33.1234 as time(4)) + INTERVAL '01:04:05.56' HOUR TO SECOND
432432
-- !query
433433
SELECT TIME'08:30' + NULL
434434
-- !query analysis
435-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
436-
{
437-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
438-
"sqlState" : "42K09",
439-
"messageParameters" : {
440-
"left" : "\"TIME(6)\"",
441-
"right" : "\"VOID\"",
442-
"sqlExpr" : "\"(TIME '08:30:00' + NULL)\""
443-
},
444-
"queryContext" : [ {
445-
"objectType" : "",
446-
"objectName" : "",
447-
"startIndex" : 8,
448-
"stopIndex" : 25,
449-
"fragment" : "TIME'08:30' + NULL"
450-
} ]
451-
}
435+
Project [08:30:00 + cast(null as interval day to second) AS TIME '08:30:00' + NULL#x]
436+
+- OneRowRelation
437+
438+
439+
-- !query
440+
SELECT NULL + TIME'08:30'
441+
-- !query analysis
442+
Project [08:30:00 + cast(null as interval day to second) AS TIME '08:30:00' + NULL#x]
443+
+- OneRowRelation
452444

453445

454446
-- !query
@@ -533,45 +525,15 @@ Project [12:30:41 - 10:00:00 AS TIME '12:30:41' - TIME '10:00:00'#x]
533525
-- !query
534526
SELECT TIME'08:30' - NULL
535527
-- !query analysis
536-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
537-
{
538-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
539-
"sqlState" : "42K09",
540-
"messageParameters" : {
541-
"left" : "\"TIME(6)\"",
542-
"right" : "\"VOID\"",
543-
"sqlExpr" : "\"(TIME '08:30:00' - NULL)\""
544-
},
545-
"queryContext" : [ {
546-
"objectType" : "",
547-
"objectName" : "",
548-
"startIndex" : 8,
549-
"stopIndex" : 25,
550-
"fragment" : "TIME'08:30' - NULL"
551-
} ]
552-
}
528+
Project [08:30:00 - cast(null as time(6)) AS TIME '08:30:00' - NULL#x]
529+
+- OneRowRelation
553530

554531

555532
-- !query
556533
SELECT NULL - TIME'10:32'
557534
-- !query analysis
558-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
559-
{
560-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
561-
"sqlState" : "42K09",
562-
"messageParameters" : {
563-
"left" : "\"VOID\"",
564-
"right" : "\"TIME(6)\"",
565-
"sqlExpr" : "\"(NULL - TIME '10:32:00')\""
566-
},
567-
"queryContext" : [ {
568-
"objectType" : "",
569-
"objectName" : "",
570-
"startIndex" : 8,
571-
"stopIndex" : 25,
572-
"fragment" : "NULL - TIME'10:32'"
573-
} ]
574-
}
535+
Project [cast(null as time(6)) - 10:32:00 AS NULL - TIME '10:32:00'#x]
536+
+- OneRowRelation
575537

576538

577539
-- !query

sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -639,45 +639,13 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
639639
-- !query
640640
select timestamp'2011-11-11 11:11:11' + null
641641
-- !query analysis
642-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
643-
{
644-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
645-
"sqlState" : "42K09",
646-
"messageParameters" : {
647-
"left" : "\"TIMESTAMP\"",
648-
"right" : "\"VOID\"",
649-
"sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + NULL)\""
650-
},
651-
"queryContext" : [ {
652-
"objectType" : "",
653-
"objectName" : "",
654-
"startIndex" : 8,
655-
"stopIndex" : 44,
656-
"fragment" : "timestamp'2011-11-11 11:11:11' + null"
657-
} ]
658-
}
642+
[Analyzer test output redacted due to nondeterminism]
659643

660644

661645
-- !query
662646
select null + timestamp'2011-11-11 11:11:11'
663647
-- !query analysis
664-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
665-
{
666-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
667-
"sqlState" : "42K09",
668-
"messageParameters" : {
669-
"left" : "\"VOID\"",
670-
"right" : "\"TIMESTAMP\"",
671-
"sqlExpr" : "\"(NULL + TIMESTAMP '2011-11-11 11:11:11')\""
672-
},
673-
"queryContext" : [ {
674-
"objectType" : "",
675-
"objectName" : "",
676-
"startIndex" : 8,
677-
"stopIndex" : 44,
678-
"fragment" : "null + timestamp'2011-11-11 11:11:11'"
679-
} ]
680-
}
648+
[Analyzer test output redacted due to nondeterminism]
681649

682650

683651
-- !query

sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -567,13 +567,15 @@ Project [(cast(2011-11-11 11:11:11 as timestamp_ntz) - 2011-11-11 11:11:10) AS (
567567
-- !query
568568
select timestamp'2011-11-11 11:11:11' - null
569569
-- !query analysis
570-
[Analyzer test output redacted due to nondeterminism]
570+
Project [(2011-11-11 11:11:11 - cast(null as timestamp_ntz)) AS (TIMESTAMP_NTZ '2011-11-11 11:11:11' - NULL)#x]
571+
+- OneRowRelation
571572

572573

573574
-- !query
574575
select null - timestamp'2011-11-11 11:11:11'
575576
-- !query analysis
576-
[Analyzer test output redacted due to nondeterminism]
577+
Project [(cast(null as timestamp_ntz) - 2011-11-11 11:11:11) AS (NULL - TIMESTAMP_NTZ '2011-11-11 11:11:11')#x]
578+
+- OneRowRelation
577579

578580

579581
-- !query
@@ -653,45 +655,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
653655
-- !query
654656
select timestamp'2011-11-11 11:11:11' + null
655657
-- !query analysis
656-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
657-
{
658-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
659-
"sqlState" : "42K09",
660-
"messageParameters" : {
661-
"left" : "\"TIMESTAMP_NTZ\"",
662-
"right" : "\"VOID\"",
663-
"sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)\""
664-
},
665-
"queryContext" : [ {
666-
"objectType" : "",
667-
"objectName" : "",
668-
"startIndex" : 8,
669-
"stopIndex" : 44,
670-
"fragment" : "timestamp'2011-11-11 11:11:11' + null"
671-
} ]
672-
}
658+
Project [cast(2011-11-11 11:11:11 + cast(null as interval day to second) as timestamp_ntz) AS TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL#x]
659+
+- OneRowRelation
673660

674661

675662
-- !query
676663
select null + timestamp'2011-11-11 11:11:11'
677664
-- !query analysis
678-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
679-
{
680-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
681-
"sqlState" : "42K09",
682-
"messageParameters" : {
683-
"left" : "\"VOID\"",
684-
"right" : "\"TIMESTAMP_NTZ\"",
685-
"sqlExpr" : "\"(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
686-
},
687-
"queryContext" : [ {
688-
"objectType" : "",
689-
"objectName" : "",
690-
"startIndex" : 8,
691-
"stopIndex" : 44,
692-
"fragment" : "null + timestamp'2011-11-11 11:11:11'"
693-
} ]
694-
}
665+
Project [cast(2011-11-11 11:11:11 + cast(null as interval day to second) as timestamp_ntz) AS TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL#x]
666+
+- OneRowRelation
695667

696668

697669
-- !query

sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -601,13 +601,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
601601
-- !query
602602
select timestamp'2011-11-11 11:11:11' - null
603603
-- !query analysis
604-
[Analyzer test output redacted due to nondeterminism]
604+
Project [(2011-11-11 11:11:11 - cast(null as timestamp_ntz)) AS (TIMESTAMP_NTZ '2011-11-11 11:11:11' - NULL)#x]
605+
+- OneRowRelation
605606

606607

607608
-- !query
608609
select null - timestamp'2011-11-11 11:11:11'
609610
-- !query analysis
610-
[Analyzer test output redacted due to nondeterminism]
611+
Project [(cast(null as timestamp_ntz) - 2011-11-11 11:11:11) AS (NULL - TIMESTAMP_NTZ '2011-11-11 11:11:11')#x]
612+
+- OneRowRelation
611613

612614

613615
-- !query
@@ -713,45 +715,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
713715
-- !query
714716
select timestamp'2011-11-11 11:11:11' + null
715717
-- !query analysis
716-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
717-
{
718-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
719-
"sqlState" : "42K09",
720-
"messageParameters" : {
721-
"left" : "\"TIMESTAMP_NTZ\"",
722-
"right" : "\"VOID\"",
723-
"sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)\""
724-
},
725-
"queryContext" : [ {
726-
"objectType" : "",
727-
"objectName" : "",
728-
"startIndex" : 8,
729-
"stopIndex" : 44,
730-
"fragment" : "timestamp'2011-11-11 11:11:11' + null"
731-
} ]
732-
}
718+
Project [cast(2011-11-11 11:11:11 + cast(null as interval day to second) as timestamp_ntz) AS TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL#x]
719+
+- OneRowRelation
733720

734721

735722
-- !query
736723
select null + timestamp'2011-11-11 11:11:11'
737724
-- !query analysis
738-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
739-
{
740-
"errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
741-
"sqlState" : "42K09",
742-
"messageParameters" : {
743-
"left" : "\"VOID\"",
744-
"right" : "\"TIMESTAMP_NTZ\"",
745-
"sqlExpr" : "\"(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
746-
},
747-
"queryContext" : [ {
748-
"objectType" : "",
749-
"objectName" : "",
750-
"startIndex" : 8,
751-
"stopIndex" : 44,
752-
"fragment" : "null + timestamp'2011-11-11 11:11:11'"
753-
} ]
754-
}
725+
Project [cast(2011-11-11 11:11:11 + cast(null as interval day to second) as timestamp_ntz) AS TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL#x]
726+
+- OneRowRelation
755727

756728

757729
-- !query

0 commit comments

Comments
 (0)