Skip to content

[SPARK-52925][SQL] Return correct error message for anchor self references in rCTEs #51619

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ object ResolveWithCTE extends Rule[LogicalPlan] {
cteDefMap.put(cteDef.id, cteDef)
}
cteDef
case cteDef if cteDef.hasSelfReferenceInAnchor =>
throw new AnalysisException(
errorClass = "INVALID_RECURSIVE_CTE",
messageParameters = Map.empty)
case cteDef =>
// Multiple self-references are not allowed within one cteDef.
cteDef.child match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.plans.logical

import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, UnresolvedSubqueryColumnAliases}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.trees.TreePattern._
Expand Down Expand Up @@ -130,6 +130,26 @@ case class CTERelationDef(
lazy val hasSelfReferenceAsCTERef: Boolean = child.collectFirstWithSubqueries {
case CTERelationRef(this.id, _, _, _, _, true, _) => true
}.getOrElse(false)
lazy val hasSelfReferenceInAnchor: Boolean = {
val unionNode: Option[Union] = child match {
case SubqueryAlias(_, union: Union) =>
Some(union)
case SubqueryAlias(_, UnresolvedSubqueryColumnAliases(_, union: Union)) =>
Some(union)
case SubqueryAlias(_, WithCTE(union: Union, _)) =>
Some(union)
case SubqueryAlias(_, UnresolvedSubqueryColumnAliases(_, WithCTE(union: Union, _))) =>
Some(union)
case _ => None
}
if (unionNode.isDefined) {
unionNode.get.children.head.collectFirstWithSubqueries {
case CTERelationRef(this.id, _, _, _, _, true, _) => true
}.getOrElse(false)
} else {
false
}
}
lazy val hasSelfReferenceAsUnionLoopRef: Boolean = child.collectFirstWithSubqueries {
case UnionLoopRef(this.id, _, _) => true
}.getOrElse(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,20 +409,10 @@ WITH RECURSIVE r(level) AS (
)
SELECT * FROM r
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
org.apache.spark.sql.AnalysisException
{
"errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
"sqlState" : "42703",
"messageParameters" : {
"objectName" : "`level`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 62,
"stopIndex" : 66,
"fragment" : "level"
} ]
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


Expand Down Expand Up @@ -2085,3 +2075,61 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"errorClass" : "INVALID_RECURSIVE_REFERENCE.PLACE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1(n) AS (
SELECT 1 FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query analysis
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1 AS (
SELECT 1 AS n FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query analysis
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1(n) AS (
WITH t2(m) AS (SELECT 1)
SELECT 1 FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query analysis
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1 AS (
WITH t2(m) AS (SELECT 1)
SELECT 1 AS n FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query analysis
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}
Original file line number Diff line number Diff line change
Expand Up @@ -1183,20 +1183,10 @@ org.apache.spark.sql.AnalysisException
WITH RECURSIVE x(n) AS (SELECT n FROM x UNION ALL SELECT 1)
SELECT * FROM x
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
org.apache.spark.sql.AnalysisException
{
"errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
"sqlState" : "42703",
"messageParameters" : {
"objectName" : "`n`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 32,
"stopIndex" : 32,
"fragment" : "n"
} ]
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


Expand Down
26 changes: 26 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
Original file line number Diff line number Diff line change
Expand Up @@ -756,3 +756,29 @@ WITH RECURSIVE t1(n) AS (
(SELECT n + 1 FROM t1 WHERE n < 5 ORDER BY n)
)
SELECT * FROM t1;

WITH RECURSIVE t1(n) AS (
SELECT 1 FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1;

WITH RECURSIVE t1 AS (
SELECT 1 AS n FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1;

WITH RECURSIVE t1(n) AS (
WITH t2(m) AS (SELECT 1)
SELECT 1 FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1;

WITH RECURSIVE t1 AS (
WITH t2(m) AS (SELECT 1)
SELECT 1 AS n FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1;
82 changes: 69 additions & 13 deletions sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -448,20 +448,10 @@ SELECT * FROM r
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
org.apache.spark.sql.AnalysisException
{
"errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
"sqlState" : "42703",
"messageParameters" : {
"objectName" : "`level`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 62,
"stopIndex" : 66,
"fragment" : "level"
} ]
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


Expand Down Expand Up @@ -1875,3 +1865,69 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"errorClass" : "INVALID_RECURSIVE_REFERENCE.PLACE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1(n) AS (
SELECT 1 FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1 AS (
SELECT 1 AS n FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1(n) AS (
WITH t2(m) AS (SELECT 1)
SELECT 1 FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


-- !query
WITH RECURSIVE t1 AS (
WITH t2(m) AS (SELECT 1)
SELECT 1 AS n FROM t1
UNION ALL
SELECT n+1 FROM t1 WHERE n < 5)
SELECT * FROM t1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}
Original file line number Diff line number Diff line change
Expand Up @@ -901,20 +901,10 @@ WITH RECURSIVE x(n) AS (SELECT n FROM x UNION ALL SELECT 1)
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
org.apache.spark.sql.AnalysisException
{
"errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
"sqlState" : "42703",
"messageParameters" : {
"objectName" : "`n`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 32,
"stopIndex" : 32,
"fragment" : "n"
} ]
"errorClass" : "INVALID_RECURSIVE_CTE",
"sqlState" : "42836"
}


Expand Down