Skip to content

Commit 451a7a6

Browse files
szehon-hocloud-fan
authored andcommitted
[SPARK-52236][SQL] Standardize analyze exceptions for default value
### What changes were proposed in this pull request? Default values are checked for various things like resolved, and deterministic. Standardize the errors for various statements (CREATE TABLE/REPLACE TABLE/ALTER TABLE ADD COLUMNS/ALTER TABLE ALTER COLUMNS). This pr just fixes the ones that were not standard before. ### Why are the changes needed? Cleanup some exceptions and messages. This is from the comment of https://github.com/apache/spark/pull/50864/files#r2087361821 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add test in DatasourceV2SQLSuite ### Was this patch authored or co-authored using generative AI tooling? Closes #50960 from szehon-ho/standard_default_value_exception. Authored-by: Szehon Ho <szehon.apache@gmail.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent de3d44d commit 451a7a6

File tree

6 files changed

+68
-40
lines changed

6 files changed

+68
-40
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ object ColumnDefinition {
149149
// Do not check anything if the children are not resolved yet.
150150
case _ if !plan.childrenResolved =>
151151

152+
// Wrap errors for default values in a more user-friendly message.
152153
case cmd: V2CreateTablePlan if cmd.columns.exists(_.defaultValue.isDefined) =>
153154
val statement = cmd match {
154155
case _: CreateTable => "CREATE TABLE"
@@ -161,44 +162,22 @@ object ColumnDefinition {
161162
cmd.columns.foreach { col =>
162163
col.defaultValue.foreach { default =>
163164
checkDefaultColumnConflicts(col)
164-
validateDefaultValueExpr(default, statement, col.name, col.dataType)
165-
if (!default.deterministic) {
166-
throw QueryCompilationErrors.defaultValueNonDeterministicError(
167-
"CREATE TABLE", col.name, default.originalSQL)
168-
}
165+
validateDefaultValueExpr(default, statement, col.name, Some(col.dataType))
169166
}
170167
}
171168

172169
case cmd: AddColumns if cmd.columnsToAdd.exists(_.default.isDefined) =>
173-
// Wrap analysis errors for default values in a more user-friendly message.
174170
cmd.columnsToAdd.foreach { c =>
175171
c.default.foreach { d =>
176-
if (!d.resolved) {
177-
throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
178-
"ALTER TABLE", c.colName, d.originalSQL, null)
179-
}
180-
validateDefaultValueExpr(d, "ALTER TABLE", c.colName, c.dataType)
181-
if (!d.deterministic) {
182-
throw QueryCompilationErrors.defaultValueNonDeterministicError(
183-
"ALTER TABLE", c.colName, d.originalSQL)
184-
}
172+
validateDefaultValueExpr(d, "ALTER TABLE ADD COLUMNS", c.colName, Some(c.dataType))
185173
}
186174
}
187175

188176
case cmd: AlterColumns if cmd.specs.exists(_.newDefaultExpression.isDefined) =>
189-
// Wrap analysis errors for default values in a more user-friendly message.
190177
cmd.specs.foreach { c =>
191178
c.newDefaultExpression.foreach { d =>
192-
if (!d.resolved) {
193-
throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
194-
"ALTER TABLE ALTER COLUMN", c.column.name.quoted, d.originalSQL, null)
195-
}
196-
validateDefaultValueExpr(d, "ALTER TABLE ALTER COLUMN",
197-
c.column.name.quoted, d.dataType)
198-
if (!d.deterministic) {
199-
throw QueryCompilationErrors.defaultValueNonDeterministicError(
200-
"ALTER TABLE ALTER COLUMN", c.column.name.quoted, d.originalSQL)
201-
}
179+
validateDefaultValueExpr(d, "ALTER TABLE ALTER COLUMN", c.column.name.quoted,
180+
None)
202181
}
203182
}
204183

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -583,21 +583,29 @@ object ResolveDefaultColumns extends QueryErrorsBase
583583
default: DefaultValueExpression,
584584
statement: String,
585585
colName: String,
586-
targetType: DataType): Unit = {
586+
targetTypeOption: Option[DataType]): Unit = {
587587
if (default.containsPattern(PLAN_EXPRESSION)) {
588588
throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
589589
statement, colName, default.originalSQL)
590590
} else if (default.resolved) {
591-
val dataType = CharVarcharUtils.replaceCharVarcharWithString(targetType)
592-
if (!Cast.canUpCast(default.child.dataType, dataType) &&
593-
defaultValueFromWiderTypeLiteral(default.child, dataType, colName).isEmpty) {
594-
throw QueryCompilationErrors.defaultValuesDataTypeError(
595-
statement, colName, default.originalSQL, targetType, default.child.dataType)
591+
targetTypeOption match {
592+
case Some(targetType) =>
593+
CharVarcharUtils.replaceCharVarcharWithString(targetType)
594+
if (!Cast.canUpCast(default.child.dataType, targetType) &&
595+
defaultValueFromWiderTypeLiteral(default.child, targetType, colName).isEmpty) {
596+
throw QueryCompilationErrors.defaultValuesDataTypeError(
597+
statement, colName, default.originalSQL, targetType, default.child.dataType)
598+
}
599+
case _ => ()
596600
}
597-
// Our analysis check passes here. We do not further inspect whether the
598-
// expression is `foldable` here, as the plan is not optimized yet.
601+
} else {
602+
throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
603+
statement, colName, default.originalSQL, null)
599604
}
600605

606+
// Our analysis check passes here. We do not further inspect whether the
607+
// expression is `foldable` here, as the plan is not optimized yet.
608+
601609
if (default.references.nonEmpty || default.exists(_.isInstanceOf[VariableReference])) {
602610
// Ideally we should let the rest of `CheckAnalysis` report errors about why the default
603611
// expression is unresolved. But we should report a better error here if the default
@@ -606,6 +614,11 @@ object ResolveDefaultColumns extends QueryErrorsBase
606614
throw QueryCompilationErrors.defaultValueNotConstantError(
607615
statement, colName, default.originalSQL)
608616
}
617+
618+
if (!default.deterministic) {
619+
throw QueryCompilationErrors.defaultValueNonDeterministicError(
620+
statement, colName, default.originalSQL)
621+
}
609622
}
610623

611624
/**

sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
374374
},
375375
condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
376376
parameters = Map(
377-
"statement" -> "ALTER TABLE",
377+
"statement" -> "ALTER TABLE ADD COLUMNS",
378378
"colName" -> "`s`",
379379
"defaultValue" -> "badvalue"))
380380

sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ class DataSourceV2DataFrameSuite
511511
Array(LiteralValue(123, IntegerType), LiteralValue(56, IntegerType)))))
512512

513513
val alterExecCol2 = executeAndKeepPhysicalPlan[AlterTableExec] {
514-
sql(s"ALTER TABLE $tableName ALTER COLUMN salary SET DEFAULT ('r' || 'l')")
514+
sql(s"ALTER TABLE $tableName ALTER COLUMN dep SET DEFAULT ('r' || 'l')")
515515
}
516516
checkDefaultValue(
517517
alterExecCol2.changes.collect {
@@ -526,7 +526,7 @@ class DataSourceV2DataFrameSuite
526526
LiteralValue(UTF8String.fromString("l"), StringType)))))
527527

528528
val alterExecCol3 = executeAndKeepPhysicalPlan[AlterTableExec] {
529-
sql(s"ALTER TABLE $tableName ALTER COLUMN salary SET DEFAULT CAST(0 AS BOOLEAN)")
529+
sql(s"ALTER TABLE $tableName ALTER COLUMN active SET DEFAULT CAST(0 AS BOOLEAN)")
530530
}
531531
checkDefaultValue(
532532
alterExecCol3.changes.collect {

sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3552,6 +3552,42 @@ class DataSourceV2SQLSuiteV1Filter
35523552
}
35533553
}
35543554

3555+
test("CREATE TABLE with invalid default value") {
3556+
withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> s"$v2Format, ") {
3557+
withTable("t") {
3558+
// The default value fails to analyze.
3559+
checkError(
3560+
exception = intercept[AnalysisException] {
3561+
sql(s"create table t(s int default badvalue) using $v2Format")
3562+
},
3563+
condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
3564+
parameters = Map(
3565+
"statement" -> "CREATE TABLE",
3566+
"colName" -> "`s`",
3567+
"defaultValue" -> "badvalue"))
3568+
}
3569+
}
3570+
}
3571+
3572+
test("REPLACE TABLE with invalid default value") {
3573+
withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> s"$v2Format, ") {
3574+
withTable("t") {
3575+
sql(s"create table t(i boolean) using $v2Format")
3576+
3577+
// The default value fails to analyze.
3578+
checkError(
3579+
exception = intercept[AnalysisException] {
3580+
sql(s"replace table t(s int default badvalue) using $v2Format")
3581+
},
3582+
condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
3583+
parameters = Map(
3584+
"statement" -> "REPLACE TABLE",
3585+
"colName" -> "`s`",
3586+
"defaultValue" -> "badvalue"))
3587+
}
3588+
}
3589+
}
3590+
35553591
test("SPARK-52116: Create table with default value which is not deterministic") {
35563592
withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> v2Source) {
35573593
withTable("tab") {
@@ -3576,7 +3612,7 @@ class DataSourceV2SQLSuiteV1Filter
35763612
s"REPLACE TABLE tab (col1 DOUBLE DEFAULT rand()) USING $v2Source")
35773613
assert(exception.getSqlState == "42623")
35783614
assert(exception.errorClass.get == "INVALID_DEFAULT_VALUE.NON_DETERMINISTIC")
3579-
assert(exception.messageParameters("statement") == "CREATE TABLE")
3615+
assert(exception.messageParameters("statement") == "REPLACE TABLE")
35803616
assert(exception.messageParameters("colName") == "`col1`")
35813617
assert(exception.messageParameters("defaultValue") == "rand()")
35823618
}
@@ -3593,7 +3629,7 @@ class DataSourceV2SQLSuiteV1Filter
35933629
s"ALTER TABLE tab ADD COLUMN col2 DOUBLE DEFAULT rand()")
35943630
assert(exception.getSqlState == "42623")
35953631
assert(exception.errorClass.get == "INVALID_DEFAULT_VALUE.NON_DETERMINISTIC")
3596-
assert(exception.messageParameters("statement") == "ALTER TABLE")
3632+
assert(exception.messageParameters("statement") == "ALTER TABLE ADD COLUMNS")
35973633
assert(exception.messageParameters("colName") == "`col2`")
35983634
assert(exception.messageParameters("defaultValue") == "rand()")
35993635
}

sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
11701170
exception = intercept[AnalysisException] {
11711171
sql("create table t(i boolean, s bigint default badvalue) using parquet")
11721172
},
1173-
condition = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
1173+
condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
11741174
parameters = Map(
11751175
"statement" -> "CREATE TABLE",
11761176
"colName" -> "`s`",

0 commit comments

Comments
 (0)