Skip to content

Commit e757b94

Browse files
ilicmarkodbyhuang-db
authored andcommitted
[SPARK-52313][SQL] Correctly resolve reference data type for Views with default collation
### What changes were proposed in this pull request? Correctly resolve reference data type for Views with default collation. `CREATE VIEW v DEFAULT COLLATION UTF8_LCASE AS SELECT 'a' AS c1` will resolve `c1` as `AttributeReference` but with non-collated `dataType`. This is because `ResolveReferences` comes before `ApplyDefaultCollationToStringType` in order of rules. Queries were still working correctly because there is `Cast` to collated `dataType` on top of `c1`. ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New test in `DefaultCollationTestSuite.scala`. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#51023 from ilicmarkodb/fix_resolve_ref. Authored-by: ilicmarkodb <marko.ilic@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent b6f4194 commit e757b94

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1507,7 +1507,17 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
15071507
}
15081508
}
15091509

1510-
def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
1510+
def apply(plan: LogicalPlan): LogicalPlan = {
1511+
val collatedPlan =
1512+
if (conf.getConf(SQLConf.RUN_COLLATION_TYPE_CASTS_BEFORE_ALIAS_ASSIGNMENT)) {
1513+
CollationRulesRunner(plan)
1514+
} else {
1515+
plan
1516+
}
1517+
doApply(collatedPlan)
1518+
}
1519+
1520+
def doApply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
15111521
// Don't wait other rules to resolve the child plans of `InsertIntoStatement` as we need
15121522
// to resolve column "DEFAULT" in the child plans so that they must be unresolved.
15131523
case i: InsertIntoStatement => resolveColumnDefaultInCommandInputQuery(i)

sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package org.apache.spark.sql.collation
1919

2020
import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
2121
import org.apache.spark.sql.catalyst.catalog.SessionCatalog.DEFAULT_DATABASE
22+
import org.apache.spark.sql.catalyst.expressions.AttributeReference
23+
import org.apache.spark.sql.catalyst.plans.logical.Project
2224
import org.apache.spark.sql.catalyst.util.CollationFactory
2325
import org.apache.spark.sql.connector.DatasourceV2SQLBase
2426
import org.apache.spark.sql.internal.SQLConf
@@ -431,6 +433,21 @@ abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSessi
431433

432434
class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
433435

436+
test("Check AttributeReference dataType from View with default collation") {
437+
withView(testView) {
438+
sql(s"CREATE VIEW $testView DEFAULT COLLATION UTF8_LCASE AS SELECT 'a' AS c1")
439+
440+
val df = sql(s"SELECT * FROM $testView")
441+
val analyzedPlan = df.queryExecution.analyzed
442+
analyzedPlan match {
443+
case Project(Seq(AttributeReference("c1", dataType, _, _)), _) =>
444+
assert(dataType == StringType("UTF8_LCASE"))
445+
case _ =>
446+
assert(false)
447+
}
448+
}
449+
}
450+
434451
test("create/alter view created from a table") {
435452
withTable(testTable) {
436453
sql(s"CREATE TABLE $testTable (c1 STRING, c2 STRING COLLATE UNICODE_CI) USING $dataSource")

0 commit comments

Comments
 (0)