Skip to content

Commit c6e15b2

Browse files
ilicmarkodbyhuang-db
authored andcommitted
[SPARK-52338][SQL] Support for inheriting default collation from schema to View
### What changes were proposed in this pull request? Support for inheriting default collation from schema to View. ### Why are the changes needed? New feature. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added tests to `DefaultCollationTestSuite.scala` ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#51040 from ilicmarkodb/shema_view. Authored-by: ilicmarkodb <marko.ilic@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 4da70a0 commit c6e15b2

File tree

2 files changed

+227
-50
lines changed

2 files changed

+227
-50
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyDefaultCollationToStringType.scala

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,28 @@ object ApplyDefaultCollationToStringType extends Rule[LogicalPlan] {
114114
if tableSpec.collation.isEmpty =>
115115
createTable.copy(tableSpec = tableSpec.copy(
116116
collation = getCollationFromSchemaMetadata(catalog, identifier.namespace())))
117-
case replaceTable@ReplaceTable(
118-
ResolvedIdentifier(catalog: SupportsNamespaces, identifier), _, _, tableSpec: TableSpec, _)
117+
118+
case replaceTable@ReplaceTable(ResolvedIdentifier(
119+
catalog: SupportsNamespaces, identifier), _, _, tableSpec: TableSpec, _)
119120
if tableSpec.collation.isEmpty =>
120121
replaceTable.copy(tableSpec = tableSpec.copy(
121122
collation = getCollationFromSchemaMetadata(catalog, identifier.namespace())))
123+
124+
case createView@CreateView(ResolvedIdentifier(
125+
catalog: SupportsNamespaces, identifier), _, _, _, _, _, _, _, _, _)
126+
if createView.collation.isEmpty =>
127+
createView.copy(
128+
collation = getCollationFromSchemaMetadata(catalog, identifier.namespace()))
129+
130+
// We match against ResolvedPersistentView because temporary views don't have a
131+
// schema/catalog.
132+
case alterViewAs@AlterViewAs(resolvedPersistentView@ResolvedPersistentView(
133+
catalog: SupportsNamespaces, identifier, _), _, _)
134+
if resolvedPersistentView.metadata.collation.isEmpty =>
135+
val newResolvedPersistentView = resolvedPersistentView.copy(
136+
metadata = resolvedPersistentView.metadata.copy(
137+
collation = getCollationFromSchemaMetadata(catalog, identifier.namespace())))
138+
alterViewAs.copy(child = newResolvedPersistentView)
122139
case other =>
123140
other
124141
}

sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala

Lines changed: 208 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,24 @@ abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSessi
5252
def testView: String = "test_view"
5353
protected val fullyQualifiedPrefix = s"${CollationFactory.CATALOG}.${CollationFactory.SCHEMA}."
5454

55+
protected val schemaAndObjectCollationPairs =
56+
Seq(
57+
// (schemaDefaultCollation, objectDefaultCollation)
58+
("UTF8_BINARY", None),
59+
("UTF8_LCASE", None),
60+
("UNICODE", None),
61+
("DE", None),
62+
("UTF8_BINARY", Some("UTF8_BINARY")),
63+
("UTF8_BINARY", Some("UTF8_LCASE")),
64+
("UTF8_BINARY", Some("DE")),
65+
("UTF8_LCASE", Some("UTF8_BINARY")),
66+
("UTF8_LCASE", Some("UTF8_LCASE")),
67+
("UTF8_LCASE", Some("DE")),
68+
("DE", Some("UTF8_BINARY")),
69+
("DE", Some("UTF8_LCASE")),
70+
("DE", Some("DE"))
71+
)
72+
5573
def assertTableColumnCollation(
5674
table: String,
5775
column: String,
@@ -144,19 +162,7 @@ abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSessi
144162
}
145163
}
146164

147-
Seq(
148-
// (schemaDefaultCollation, tableDefaultCollation)
149-
("UTF8_BINARY", None),
150-
("UTF8_LCASE", None),
151-
("UNICODE", None),
152-
("DE", None),
153-
("UTF8_BINARY", Some("UTF8_BINARY")),
154-
("UTF8_BINARY", Some("UTF8_LCASE")),
155-
("UTF8_BINARY", Some("DE")),
156-
("UTF8_LCASE", Some("UTF8_BINARY")),
157-
("UTF8_LCASE", Some("UTF8_LCASE")),
158-
("UTF8_LCASE", Some("DE"))
159-
).foreach {
165+
schemaAndObjectCollationPairs.foreach {
160166
case (schemaDefaultCollation, tableDefaultCollation) =>
161167
test(
162168
s"""CREATE table with schema level collation
@@ -526,28 +532,15 @@ class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
526532
}
527533

528534
test("default string producing expressions in view definition") {
529-
val viewDefaultCollation = Seq(
530-
"UTF8_BINARY", "UNICODE"
531-
)
532-
533-
viewDefaultCollation.foreach { collation =>
534-
withView(testTable) {
535-
536-
val columns = defaultStringProducingExpressions.zipWithIndex.map {
537-
case (expr, index) => s"$expr AS c${index + 1}"
538-
}.mkString(", ")
539-
540-
sql(
541-
s"""
542-
|CREATE view $testTable
543-
|DEFAULT COLLATION $collation
544-
|AS SELECT $columns
545-
|""".stripMargin)
546-
547-
(1 to defaultStringProducingExpressions.length).foreach { index =>
548-
assertTableColumnCollation(testTable, s"c$index", collation)
549-
}
550-
}
535+
Seq(
536+
// viewDefaultCollation
537+
"UTF8_BINARY",
538+
"UTF8_LCASE",
539+
"UNICODE",
540+
"DE"
541+
).foreach { viewDefaultCollation =>
542+
testViewWithDefaultStringProducingExpressions(
543+
viewDefaultCollation = Some(viewDefaultCollation))
551544
}
552545
}
553546

@@ -569,6 +562,185 @@ class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
569562
assertTableColumnCollation(testTable, "c4", "UTF8_BINARY")
570563
}
571564
}
565+
566+
// View with schema level collation tests
567+
schemaAndObjectCollationPairs.foreach {
568+
case (schemaDefaultCollation, viewDefaultCollation) =>
569+
test(
570+
s"""CREATE VIEW with schema level collation
571+
| (schema default collation = $schemaDefaultCollation,
572+
| view default collation = $viewDefaultCollation)""".stripMargin) {
573+
testCreateViewWithSchemaLevelCollation(
574+
schemaDefaultCollation, viewDefaultCollation)
575+
}
576+
577+
test(
578+
s"""CREATE OR REPLACE VIEW with schema level collation
579+
| (schema default collation = $schemaDefaultCollation,
580+
| view default collation = $viewDefaultCollation)""".stripMargin) {
581+
testCreateViewWithSchemaLevelCollation(
582+
schemaDefaultCollation, viewDefaultCollation, replaceView = true)
583+
}
584+
585+
test(
586+
s"""ALTER VIEW with schema level collation
587+
| (schema default collation = $schemaDefaultCollation,
588+
| view default collation = $viewDefaultCollation)""".stripMargin) {
589+
testAlterViewWithSchemaLevelCollation(schemaDefaultCollation, viewDefaultCollation)
590+
}
591+
592+
test(
593+
s"""ALTER VIEW after ALTER SCHEMA DEFAULT COLLATION
594+
| (original schema default collation = $schemaDefaultCollation,
595+
| view default collation = $viewDefaultCollation)""".stripMargin) {
596+
testAlterViewWithSchemaLevelCollation(
597+
schemaDefaultCollation, viewDefaultCollation, alterSchemaCollation = true)
598+
}
599+
600+
test(
601+
s"""View with default string producing expressions and schema level collation
602+
| (schema default collation = $schemaDefaultCollation,
603+
| view default collation = $viewDefaultCollation)""".stripMargin) {
604+
withDatabase(testSchema) {
605+
sql(s"CREATE SCHEMA $testSchema DEFAULT COLLATION $schemaDefaultCollation")
606+
sql(s"USE $testSchema")
607+
608+
testViewWithDefaultStringProducingExpressions(
609+
Some(schemaDefaultCollation), viewDefaultCollation)
610+
}
611+
}
612+
}
613+
614+
test("View with UTF8_LCASE default collation from schema level") {
615+
withDatabase(testSchema) {
616+
sql(s"CREATE SCHEMA $testSchema DEFAULT COLLATION UTF8_LCASE")
617+
sql(s"USE $testSchema")
618+
619+
withView(testView) {
620+
sql(s"CREATE VIEW $testView AS SELECT 'a' AS c1 WHERE 'a' = 'A'")
621+
622+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView"), Row(1))
623+
assertTableColumnCollation(testView, "c1", "UTF8_LCASE")
624+
}
625+
}
626+
}
627+
628+
private def testCreateViewWithSchemaLevelCollation(
629+
schemaDefaultCollation: String,
630+
viewDefaultCollation: Option[String] = None,
631+
replaceView: Boolean = false): Unit = {
632+
val (viewDefaultCollationClause, resolvedDefaultCollation) =
633+
if (viewDefaultCollation.isDefined) {
634+
(s"DEFAULT COLLATION ${viewDefaultCollation.get}", viewDefaultCollation.get)
635+
} else {
636+
("", schemaDefaultCollation)
637+
}
638+
val replace = if (replaceView) "OR REPLACE" else ""
639+
640+
withDatabase(testSchema) {
641+
sql(s"CREATE SCHEMA $testSchema DEFAULT COLLATION $schemaDefaultCollation")
642+
sql(s"USE $testSchema")
643+
644+
withView(testView) {
645+
sql(s"CREATE $replace VIEW $testView $viewDefaultCollationClause AS SELECT 'a' AS c1")
646+
647+
assertTableColumnCollation(testView, "c1", resolvedDefaultCollation)
648+
}
649+
650+
withTable(testTable) {
651+
sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_BINARY, " +
652+
s"c2 STRING COLLATE UTF8_LCASE, c3 STRING COLLATE UNICODE)")
653+
sql(s"INSERT INTO $testTable VALUES ('a', 'b', 'c'), ('A', 'D', 'C')")
654+
655+
withView(testView) {
656+
// scalastyle:off
657+
sql(s"CREATE $replace VIEW $testView $viewDefaultCollationClause AS " +
658+
s"SELECT *, 'd' AS c4 FROM $testTable WHERE c2 = 'B' AND 'ć' != 'č'")
659+
// scalastyle:on
660+
661+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView"), Row(1))
662+
663+
assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
664+
assertTableColumnCollation(testView, "c2", "UTF8_LCASE")
665+
assertTableColumnCollation(testView, "c3", "UNICODE")
666+
assertTableColumnCollation(testView, "c4", resolvedDefaultCollation)
667+
}
668+
}
669+
}
670+
}
671+
672+
private def testAlterViewWithSchemaLevelCollation(
673+
schemaDefaultCollation: String,
674+
viewDefaultCollation: Option[String] = None,
675+
alterSchemaCollation: Boolean = false): Unit = {
676+
val (viewDefaultCollationClause, resolvedDefaultCollation) =
677+
if (viewDefaultCollation.isDefined) {
678+
(s"DEFAULT COLLATION ${viewDefaultCollation.get}", viewDefaultCollation.get)
679+
} else {
680+
("", schemaDefaultCollation)
681+
}
682+
683+
withDatabase(testSchema) {
684+
sql(s"CREATE SCHEMA $testSchema DEFAULT COLLATION $schemaDefaultCollation")
685+
sql(s"USE $testSchema")
686+
687+
withView(testView) {
688+
sql(s"CREATE VIEW $testView $viewDefaultCollationClause AS SELECT 'a' AS c1")
689+
withTable(testTable) {
690+
sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_BINARY, " +
691+
s"c2 STRING COLLATE UTF8_LCASE, c3 STRING COLLATE UNICODE)")
692+
sql(s"INSERT INTO $testTable VALUES ('a', 'b', 'c'), ('A', 'D', 'C')")
693+
694+
if (alterSchemaCollation) {
695+
// ALTER SCHEMA DEFAULT COLLATION shouldn't change View's default collation
696+
sql(s"ALTER SCHEMA $testSchema DEFAULT COLLATION SR_AI_CI")
697+
}
698+
699+
// scalastyle:off
700+
sql(s"ALTER VIEW $testView " +
701+
s"AS SELECT *, 'd' AS c4 FROM $testTable WHERE c2 = 'B' AND 'ć' != 'č'")
702+
// scalastyle:on
703+
704+
checkAnswer(sql(s"SELECT COUNT(*) FROM $testView"), Row(1))
705+
706+
assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
707+
assertTableColumnCollation(testView, "c2", "UTF8_LCASE")
708+
assertTableColumnCollation(testView, "c3", "UNICODE")
709+
assertTableColumnCollation(testView, "c4", resolvedDefaultCollation)
710+
}
711+
}
712+
}
713+
}
714+
715+
private def testViewWithDefaultStringProducingExpressions(
716+
schemaDefaultCollation: Option[String] = None,
717+
viewDefaultCollation: Option[String] = None): Unit = {
718+
val (viewDefaultCollationClause, resolvedDefaultCollation) =
719+
if (viewDefaultCollation.isDefined) {
720+
(s"DEFAULT COLLATION ${viewDefaultCollation.get}", viewDefaultCollation.get)
721+
} else if (schemaDefaultCollation.isDefined) {
722+
("", schemaDefaultCollation.get)
723+
} else {
724+
("", "UTF8_BINARY")
725+
}
726+
727+
withView(testView) {
728+
val columns = defaultStringProducingExpressions.zipWithIndex.map {
729+
case (expr, index) => s"$expr AS c${index + 1}"
730+
}.mkString(", ")
731+
732+
sql(
733+
s"""
734+
|CREATE view $testView
735+
|$viewDefaultCollationClause
736+
|AS SELECT $columns
737+
|""".stripMargin)
738+
739+
(1 to defaultStringProducingExpressions.length).foreach { index =>
740+
assertTableColumnCollation(testView, s"c$index", resolvedDefaultCollation)
741+
}
742+
}
743+
}
572744
}
573745

574746
class DefaultCollationTestSuiteV2 extends DefaultCollationTestSuite with DatasourceV2SQLBase {
@@ -615,19 +787,7 @@ class DefaultCollationTestSuiteV2 extends DefaultCollationTestSuite with Datasou
615787
}
616788
}
617789

618-
Seq(
619-
// (schemaDefaultCollation, tableDefaultCollation)
620-
("UTF8_BINARY", None),
621-
("UTF8_LCASE", None),
622-
("UNICODE", None),
623-
("DE", None),
624-
("UTF8_BINARY", Some("UTF8_BINARY")),
625-
("UTF8_BINARY", Some("UTF8_LCASE")),
626-
("UTF8_BINARY", Some("DE")),
627-
("UTF8_LCASE", Some("UTF8_BINARY")),
628-
("UTF8_LCASE", Some("UTF8_LCASE")),
629-
("UTF8_LCASE", Some("DE"))
630-
).foreach {
790+
schemaAndObjectCollationPairs.foreach {
631791
case (schemaDefaultCollation, tableDefaultCollation) =>
632792
test(
633793
s"""CREATE OR REPLACE table with schema level collation

0 commit comments

Comments
 (0)