queryContext addition

aakash-db · aakash-db · commit 12790a4b4244 · 2025-05-27T23:38:09.000-07:00
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/CoreDataflowNodeProcessor.scala
@@ -132,8 +132,7 @@ private class FlowResolver(rawGraph: DataflowGraph) extends Logging {
       allInputs = allInputs,
       availableInputs = availableResolvedInputs.values.toList,
       configuration = flowToResolve.sqlConf,
-      currentCatalog = flowToResolve.currentCatalog,
-      currentDatabase = flowToResolve.currentDatabase
+      queryContext = flowToResolve.queryContext
     )
     val result =
       flowFunctionResult match {
@@ -179,8 +178,7 @@ private class FlowResolver(rawGraph: DataflowGraph) extends Logging {
               allInputs = allInputs,
               availableInputs = availableResolvedInputs.values.toList,
               configuration = newSqlConf,
-              currentCatalog = flowToResolve.currentCatalog,
-              currentDatabase = flowToResolve.currentDatabase
+              queryContext = flowToResolve.queryContext
             )
           } else {
             f
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala
@@ -26,6 +26,13 @@ import org.apache.spark.sql.pipelines.AnalysisWarning
 import org.apache.spark.sql.pipelines.util.InputReadOptions
 import org.apache.spark.sql.types.StructType
 
+/**
+ * Contains the catalog and database context information for query execution.
+ */
+case class QueryContext(
+    currentCatalog: Option[String],
+    currentDatabase: Option[String])
+
 /**
  * A [[Flow]] is a node of data transformation in a dataflow graph. It describes the movement
  * of data into a particular dataset.
@@ -49,11 +56,8 @@ trait Flow extends GraphElement with Logging {
    */
   def once: Boolean = false
 
-  /** The current catalog in the execution context when the query is defined. */
-  def currentCatalog: Option[String]
-
-  /** The current database in the execution context when the query is defined. */
-  def currentDatabase: Option[String]
+  /** The current query context (catalog and database) when the query is defined. */
+  def queryContext: QueryContext
 
   /** The comment associated with this flow */
   def comment: Option[String]
@@ -74,16 +78,14 @@ trait FlowFunction extends Logging {
    *                  [[DataflowGraph]].
    * @param availableInputs the list of all [[Input]]s available to this flow
    * @param configuration the spark configurations that apply to this flow.
-   * @param currentCatalog The current catalog in execution context when the query is defined.
-   * @param currentDatabase The current database in execution context when the query is defined.
+   * @param queryContext The context of the query being evaluated.
    * @return the inputs actually used, and the [[DataFrame]] expression for the flow
    */
   def call(
       allInputs: Set[TableIdentifier],
       availableInputs: Seq[Input],
       configuration: Map[String, String],
-      currentCatalog: Option[String],
-      currentDatabase: Option[String]
+      queryContext: QueryContext
   ): FlowFunctionResult
 }
 
@@ -127,8 +129,7 @@ case class UnresolvedFlow(
     identifier: TableIdentifier,
     destinationIdentifier: TableIdentifier,
     func: FlowFunction,
-    currentCatalog: Option[String],
-    currentDatabase: Option[String],
+    queryContext: QueryContext,
     sqlConf: Map[String, String],
     comment: Option[String] = None,
     override val once: Boolean,
@@ -147,8 +148,7 @@ trait ResolutionCompletedFlow extends Flow {
   val identifier: TableIdentifier = flow.identifier
   val destinationIdentifier: TableIdentifier = flow.destinationIdentifier
   def func: FlowFunction = flow.func
-  def currentCatalog: Option[String] = flow.currentCatalog
-  def currentDatabase: Option[String] = flow.currentDatabase
+  def queryContext: QueryContext = flow.queryContext
   def comment: Option[String] = flow.comment
   def sqlConf: Map[String, String] = funcResult.sqlConf
   def origin: QueryOrigin = flow.origin
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.pipelines.graph
 
+import scala.util.Try
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CTESubstitution, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.classic.{DataFrame, DataStreamReader, Dataset, SparkSession}
+import org.apache.spark.sql.classic.{DataFrame, Dataset, DataStreamReader, SparkSession}
 import org.apache.spark.sql.pipelines.{AnalysisWarning, Language}
 import org.apache.spark.sql.pipelines.graph.GraphIdentifierManager.{ExternalDatasetIdentifier, InternalDatasetIdentifier}
 import org.apache.spark.sql.pipelines.util.{BatchReadOptions, InputReadOptions, StreamingReadOptions}
@@ -33,14 +35,12 @@ object FlowAnalysis {
           allInputs: Set[TableIdentifier],
           availableInputs: Seq[Input],
           confs: Map[String, String],
-          currentCatalog: Option[String],
-          currentDatabase: Option[String]
+          queryContext: QueryContext
       ): FlowFunctionResult = {
         val ctx = FlowAnalysisContext(
           allInputs = allInputs,
           availableInputs = availableInputs,
-          currentCatalog = currentCatalog,
-          currentDatabase = currentDatabase,
+          queryContext = queryContext,
           spark = SparkSession.active
         )
         val df = try {
@@ -51,8 +51,8 @@ object FlowAnalysis {
         }
         FlowFunctionResult(
           requestedInputs = ctx.requestedInputs.toSet,
-          usedBatchInputs = ctx.batchInputs.toSet,
-          usedStreamingInputs = ctx.streamingInputs.toSet,
+          batchInputs = ctx.batchInputs.toSet,
+          streamingInputs = ctx.streamingInputs.toSet,
           usedExternalInputs = ctx.externalInputs.toSet,
           dataFrame = df,
           sqlConf = confs,
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysisContext.scala
@@ -29,8 +29,7 @@ import org.apache.spark.sql.pipelines.AnalysisWarning
  *
  * @param allInputs            Set of identifiers for all [[Input]]s defined in the DataflowGraph.
  * @param availableInputs      Inputs available to be referenced with `read` or `readStream`.
- * @param currentCatalog       The current catalog in execution context when the query is defined.
- * @param currentDatabase        The current schema in execution context when the query is defined.
+ * @param queryContext         The context of the query being evaluated.
  * @param requestedInputs      A mutable buffer populated with names of all inputs that were
  *                             requested.
  * @param spark                the spark session to be used.
@@ -40,8 +39,7 @@ import org.apache.spark.sql.pipelines.AnalysisWarning
 private[pipelines] case class FlowAnalysisContext(
     allInputs: Set[TableIdentifier],
     availableInputs: Seq[Input],
-    currentCatalog: Option[String],
-    currentDatabase: Option[String],
+    queryContext: QueryContext,
     batchInputs: mutable.HashSet[ResolvedInput] = mutable.HashSet.empty,
     streamingInputs: mutable.HashSet[ResolvedInput] = mutable.HashSet.empty,
     requestedInputs: mutable.HashSet[TableIdentifier] = mutable.HashSet.empty,
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphIdentifierManager.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/GraphIdentifierManager.scala
@@ -85,8 +85,8 @@ object GraphIdentifierManager {
     } else {
       val fullyQualifiedInputIdentifier = fullyQualifyIdentifier(
         maybeFullyQualifiedIdentifier = inputIdentifier,
-        currentCatalog = context.currentCatalog,
-        currentDatabase = context.currentDatabase
+        currentCatalog = context.queryContext.currentCatalog,
+        currentDatabase = context.queryContext.currentDatabase
       )
       assertIsFullyQualifiedForRead(identifier = fullyQualifiedInputIdentifier)
 
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectInvalidPipelineSuite.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/graph/ConnectInvalidPipelineSuite.scala
@@ -374,7 +374,7 @@ class ConnectInvalidPipelineSuite extends PipelineTest {
         .getMessage
         .contains(
           s"View ${fullyQualifiedIdentifier("a", isView = true).quotedString}" +
-          s" is not a streaming view and must be referenced using read."
+          s" is not a batch view and must be referenced using read."
         )
     )
   }
diff --git a/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/TestGraphRegistrationContext.scala b/sql/pipelines/src/test/scala/org/apache/spark/sql/pipelines/utils/TestGraphRegistrationContext.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.pipelines.graph.{
   GraphIdentifierManager,
   GraphRegistrationContext,
   PersistedView,
+  QueryContext,
   QueryOrigin,
   Table,
   TemporaryView,
@@ -86,10 +87,12 @@ class TestGraphRegistrationContext(
           identifier = tableIdentifier,
           destinationIdentifier = tableIdentifier,
           func = query.get,
+          queryContext = QueryContext(
+            currentCatalog = catalog.orElse(Some(defaultCatalog)),
+            currentDatabase = database.orElse(Some(defaultDatabase))
+          ),
           sqlConf = sqlConf,
           once = false,
-          currentCatalog = catalog.orElse(Some(defaultCatalog)),
-          currentDatabase = database.orElse(Some(defaultDatabase)),
           comment = comment,
           origin = baseOrigin
         )
@@ -138,10 +141,12 @@ class TestGraphRegistrationContext(
         identifier = viewIdentifier,
         destinationIdentifier = viewIdentifier,
         func = query,
+        queryContext = QueryContext(
+          currentCatalog = catalog.orElse(Some(defaultCatalog)),
+          currentDatabase = database.orElse(Some(defaultDatabase))
+        ),
         sqlConf = sqlConf,
         once = false,
-        currentCatalog = catalog.orElse(Some(defaultCatalog)),
-        currentDatabase = database.orElse(Some(defaultDatabase)),
         comment = comment,
         origin = origin
       )
@@ -165,10 +170,12 @@ class TestGraphRegistrationContext(
         identifier = flowIdentifier,
         destinationIdentifier = flowDestinationIdentifier,
         func = query,
+        queryContext = QueryContext(
+          currentCatalog = catalog.orElse(Some(defaultCatalog)),
+          currentDatabase = database.orElse(Some(defaultDatabase))
+        ),
         sqlConf = Map.empty,
         once = once,
-        currentCatalog = catalog.orElse(Some(defaultCatalog)),
-        currentDatabase = database.orElse(Some(defaultDatabase)),
         comment = None,
         origin = QueryOrigin()
       )

Original file line number	Diff line number	Diff line change
`@@ -132,8 +132,7 @@ private class FlowResolver(rawGraph: DataflowGraph) extends Logging {`
`132`	`132`	`allInputs = allInputs,`
`133`	`133`	`availableInputs = availableResolvedInputs.values.toList,`
`134`	`134`	`configuration = flowToResolve.sqlConf,`
`135`		`- currentCatalog = flowToResolve.currentCatalog,`
`136`		`- currentDatabase = flowToResolve.currentDatabase`
	`135`	`+ queryContext = flowToResolve.queryContext`
`137`	`136`	`)`
`138`	`137`	`val result =`
`139`	`138`	`flowFunctionResult match {`
`@@ -179,8 +178,7 @@ private class FlowResolver(rawGraph: DataflowGraph) extends Logging {`
`179`	`178`	`allInputs = allInputs,`
`180`	`179`	`availableInputs = availableResolvedInputs.values.toList,`
`181`	`180`	`configuration = newSqlConf,`
`182`		`- currentCatalog = flowToResolve.currentCatalog,`
`183`		`- currentDatabase = flowToResolve.currentDatabase`
	`181`	`+ queryContext = flowToResolve.queryContext`
`184`	`182`	`)`
`185`	`183`	`} else {`
`186`	`184`	`f`
Original file line number	Diff line number	Diff line change
`@@ -374,7 +374,7 @@ class ConnectInvalidPipelineSuite extends PipelineTest {`
`374`	`374`	`.getMessage`
`375`	`375`	`.contains(`
`376`	`376`	`s"View ${fullyQualifiedIdentifier("a", isView = true).quotedString}" +`
`377`		`- s" is not a streaming view and must be referenced using read."`
	`377`	`+ s" is not a batch view and must be referenced using read."`
`378`	`378`	`)`
`379`	`379`	`)`
`380`	`380`	`}`