1

aakash-db · aakash-db · commit c53c17cee26a · 2025-05-27T22:52:13.000-07:00
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -2033,7 +2033,7 @@
   },
   "INCOMPATIBLE_BATCH_VIEW_READ": {
     "message": [
-      "View <datasetIdentifier> is not a streaming view and must be referenced using read. This check can be disabled by setting Spark conf pipelines.incompatibleViewCheck.enabled = false."
+      "View <datasetIdentifier> is not a batch view and must be referenced using read. This check can be disabled by setting Spark conf pipelines.incompatibleViewCheck.enabled = false."
     ],
     "sqlState": "42000"
   },
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -896,12 +896,12 @@ object SparkDeclarativePipelines {
       val guavaVersion =
         SbtPomKeys.effectivePom.value.getProperties.get(
           "connect.guava.version").asInstanceOf[String]
-      val guavaFailureaccessVersion =
+      val guavaFailureAccessVersion =
         SbtPomKeys.effectivePom.value.getProperties.get(
           "guava.failureaccess.version").asInstanceOf[String]
       Seq(
         "com.google.guava" % "guava" % guavaVersion,
-        "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+        "com.google.guava" % "failureaccess" % guavaFailureAccessVersion,
         "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
       )
     },
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/Language.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/Language.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.pipelines
 
 sealed trait Language {}
 
-case class Python() extends Language {}
+object Language {
+  case class Python() extends Language {}
+  case class Sql() extends Language {}
+}
 
-case class Sql() extends Language {}
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/Flow.scala
@@ -90,15 +90,15 @@ trait FlowFunction extends Logging {
 /**
  * Holds the [[DataFrame]] returned by a [[FlowFunction]] along with the inputs used to
  * construct it.
- * @param usedBatchInputs the identifiers of the complete inputs read by the flow
- * @param usedStreamingInputs the identifiers of the incremental inputs read by the flow
+ * @param batchInputs the complete inputs read by the flow
+ * @param sreamingInputs the incremental inputs read by the flow
  * @param usedExternalInputs the identifiers of the external inputs read by the flow
  * @param dataFrame the [[DataFrame]] expression executed by the flow if the flow can be resolved
  */
 case class FlowFunctionResult(
     requestedInputs: Set[TableIdentifier],
-    usedBatchInputs: Set[ResolvedInput],
-    usedStreamingInputs: Set[ResolvedInput],
+    batchInputs: Set[ResolvedInput],
+    streamingInputs: Set[ResolvedInput],
     usedExternalInputs: Set[String],
     dataFrame: Try[DataFrame],
     sqlConf: Map[String, String],
@@ -113,12 +113,6 @@ case class FlowFunctionResult(
     (batchInputs ++ streamingInputs).map(_.input.identifier)
   }
 
-  /** Names of [[Input]]s read completely by this [[Flow]]. */
-  def batchInputs: Set[ResolvedInput] = usedBatchInputs
-
-  /** Names of [[Input]]s read incrementally by this [[Flow]]. */
-  def streamingInputs: Set[ResolvedInput] = usedStreamingInputs
-
   /** Returns errors that occurred when attempting to analyze this [[Flow]]. */
   def failure: Seq[Throwable] = {
     dataFrame.failed.toOption.toSeq
@@ -129,35 +123,17 @@ case class FlowFunctionResult(
 }
 
 /** A [[Flow]] whose output schema and dependencies aren't known. */
-class UnresolvedFlow(
-    val identifier: TableIdentifier,
-    val destinationIdentifier: TableIdentifier,
-    val func: FlowFunction,
-    val currentCatalog: Option[String],
-    val currentDatabase: Option[String],
-    val sqlConf: Map[String, String],
-    val comment: Option[String] = None,
+case class UnresolvedFlow(
+    identifier: TableIdentifier,
+    destinationIdentifier: TableIdentifier,
+    func: FlowFunction,
+    currentCatalog: Option[String],
+    currentDatabase: Option[String],
+    sqlConf: Map[String, String],
+    comment: Option[String] = None,
     override val once: Boolean,
     override val origin: QueryOrigin
-) extends Flow {
-  def copy(
-      identifier: TableIdentifier = identifier,
-      destinationIdentifier: TableIdentifier = destinationIdentifier,
-      sqlConf: Map[String, String] = sqlConf
-  ): UnresolvedFlow = {
-    new UnresolvedFlow(
-      identifier = identifier,
-      destinationIdentifier = destinationIdentifier,
-      func = func,
-      currentCatalog = currentCatalog,
-      currentDatabase = currentDatabase,
-      sqlConf = sqlConf,
-      comment = comment,
-      once = once,
-      origin = origin
-    )
-  }
-}
+) extends Flow
 
 /**
  * A [[Flow]] whose flow function has been invoked, meaning either:
diff --git a/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala b/sql/pipelines/src/main/scala/org/apache/spark/sql/pipelines/graph/FlowAnalysis.scala
@@ -17,23 +17,14 @@
 
 package org.apache.spark.sql.pipelines.graph
 
-import scala.util.Try
-
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CTESubstitution, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.classic.{DataFrame, Dataset, DataStreamReader, SparkSession}
-import org.apache.spark.sql.pipelines.{AnalysisWarning, Sql}
-import org.apache.spark.sql.pipelines.graph.GraphIdentifierManager.{
-  ExternalDatasetIdentifier,
-  InternalDatasetIdentifier
-}
-import org.apache.spark.sql.pipelines.util.{
-  BatchReadOptions,
-  InputReadOptions,
-  StreamingReadOptions
-}
+import org.apache.spark.sql.classic.{DataFrame, DataStreamReader, Dataset, SparkSession}
+import org.apache.spark.sql.pipelines.{AnalysisWarning, Language}
+import org.apache.spark.sql.pipelines.graph.GraphIdentifierManager.{ExternalDatasetIdentifier, InternalDatasetIdentifier}
+import org.apache.spark.sql.pipelines.util.{BatchReadOptions, InputReadOptions, StreamingReadOptions}
 
 object FlowAnalysis {
   def createFlowFunctionFromLogicalPlan(plan: LogicalPlan): FlowFunction = {
@@ -115,7 +106,7 @@ object FlowAnalysis {
             name = IdentifierHelper.toQuotedString(u.multipartIdentifier),
             spark.readStream,
             streamingReadOptions = StreamingReadOptions(
-              apiLanguage = Sql()
+              apiLanguage = Language.Sql()
             )
           ).queryExecution.analyzed
 
@@ -124,7 +115,7 @@ object FlowAnalysis {
           readBatchInput(
             context,
             name = IdentifierHelper.toQuotedString(u.multipartIdentifier),
-            batchReadOptions = BatchReadOptions(apiLanguage = Sql())
+            batchReadOptions = BatchReadOptions(apiLanguage = Language.Sql())
           ).queryExecution.analyzed
       }
     Dataset.ofRows(spark, resolvedPlan)