[SPARK-52704][SQL][FOLLOWUP] Move session state utilities from trait FileFormat to SessionStateHelper

yaooqinn · dongjoon-hyun · commit 2460893c39f2 · 2025-07-09T06:55:44.000-07:00
### What changes were proposed in this pull request? Move session state utilities from trait FileFormat to SessionStateHelper ### Why are the changes needed? To addresses comments from #51398 (review) ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #51411 from yaooqinn/SPARK-52704-F. Authored-by: Kent Yao <yao@apache.org> Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.{SessionState, SQLConf}
+import org.apache.spark.sql.internal.{SessionStateHelper, SQLConf}
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
 
@@ -235,20 +235,6 @@ trait FileFormat {
    */
   def fileConstantMetadataExtractors: Map[String, PartitionedFile => Any] =
     FileFormat.BASE_METADATA_EXTRACTORS
-
-  protected def sessionState(sparkSession: SparkSession): SessionState = {
-    sparkSession.sessionState
-  }
-
-  protected def sqlConf(sparkSession: SparkSession): SQLConf = {
-    sessionState(sparkSession).conf
-  }
-
-  protected def hadoopConf(
-      sparkSession: SparkSession,
-      options: Map[String, String]): Configuration = {
-    sessionState(sparkSession).newHadoopConfWithOptions(options)
-  }
 }
 
 object FileFormat {
@@ -370,15 +356,15 @@ object FileFormat {
 /**
  * The base class file format that is based on text file.
  */
-abstract class TextBasedFileFormat extends FileFormat {
+abstract class TextBasedFileFormat extends FileFormat with SessionStateHelper {
   private var codecFactory: CompressionCodecFactory = _
 
   override def isSplitable(
       sparkSession: SparkSession,
       options: Map[String, String],
       path: Path): Boolean = {
     if (codecFactory == null) {
-      codecFactory = new CompressionCodecFactory(hadoopConf(sparkSession, options))
+      codecFactory = new CompressionCodecFactory(getHadoopConf(sparkSession, options))
     }
     val codec = codecFactory.getCodec(path)
     codec == null || codec.isInstanceOf[SplittableCompressionCodec]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -168,7 +168,7 @@ case class CSVFileFormat() extends TextBasedFileFormat with DataSourceRegister {
   private def getCsvOptions(
       sparkSession: SparkSession,
       options: Map[String, String]): CSVOptions = {
-    val conf = sqlConf(sparkSession)
+    val conf = getSqlConf(sparkSession)
     new CSVOptions(
       options,
       conf.csvColumnPruning,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -136,7 +136,7 @@ case class JsonFileFormat() extends TextBasedFileFormat with DataSourceRegister
       spark: SparkSession,
       options: Map[String, String],
       inRead: Boolean = true): JSONOptions = {
-    val conf = sqlConf(spark)
+    val conf = getSqlConf(spark)
     if (inRead) {
       new JSONOptionsInRead(options, conf.sessionLocalTimeZone, conf.columnNameOfCorruptRecord)
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -30,11 +30,11 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeSet, Expression, Expr
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
-import org.apache.spark.sql.connector.read.{Batch, InputPartition, Scan, Statistics, SupportsReportStatistics}
+import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.internal.{SessionState, SQLConf}
+import org.apache.spark.sql.internal.{SessionStateHelper, SQLConf}
 import org.apache.spark.sql.internal.connector.SupportsMetadata
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
@@ -113,10 +113,7 @@ trait FileScan extends Scan
 
   override def hashCode(): Int = getClass.hashCode()
 
-  override def conf: SQLConf = {
-    val sessionState: SessionState = sparkSession.sessionState
-    sessionState.conf
-  }
+  override def conf: SQLConf = SessionStateHelper.getSqlConf(sparkSession)
 
   val maxMetadataValueLength = conf.maxMetadataStringLength
 
@@ -177,7 +174,7 @@ trait FileScan extends Scan
     if (splitFiles.length == 1) {
       val path = splitFiles(0).toPath
       if (!isSplitable(path) && splitFiles(0).length >
-        sparkSession.sparkContext.conf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
+        SessionStateHelper.getSparkConf(sparkSession).get(IO_WARNING_LARGEFILETHRESHOLD)) {
         logWarning(log"Loading one large unsplittable file ${MDC(PATH, path.toString)} with only " +
           log"one partition, the reason is: ${MDC(REASON, getFileUnSplittableReason(path))}")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlFileFormat.scala
@@ -42,7 +42,7 @@ case class XmlFileFormat() extends TextBasedFileFormat with DataSourceRegister {
   private def getXmlOptions(
       sparkSession: SparkSession,
       parameters: Map[String, String]): XmlOptions = {
-    val conf = sqlConf(sparkSession)
+    val conf = getSqlConf(sparkSession)
     new XmlOptions(parameters, conf.sessionLocalTimeZone, conf.columnNameOfCorruptRecord, true)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionStateHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionStateHelper.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Helper trait to access session state related configurations and utilities.
+ * It also provides type annotations for IDEs to build indexes.
+ */
+trait SessionStateHelper {
+  private def sessionState(sparkSession: SparkSession): SessionState = {
+    sparkSession.sessionState
+  }
+
+  private def sparkContext(sparkSession: SparkSession): SparkContext = {
+    sparkSession.sparkContext
+  }
+
+  def getSparkConf(sparkSession: SparkSession): SparkConf = {
+    sparkContext(sparkSession).conf
+  }
+
+  def getSqlConf(sparkSession: SparkSession): SQLConf = {
+    sessionState(sparkSession).conf
+  }
+
+  def getHadoopConf(
+      sparkSession: SparkSession,
+      options: Map[String, String]): Configuration = {
+    sessionState(sparkSession).newHadoopConfWithOptions(options)
+  }
+}
+
+object SessionStateHelper extends SessionStateHelper

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ case class XmlFileFormat() extends TextBasedFileFormat with DataSourceRegister {`
`42`	`42`	`private def getXmlOptions(`
`43`	`43`	`sparkSession: SparkSession,`
`44`	`44`	`parameters: Map[String, String]): XmlOptions = {`
`45`		`- val conf = sqlConf(sparkSession)`
	`45`	`+ val conf = getSqlConf(sparkSession)`
`46`	`46`	`new XmlOptions(parameters, conf.sessionLocalTimeZone, conf.columnNameOfCorruptRecord, true)`
`47`	`47`	`}`
`48`	`48`