apache
diff --git a/‎.github/workflows/build_and_test.yml
Lines changed: 19 additions & 0 deletions b/‎.github/workflows/build_and_test.yml
Lines changed: 19 additions & 0 deletions
diff --git a/‎.github/workflows/build_python_ps_minimum.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_python_ps_minimum.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release.yml
Lines changed: 28 additions & 6 deletions b/‎.github/workflows/release.yml
Lines changed: 28 additions & 6 deletions
diff --git a/‎common/network-common/pom.xml
Lines changed: 0 additions & 4 deletions b/‎common/network-common/pom.xml
Lines changed: 0 additions & 4 deletions
diff --git a/‎common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
Lines changed: 3 additions & 2 deletions b/‎common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
Lines changed: 3 additions & 2 deletions
diff --git a/‎common/utils/src/main/resources/error/error-conditions.json
Lines changed: 20 additions & 1 deletion b/‎common/utils/src/main/resources/error/error-conditions.json
Lines changed: 20 additions & 1 deletion
diff --git a/‎common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
Lines changed: 3 additions & 0 deletions b/‎common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
Lines changed: 3 additions & 0 deletions
diff --git a/‎common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
Lines changed: 1 addition & 2 deletions b/‎common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
Lines changed: 1 addition & 2 deletions
diff --git a/‎common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala
Lines changed: 28 additions & 0 deletions b/‎common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala
Lines changed: 28 additions & 0 deletions
diff --git a/‎core/benchmarks/ZStandardBenchmark-jdk21-results.txt
Lines changed: 28 additions & 28 deletions b/‎core/benchmarks/ZStandardBenchmark-jdk21-results.txt
Lines changed: 28 additions & 28 deletions
@@ -122,6 +122,7 @@ jobs:
               \"tpcds-1g\": \"$tpcds\",
               \"docker-integration-tests\": \"$docker\",
               \"lint\" : \"true\",
+              \"java17\" : \"true\",
               \"java25\" : \"true\",
               \"docs\" : \"$docs\",
               \"yarn\" : \"$yarn\",
@@ -920,6 +921,24 @@ jobs:
     - name: R linter
       run: ./dev/lint-r
 
+  java17:
+    needs: [precondition]
+    if: fromJson(needs.precondition.outputs.required).java17 == 'true'
+    name: Java 17 build with Maven
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+    steps:
+    - uses: actions/checkout@v4
+    - uses: actions/setup-java@v4
+      with:
+        distribution: zulu
+        java-version: 17
+    - name: Build with Maven
+      run: |
+        export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_CLI_OPTS="--no-transfer-progress"
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install
+
   java25:
     needs: [precondition]
     if: fromJson(needs.precondition.outputs.required).java25 == 'true'
 
@@ -38,7 +38,7 @@ jobs:
       envs: >-
         {
           "PYSPARK_IMAGE_TO_TEST": "python-ps-minimum",
-          "PYTHON_TO_TEST": "python3.9"
+          "PYTHON_TO_TEST": "python3.10"
         }
       jobs: >-
         {
 
@@ -91,10 +91,23 @@ jobs:
   release:
     name: Release Apache Spark
     runs-on: ubuntu-latest
-    # Do not allow dispatching this workflow manually in the main repo.
-    # and skip this workflow in forked repository when running as a
-    # scheduled job (dryrun).
-    if: ${{ (github.repository == 'apache/spark') != (inputs.branch != '' && inputs.release-version != '') }}
+    # Allow workflow to run only in the following cases:
+    # 1. In the apache/spark repository:
+    #    - Only allow dry runs (i.e., both 'branch' and 'release-version' inputs are empty).
+    # 2. In forked repositories:
+    #    - Allow real runs when both 'branch' and 'release-version' are provided.
+    #    - Allow dry runs only if manually dispatched (not on a schedule).
+    if: |
+      (
+        github.repository == 'apache/spark' &&
+        inputs.branch == '' &&
+        inputs.release-version == ''
+      ) || (
+        github.repository != 'apache/spark' &&
+        (
+          (inputs.branch != '' && inputs.release-version != '') || github.event_name == 'workflow_dispatch'
+        )
+      )
     steps:
       - name: Checkout Spark repository
         uses: actions/checkout@v4
@@ -227,9 +240,18 @@ jobs:
             cp "$file" "$file.bak"
             for pattern in "${PATTERNS[@]}"; do
               [ -n "$pattern" ] || continue  # Skip empty patterns
-              escaped_pattern=$(printf '%s\n' "$pattern" | sed 's/[\/&]/\\&/g')
-              sed -i "s/${escaped_pattern}/***/g" "$file"
+          
+              # Safely escape special characters for sed
+              escaped_pattern=${pattern//\\/\\\\} # Escape backslashes
+              escaped_pattern=${escaped_pattern//\//\\/} # Escape forward slashes
+              escaped_pattern=${escaped_pattern//&/\\&} # Escape &
+              escaped_pattern=${escaped_pattern//$'\n'/} # Remove newlines
+              escaped_pattern=${escaped_pattern//$'\r'/} # Remove carriage returns (optional)
+          
+              # Redact the pattern
+              sed -i.bak "s/${escaped_pattern}/***/g" "$file"
             done
+            rm -f "$file.bak"
           done
 
           # Zip logs/output
 
@@ -83,10 +83,6 @@
     </dependency>
     <!-- Netty End -->
 
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-    </dependency>
     <dependency>
       <groupId>${leveldbjni.group}</groupId>
       <artifactId>leveldbjni-all</artifactId>
 
@@ -33,7 +33,6 @@
 import io.netty.channel.ChannelOption;
 import io.netty.channel.EventLoopGroup;
 import io.netty.channel.socket.SocketChannel;
-import org.apache.commons.lang3.SystemUtils;
 
 import org.apache.spark.internal.SparkLogger;
 import org.apache.spark.internal.SparkLoggerFactory;
@@ -105,11 +104,13 @@ private void init(String hostToBind, int portToBind) {
     EventLoopGroup workerGroup =  NettyUtils.createEventLoop(ioMode, conf.serverThreads(),
       conf.getModuleName() + "-server");
 
+    String name = System.getProperty("os.name");
+    boolean isNotWindows = 7 > name.length() || !name.regionMatches(true, 0, "Windows", 0, 7);
     bootstrap = new ServerBootstrap()
       .group(bossGroup, workerGroup)
       .channel(NettyUtils.getServerChannelClass(ioMode))
       .option(ChannelOption.ALLOCATOR, pooledAllocator)
-      .option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS)
+      .option(ChannelOption.SO_REUSEADDR, isNotWindows)
       .childOption(ChannelOption.ALLOCATOR, pooledAllocator);
 
     this.metrics = new NettyMemoryMetrics(
 
@@ -4411,6 +4411,18 @@
     ],
     "sqlState" : "42809"
   },
+  "NOT_A_SCALAR_FUNCTION" : {
+    "message" : [
+      "<functionName> appears as a scalar expression here, but the function was defined as a table function. Please update the query to move the function call into the FROM clause, or redefine <functionName> as a scalar function instead."
+    ],
+    "sqlState" : "42887"
+  },
+  "NOT_A_TABLE_FUNCTION" : {
+    "message" : [
+      "<functionName> appears as a table function here, but the function was defined as a scalar function. Please update the query to move the function call outside the FROM clause, or redefine <functionName> as a table function instead."
+    ],
+    "sqlState" : "42887"
+  },
   "NOT_NULL_ASSERT_VIOLATION" : {
     "message" : [
       "NULL value appeared in non-nullable field: <walkedTypePath>If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (such as java.lang.Integer instead of int/scala.Int)."
@@ -4918,6 +4930,13 @@
     ],
     "sqlState" : "22023"
   },
+  "RUN_EMPTY_PIPELINE" : {
+    "message" : [
+      "Pipelines are expected to have at least one non-temporary dataset defined (tables, persisted views) but no non-temporary datasets were found in your pipeline.",
+      "Please verify that you have included the expected source files, and that your source code includes table definitions (e.g., CREATE MATERIALIZED VIEW in SQL code, @sdp.table in python code)."
+    ],
+    "sqlState" : "42617"
+  },
   "SCALAR_FUNCTION_NOT_COMPATIBLE" : {
     "message" : [
       "ScalarFunction <scalarFunc> not overrides method 'produceResult(InternalRow)' with custom implementation."
@@ -8676,7 +8695,7 @@
   },
   "_LEGACY_ERROR_TEMP_2250" : {
     "message" : [
-      "Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting <autoBroadcastJoinThreshold> to -1 or increase the spark driver memory by setting <driverMemory> to a higher value<analyzeTblMsg>"
+      "Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting <autoBroadcastJoinThreshold> to -1 or increase the spark driver memory by setting <driverMemory> to a higher value<analyzeTblMsg> or apply the shuffle sort merge join hint as described in the Spark documentation: https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-hints.html#join-hints."
     ]
   },
   "_LEGACY_ERROR_TEMP_2251" : {
 
@@ -71,6 +71,8 @@ private[spark] object LogKeys {
   case object ALIGNED_TO_TIME extends LogKey
   case object ALPHA extends LogKey
   case object ANALYSIS_ERROR extends LogKey
+  case object ANTLR_DFA_CACHE_DELTA extends LogKey
+  case object ANTLR_DFA_CACHE_SIZE extends LogKey
   case object APP_ATTEMPT_ID extends LogKey
   case object APP_ATTEMPT_SHUFFLE_MERGE_ID extends LogKey
   case object APP_DESC extends LogKey
@@ -209,6 +211,7 @@ private[spark] object LogKeys {
   case object DIFF_DELTA extends LogKey
   case object DIVISIBLE_CLUSTER_INDICES_SIZE extends LogKey
   case object DRIVER_ID extends LogKey
+  case object DRIVER_JVM_MEMORY extends LogKey
   case object DRIVER_MEMORY_SIZE extends LogKey
   case object DRIVER_STATE extends LogKey
   case object DROPPED_PARTITIONS extends LogKey
 
@@ -24,7 +24,6 @@ import java.lang.reflect.{Field, Modifier}
 import scala.collection.mutable.{Map, Queue, Set, Stack}
 import scala.jdk.CollectionConverters._
 
-import org.apache.commons.lang3.ClassUtils
 import org.apache.xbean.asm9.{ClassReader, ClassVisitor, Handle, MethodVisitor, Type}
 import org.apache.xbean.asm9.Opcodes._
 import org.apache.xbean.asm9.tree.{ClassNode, MethodNode}
@@ -619,7 +618,7 @@ private[spark] object IndylambdaScalaClosures extends Logging {
   def getSerializationProxy(maybeClosure: AnyRef): Option[SerializedLambda] = {
     def isClosureCandidate(cls: Class[_]): Boolean = {
       // TODO: maybe lift this restriction to support other functional interfaces in the future
-      val implementedInterfaces = ClassUtils.getAllInterfaces(cls).asScala
+      val implementedInterfaces = SparkClassUtils.getAllInterfaces(cls)
       implementedInterfaces.exists(_.getName.startsWith("scala.Function"))
     }
 
 
@@ -18,6 +18,7 @@ package org.apache.spark.util
 
 import java.util.Random
 
+import scala.collection.mutable.LinkedHashSet
 import scala.util.Try
 
 private[spark] trait SparkClassUtils {
@@ -136,6 +137,33 @@ private[spark] trait SparkClassUtils {
       }
     }
   }
+
+  /**
+   * Gets a list of all interfaces implemented by the given class and its superclasses.
+   */
+  def getAllInterfaces(cls: Class[_]): List[Class[_]] = {
+    if (cls == null) {
+      return null
+    }
+    val interfacesFound = LinkedHashSet[Class[_]]()
+    getAllInterfacesHelper(cls, interfacesFound)
+    interfacesFound.toList
+  }
+
+  private def getAllInterfacesHelper(
+      clazz: Class[_],
+      interfacesFound: LinkedHashSet[Class[_]]): Unit = {
+    var currentClass = clazz
+    while (currentClass != null) {
+      val interfaces = currentClass.getInterfaces
+      for (i <- interfaces) {
+        if (interfacesFound.add(i)) {
+          getAllInterfacesHelper(i, interfacesFound)
+        }
+      }
+      currentClass = currentClass.getSuperclass
+    }
+  }
 }
 
 private[spark] object SparkClassUtils extends SparkClassUtils
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            657            673          15          0.0       65664.9       1.0X
-Compression 10000 times at level 2 without buffer pool            718            726          13          0.0       71830.9       0.9X
-Compression 10000 times at level 3 without buffer pool            815            819           7          0.0       81453.1       0.8X
-Compression 10000 times at level 1 with buffer pool               598            600           1          0.0       59809.3       1.1X
-Compression 10000 times at level 2 with buffer pool               637            639           2          0.0       63710.0       1.0X
-Compression 10000 times at level 3 with buffer pool               754            757           3          0.0       75403.8       0.9X
+Compression 10000 times at level 1 without buffer pool            646            668          20          0.0       64639.2       1.0X
+Compression 10000 times at level 2 without buffer pool            715            716           2          0.0       71496.2       0.9X
+Compression 10000 times at level 3 without buffer pool            810            818           7          0.0       81013.5       0.8X
+Compression 10000 times at level 1 with buffer pool               603            604           0          0.0       60335.0       1.1X
+Compression 10000 times at level 2 with buffer pool               638            641           3          0.0       63817.7       1.0X
+Compression 10000 times at level 3 with buffer pool               739            740           1          0.0       73912.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            832            835           2          0.0       83247.0       1.0X
-Decompression 10000 times from level 2 without buffer pool            833            840           8          0.0       83277.8       1.0X
-Decompression 10000 times from level 3 without buffer pool            833            833           0          0.0       83280.6       1.0X
-Decompression 10000 times from level 1 with buffer pool               753            755           2          0.0       75333.7       1.1X
-Decompression 10000 times from level 2 with buffer pool               751            752           1          0.0       75115.3       1.1X
-Decompression 10000 times from level 3 with buffer pool               753            754           2          0.0       75254.0       1.1X
+Decompression 10000 times from level 1 without buffer pool            830            833           4          0.0       83030.7       1.0X
+Decompression 10000 times from level 2 without buffer pool            832            833           1          0.0       83236.0       1.0X
+Decompression 10000 times from level 3 without buffer pool            832            833           1          0.0       83183.1       1.0X
+Decompression 10000 times from level 1 with buffer pool               758            759           1          0.0       75813.5       1.1X
+Decompression 10000 times from level 2 with buffer pool               758            758           1          0.0       75767.1       1.1X
+Decompression 10000 times from level 3 with buffer pool               757            758           1          0.0       75652.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  77             78           1          0.0      601317.6       1.0X
-Parallel Compression with 1 workers                  64             70           4          0.0      499456.2       1.2X
-Parallel Compression with 2 workers                  53             56           3          0.0      410610.6       1.5X
-Parallel Compression with 4 workers                  45             47           1          0.0      350847.8       1.7X
-Parallel Compression with 8 workers                  47             49           1          0.0      370647.8       1.6X
-Parallel Compression with 16 workers                 50             52           1          0.0      390524.8       1.5X
+Parallel Compression with 0 workers                  66             66           0          0.0      512667.9       1.0X
+Parallel Compression with 1 workers                  56             58           2          0.0      435183.0       1.2X
+Parallel Compression with 2 workers                  46             47           1          0.0      356034.8       1.4X
+Parallel Compression with 4 workers                  41             43           1          0.0      318331.7       1.6X
+Parallel Compression with 8 workers                  44             46           1          0.0      342564.5       1.5X
+Parallel Compression with 16 workers                 48             51           2          0.0      371266.4       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
+OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 243            244           2          0.0     1894990.4       1.0X
-Parallel Compression with 1 workers                 299            300           2          0.0     2335128.6       0.8X
-Parallel Compression with 2 workers                 167            177          11          0.0     1308212.2       1.4X
-Parallel Compression with 4 workers                 161            163           3          0.0     1254638.0       1.5X
-Parallel Compression with 8 workers                 166            170           4          0.0     1299104.0       1.5X
-Parallel Compression with 16 workers                167            170           2          0.0     1301666.4       1.5X
+Parallel Compression with 0 workers                 236            237           1          0.0     1847245.2       1.0X
+Parallel Compression with 1 workers                 251            252           2          0.0     1961753.5       0.9X
+Parallel Compression with 2 workers                 141            148           4          0.0     1100274.2       1.7X
+Parallel Compression with 4 workers                 129            133           3          0.0     1009465.5       1.8X
+Parallel Compression with 8 workers                 135            139           3          0.0     1054496.8       1.8X
+Parallel Compression with 16 workers                135            139           6          0.0     1051577.1       1.8X
Original file line number	Diff line number	Diff line change
`@@ -38,7 +38,7 @@ jobs:`
`38`	`38`	`envs: >-`
`39`	`39`	`{`
`40`	`40`	`"PYSPARK_IMAGE_TO_TEST": "python-ps-minimum",`
`41`		`- "PYTHON_TO_TEST": "python3.9"`
	`41`	`+ "PYTHON_TO_TEST": "python3.10"`
`42`	`42`	`}`
`43`	`43`	`jobs: >-`
`44`	`44`	`{`