Skip to content

Commit 89dfef0

Browse files
committed
Resolve merge conflict
2 parents fd5e76e + 5724c71 commit 89dfef0

File tree

274 files changed

+9673
-2900
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

274 files changed

+9673
-2900
lines changed

.github/workflows/build_and_test.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ jobs:
122122
\"tpcds-1g\": \"$tpcds\",
123123
\"docker-integration-tests\": \"$docker\",
124124
\"lint\" : \"true\",
125+
\"java17\" : \"true\",
125126
\"java25\" : \"true\",
126127
\"docs\" : \"$docs\",
127128
\"yarn\" : \"$yarn\",
@@ -920,6 +921,24 @@ jobs:
920921
- name: R linter
921922
run: ./dev/lint-r
922923

924+
java17:
925+
needs: [precondition]
926+
if: fromJson(needs.precondition.outputs.required).java17 == 'true'
927+
name: Java 17 build with Maven
928+
runs-on: ubuntu-latest
929+
timeout-minutes: 120
930+
steps:
931+
- uses: actions/checkout@v4
932+
- uses: actions/setup-java@v4
933+
with:
934+
distribution: zulu
935+
java-version: 17
936+
- name: Build with Maven
937+
run: |
938+
export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
939+
export MAVEN_CLI_OPTS="--no-transfer-progress"
940+
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install
941+
923942
java25:
924943
needs: [precondition]
925944
if: fromJson(needs.precondition.outputs.required).java25 == 'true'

.github/workflows/build_python_ps_minimum.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
envs: >-
3939
{
4040
"PYSPARK_IMAGE_TO_TEST": "python-ps-minimum",
41-
"PYTHON_TO_TEST": "python3.9"
41+
"PYTHON_TO_TEST": "python3.10"
4242
}
4343
jobs: >-
4444
{

.github/workflows/release.yml

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,23 @@ jobs:
9191
release:
9292
name: Release Apache Spark
9393
runs-on: ubuntu-latest
94-
# Do not allow dispatching this workflow manually in the main repo.
95-
# and skip this workflow in forked repository when running as a
96-
# scheduled job (dryrun).
97-
if: ${{ (github.repository == 'apache/spark') != (inputs.branch != '' && inputs.release-version != '') }}
94+
# Allow workflow to run only in the following cases:
95+
# 1. In the apache/spark repository:
96+
# - Only allow dry runs (i.e., both 'branch' and 'release-version' inputs are empty).
97+
# 2. In forked repositories:
98+
# - Allow real runs when both 'branch' and 'release-version' are provided.
99+
# - Allow dry runs only if manually dispatched (not on a schedule).
100+
if: |
101+
(
102+
github.repository == 'apache/spark' &&
103+
inputs.branch == '' &&
104+
inputs.release-version == ''
105+
) || (
106+
github.repository != 'apache/spark' &&
107+
(
108+
(inputs.branch != '' && inputs.release-version != '') || github.event_name == 'workflow_dispatch'
109+
)
110+
)
98111
steps:
99112
- name: Checkout Spark repository
100113
uses: actions/checkout@v4
@@ -227,9 +240,18 @@ jobs:
227240
cp "$file" "$file.bak"
228241
for pattern in "${PATTERNS[@]}"; do
229242
[ -n "$pattern" ] || continue # Skip empty patterns
230-
escaped_pattern=$(printf '%s\n' "$pattern" | sed 's/[\/&]/\\&/g')
231-
sed -i "s/${escaped_pattern}/***/g" "$file"
243+
244+
# Safely escape special characters for sed
245+
escaped_pattern=${pattern//\\/\\\\} # Escape backslashes
246+
escaped_pattern=${escaped_pattern//\//\\/} # Escape forward slashes
247+
escaped_pattern=${escaped_pattern//&/\\&} # Escape &
248+
escaped_pattern=${escaped_pattern//$'\n'/} # Remove newlines
249+
escaped_pattern=${escaped_pattern//$'\r'/} # Remove carriage returns (optional)
250+
251+
# Redact the pattern
252+
sed -i.bak "s/${escaped_pattern}/***/g" "$file"
232253
done
254+
rm -f "$file.bak"
233255
done
234256
235257
# Zip logs/output

common/network-common/pom.xml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,6 @@
8383
</dependency>
8484
<!-- Netty End -->
8585

86-
<dependency>
87-
<groupId>org.apache.commons</groupId>
88-
<artifactId>commons-lang3</artifactId>
89-
</dependency>
9086
<dependency>
9187
<groupId>${leveldbjni.group}</groupId>
9288
<artifactId>leveldbjni-all</artifactId>

common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
import io.netty.channel.ChannelOption;
3434
import io.netty.channel.EventLoopGroup;
3535
import io.netty.channel.socket.SocketChannel;
36-
import org.apache.commons.lang3.SystemUtils;
3736

3837
import org.apache.spark.internal.SparkLogger;
3938
import org.apache.spark.internal.SparkLoggerFactory;
@@ -105,11 +104,13 @@ private void init(String hostToBind, int portToBind) {
105104
EventLoopGroup workerGroup = NettyUtils.createEventLoop(ioMode, conf.serverThreads(),
106105
conf.getModuleName() + "-server");
107106

107+
String name = System.getProperty("os.name");
108+
boolean isNotWindows = 7 > name.length() || !name.regionMatches(true, 0, "Windows", 0, 7);
108109
bootstrap = new ServerBootstrap()
109110
.group(bossGroup, workerGroup)
110111
.channel(NettyUtils.getServerChannelClass(ioMode))
111112
.option(ChannelOption.ALLOCATOR, pooledAllocator)
112-
.option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS)
113+
.option(ChannelOption.SO_REUSEADDR, isNotWindows)
113114
.childOption(ChannelOption.ALLOCATOR, pooledAllocator);
114115

115116
this.metrics = new NettyMemoryMetrics(

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4411,6 +4411,18 @@
44114411
],
44124412
"sqlState" : "42809"
44134413
},
4414+
"NOT_A_SCALAR_FUNCTION" : {
4415+
"message" : [
4416+
"<functionName> appears as a scalar expression here, but the function was defined as a table function. Please update the query to move the function call into the FROM clause, or redefine <functionName> as a scalar function instead."
4417+
],
4418+
"sqlState" : "42887"
4419+
},
4420+
"NOT_A_TABLE_FUNCTION" : {
4421+
"message" : [
4422+
"<functionName> appears as a table function here, but the function was defined as a scalar function. Please update the query to move the function call outside the FROM clause, or redefine <functionName> as a table function instead."
4423+
],
4424+
"sqlState" : "42887"
4425+
},
44144426
"NOT_NULL_ASSERT_VIOLATION" : {
44154427
"message" : [
44164428
"NULL value appeared in non-nullable field: <walkedTypePath>If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (such as java.lang.Integer instead of int/scala.Int)."
@@ -4918,6 +4930,13 @@
49184930
],
49194931
"sqlState" : "22023"
49204932
},
4933+
"RUN_EMPTY_PIPELINE" : {
4934+
"message" : [
4935+
"Pipelines are expected to have at least one non-temporary dataset defined (tables, persisted views) but no non-temporary datasets were found in your pipeline.",
4936+
"Please verify that you have included the expected source files, and that your source code includes table definitions (e.g., CREATE MATERIALIZED VIEW in SQL code, @sdp.table in python code)."
4937+
],
4938+
"sqlState" : "42617"
4939+
},
49214940
"SCALAR_FUNCTION_NOT_COMPATIBLE" : {
49224941
"message" : [
49234942
"ScalarFunction <scalarFunc> not overrides method 'produceResult(InternalRow)' with custom implementation."
@@ -8676,7 +8695,7 @@
86768695
},
86778696
"_LEGACY_ERROR_TEMP_2250" : {
86788697
"message" : [
8679-
"Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting <autoBroadcastJoinThreshold> to -1 or increase the spark driver memory by setting <driverMemory> to a higher value<analyzeTblMsg>"
8698+
"Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting <autoBroadcastJoinThreshold> to -1 or increase the spark driver memory by setting <driverMemory> to a higher value<analyzeTblMsg> or apply the shuffle sort merge join hint as described in the Spark documentation: https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-hints.html#join-hints."
86808699
]
86818700
},
86828701
"_LEGACY_ERROR_TEMP_2251" : {

common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ private[spark] object LogKeys {
7171
case object ALIGNED_TO_TIME extends LogKey
7272
case object ALPHA extends LogKey
7373
case object ANALYSIS_ERROR extends LogKey
74+
case object ANTLR_DFA_CACHE_DELTA extends LogKey
75+
case object ANTLR_DFA_CACHE_SIZE extends LogKey
7476
case object APP_ATTEMPT_ID extends LogKey
7577
case object APP_ATTEMPT_SHUFFLE_MERGE_ID extends LogKey
7678
case object APP_DESC extends LogKey
@@ -209,6 +211,7 @@ private[spark] object LogKeys {
209211
case object DIFF_DELTA extends LogKey
210212
case object DIVISIBLE_CLUSTER_INDICES_SIZE extends LogKey
211213
case object DRIVER_ID extends LogKey
214+
case object DRIVER_JVM_MEMORY extends LogKey
212215
case object DRIVER_MEMORY_SIZE extends LogKey
213216
case object DRIVER_STATE extends LogKey
214217
case object DROPPED_PARTITIONS extends LogKey

common/utils/src/main/scala/org/apache/spark/util/ClosureCleaner.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import java.lang.reflect.{Field, Modifier}
2424
import scala.collection.mutable.{Map, Queue, Set, Stack}
2525
import scala.jdk.CollectionConverters._
2626

27-
import org.apache.commons.lang3.ClassUtils
2827
import org.apache.xbean.asm9.{ClassReader, ClassVisitor, Handle, MethodVisitor, Type}
2928
import org.apache.xbean.asm9.Opcodes._
3029
import org.apache.xbean.asm9.tree.{ClassNode, MethodNode}
@@ -619,7 +618,7 @@ private[spark] object IndylambdaScalaClosures extends Logging {
619618
def getSerializationProxy(maybeClosure: AnyRef): Option[SerializedLambda] = {
620619
def isClosureCandidate(cls: Class[_]): Boolean = {
621620
// TODO: maybe lift this restriction to support other functional interfaces in the future
622-
val implementedInterfaces = ClassUtils.getAllInterfaces(cls).asScala
621+
val implementedInterfaces = SparkClassUtils.getAllInterfaces(cls)
623622
implementedInterfaces.exists(_.getName.startsWith("scala.Function"))
624623
}
625624

common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package org.apache.spark.util
1818

1919
import java.util.Random
2020

21+
import scala.collection.mutable.LinkedHashSet
2122
import scala.util.Try
2223

2324
private[spark] trait SparkClassUtils {
@@ -136,6 +137,33 @@ private[spark] trait SparkClassUtils {
136137
}
137138
}
138139
}
140+
141+
/**
142+
* Gets a list of all interfaces implemented by the given class and its superclasses.
143+
*/
144+
def getAllInterfaces(cls: Class[_]): List[Class[_]] = {
145+
if (cls == null) {
146+
return null
147+
}
148+
val interfacesFound = LinkedHashSet[Class[_]]()
149+
getAllInterfacesHelper(cls, interfacesFound)
150+
interfacesFound.toList
151+
}
152+
153+
private def getAllInterfacesHelper(
154+
clazz: Class[_],
155+
interfacesFound: LinkedHashSet[Class[_]]): Unit = {
156+
var currentClass = clazz
157+
while (currentClass != null) {
158+
val interfaces = currentClass.getInterfaces
159+
for (i <- interfaces) {
160+
if (interfacesFound.add(i)) {
161+
getAllInterfacesHelper(i, interfacesFound)
162+
}
163+
}
164+
currentClass = currentClass.getSuperclass
165+
}
166+
}
139167
}
140168

141169
private[spark] object SparkClassUtils extends SparkClassUtils

core/benchmarks/ZStandardBenchmark-jdk21-results.txt

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,48 +2,48 @@
22
Benchmark ZStandardCompressionCodec
33
================================================================================================
44

5-
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
5+
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
66
AMD EPYC 7763 64-Core Processor
77
Benchmark ZStandardCompressionCodec: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
88
--------------------------------------------------------------------------------------------------------------------------------------
9-
Compression 10000 times at level 1 without buffer pool 657 673 15 0.0 65664.9 1.0X
10-
Compression 10000 times at level 2 without buffer pool 718 726 13 0.0 71830.9 0.9X
11-
Compression 10000 times at level 3 without buffer pool 815 819 7 0.0 81453.1 0.8X
12-
Compression 10000 times at level 1 with buffer pool 598 600 1 0.0 59809.3 1.1X
13-
Compression 10000 times at level 2 with buffer pool 637 639 2 0.0 63710.0 1.0X
14-
Compression 10000 times at level 3 with buffer pool 754 757 3 0.0 75403.8 0.9X
9+
Compression 10000 times at level 1 without buffer pool 646 668 20 0.0 64639.2 1.0X
10+
Compression 10000 times at level 2 without buffer pool 715 716 2 0.0 71496.2 0.9X
11+
Compression 10000 times at level 3 without buffer pool 810 818 7 0.0 81013.5 0.8X
12+
Compression 10000 times at level 1 with buffer pool 603 604 0 0.0 60335.0 1.1X
13+
Compression 10000 times at level 2 with buffer pool 638 641 3 0.0 63817.7 1.0X
14+
Compression 10000 times at level 3 with buffer pool 739 740 1 0.0 73912.1 0.9X
1515

16-
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
16+
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
1717
AMD EPYC 7763 64-Core Processor
1818
Benchmark ZStandardCompressionCodec: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
1919
------------------------------------------------------------------------------------------------------------------------------------------
20-
Decompression 10000 times from level 1 without buffer pool 832 835 2 0.0 83247.0 1.0X
21-
Decompression 10000 times from level 2 without buffer pool 833 840 8 0.0 83277.8 1.0X
22-
Decompression 10000 times from level 3 without buffer pool 833 833 0 0.0 83280.6 1.0X
23-
Decompression 10000 times from level 1 with buffer pool 753 755 2 0.0 75333.7 1.1X
24-
Decompression 10000 times from level 2 with buffer pool 751 752 1 0.0 75115.3 1.1X
25-
Decompression 10000 times from level 3 with buffer pool 753 754 2 0.0 75254.0 1.1X
20+
Decompression 10000 times from level 1 without buffer pool 830 833 4 0.0 83030.7 1.0X
21+
Decompression 10000 times from level 2 without buffer pool 832 833 1 0.0 83236.0 1.0X
22+
Decompression 10000 times from level 3 without buffer pool 832 833 1 0.0 83183.1 1.0X
23+
Decompression 10000 times from level 1 with buffer pool 758 759 1 0.0 75813.5 1.1X
24+
Decompression 10000 times from level 2 with buffer pool 758 758 1 0.0 75767.1 1.1X
25+
Decompression 10000 times from level 3 with buffer pool 757 758 1 0.0 75652.4 1.1X
2626

27-
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
27+
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
2828
AMD EPYC 7763 64-Core Processor
2929
Parallel Compression at level 3: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
3030
------------------------------------------------------------------------------------------------------------------------
31-
Parallel Compression with 0 workers 77 78 1 0.0 601317.6 1.0X
32-
Parallel Compression with 1 workers 64 70 4 0.0 499456.2 1.2X
33-
Parallel Compression with 2 workers 53 56 3 0.0 410610.6 1.5X
34-
Parallel Compression with 4 workers 45 47 1 0.0 350847.8 1.7X
35-
Parallel Compression with 8 workers 47 49 1 0.0 370647.8 1.6X
36-
Parallel Compression with 16 workers 50 52 1 0.0 390524.8 1.5X
31+
Parallel Compression with 0 workers 66 66 0 0.0 512667.9 1.0X
32+
Parallel Compression with 1 workers 56 58 2 0.0 435183.0 1.2X
33+
Parallel Compression with 2 workers 46 47 1 0.0 356034.8 1.4X
34+
Parallel Compression with 4 workers 41 43 1 0.0 318331.7 1.6X
35+
Parallel Compression with 8 workers 44 46 1 0.0 342564.5 1.5X
36+
Parallel Compression with 16 workers 48 51 2 0.0 371266.4 1.4X
3737

38-
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1014-azure
38+
OpenJDK 64-Bit Server VM 21.0.7+6-LTS on Linux 6.11.0-1018-azure
3939
AMD EPYC 7763 64-Core Processor
4040
Parallel Compression at level 9: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
4141
------------------------------------------------------------------------------------------------------------------------
42-
Parallel Compression with 0 workers 243 244 2 0.0 1894990.4 1.0X
43-
Parallel Compression with 1 workers 299 300 2 0.0 2335128.6 0.8X
44-
Parallel Compression with 2 workers 167 177 11 0.0 1308212.2 1.4X
45-
Parallel Compression with 4 workers 161 163 3 0.0 1254638.0 1.5X
46-
Parallel Compression with 8 workers 166 170 4 0.0 1299104.0 1.5X
47-
Parallel Compression with 16 workers 167 170 2 0.0 1301666.4 1.5X
42+
Parallel Compression with 0 workers 236 237 1 0.0 1847245.2 1.0X
43+
Parallel Compression with 1 workers 251 252 2 0.0 1961753.5 0.9X
44+
Parallel Compression with 2 workers 141 148 4 0.0 1100274.2 1.7X
45+
Parallel Compression with 4 workers 129 133 3 0.0 1009465.5 1.8X
46+
Parallel Compression with 8 workers 135 139 3 0.0 1054496.8 1.8X
47+
Parallel Compression with 16 workers 135 139 6 0.0 1051577.1 1.8X
4848

4949

0 commit comments

Comments
 (0)