1
1
# syntax=docker/dockerfile:1.8.1@sha256:e87caa74dcb7d46cd820352bfea12591f3dba3ddc4285e19c7dcd13359f7cefd
2
2
3
- FROM stackable/image/java-devel as builder
3
+ # hadoop-builder: Provides Hadoop libraries
4
+ FROM stackable/image/hadoop AS hadoop-builder
5
+
6
+ # hbase-builder: Provides HBase libraries
7
+ FROM stackable/image/hbase AS hbase-builder
8
+
9
+ # spark-source-builder: Download the Spark source code into
10
+ # /stackable/spark and apply the patches
11
+ FROM stackable/image/java-devel as spark-source-builder
4
12
5
13
ARG PRODUCT
6
- ARG HADOOP_LONG_VERSION
7
- ARG AWS_JAVA_SDK_BUNDLE
8
- ARG AZURE_STORAGE
9
- ARG AZURE_KEYVAULT_CORE
10
- ARG JACKSON_DATAFORMAT_XML
11
- ARG STAX2_API
12
- ARG WOODSTOX_CORE
13
- ARG JMX_EXPORTER
14
- ARG TARGETARCH
15
- ARG TINI
16
14
17
15
RUN <<EOF
18
16
microdnf update
27
25
28
26
WORKDIR /stackable
29
27
30
- COPY --chown=stackable:stackable spark-k8s/stackable/patches/apply_patches.sh /stackable/spark-${PRODUCT}/patches/apply_patches.sh
31
- COPY --chown=stackable:stackable spark-k8s/stackable/patches/${PRODUCT} /stackable/spark-${PRODUCT}/patches/${PRODUCT}
28
+ RUN <<EOF
29
+ curl https://repo.stackable.tech/repository/packages/spark/spark-${PRODUCT}.tgz \
30
+ | tar xz
31
+ ln -s spark-${PRODUCT} spark
32
+ EOF
33
+
34
+ WORKDIR /stackable/spark
35
+
36
+ COPY --chown=stackable:stackable \
37
+ spark-k8s/stackable/patches/apply_patches.sh \
38
+ patches/apply_patches.sh
39
+ COPY --chown=stackable:stackable \
40
+ spark-k8s/stackable/patches/${PRODUCT} \
41
+ patches/${PRODUCT}
42
+
43
+ RUN patches/apply_patches.sh ${PRODUCT}
44
+
45
+
46
+ # hbase-connectors-builder: Build the Spark HBase connector and copy
47
+ # required JARs into /stackable/spark/jars
48
+ FROM stackable/image/java-devel as hbase-connectors-builder
49
+
50
+ ARG PRODUCT
51
+ ARG HADOOP
52
+ ARG HBASE
53
+ ARG HBASE_CONNECTOR
54
+
55
+ WORKDIR /stackable
56
+
57
+ # Download the hbase-connectors source code
58
+ RUN <<EOF
59
+ curl https://repo.stackable.tech/repository/packages/hbase-connectors/hbase-connectors_${HBASE_CONNECTOR}.tar.gz \
60
+ | tar xz
61
+ ln -s hbase-connectors-rel-${HBASE_CONNECTOR} hbase-connectors
62
+ EOF
63
+
64
+ # Copy the pom.xml file from the patched Spark source code to read the
65
+ # versions used by Spark. The pom.xml defines child modules which are
66
+ # not required and not copied, therefore mvn must be called with the
67
+ # parameter --non-recursive.
68
+ COPY --chown=stackable:stackable --from=spark-source-builder \
69
+ /stackable/spark/pom.xml \
70
+ spark/
71
+
72
+ WORKDIR /stackable/hbase-connectors/spark
73
+
74
+ RUN <<EOF
75
+ # Building the hbase-connectors with JDK 17 is not yet supported, see
76
+ # https://github.com/apache/hbase-connectors/pull/132.
77
+ # As there are no JDK profiles, access to the non-public elements must
78
+ # be enabled with --add-opens, see https://openjdk.org/jeps/403 and
79
+ # https://openjdk.org/jeps/261#Breaking-encapsulation.
80
+ export JDK_JAVA_OPTIONS="\
81
+ --add-opens java.base/java.lang=ALL-UNNAMED \
82
+ --add-opens java.base/java.util=ALL-UNNAMED"
83
+
84
+ # Get the Scala version used by Spark
85
+ SCALA_VERSION=$( \
86
+ mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
87
+ org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
88
+ -DforceStdout \
89
+ -Dexpression='project.properties(scala.version)' )
90
+
91
+ # Get the Scala binary version used by Spark
92
+ SCALA_BINARY_VERSION=$( \
93
+ mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
94
+ org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
95
+ -DforceStdout \
96
+ -Dexpression='project.properties(scala.binary.version)' )
97
+
98
+ # Build the Spark HBase connector
99
+ # Skip the tests because the MiniHBaseCluster does not get ready for
100
+ # whatever reason:
101
+ # Caused by: java.lang.RuntimeException: Master not active after 30000ms
102
+ # at org.apache.hadoop.hbase.util.JVMClusterUtil.waitForEvent(JVMClusterUtil.java:221)
103
+ # at org.apache.hadoop.hbase.util.JVMClusterUtil.startup(JVMClusterUtil.java:177)
104
+ # at org.apache.hadoop.hbase.LocalHBaseCluster.startup(LocalHBaseCluster.java:407)
105
+ # at org.apache.hadoop.hbase.MiniHBaseCluster.init(MiniHBaseCluster.java:250)
106
+ mvn \
107
+ --batch-mode \
108
+ --no-transfer-progress \
109
+ --define spark.version="${PRODUCT}" \
110
+ --define scala.version="${SCALA_VERSION}" \
111
+ --define scala.binary.version="${SCALA_BINARY_VERSION}" \
112
+ --define hadoop-three.version="${HADOOP}" \
113
+ --define hbase.version="${HBASE}" \
114
+ --define skipTests \
115
+ clean package
116
+ EOF
117
+
118
+ WORKDIR /stackable/spark/jars
119
+
120
+ RUN <<EOF
121
+ ln -s /stackable/hbase-connectors/spark/hbase-spark/target/hbase-spark-${HBASE_CONNECTOR}.jar
122
+
123
+ # Download log4j-slf4j-impl-x.x.x.jar containing the StaticLoggerBinder
124
+ # which is required by the connector.
125
+ # Spark contains only log4j-slf4j2-impl-x.x.x.jar but not
126
+ # log4j-slf4j-impl-x.x.x.jar. It is okay to have both JARs in the
127
+ # classpath as long as they have the same version.
128
+ mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
129
+ dependency:copy \
130
+ -Dartifact=org.apache.logging.log4j:log4j-slf4j-impl:'${log4j.version}' \
131
+ -DoutputDirectory=.
132
+ EOF
133
+
134
+
135
+ # spark-builder: Build Spark into /stackable/spark-${PRODUCT}/dist,
136
+ # download additional JARs and perform checks, like log4shell check.
137
+ FROM stackable/image/java-devel as spark-builder
138
+
139
+ ARG PRODUCT
140
+ ARG HADOOP
141
+ ARG HBASE
142
+ ARG AWS_JAVA_SDK_BUNDLE
143
+ ARG AZURE_STORAGE
144
+ ARG AZURE_KEYVAULT_CORE
145
+ ARG JACKSON_DATAFORMAT_XML
146
+ ARG STAX2_API
147
+ ARG WOODSTOX_CORE
148
+ ARG JMX_EXPORTER
149
+ ARG TARGETARCH
150
+ ARG TINI
151
+
152
+ WORKDIR /stackable/spark-${PRODUCT}
153
+
154
+ COPY --chown=stackable:stackable --from=spark-source-builder \
155
+ /stackable/spark/ \
156
+ ./
32
157
33
158
# >>> Build spark
34
159
# Compiling the tests takes a lot of time, so we skip them
@@ -37,12 +162,9 @@ COPY --chown=stackable:stackable spark-k8s/stackable/patches/${PRODUCT} /stackab
37
162
#
38
163
# This will download it's own version of maven because the UBI version is too old:
39
164
# 134.0 [ERROR] Detected Maven Version: 3.6.3 is not in the allowed range [3.8.8,)
40
- RUN curl https://repo.stackable.tech/repository/packages/spark/spark-${PRODUCT}.tgz | tar -xzf - \
41
- && cd spark-${PRODUCT} \
42
- && ./patches/apply_patches.sh ${PRODUCT} \
43
- && export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" \
165
+ RUN export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g" \
44
166
&& ./dev/make-distribution.sh \
45
- -Dhadoop.version="$HADOOP_LONG_VERSION " \
167
+ -Dhadoop.version="$HADOOP " \
46
168
-Dmaven.test.skip=true \
47
169
-DskipTests \
48
170
-P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
@@ -55,12 +177,40 @@ RUN curl -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/
55
177
# We download these under dist so that log4shell checks them
56
178
WORKDIR /stackable/spark-${PRODUCT}/dist/jars
57
179
58
- # Download various modules for Hadoop (e.g. support for s3a:// and abfs://)
59
- RUN curl -O https://repo.stackable.tech/repository/packages/aws/hadoop-aws-${HADOOP_LONG_VERSION}.jar \
60
- && curl -O https://repo.stackable.tech/repository/packages/aws/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar \
61
- && curl -O https://repo.stackable.tech/repository/packages/azure/hadoop-azure-${HADOOP_LONG_VERSION}.jar \
62
- && curl -O https://repo.stackable.tech/repository/packages/azure/azure-storage-${AZURE_STORAGE}.jar \
63
- && curl -O https://repo.stackable.tech/repository/packages/azure/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar
180
+ # Copy modules required for s3a://
181
+ COPY --from=hadoop-builder --chown=stackable:stackable \
182
+ /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar \
183
+ /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar \
184
+ ./
185
+
186
+ # Copy modules required for abfs://
187
+ COPY --from=hadoop-builder --chown=stackable:stackable \
188
+ /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar \
189
+ /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar \
190
+ /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar \
191
+ ./
192
+
193
+ # Copy the HBase connector including required modules
194
+ COPY --from=hbase-connectors-builder --chown=stackable:stackable \
195
+ /stackable/spark/jars/* \
196
+ ./
197
+
198
+ # Copy modules required to access HBase
199
+ COPY --from=hbase-builder --chown=stackable:stackable \
200
+ /stackable/hbase/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${HBASE}.jar \
201
+ /stackable/hbase/lib/shaded-clients/hbase-shaded-mapreduce-${HBASE}.jar \
202
+ ./
203
+ # Copy modules required to access HBase if $HBASE == 2.4.x
204
+ COPY --from=hbase-builder --chown=stackable:stackable \
205
+ /stackable/hbase/lib/client-facing-thirdparty/htrace-core4-*-incubating.jar \
206
+ /stackable/hbase/lib/client-facing-thirdparty/slf4j-reload4j-*.jar \
207
+ ./
208
+ # Copy modules required to access HBase if $HBASE == 2.6.x
209
+ COPY --from=hbase-builder --chown=stackable:stackable \
210
+ /stackable/hbase/lib/client-facing-thirdparty/opentelemetry-api-*.jar \
211
+ /stackable/hbase/lib/client-facing-thirdparty/opentelemetry-context-*.jar \
212
+ /stackable/hbase/lib/client-facing-thirdparty/opentelemetry-semconv-*-alpha.jar \
213
+ ./
64
214
65
215
WORKDIR /stackable/spark-${PRODUCT}/dist/extra-jars
66
216
@@ -93,6 +243,7 @@ COPY shared/log4shell_scanner /bin/log4shell_scanner
93
243
RUN /bin/log4shell_scanner s /stackable/spark-${PRODUCT}/dist
94
244
# ===
95
245
246
+
96
247
FROM stackable/image/java-base as final
97
248
98
249
ARG PRODUCT
@@ -115,12 +266,12 @@ RUN microdnf update && \
115
266
hostname \
116
267
# required for spark startup scripts
117
268
procps \
118
- python${PYTHON} \
119
- python${PYTHON}-pip \
269
+ " python${PYTHON}" \
270
+ " python${PYTHON}-pip" \
120
271
zip \
121
272
# This is needed by the Spark UI to display process information using jps and jmap
122
273
# Copying the binaries from the builder stage failed.
123
- java-${JAVA_VERSION}-openjdk-devel \
274
+ " java-${JAVA_VERSION}-openjdk-devel" \
124
275
&& microdnf clean all \
125
276
&& rm -rf /var/cache/yum
126
277
@@ -134,10 +285,10 @@ ENV PATH=$SPARK_HOME:$PATH:/bin:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$HOME/.local/b
134
285
ENV PYSPARK_PYTHON=/usr/bin/python
135
286
ENV PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
136
287
137
- COPY --chown=stackable:stackable --from=builder /stackable/spark-${PRODUCT}/dist /stackable/spark
138
- COPY --chown=stackable:stackable --from=builder /stackable/spark-${PRODUCT}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}.cdx.json
139
- COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx
140
- COPY --from=builder /usr/bin/tini /usr/bin/tini
288
+ COPY --chown=stackable:stackable --from=spark- builder /stackable/spark-${PRODUCT}/dist /stackable/spark
289
+ COPY --chown=stackable:stackable --from=spark- builder /stackable/spark-${PRODUCT}/assembly/target/bom.json /stackable/spark/spark-${PRODUCT}.cdx.json
290
+ COPY --chown=stackable:stackable --from=spark- builder /stackable/jmx /stackable/jmx
291
+ COPY --from=spark- builder /usr/bin/tini /usr/bin/tini
141
292
142
293
RUN ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar \
143
294
# Symlink example jar, so that we can easily use it in tests
0 commit comments