5
5
# hadolint global ignore=DL3038,DL4006
6
6
7
7
# hadolint ignore=DL3006
8
- FROM stackable/image/java-devel AS builder
8
+ FROM stackable/image/java-devel AS hadoop- builder
9
9
10
10
ARG PRODUCT
11
11
ARG ASYNC_PROFILER
@@ -25,25 +25,31 @@ COPY hadoop/stackable/fuse_dfs_wrapper /stackable/fuse_dfs_wrapper
25
25
# At the same time a new HDFS Operator will still work with older images which do not have the symlink to the versionless jar.
26
26
# After one of our next releases (23.11 or 24.x) we should update the operator to point at the non-versioned symlink (jmx_prometheus_javaagent.jar)
27
27
# And then we can also remove the symlink to 0.16.1 from this Dockerfile.
28
- RUN curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \
29
- chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" && \
30
- ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar && \
31
- ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar
28
+ RUN <<EOF
29
+ curl --fail "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
30
+ chmod -x "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar"
31
+ ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar
32
+ ln -s /stackable/jmx/jmx_prometheus_javaagent.jar /stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar
33
+ # TODO: Can the symlink go?
32
34
33
- RUN ARCH="${TARGETARCH/amd64/x64}" && \
34
- curl --fail -L "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC . && \
35
- ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
35
+ ARCH="${TARGETARCH/amd64/x64}"
36
+ curl --fail -L "https://repo.stackable.tech/repository/packages/async-profiler/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}.tar.gz" | tar -xzC .
37
+ ln -s "/stackable/async-profiler-${ASYNC_PROFILER}-${TARGETOS}-${ARCH}" /stackable/async-profiler
36
38
37
39
# This Protobuf version is the exact version as used in the Hadoop Dockerfile
38
40
# See https://github.com/apache/hadoop/blob/trunk/dev-support/docker/pkg-resolver/install-protobuf.sh
39
41
# (this was hardcoded in the Dockerfile in earlier versions of Hadoop, make sure to look at the exact version in Github)
40
- WORKDIR /opt/protobuf-src
41
- RUN curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-${PROTOBUF}.tar.gz -o /opt/protobuf.tar.gz && \
42
- tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner && \
43
- ./configure --prefix=/opt/protobuf && \
44
- make "-j$(nproc)" && \
45
- make install && \
46
- rm -rf /opt/protobuf-src
42
+ # At the time of writing we could save around ~350MB if we included this in the later RUN statement and deleted it afterwards
43
+ mkdir /opt/protobuf-src
44
+ cd /opt/protobuf-src
45
+ curl --fail -L -s -S https://repo.stackable.tech/repository/packages/protobuf/protobuf-java-${PROTOBUF}.tar.gz -o /opt/protobuf.tar.gz
46
+ tar xzf /opt/protobuf.tar.gz --strip-components 1 --no-same-owner
47
+ ./configure --prefix=/opt/protobuf
48
+ make "-j$(nproc)"
49
+ make install
50
+ rm -rf /opt/protobuf-src
51
+ rm -f /opt/protobuf.tar.gz
52
+ EOF
47
53
48
54
ENV PROTOBUF_HOME=/opt/protobuf
49
55
ENV PATH="${PATH}:/opt/protobuf/bin"
@@ -56,6 +62,7 @@ RUN microdnf update && \
56
62
microdnf clean all && \
57
63
rm -rf /var/cache/yum
58
64
65
+ USER stackable
59
66
WORKDIR /stackable
60
67
61
68
COPY hadoop/stackable/patches /stackable/patches
@@ -65,123 +72,6 @@ COPY hadoop/stackable/patches /stackable/patches
65
72
# Also skip building the yarn, mapreduce and minicluster modules: this will result in the modules being excluded but not all
66
73
# jar files will be stripped if they are needed elsewhere e.g. share/hadoop/yarn will not be part of the build, but yarn jars
67
74
# will still exist in share/hadoop/tools as they would be needed by the resource estimator tool. Such jars are removed in a later step.
68
- RUN curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC . && \
69
- patches/apply_patches.sh ${PRODUCT} && \
70
- cd hadoop-${PRODUCT}-src && \
71
- mvn clean package -Pdist,native -pl '!hadoop-tools/hadoop-pipes,!hadoop-yarn-project,!hadoop-mapreduce-project,!hadoop-minicluster' -Drequire.fuse=true -DskipTests -Dmaven.javadoc.skip=true && \
72
- cp -r hadoop-dist/target/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT} && \
73
- # HDFS fuse-dfs is not part of the regular dist output, so we need to copy it in ourselves
74
- cp hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/fuse-dfs/fuse_dfs /stackable/hadoop-${PRODUCT}/bin && \
75
- rm -rf /stackable/hadoop-${PRODUCT}-src
76
-
77
- # For earlier versions this script removes the .class file that contains the
78
- # vulnerable code.
79
- # TODO: This can be restricted to target only versions which do not honor the environment
80
- # varible that has been set above but this has not currently been implemented
81
- COPY shared/log4shell.sh /bin
82
- RUN /bin/log4shell.sh "/stackable/hadoop-${PRODUCT}"
83
-
84
- # Ensure no vulnerable files are left over
85
- # This will currently report vulnerable files being present, as it also alerts on
86
- # SocketNode.class, which we do not remove with our scripts.
87
- # Further investigation will be needed whether this should also be removed.
88
- COPY shared/log4shell_1.6.1-log4shell_Linux_x86_64 /bin/log4shell_scanner_x86_64
89
- COPY shared/log4shell_1.6.1-log4shell_Linux_aarch64 /bin/log4shell_scanner_aarch64
90
- COPY shared/log4shell_scanner /bin/log4shell_scanner
91
- RUN /bin/log4shell_scanner s "/stackable/hadoop-${PRODUCT}"
92
- # ===
93
-
94
- FROM stackable/image/java-devel AS hdfs-utils-builder
95
-
96
- ARG HDFS_UTILS
97
- ARG PRODUCT
98
-
99
- WORKDIR /stackable
100
-
101
- # The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
102
- # The topology provider provides rack awareness functionality for HDFS by allowing users to specify Kubernetes
103
- # labels to build a rackID from.
104
- # Starting with hdfs-utils version 0.3.0 the topology provider is not a standalone jar anymore and included in hdfs-utils.
105
-
106
- RUN curl --fail -L "https://github.com/stackabletech/hdfs-utils/archive/refs/tags/v${HDFS_UTILS}.tar.gz" | tar -xzC . && \
107
- cd hdfs-utils-${HDFS_UTILS} && \
108
- mvn clean package -P hadoop-${PRODUCT} -DskipTests -Dmaven.javadoc.skip=true && \
109
- mkdir -p /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib && \
110
- cp target/hdfs-utils-$HDFS_UTILS.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar && \
111
- rm -rf /stackable/hdfs-utils-main
112
-
113
- FROM stackable/image/java-base AS final
114
-
115
- ARG PRODUCT
116
- ARG RELEASE
117
- ARG HDFS_UTILS
118
-
119
- LABEL name="Apache Hadoop" \
120
- maintainer="info@stackable.tech" \
121
- vendor="Stackable GmbH" \
122
- version="${PRODUCT}" \
123
- release="${RELEASE}" \
124
- summary="The Stackable image for Apache Hadoop." \
125
- description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
126
-
127
- # fuse is required for fusermount (called by fuse_dfs)
128
- # fuse-libs is required for fuse_dfs (not included in fuse)
129
- # openssl -> not sure
130
- RUN microdnf update && \
131
- microdnf install \
132
- fuse \
133
- fuse-libs \
134
- # tar is required for `kubectl cp` which can be used to copy the log files
135
- # or profiler flamegraph from the Pod
136
- tar && \
137
- microdnf clean all && \
138
- rm -rf /var/cache/yum
139
-
140
- COPY hadoop/licenses /licenses
141
-
142
- # Without this fuse_dfs does not work
143
- # It is so non-root users (as we are) can mount a FUSE device and let other users access it
144
- RUN echo "user_allow_other" > /etc/fuse.conf
145
-
146
- USER stackable
147
- WORKDIR /stackable
148
-
149
- COPY --chown=stackable:stackable --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
150
- COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx/
151
- COPY --chown=stackable:stackable --from=builder /stackable/async-profiler /stackable/async-profiler/
152
- COPY --chown=stackable:stackable --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
153
- RUN ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
154
-
155
- COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin
156
-
157
- ENV HOME=/stackable
158
- ENV LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server
159
- ENV PATH="${PATH}" :/stackable/hadoop/bin
160
- ENV HADOOP_HOME=/stackable/hadoop
161
- ENV HADOOP_CONF_DIR=/stackable/config
162
- ENV ASYNC_PROFILER_HOME=/stackable/async-profiler
163
- # The following 2 env-vars are required for common scripts even if the respective libraries are never used.
164
- # HADOOP_HOME is often used internally if HADOOP_YARN_HOME/HADOOP_MAPRED_HOME are not set, although
165
- # a subdirectory is also required in (at least)
166
- # hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh
167
- # if HADOOP_YARN_HOME does not exist at all, so we set it here to a sensible default.
168
- ENV HADOOP_YARN_HOME=/stackable/hadoop
169
- ENV HADOOP_MAPRED_HOME=/stackable/hadoop
170
-
171
- # Remove unneeded binaries:
172
- # - code sources
173
- # - mapreduce/yarn binaries that were built as cross-project dependencies
174
- # - minicluster (only used for testing) and test .jars
175
- # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
176
- RUN rm -rf /stackable/hadoop/share/hadoop/common/sources/ && \
177
- rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/ && \
178
- rm -rf /stackable/hadoop/share/hadoop/tools/sources/ && \
179
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar && \
180
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar && \
181
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar && \
182
- find . -name 'hadoop-minicluster-*.jar' -type f -delete && \
183
- find . -name 'hadoop-client-minicluster-*.jar' -type f -delete && \
184
- find . -name 'hadoop-*tests.jar' -type f -delete
185
-
186
- WORKDIR /stackable/hadoop
187
- CMD ["echo" , "This image is not meant to be 'run' directly." ]
75
+ RUN <<EOF
76
+ curl --fail -L "https://repo.stackable.tech/repository/packages/hadoop/hadoop-${PRODUCT}-src.tar.gz" | tar -xzC .
77
+ EOF
0 commit comments