Skip to content

Commit fba55d4

Browse files
committed
Adjust Hadoop
1 parent 0b07019 commit fba55d4

File tree

1 file changed

+44
-34
lines changed

1 file changed

+44
-34
lines changed

hadoop/Dockerfile

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ ARG PROTOBUF
99
ARG TARGETARCH
1010
ARG TARGETOS
1111

12+
USER ${STACKABLE_USER_UID}
1213
WORKDIR /stackable
1314

1415
COPY hadoop/stackable/jmx /stackable/jmx
@@ -92,6 +93,7 @@ FROM stackable/image/java-devel AS hdfs-utils-builder
9293
ARG HDFS_UTILS
9394
ARG PRODUCT
9495

96+
USER ${STACKABLE_USER_UID}
9597
WORKDIR /stackable
9698

9799
# The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
@@ -120,35 +122,58 @@ LABEL name="Apache Hadoop" \
120122
summary="The Stackable image for Apache Hadoop." \
121123
description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
122124

125+
COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
126+
COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/jmx /stackable/jmx/
127+
COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/async-profiler /stackable/async-profiler/
128+
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
129+
COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin
130+
131+
123132
# fuse is required for fusermount (called by fuse_dfs)
124133
# fuse-libs is required for fuse_dfs (not included in fuse)
125134
# openssl -> not sure
126-
RUN microdnf update && \
127-
microdnf install \
128-
fuse \
129-
fuse-libs \
130-
# tar is required for `kubectl cp` which can be used to copy the log files
131-
# or profiler flamegraph from the Pod
132-
tar && \
133-
microdnf clean all && \
134-
rm -rf /var/cache/yum
135+
RUN <<EOF
136+
microdnf update
137+
microdnf install \
138+
fuse \
139+
fuse-libs \
140+
# tar is required for `kubectl cp` which can be used to copy the log files
141+
# or profiler flamegraph from the Pod
142+
# It is already installed in the base image but leaving here for documentation purposes
143+
tar
144+
microdnf clean all
145+
rm -rf /var/cache/yum
146+
147+
ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
135148

136-
COPY hadoop/licenses /licenses
149+
# Remove unneeded binaries:
150+
# - code sources
151+
# - mapreduce/yarn binaries that were built as cross-project dependencies
152+
# - minicluster (only used for testing) and test .jars
153+
# - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
154+
rm -rf /stackable/hadoop/share/hadoop/common/sources/
155+
rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
156+
rm -rf /stackable/hadoop/share/hadoop/tools/sources/
157+
rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
158+
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
159+
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
160+
find . -name 'hadoop-minicluster-*.jar' -type f -delete
161+
find . -name 'hadoop-client-minicluster-*.jar' -type f -delete
162+
find . -name 'hadoop-*tests.jar' -type f -delete
137163

138164
# Without this fuse_dfs does not work
139165
# It is so non-root users (as we are) can mount a FUSE device and let other users access it
140-
RUN echo "user_allow_other" > /etc/fuse.conf
166+
echo "user_allow_other" > /etc/fuse.conf
141167

142-
USER stackable
143-
WORKDIR /stackable
168+
# All files and folders owned by root to support running as arbitrary users
169+
# This is best practice as all container users will belong to the root group (0)
170+
chown -R ${STACKABLE_USER_UID}:0 /stackable
171+
chmod -R g=u /stackable
172+
EOF
144173

145-
COPY --chown=stackable:stackable --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
146-
COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx/
147-
COPY --chown=stackable:stackable --from=builder /stackable/async-profiler /stackable/async-profiler/
148-
COPY --chown=stackable:stackable --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
149-
RUN ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
174+
COPY hadoop/licenses /licenses
150175

151-
COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin
176+
USER stackable
152177

153178
ENV HOME=/stackable
154179
ENV LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server
@@ -164,20 +189,5 @@ ENV ASYNC_PROFILER_HOME=/stackable/async-profiler
164189
ENV HADOOP_YARN_HOME=/stackable/hadoop
165190
ENV HADOOP_MAPRED_HOME=/stackable/hadoop
166191

167-
# Remove unneeded binaries:
168-
# - code sources
169-
# - mapreduce/yarn binaries that were built as cross-project dependencies
170-
# - minicluster (only used for testing) and test .jars
171-
# - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
172-
RUN rm -rf /stackable/hadoop/share/hadoop/common/sources/ && \
173-
rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/ && \
174-
rm -rf /stackable/hadoop/share/hadoop/tools/sources/ && \
175-
rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar && \
176-
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar && \
177-
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar && \
178-
find . -name 'hadoop-minicluster-*.jar' -type f -delete && \
179-
find . -name 'hadoop-client-minicluster-*.jar' -type f -delete && \
180-
find . -name 'hadoop-*tests.jar' -type f -delete
181-
182192
WORKDIR /stackable/hadoop
183193
CMD ["echo", "This image is not meant to be 'run' directly."]

0 commit comments

Comments
 (0)