@@ -9,6 +9,7 @@ ARG PROTOBUF
9
9
ARG TARGETARCH
10
10
ARG TARGETOS
11
11
12
+ USER ${STACKABLE_USER_UID}
12
13
WORKDIR /stackable
13
14
14
15
COPY hadoop/stackable/jmx /stackable/jmx
@@ -92,6 +93,7 @@ FROM stackable/image/java-devel AS hdfs-utils-builder
92
93
ARG HDFS_UTILS
93
94
ARG PRODUCT
94
95
96
+ USER ${STACKABLE_USER_UID}
95
97
WORKDIR /stackable
96
98
97
99
# The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
@@ -120,35 +122,58 @@ LABEL name="Apache Hadoop" \
120
122
summary="The Stackable image for Apache Hadoop." \
121
123
description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."
122
124
125
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
126
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/jmx /stackable/jmx/
127
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/async-profiler /stackable/async-profiler/
128
+ COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
129
+ COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin
130
+
131
+
123
132
# fuse is required for fusermount (called by fuse_dfs)
124
133
# fuse-libs is required for fuse_dfs (not included in fuse)
125
134
# openssl -> not sure
126
- RUN microdnf update && \
127
- microdnf install \
128
- fuse \
129
- fuse-libs \
130
- # tar is required for `kubectl cp` which can be used to copy the log files
131
- # or profiler flamegraph from the Pod
132
- tar && \
133
- microdnf clean all && \
134
- rm -rf /var/cache/yum
135
+ RUN <<EOF
136
+ microdnf update
137
+ microdnf install \
138
+ fuse \
139
+ fuse-libs \
140
+ # tar is required for `kubectl cp` which can be used to copy the log files
141
+ # or profiler flamegraph from the Pod
142
+ # It is already installed in the base image but leaving here for documentation purposes
143
+ tar
144
+ microdnf clean all
145
+ rm -rf /var/cache/yum
146
+
147
+ ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
135
148
136
- COPY hadoop/licenses /licenses
149
+ # Remove unneeded binaries:
150
+ # - code sources
151
+ # - mapreduce/yarn binaries that were built as cross-project dependencies
152
+ # - minicluster (only used for testing) and test .jars
153
+ # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
154
+ rm -rf /stackable/hadoop/share/hadoop/common/sources/
155
+ rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
156
+ rm -rf /stackable/hadoop/share/hadoop/tools/sources/
157
+ rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
158
+ rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
159
+ rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
160
+ find . -name 'hadoop-minicluster-*.jar' -type f -delete
161
+ find . -name 'hadoop-client-minicluster-*.jar' -type f -delete
162
+ find . -name 'hadoop-*tests.jar' -type f -delete
137
163
138
164
# Without this fuse_dfs does not work
139
165
# It is so non-root users (as we are) can mount a FUSE device and let other users access it
140
- RUN echo "user_allow_other" > /etc/fuse.conf
166
+ echo "user_allow_other" > /etc/fuse.conf
141
167
142
- USER stackable
143
- WORKDIR /stackable
168
+ # All files and folders owned by root to support running as arbitrary users
169
+ # This is best practice as all container users will belong to the root group (0)
170
+ chown -R ${STACKABLE_USER_UID}:0 /stackable
171
+ chmod -R g=u /stackable
172
+ EOF
144
173
145
- COPY --chown=stackable:stackable --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
146
- COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx/
147
- COPY --chown=stackable:stackable --from=builder /stackable/async-profiler /stackable/async-profiler/
148
- COPY --chown=stackable:stackable --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
149
- RUN ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
174
+ COPY hadoop/licenses /licenses
150
175
151
- COPY hadoop/ stackable/fuse_dfs_wrapper /stackable/hadoop/bin
176
+ USER stackable
152
177
153
178
ENV HOME=/stackable
154
179
ENV LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server
@@ -164,20 +189,5 @@ ENV ASYNC_PROFILER_HOME=/stackable/async-profiler
164
189
ENV HADOOP_YARN_HOME=/stackable/hadoop
165
190
ENV HADOOP_MAPRED_HOME=/stackable/hadoop
166
191
167
- # Remove unneeded binaries:
168
- # - code sources
169
- # - mapreduce/yarn binaries that were built as cross-project dependencies
170
- # - minicluster (only used for testing) and test .jars
171
- # - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
172
- RUN rm -rf /stackable/hadoop/share/hadoop/common/sources/ && \
173
- rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/ && \
174
- rm -rf /stackable/hadoop/share/hadoop/tools/sources/ && \
175
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar && \
176
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar && \
177
- rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar && \
178
- find . -name 'hadoop-minicluster-*.jar' -type f -delete && \
179
- find . -name 'hadoop-client-minicluster-*.jar' -type f -delete && \
180
- find . -name 'hadoop-*tests.jar' -type f -delete
181
-
182
192
WORKDIR /stackable/hadoop
183
193
CMD ["echo" , "This image is not meant to be 'run' directly." ]
0 commit comments