Skip to content

Make uid/gid configurable & change group of files #849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 41 additions & 37 deletions airflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ARG GIT_SYNC

# For updated versions check https://github.com/kubernetes/git-sync/releases
# which should contain a image location (e.g. registry.k8s.io/git-sync/git-sync:v3.6.8)
FROM oci.stackable.tech/sdp/git-sync:${GIT_SYNC} as gitsync-image
FROM oci.stackable.tech/sdp/git-sync:${GIT_SYNC} AS gitsync-image

FROM stackable/image/statsd_exporter AS statsd_exporter-builder

Expand All @@ -13,7 +13,6 @@ FROM stackable/image/vector AS airflow-build-image
ARG PRODUCT
ARG PYTHON
ARG TARGETARCH
ARG TARGETOS

COPY airflow/constraints-${PRODUCT}-python${PYTHON}.txt /tmp/constraints.txt

Expand Down Expand Up @@ -59,7 +58,7 @@ ARG PYTHON
ARG RELEASE
ARG TINI
ARG TARGETARCH
ARG TARGETOS
ARG STACKABLE_USER_UID

LABEL name="Apache Airflow" \
maintainer="info@stackable.tech" \
Expand All @@ -70,51 +69,56 @@ LABEL name="Apache Airflow" \
description="This image is deployed by the Stackable Operator for Apache Airflow."

COPY airflow/licenses /licenses

# Update image and install python
RUN microdnf update && \
microdnf install \
ca-certificates \
cyrus-sasl \
git \
libpq \
openldap \
openldap-clients \
openssh-clients \
openssl-libs \
openssl-pkcs11 \
python${PYTHON} \
socat \
unixODBC && \
microdnf clean all && \
rm -rf /var/cache/yum
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/utils/entrypoint.sh /entrypoint.sh
COPY --chown=${STACKABLE_USER_UID}:0 airflow/stackable/utils/run-airflow.sh /run-airflow.sh

ENV HOME=/stackable
ENV AIRFLOW_USER_HOME_DIR=/stackable
ENV PATH=$PATH:/bin:$HOME/app/bin
ENV AIRFLOW_HOME=$HOME/airflow

# Update image and install python
RUN <<EOF
microdnf update
microdnf install \
ca-certificates \
cyrus-sasl \
git \
libpq \
openldap \
openldap-clients \
openssh-clients \
openssl-libs \
openssl-pkcs11 \
python${PYTHON} \
socat \
unixODBC
microdnf clean all
rm -rf /var/cache/yum

# Get the correct `tini` binary for our architecture.
# It is used as an init alternative in the entrypoint
RUN mkdir -pv ${AIRFLOW_HOME} && \
mkdir -pv ${AIRFLOW_HOME}/dags && \
mkdir -pv ${AIRFLOW_HOME}/logs && \
chown --recursive stackable:stackable ${AIRFLOW_HOME} && \
curl --fail -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}"

COPY airflow/stackable/utils/entrypoint.sh /entrypoint.sh
COPY airflow/stackable/utils/run-airflow.sh /run-airflow.sh
RUN chmod a+x /entrypoint.sh && \
chmod a+x /run-airflow.sh && \
chmod +x /usr/bin/tini

COPY --from=airflow-build-image --chown=stackable:stackable /stackable/ ${HOME}/
COPY --from=gitsync-image --chown=stackable:stackable /git-sync /stackable/git-sync

USER stackable
curl --fail -o /usr/bin/tini "https://repo.stackable.tech/repository/packages/tini/tini-${TINI}-${TARGETARCH}"
chmod a+x /entrypoint.sh
chmod a+x /run-airflow.sh
chmod +x /usr/bin/tini

mkdir -pv ${AIRFLOW_HOME}
mkdir -pv ${AIRFLOW_HOME}/dags
mkdir -pv ${AIRFLOW_HOME}/logs

# All files and folders owned by root to support running as arbitrary users
# This is best practice as all container users will belong to the root group (0)
chown -R ${STACKABLE_USER_UID}:0 /stackable
chmod -R g=u /stackable
EOF

USER ${STACKABLE_USER_UID}
WORKDIR /stackable

COPY --from=airflow-build-image --chown=${STACKABLE_USER_UID}:0 /stackable/ ${HOME}/
COPY --from=gitsync-image --chown=${STACKABLE_USER_UID}:0 /git-sync /stackable/git-sync

ENTRYPOINT ["/usr/bin/tini", "--", "/run-airflow.sh"]
CMD []

Expand Down
7 changes: 7 additions & 0 deletions conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,10 @@
"ignore-error": "true",
},
]

args = {
"STACKABLE_USER_NAME": "stackable",
"STACKABLE_USER_UID": "1000",
"STACKABLE_USER_GID": "1000",
"DELETE_CACHES": "true"
}
33 changes: 18 additions & 15 deletions druid/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ microdnf clean all
rm -rf /var/cache/yum
EOF

USER stackable
USER ${STACKABLE_USER_UID}
WORKDIR /stackable

COPY --chown=stackable:stackable druid/stackable/patches/apply_patches.sh /stackable/apache-druid-${PRODUCT}-src/patches/apply_patches.sh
COPY --chown=stackable:stackable druid/stackable/patches/${PRODUCT} /stackable/apache-druid-${PRODUCT}-src/patches/${PRODUCT}
COPY --chown=stackable:0 druid/stackable/patches/apply_patches.sh /stackable/apache-druid-${PRODUCT}-src/patches/apply_patches.sh
COPY --chown=stackable:0 druid/stackable/patches/${PRODUCT} /stackable/apache-druid-${PRODUCT}-src/patches/${PRODUCT}

# Cache mounts are owned by root by default
# We need to explicitly give the uid to use which is hardcoded to "1000" in stackable-base
Expand All @@ -44,9 +44,9 @@ COPY --chown=stackable:stackable druid/stackable/patches/${PRODUCT} /stackable/a
# with a "directory not empty" error on the first builder to finish, as other builders
# are still working in the cache directory.

RUN --mount=type=cache,id=maven-${PRODUCT},uid=1000,target=/stackable/.m2/repository \
--mount=type=cache,id=npm-${PRODUCT},uid=1000,target=/stackable/.npm \
--mount=type=cache,id=cache-${PRODUCT},uid=1000,target=/stackable/.cache \
RUN --mount=type=cache,id=maven-${PRODUCT},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository \
--mount=type=cache,id=npm-${PRODUCT},uid=${STACKABLE_USER_UID},target=/stackable/.npm \
--mount=type=cache,id=cache-${PRODUCT},uid=${STACKABLE_USER_UID},target=/stackable/.cache \
<<EOF
curl --fail -L "https://repo.stackable.tech/repository/packages/druid/apache-druid-${PRODUCT}-src.tar.gz" | tar -xzC .
cd apache-druid-${PRODUCT}-src
Expand Down Expand Up @@ -78,6 +78,7 @@ FROM stackable/image/java-base AS final

ARG PRODUCT
ARG RELEASE
ARG STACKABLE_USER_UID

ARG NAME="Apache Druid"
ARG DESCRIPTION="This image is deployed by the Stackable Operator for Apache Druid"
Expand All @@ -101,27 +102,29 @@ LABEL io.openshift.tags="ubi9,stackable,druid,sdp"
LABEL io.k8s.description="${DESCRIPTION}"
LABEL io.k8s.display-name="${NAME}"


COPY --chown=${STACKABLE_USER_UID}:0 --from=druid-builder /stackable/apache-druid-${PRODUCT} /stackable/apache-druid-${PRODUCT}
COPY --chown=${STACKABLE_USER_UID}:0 druid/stackable/bin /stackable/bin
COPY --chown=${STACKABLE_USER_UID}:0 druid/licenses /licenses

RUN <<EOF
microdnf update
microdnf clean all
rpm -qa --qf "%{NAME}-%{VERSION}-%{RELEASE}\n" | sort > /stackable/package_manifest.txt
rm -rf /var/cache/yum
EOF

USER stackable
WORKDIR /stackable

COPY --chown=stackable:stackable --from=druid-builder /stackable/apache-druid-${PRODUCT} /stackable/apache-druid-${PRODUCT}
COPY --chown=stackable:stackable druid/stackable/bin /stackable/bin
COPY --chown=stackable:stackable druid/licenses /licenses

RUN <<EOF
ln -s /stackable/apache-druid-${PRODUCT} /stackable/druid

# Force to overwrite the existing 'run-druid'
ln -sf /stackable/bin/run-druid /stackable/druid/bin/run-druid

# All files and folders owned by root to support running as arbitrary users
# This is best practice as all container users will belong to the root group (0)
chown -R ${STACKABLE_USER_UID}:0 /stackable
chmod -R g=u /stackable
EOF

USER ${STACKABLE_USER_UID}
ENV PATH="${PATH}":/stackable/druid/bin

WORKDIR /stackable/druid
Expand Down
78 changes: 44 additions & 34 deletions hadoop/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ ARG PROTOBUF
ARG TARGETARCH
ARG TARGETOS

USER ${STACKABLE_USER_UID}
WORKDIR /stackable

COPY hadoop/stackable/jmx /stackable/jmx
Expand Down Expand Up @@ -92,6 +93,7 @@ FROM stackable/image/java-devel AS hdfs-utils-builder
ARG HDFS_UTILS
ARG PRODUCT

USER ${STACKABLE_USER_UID}
WORKDIR /stackable

# The Stackable HDFS utils contain an OPA authorizer, group mapper & topology provider.
Expand Down Expand Up @@ -120,35 +122,58 @@ LABEL name="Apache Hadoop" \
summary="The Stackable image for Apache Hadoop." \
description="This image is deployed by the Stackable Operator for Apache Hadoop / HDFS."

COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/jmx /stackable/jmx/
COPY --chown=${STACKABLE_USER_UID}:0 --from=builder /stackable/async-profiler /stackable/async-profiler/
COPY --chown=${STACKABLE_USER_UID}:0 --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin


# fuse is required for fusermount (called by fuse_dfs)
# fuse-libs is required for fuse_dfs (not included in fuse)
# openssl -> not sure
RUN microdnf update && \
microdnf install \
fuse \
fuse-libs \
# tar is required for `kubectl cp` which can be used to copy the log files
# or profiler flamegraph from the Pod
tar && \
microdnf clean all && \
rm -rf /var/cache/yum
RUN <<EOF
microdnf update
microdnf install \
fuse \
fuse-libs \
# tar is required for `kubectl cp` which can be used to copy the log files
# or profiler flamegraph from the Pod
# It is already installed in the base image but leaving here for documentation purposes
tar
microdnf clean all
rm -rf /var/cache/yum

ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop

COPY hadoop/licenses /licenses
# Remove unneeded binaries:
# - code sources
# - mapreduce/yarn binaries that were built as cross-project dependencies
# - minicluster (only used for testing) and test .jars
# - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
rm -rf /stackable/hadoop/share/hadoop/common/sources/
rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/
rm -rf /stackable/hadoop/share/hadoop/tools/sources/
rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar
find . -name 'hadoop-minicluster-*.jar' -type f -delete
find . -name 'hadoop-client-minicluster-*.jar' -type f -delete
find . -name 'hadoop-*tests.jar' -type f -delete

# Without this fuse_dfs does not work
# It is so non-root users (as we are) can mount a FUSE device and let other users access it
RUN echo "user_allow_other" > /etc/fuse.conf
echo "user_allow_other" > /etc/fuse.conf

USER stackable
WORKDIR /stackable
# All files and folders owned by root to support running as arbitrary users
# This is best practice as all container users will belong to the root group (0)
chown -R ${STACKABLE_USER_UID}:0 /stackable
chmod -R g=u /stackable
EOF

COPY --chown=stackable:stackable --from=builder /stackable/hadoop-${PRODUCT} /stackable/hadoop-${PRODUCT}/
COPY --chown=stackable:stackable --from=builder /stackable/jmx /stackable/jmx/
COPY --chown=stackable:stackable --from=builder /stackable/async-profiler /stackable/async-profiler/
COPY --chown=stackable:stackable --from=hdfs-utils-builder /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar /stackable/hadoop-${PRODUCT}/share/hadoop/common/lib/hdfs-utils-${HDFS_UTILS}.jar
RUN ln -s /stackable/hadoop-${PRODUCT} /stackable/hadoop
COPY hadoop/licenses /licenses

COPY hadoop/stackable/fuse_dfs_wrapper /stackable/hadoop/bin
USER stackable

ENV HOME=/stackable
ENV LD_LIBRARY_PATH=/stackable/hadoop/lib/native:/usr/lib/jvm/jre/lib/server
Expand All @@ -164,20 +189,5 @@ ENV ASYNC_PROFILER_HOME=/stackable/async-profiler
ENV HADOOP_YARN_HOME=/stackable/hadoop
ENV HADOOP_MAPRED_HOME=/stackable/hadoop

# Remove unneeded binaries:
# - code sources
# - mapreduce/yarn binaries that were built as cross-project dependencies
# - minicluster (only used for testing) and test .jars
# - json-io: this is a transitive dependency pulled in by cedarsoft/java-utils/json-io and is excluded in 3.4.0. See CVE-2023-34610.
RUN rm -rf /stackable/hadoop/share/hadoop/common/sources/ && \
rm -rf /stackable/hadoop/share/hadoop/hdfs/sources/ && \
rm -rf /stackable/hadoop/share/hadoop/tools/sources/ && \
rm -rf /stackable/hadoop/share/hadoop/tools/lib/json-io-*.jar && \
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-mapreduce-client-*.jar && \
rm -rf /stackable/hadoop/share/hadoop/tools/lib/hadoop-yarn-server*.jar && \
find . -name 'hadoop-minicluster-*.jar' -type f -delete && \
find . -name 'hadoop-client-minicluster-*.jar' -type f -delete && \
find . -name 'hadoop-*tests.jar' -type f -delete

WORKDIR /stackable/hadoop
CMD ["echo", "This image is not meant to be 'run' directly."]
20 changes: 12 additions & 8 deletions stackable-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ FROM registry.access.redhat.com/ubi9/ubi-minimal@sha256:104cf11d890aeb7dd5728b7d

# intentionally unused
ARG PRODUCT
ARG STACKABLE_USER_UID
ARG STACKABLE_USER_GID
ARG STACKABLE_USER_NAME

# Sets the default shell to Bash with strict error handling and robust pipeline processing.
# "-e": Exits immediately if a command exits with a non-zero status
Expand Down Expand Up @@ -102,21 +105,22 @@ microdnf install \
###
# Added only temporarily to create the user and group, removed again below
microdnf install shadow-utils
groupadd --gid 1000 --system stackable
groupadd --gid ${STACKABLE_USER_GID} --system ${STACKABLE_USER_NAME}

# The --no-log-init is required to work around a bug/problem in Go/Docker when very large UIDs are used
# See https://github.com/moby/moby/issues/5419#issuecomment-41478290 for more context
# Making this a system user prevents a mail dir from being created, expiry of passwords etc. but it will warn:
# useradd warning: stackable's uid 1000 is greater than SYS_UID_MAX 999
# We can safely ignore this warning, to get rid of the warning we could change /etc/login.defs but that does not seem worth it
# We'll leave the home directory hardcoded to /stackable because I don't want to deal with which chars might be valid and which might not in user name vs. directory
useradd \
--no-log-init \
--gid stackable \
--uid 1000 \
--gid ${STACKABLE_USER_GID} \
--uid ${STACKABLE_USER_UID} \
--system \
--create-home \
--home-dir /stackable \
stackable
${STACKABLE_USER_NAME}
microdnf remove shadow-utils
microdnf clean all

Expand All @@ -132,8 +136,8 @@ microdnf clean all

echo -e "if [ -f ~/.bashrc ]; then\n\tsource ~/.bashrc\nfi" >> /stackable/.profile

chown stackable:stackable /stackable/.bashrc
chown stackable:stackable /stackable/.profile
chown ${STACKABLE_USER_UID}:0 /stackable/.bashrc
chown ${STACKABLE_USER_UID}:0 /stackable/.profile

# CVE-2023-37920: Remove "e-Tugra" root certificates
# e-Tugra's root certificates were subject to an investigation prompted by reporting of security issues in their systems
Expand All @@ -156,8 +160,8 @@ if [ "$(trust list --filter=ca-anchors | grep -c 'E-Tugra')" != "0" ]; then
fi
EOF

COPY --from=product-utils-builder --chown=stackable:stackable /config-utils/target/release/config-utils /stackable/config-utils
COPY --from=product-utils-builder --chown=stackable:stackable /config-utils/config-utils.cdx.xml /stackable/config-utils.cdx.xml
COPY --from=product-utils-builder --chown=${STACKABLE_USER_UID}:0 /config-utils/target/release/config-utils /stackable/config-utils
COPY --from=product-utils-builder --chown=${STACKABLE_USER_UID}:0 /config-utils/config-utils.cdx.xml /stackable/config-utils.cdx.xml
ENV PATH="${PATH}:/stackable"

# These labels have mostly been superceded by the OpenContainer spec annotations below but it doesn't hurt to include them
Expand Down
Loading