Skip to content

Commit e419329

Browse files
authored
add 2.0.1 dockerfiles (#285)
* add 2.0.1 dockerfiles * update sagemaker package name * add * to sagemaker package name * add comment unpin awscli * update 2.0.1 license * remove dead code
1 parent cd05883 commit e419329

File tree

4 files changed

+584
-0
lines changed

4 files changed

+584
-0
lines changed

docker/2.0.1/py2/Dockerfile.cpu

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
FROM ubuntu:18.04
2+
3+
LABEL maintainer="Amazon AI"
4+
5+
# prevent stopping by user interaction
6+
ENV DEBIAN_FRONTEND noninteractive
7+
ENV DEBCONF_NONINTERACTIVE_SEEN true
8+
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
9+
10+
# Set environment variables for MKL
11+
# For more about MKL with TensorFlow see:
12+
# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
13+
ENV KMP_AFFINITY=granularity=fine,compact,1,0
14+
ENV KMP_BLOCKTIME=1
15+
ENV KMP_SETTINGS=0
16+
17+
ENV PYTHONDONTWRITEBYTECODE=1
18+
ENV PYTHONUNBUFFERED=1
19+
ENV PYTHONIOENCODING=UTF-8
20+
ENV LANG=C.UTF-8
21+
ENV LC_ALL=C.UTF-8
22+
23+
ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_training*.tar.gz
24+
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.0.1/AmazonLinux/cpu/final/tensorflow-2.0.1-cp27-cp27mu-manylinux2010_x86_64.whl
25+
26+
ARG PYTHON=python
27+
ARG PYTHON_PIP=python-pip
28+
ARG PIP=pip
29+
30+
RUN apt-get update && apt-get install -y --no-install-recommends \
31+
software-properties-common \
32+
build-essential \
33+
openssh-client \
34+
openssh-server \
35+
ca-certificates \
36+
curl \
37+
git \
38+
wget \
39+
vim \
40+
zlib1g-dev \
41+
# Install dependent library for OpenCV
42+
libgtk2.0-dev \
43+
&& rm -rf /var/lib/apt/lists/*
44+
45+
# Install Open MPI
46+
RUN mkdir /tmp/openmpi \
47+
&& cd /tmp/openmpi \
48+
&& curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
49+
&& tar zxf openmpi-4.0.1.tar.gz \
50+
&& cd openmpi-4.0.1 \
51+
&& ./configure --enable-orterun-prefix-by-default \
52+
&& make -j $(nproc) all \
53+
&& make install \
54+
&& ldconfig \
55+
&& rm -rf /tmp/openmpi
56+
57+
# Create a wrapper for OpenMPI to allow running as root by default
58+
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
59+
&& echo '#!/bin/bash' > /usr/local/bin/mpirun \
60+
&& echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
61+
&& chmod a+x /usr/local/bin/mpirun
62+
63+
RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
64+
&& echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
65+
66+
ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
67+
ENV PATH /usr/local/openmpi/bin/:$PATH
68+
69+
# SSH login fix. Otherwise user is kicked off after login
70+
RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
71+
72+
# Create SSH key.
73+
RUN mkdir -p /root/.ssh/ \
74+
&& mkdir -p /var/run/sshd \
75+
&& ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
76+
&& cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
77+
&& printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
78+
79+
WORKDIR /
80+
81+
RUN apt-get update && apt-get install -y \
82+
${PYTHON} \
83+
${PYTHON_PIP}
84+
85+
COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
86+
87+
RUN ${PIP} --no-cache-dir install --upgrade \
88+
pip \
89+
setuptools
90+
91+
# Some TF tools expect a "python" binary
92+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
93+
94+
# install PyYAML==5.1.2 to avoid conflict with latest awscli
95+
# # python-dateutil==2.8.0 to satisfy botocore associated with latest awscli
96+
RUN ${PIP} install --no-cache-dir -U \
97+
numpy==1.16.5 \
98+
scipy==1.2.2 \
99+
scikit-learn==0.20.4 \
100+
pandas==0.24.2 \
101+
Pillow==6.2.1 \
102+
h5py==2.10.0 \
103+
keras_applications==1.0.8 \
104+
keras_preprocessing==1.1.0 \
105+
requests==2.22.0 \
106+
keras==2.3.1 \
107+
python-dateutil==2.8.0 \
108+
PyYAML==5.1.2 \
109+
awscli \
110+
mpi4py==3.0.3 \
111+
opencv-python==4.2.0.32 \
112+
"cryptography>=2.3" \
113+
"sagemaker-tensorflow>=2.0,<2.1" \
114+
# Let's install TensorFlow separately in the end to avoid
115+
# the library version to be overwritten
116+
&& ${PIP} install --no-cache-dir -U \
117+
${TF_URL} \
118+
&& ${PIP} install --no-cache-dir -U \
119+
$FRAMEWORK_SUPPORT_INSTALLABLE \
120+
&& rm -f $FRAMEWORK_SUPPORT_INSTALLABLE \
121+
&& ${PIP} install --no-cache-dir -U \
122+
horovod==0.18.2
123+
124+
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0.1/license.txt -o /license.txt
125+
126+
CMD ["bin/bash"]

docker/2.0.1/py2/Dockerfile.gpu

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
FROM nvidia/cuda:10.0-base-ubuntu18.04
2+
3+
LABEL maintainer="Amazon AI"
4+
5+
# prevent stopping by user interaction
6+
ENV DEBIAN_FRONTEND noninteractive
7+
ENV DEBCONF_NONINTERACTIVE_SEEN true
8+
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
9+
10+
ENV PYTHONDONTWRITEBYTECODE=1
11+
ENV PYTHONUNBUFFERED=1
12+
ENV PYTHONIOENCODING=UTF-8
13+
ENV LANG=C.UTF-8
14+
ENV LC_ALL=C.UTF-8
15+
16+
ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_training*.tar.gz
17+
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/2.0.1/AmazonLinux/gpu/final/tensorflow_gpu-2.0.1-cp27-cp27mu-manylinux2010_x86_64.whl
18+
19+
ARG PYTHON=python
20+
ARG PYTHON_PIP=python-pip
21+
ARG PIP=pip
22+
23+
RUN apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
24+
ca-certificates \
25+
cuda-command-line-tools-10-0 \
26+
cuda-cublas-dev-10-0 \
27+
cuda-cudart-dev-10-0 \
28+
cuda-cufft-dev-10-0 \
29+
cuda-curand-dev-10-0 \
30+
cuda-cusolver-dev-10-0 \
31+
cuda-cusparse-dev-10-0 \
32+
curl \
33+
libcudnn7=7.5.1.10-1+cuda10.0 \
34+
# TensorFlow doesn't require libnccl anymore but Open MPI still depends on it
35+
libnccl2=2.4.7-1+cuda10.0 \
36+
libgomp1 \
37+
libnccl-dev=2.4.7-1+cuda10.0 \
38+
libfreetype6-dev \
39+
libhdf5-serial-dev \
40+
libpng-dev \
41+
libzmq3-dev \
42+
git \
43+
wget \
44+
vim \
45+
build-essential \
46+
openssh-client \
47+
openssh-server \
48+
zlib1g-dev \
49+
# Install dependent library for OpenCV
50+
libgtk2.0-dev \
51+
# The 'apt-get install' of nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0
52+
# adds a new list which contains libnvinfer library, so it needs another
53+
# 'apt-get update' to retrieve that list before it can actually install the
54+
# library.
55+
# We don't install libnvinfer-dev since we don't need to build against TensorRT,
56+
# and libnvinfer4 doesn't contain libnvinfer.a static library.
57+
&& apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
58+
nvinfer-runtime-trt-repo-ubuntu1804-5.0.2-ga-cuda10.0 \
59+
&& apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
60+
libnvinfer5=5.0.2-1+cuda10.0 \
61+
&& rm /usr/lib/x86_64-linux-gnu/libnvinfer_plugin* \
62+
&& rm /usr/lib/x86_64-linux-gnu/libnvcaffe_parser* \
63+
&& rm /usr/lib/x86_64-linux-gnu/libnvparsers* \
64+
&& rm -rf /var/lib/apt/lists/* \
65+
&& mkdir -p /var/run/sshd
66+
67+
# Install Open MPI
68+
RUN mkdir /tmp/openmpi \
69+
&& cd /tmp/openmpi \
70+
&& curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
71+
&& tar zxf openmpi-4.0.1.tar.gz \
72+
&& cd openmpi-4.0.1 \
73+
&& ./configure --enable-orterun-prefix-by-default \
74+
&& make -j $(nproc) all \
75+
&& make install \
76+
&& ldconfig \
77+
&& rm -rf /tmp/openmpi
78+
79+
RUN apt-get update && apt-get install -y \
80+
${PYTHON} \
81+
${PYTHON_PIP}
82+
83+
# Create a wrapper for OpenMPI to allow running as root by default
84+
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
85+
&& echo '#!/bin/bash' > /usr/local/bin/mpirun \
86+
&& echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
87+
&& chmod a+x /usr/local/bin/mpirun
88+
89+
# Configure OpenMPI to run good defaults:
90+
# --bind-to none --map-by slot --mca btl_tcp_if_exclude lo,docker0
91+
RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
92+
&& echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
93+
94+
# Set default NCCL parameters
95+
RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf
96+
97+
ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
98+
ENV PATH /usr/local/openmpi/bin/:$PATH
99+
ENV PATH=/usr/local/nvidia/bin:$PATH
100+
101+
# SSH login fix. Otherwise user is kicked off after login
102+
RUN mkdir -p /var/run/sshd \
103+
&& sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
104+
105+
# Create SSH key.
106+
RUN mkdir -p /root/.ssh/ \
107+
&& ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
108+
&& cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
109+
&& printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
110+
111+
WORKDIR /
112+
113+
RUN ${PIP} --no-cache-dir install --upgrade \
114+
pip \
115+
setuptools
116+
117+
# Some TF tools expect a "python" binary
118+
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python
119+
120+
COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
121+
122+
# install PyYAML==5.1.2 to avoid conflict with latest awscli
123+
# # python-dateutil==2.8.0 to satisfy botocore associated with latest awscli
124+
RUN ${PIP} install --no-cache-dir -U \
125+
numpy==1.16.5 \
126+
scipy==1.2.2 \
127+
scikit-learn==0.20.4 \
128+
pandas==0.24.2 \
129+
Pillow==6.2.1 \
130+
h5py==2.10.0 \
131+
keras_applications==1.0.8 \
132+
keras_preprocessing==1.1.0 \
133+
requests==2.22.0 \
134+
keras==2.3.1 \
135+
python-dateutil==2.8.0 \
136+
PyYAML==5.1.2 \
137+
awscli \
138+
mpi4py==3.0.3 \
139+
opencv-python==4.2.0.32 \
140+
"cryptography>=2.3" \
141+
"sagemaker-tensorflow>=2.0,<2.1" \
142+
# Let's install TensorFlow separately in the end to avoid
143+
# the library version to be overwritten
144+
&& ${PIP} install --no-cache-dir -U \
145+
${TF_URL} \
146+
&& ${PIP} install --no-cache-dir -U \
147+
$FRAMEWORK_SUPPORT_INSTALLABLE \
148+
&& rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
149+
150+
# Install Horovod, temporarily using CUDA stubs
151+
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs \
152+
&& HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_WITH_TENSORFLOW=1 ${PIP} install --no-cache-dir horovod==0.18.2 \
153+
&& ldconfig
154+
155+
# Allow OpenSSH to talk to containers without asking for confirmation
156+
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new \
157+
&& echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new \
158+
&& mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
159+
160+
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-2.0.1/license.txt -o /license.txt
161+
162+
CMD ["bin/bash"]

0 commit comments

Comments
 (0)