Skip to content

Commit 8800bcf

Browse files
Satish615Satish Gollaproluajaykarpur
authored
add dockerfiles for tf 1.15.2 py37 containers (#356)
* add dockerfiles for tf 1.15.2 py37 containers * udpate tensorflow 1.15.2 custom binaries for py37 * build docker images using --no-cache * add sagemaker binaries for py37 support Co-authored-by: Satish Gollaprolu <sgollapr@amazon.com> Co-authored-by: Ajay Karpur <akarpur@amazon.com>
1 parent 2e2fbeb commit 8800bcf

File tree

4 files changed

+366
-1
lines changed

4 files changed

+366
-1
lines changed

buildspec.yml

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,25 @@ phases:
7979
- build_cmd="docker build --cache-from $PROD_IMAGE:$prod_tag -f $GPU_DOCKERFILE -t $PREPROD_IMAGE:$GPU_TAG_PY3 . "
8080
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
8181

82+
# build py37 images
83+
84+
# prepare build context
85+
- build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py37"
86+
- cp $build_artifacts/*.py $build_dir/
87+
- cd $build_dir
88+
89+
# build cpu image
90+
- prod_tag="$FRAMEWORK_VERSION-cpu-py37"
91+
- CPU_TAG_PY37="$prod_tag-$build_id"
92+
- build_cmd="docker build --no-cache -f $CPU_DOCKERFILE -t $PREPROD_IMAGE:$CPU_TAG_PY37 . "
93+
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
94+
95+
# build gpu image
96+
- prod_tag="$FRAMEWORK_VERSION-gpu-py37"
97+
- GPU_TAG_PY37="$prod_tag-$build_id"
98+
- build_cmd="docker build --no-cache -f $GPU_DOCKERFILE -t $PREPROD_IMAGE:$GPU_TAG_PY37 . "
99+
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
100+
82101
# push images to ecr
83102
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
84103
- push_cmd="docker push $PREPROD_IMAGE:$CPU_TAG_PY2"
@@ -89,6 +108,10 @@ phases:
89108
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
90109
- push_cmd="docker push $PREPROD_IMAGE:$GPU_TAG_PY3"
91110
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
111+
- push_cmd="docker push $PREPROD_IMAGE:$CPU_TAG_PY37"
112+
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
113+
- push_cmd="docker push $PREPROD_IMAGE:$GPU_TAG_PY37"
114+
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
92115

93116
# launch remote gpu instance
94117
- instance_type='p2.xlarge'
@@ -98,6 +121,8 @@ phases:
98121
# run cpu integration tests
99122
- py3_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu --durations 10"
100123
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
124+
- py37_cmd="IGNORE_COVERAGE=- tox -e py37 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY37 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu --durations 10"
125+
- execute-command-if-has-matching-changes "$py37_cmd" "test/" "docker/*" "buildspec.yml"
101126
- py2_cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu --durations 10"
102127
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
103128

@@ -111,11 +136,19 @@ phases:
111136
- py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
112137
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
113138

139+
- cmd="IGNORE_COVERAGE=- tox -e py37 -- test/integration/local -n 4 --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY37 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu --durations 10"
140+
- py37_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
141+
- execute-command-if-has-matching-changes "$py37_cmd" "test/" "docker/*" "buildspec.yml"
142+
114143
# run sagemaker tests
115144
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu --durations 10"
116145
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
117146
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu --durations 10"
118147
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
148+
- test_cmd="IGNORE_COVERAGE=- tox -e py37 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY37 --py-version 3 --processor cpu --durations 10"
149+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
150+
- test_cmd="IGNORE_COVERAGE=- tox -e py37 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY37 --py-version 3 --processor gpu --durations 10"
151+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
119152
- test_cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu --durations 10"
120153
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
121154
- test_cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu --durations 10"
@@ -135,3 +168,7 @@ phases:
135168
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"
136169
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY3"
137170
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"
171+
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY37"
172+
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"
173+
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY37"
174+
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"

docker/1.15.2/py37/Dockerfile.cpu

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
FROM ubuntu:18.04
2+
3+
LABEL maintainer="Amazon AI"
4+
5+
# Prevent docker build get stopped by requesting user interaction
6+
ENV DEBIAN_FRONTEND=noninteractive
7+
ENV DEBCONF_NONINTERACTIVE_SEEN=true
8+
# Set environment variables for MKL
9+
# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
10+
ENV KMP_AFFINITY=granularity=fine,compact,1,0
11+
ENV KMP_BLOCKTIME=1
12+
ENV KMP_SETTINGS=0
13+
# Python won’t try to write .pyc or .pyo files on the import of source modules
14+
ENV PYTHONDONTWRITEBYTECODE=1
15+
ENV PYTHONUNBUFFERED=1
16+
# See http://bugs.python.org/issue19846
17+
ENV PYTHONIOENCODING=UTF-8
18+
ENV LANG=C.UTF-8
19+
ENV LC_ALL=C.UTF-8
20+
# Specify the location of module that contains the training logic for SageMaker
21+
# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
22+
ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
23+
24+
# Define framework-related package sources
25+
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/cpu/final/tensorflow_cpu-1.15.2-cp37-cp37m-manylinux2010_x86_64.whl
26+
ARG PYTHON=python3
27+
ARG PYTHON_PIP=python3-pip
28+
ARG PIP=pip3
29+
ARG PYTHON_VERSION=3.7.7
30+
31+
RUN apt-get update \
32+
&& apt-get install -y --no-install-recommends \
33+
build-essential \
34+
ca-certificates \
35+
curl \
36+
git \
37+
openssh-client \
38+
openssh-server \
39+
vim \
40+
wget \
41+
zlib1g-dev \
42+
&& rm -rf /var/lib/apt/lists/* \
43+
&& apt-get clean
44+
45+
# Install Open MPI
46+
RUN mkdir /tmp/openmpi \
47+
&& cd /tmp/openmpi \
48+
&& curl -fSsL -O https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.1.tar.gz \
49+
&& tar zxf openmpi-4.0.1.tar.gz \
50+
&& cd openmpi-4.0.1 \
51+
&& ./configure --enable-orterun-prefix-by-default \
52+
&& make -j $(nproc) all \
53+
&& make install \
54+
&& ldconfig \
55+
&& rm -rf /tmp/openmpi
56+
57+
# Create a wrapper for OpenMPI to allow running as root by default
58+
RUN mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real \
59+
&& echo '#!/bin/bash' > /usr/local/bin/mpirun \
60+
&& echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun \
61+
&& chmod a+x /usr/local/bin/mpirun
62+
63+
RUN echo "hwloc_base_binding_policy = none" >> /usr/local/etc/openmpi-mca-params.conf \
64+
&& echo "rmaps_base_mapping_policy = slot" >> /usr/local/etc/openmpi-mca-params.conf
65+
66+
ENV LD_LIBRARY_PATH=/usr/local/openmpi/lib:$LD_LIBRARY_PATH
67+
ENV PATH=/usr/local/openmpi/bin/:$PATH
68+
69+
# SSH login fix. Otherwise user is kicked off after login
70+
RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
71+
72+
# Create SSH key.
73+
RUN mkdir -p /root/.ssh/ \
74+
&& mkdir -p /var/run/sshd \
75+
&& ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
76+
&& cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
77+
&& printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
78+
79+
WORKDIR /
80+
81+
RUN apt-get update \
82+
&& apt-get install -y --no-install-recommends \
83+
libbz2-dev \
84+
libc6-dev \
85+
libffi-dev \
86+
libgdbm-dev \
87+
libncursesw5-dev \
88+
libreadline-gplv2-dev \
89+
libsqlite3-dev \
90+
libssl-dev \
91+
tk-dev \
92+
&& rm -rf /var/lib/apt/lists/* \
93+
&& apt-get clean
94+
95+
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
96+
&& tar -xvf Python-$PYTHON_VERSION.tgz \
97+
&& cd Python-$PYTHON_VERSION \
98+
&& ./configure && make && make install \
99+
&& make && make install && rm -rf ../Python-$PYTHON_VERSION*
100+
101+
RUN ${PIP} --no-cache-dir install --upgrade \
102+
pip \
103+
setuptools
104+
105+
# Some TF tools expect a "python" binary
106+
RUN ln -s $(which python3) /usr/local/bin/python \
107+
&& ln -s $(which pip3) /usr/bin/pip
108+
109+
RUN ${PIP} install --no-cache-dir -U \
110+
numpy==1.17.4 \
111+
scipy==1.2.2 \
112+
scikit-learn==0.20.3 \
113+
pandas==0.24.2 \
114+
Pillow==7.0.0 \
115+
h5py==2.10.0 \
116+
keras_applications==1.0.8 \
117+
keras_preprocessing==1.1.0 \
118+
requests==2.22.0 \
119+
smdebug==0.7.2 \
120+
sagemaker==1.56.1 \
121+
sagemaker-experiments==0.1.7 \
122+
mpi4py==3.0.2 \
123+
"cryptography>=2.3" \
124+
"sagemaker-tensorflow>=1.15,<1.16" \
125+
sagemaker-tensorflow-training==10.1.0 \
126+
# Let's install TensorFlow separately in the end to avoid
127+
# the library version to be overwritten
128+
&& ${PIP} install --force-reinstall --no-cache-dir -U \
129+
${TF_URL} \
130+
&& ${PIP} install --force-reinstall --no-cache-dir -U \
131+
horovod==0.18.2 \
132+
&& ${PIP} install --no-cache-dir -U \
133+
awscli
134+
135+
ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
136+
137+
RUN chmod +x /usr/local/bin/deep_learning_container.py
138+
139+
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
140+
141+
CMD ["bin/bash"]

0 commit comments

Comments
 (0)