Skip to content

Commit fd684cb

Browse files
authored
feature: install sagemaker-tensorflow-toolkit from PyPI. (#335)
1 parent e07703b commit fd684cb

File tree

6 files changed

+36
-60
lines changed

6 files changed

+36
-60
lines changed

README.rst

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,17 +114,6 @@ Before building "final" images:
114114
Build your "base" image. Make sure it is named and tagged in accordance with your "final"
115115
Dockerfile. Skip this step if you want to build image of Tensorflow Version 1.9.0 and above.
116116

117-
Then prepare the SageMaker TensorFlow Container python package in the image folder like below:
118-
119-
::
120-
121-
# Create the SageMaker TensorFlow Container Python package.
122-
cd sagemaker-tensorflow-containers
123-
python setup.py sdist
124-
125-
#. Copy your Python package to "final" Dockerfile directory that you are building.
126-
cp dist/sagemaker_tensorflow_container-<package_version>.tar.gz docker/<tensorflow_version>/final/py2
127-
128117
If you want to build "final" Docker images, for versions 1.6 and above, you will first need to download the appropriate tensorflow pip wheel, then pass in its location as a build argument. These can be obtained from pypi. For example, the files for 1.6.0 are here:
129118

130119
https://pypi.org/project/tensorflow/1.6.0/#files

buildspec.yml

Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,13 @@ phases:
2323
build:
2424
commands:
2525
# install
26-
- pip3 install -U -e .
2726
- pip3 install -U -e .[test]
2827

2928
# run flake8
3029
- tox -e flake8,twine
3130

32-
# run unit tests
33-
- tox -e py36,py27 test/unit
34-
35-
# Create pip archive
3631
- root_dir=$(pwd)
3732
- build_id="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
38-
- python3 setup.py sdist
39-
- tar_name=$(ls dist)
4033

4134
# Find build artifacts
4235
- build_artifacts=$root_dir/docker/build_artifacts
@@ -48,48 +41,54 @@ phases:
4841

4942
# prepare build context
5043
- build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py2"
51-
- cp $root_dir/dist/$tar_name $build_dir
5244
- cp $build_artifacts/*.py $build_dir/
5345
- cd $build_dir
5446

5547
# build cpu image
5648
- prod_tag="$FRAMEWORK_VERSION-cpu-py2"
5749
- CPU_TAG_PY2="$prod_tag-$build_id"
5850
- docker pull $PROD_IMAGE:$prod_tag
59-
- docker build --cache-from $PROD_IMAGE:$prod_tag -f $CPU_DOCKERFILE -t $PREPROD_IMAGE:$CPU_TAG_PY2 .
51+
- build_cmd="docker build --cache-from $PROD_IMAGE:$prod_tag -f $CPU_DOCKERFILE -t $PREPROD_IMAGE:$CPU_TAG_PY2 . "
52+
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
6053

6154
# build gpu image
6255
- prod_tag="$FRAMEWORK_VERSION-gpu-py2"
6356
- GPU_TAG_PY2="$prod_tag-$build_id"
6457
- docker pull $PROD_IMAGE:$prod_tag
65-
- docker build --cache-from $PROD_IMAGE:$prod_tag -f $GPU_DOCKERFILE -t $PREPROD_IMAGE:$GPU_TAG_PY2 .
58+
- build_cmd="docker build --cache-from $PROD_IMAGE:$prod_tag -f $GPU_DOCKERFILE -t $PREPROD_IMAGE:$GPU_TAG_PY2 . "
59+
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
6660

6761
# build py3 images
6862

6963
# prepare build context
7064
- build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py3"
71-
- cp $root_dir/dist/$tar_name $build_dir
7265
- cp $build_artifacts/*.py $build_dir/
7366
- cd $build_dir
7467

7568
# build cpu image
7669
- prod_tag="$FRAMEWORK_VERSION-cpu-py3"
7770
- CPU_TAG_PY3="$prod_tag-$build_id"
7871
- docker pull $PROD_IMAGE:$prod_tag
79-
- docker build --cache-from $PROD_IMAGE:$prod_tag -f $CPU_DOCKERFILE -t $PREPROD_IMAGE:$CPU_TAG_PY3 .
72+
- build_cmd="docker build --cache-from $PROD_IMAGE:$prod_tag -f $CPU_DOCKERFILE -t $PREPROD_IMAGE:$CPU_TAG_PY3 . "
73+
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
8074

8175
# build gpu image
8276
- prod_tag="$FRAMEWORK_VERSION-gpu-py3"
8377
- GPU_TAG_PY3="$prod_tag-$build_id"
8478
- docker pull $PROD_IMAGE:$prod_tag
85-
- docker build --cache-from $PROD_IMAGE:$prod_tag -f $GPU_DOCKERFILE -t $PREPROD_IMAGE:$GPU_TAG_PY3 .
79+
- build_cmd="docker build --cache-from $PROD_IMAGE:$prod_tag -f $GPU_DOCKERFILE -t $PREPROD_IMAGE:$GPU_TAG_PY3 . "
80+
- execute-command-if-has-matching-changes "$build_cmd" "test/" "docker/*" "buildspec.yml"
8681

8782
# push images to ecr
8883
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
8984
- docker push $PREPROD_IMAGE:$CPU_TAG_PY2
85+
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
9086
- docker push $PREPROD_IMAGE:$GPU_TAG_PY2
87+
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
9188
- docker push $PREPROD_IMAGE:$CPU_TAG_PY3
89+
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
9290
- docker push $PREPROD_IMAGE:$GPU_TAG_PY3
91+
- execute-command-if-has-matching-changes "$push_cmd" "test/" "docker/*" "buildspec.yml"
9392

9493
# launch remote gpu instance
9594
- instance_type='p2.xlarge'
@@ -98,37 +97,41 @@ phases:
9897

9998
# run cpu integration tests
10099
- py3_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu --durations 10"
101-
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
100+
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
102101
- py2_cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu --durations 10"
103-
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
102+
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
104103

105104
# run gpu integration tests
106105
- printf "$SETUP_CMDS" > $SETUP_FILE
107106
- cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -n 4 --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu --durations 10"
108107
- py3_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
109-
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
108+
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "docker/*" "buildspec.yml"
110109

111110
- cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/local -n 4 --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu --durations 10"
112111
- py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
113-
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
112+
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "docker/*" "buildspec.yml"
114113

115114
# run sagemaker tests
116115
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu --durations 10"
117-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
116+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
118117
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu --durations 10"
119-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
118+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
120119
- test_cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu --durations 10"
121-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
120+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
122121
- test_cmd="IGNORE_COVERAGE=- tox -e py27 -- test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu --durations 10"
123-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
122+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "docker/*" "buildspec.yml"
124123

125124
finally:
126125
# shut down remote gpu instance
127126
- cleanup-gpu-instances
128127
- cleanup-key-pairs
129128

130129
# remove ecr image
131-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY2
132-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY2
133-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY3
134-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY3
130+
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY2"
131+
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"
132+
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY2"
133+
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"
134+
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY3"
135+
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"
136+
- delete_cmd="aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY3"
137+
- execute-command-if-has-matching-changes "$delete_cmd" "test/" "docker/*" "buildspec.yml"

docker/1.15.2/py2/Dockerfile.cpu

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ ENV LC_ALL=C.UTF-8
2222
ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
2323

2424
# Define framework-related package sources
25-
ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_training*.tar.gz
2625
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/cpu/final/tensorflow-1.15.2-cp27-cp27mu-manylinux2010_x86_64.whl
2726

2827
RUN apt-get update \
@@ -80,8 +79,6 @@ RUN apt-get update \
8079
python \
8180
python-pip
8281

83-
COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
84-
8582
RUN pip --no-cache-dir install --upgrade \
8683
pip \
8784
setuptools
@@ -103,13 +100,12 @@ RUN pip install --no-cache-dir -U \
103100
mpi4py==3.0.2 \
104101
"cryptography>=2.3" \
105102
"sagemaker-tensorflow>=1.15,<1.16" \
103+
"sagemaker-tensorflow-training>=2,<3" \
106104
# Let's install TensorFlow separately in the end to avoid the library version to be overwritten
107105
&& pip install --force-reinstall --no-cache-dir -U \
108106
${TF_URL} \
109107
&& pip install --no-cache-dir -U \
110-
$FRAMEWORK_SUPPORT_INSTALLABLE \
111-
awscli\
112-
&& rm -f $FRAMEWORK_SUPPORT_INSTALLABLE \
108+
awscli \
113109
&& pip install --no-cache-dir -U \
114110
horovod==0.18.2
115111

docker/1.15.2/py2/Dockerfile.gpu

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ ENV LC_ALL=C.UTF-8
1919
ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
2020

2121
# Define framework-related package sources
22-
ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_training*.tar.gz
2322
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp27-cp27mu-manylinux2010_x86_64.whl
2423

2524
RUN apt-get update \
@@ -120,8 +119,6 @@ RUN pip --no-cache-dir install --upgrade \
120119
# Some TF tools expect a "python" binary
121120
RUN ln -s $(which python) /usr/local/bin/python
122121

123-
COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
124-
125122
RUN pip install --no-cache-dir -U \
126123
numpy==1.16.5 \
127124
scipy==1.2.2 \
@@ -136,13 +133,12 @@ RUN pip install --no-cache-dir -U \
136133
mpi4py==3.0.2 \
137134
"cryptography>=2.3" \
138135
"sagemaker-tensorflow>=1.15,<1.16" \
136+
"sagemaker-tensorflow-training>=2,<3" \
139137
# Let's install TensorFlow separately in the end to avoid the library version to be overwritten
140138
&& pip install --force-reinstall --no-cache-dir -U \
141139
${TF_URL} \
142140
&& pip install --no-cache-dir -U \
143-
$FRAMEWORK_SUPPORT_INSTALLABLE \
144-
awscli\
145-
&& rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
141+
awscli
146142

147143
# Install Horovod, temporarily using CUDA stubs
148144
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs \

docker/1.15.2/py3/Dockerfile.cpu

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ ENV LC_ALL=C.UTF-8
2222
ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
2323

2424
# Define framework-related package sources
25-
ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_training*.tar.gz
2625
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/cpu/final/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl
2726

2827
RUN apt-get update \
@@ -78,8 +77,6 @@ RUN mkdir -p /root/.ssh/ \
7877

7978
WORKDIR /
8079

81-
COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
82-
8380
RUN pip3 --no-cache-dir install --upgrade \
8481
pip \
8582
setuptools
@@ -105,16 +102,15 @@ RUN pip install --no-cache-dir -U \
105102
mpi4py==3.0.2 \
106103
"cryptography>=2.3" \
107104
"sagemaker-tensorflow>=1.15,<1.16" \
105+
"sagemaker-tensorflow-training>=2,<3" \
108106
# Let's install TensorFlow separately in the end to avoid
109107
# the library version to be overwritten
110108
&& pip install --force-reinstall --no-cache-dir -U \
111109
${TF_URL} \
112110
&& pip install --force-reinstall --no-cache-dir -U \
113111
horovod==0.18.2 \
114112
&& pip install --no-cache-dir -U \
115-
$FRAMEWORK_SUPPORT_INSTALLABLE \
116-
awscli\
117-
&& rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
113+
awscli
118114

119115
ADD https://raw.githubusercontent.com/aws/aws-deep-learning-containers-utils/master/deep_learning_container.py /usr/local/bin/deep_learning_container.py
120116

docker/1.15.2/py3/Dockerfile.gpu

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ ENV LC_ALL=C.UTF-8
1919
ENV SAGEMAKER_TRAINING_MODULE=sagemaker_tensorflow_container.training:main
2020

2121
# Define framework-related package sources
22-
ARG FRAMEWORK_SUPPORT_INSTALLABLE=sagemaker_tensorflow_training*.tar.gz
2322
ARG TF_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com/1.15.2/AmazonLinux/gpu/final/tensorflow_gpu-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl
2423

2524
RUN apt-get update \
@@ -123,8 +122,6 @@ RUN pip3 --no-cache-dir install --upgrade \
123122
RUN ln -s $(which python3) /usr/local/bin/python \
124123
&& ln -s $(which pip3) /usr/bin/pip
125124

126-
COPY $FRAMEWORK_SUPPORT_INSTALLABLE .
127-
128125
RUN pip install --no-cache-dir -U \
129126
numpy==1.17.4 \
130127
scipy==1.2.2 \
@@ -142,14 +139,13 @@ RUN pip install --no-cache-dir -U \
142139
mpi4py==3.0.2 \
143140
"cryptography>=2.3" \
144141
"sagemaker-tensorflow>=1.15,<1.16" \
142+
"sagemaker-tensorflow-training>=2,<3" \
145143
# Let's install TensorFlow separately in the end to avoid
146144
# the library version to be overwritten
147145
&& pip install --force-reinstall --no-cache-dir -U \
148146
${TF_URL} \
149147
&& pip install --no-cache-dir -U \
150-
$FRAMEWORK_SUPPORT_INSTALLABLE \
151-
awscli\
152-
&& rm -f $FRAMEWORK_SUPPORT_INSTALLABLE
148+
awscli
153149

154150
# Install Horovod, temporarily using CUDA stubs
155151
RUN ldconfig /usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs \

0 commit comments

Comments
 (0)