Skip to content

Commit a990210

Browse files
authored
2 parents da7babd + a00aa70 commit a990210

36 files changed

+1067
-157
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ dist
33
**/*.egg-info
44
.DS_Store
55
.idea/
6+
.cache/
67
*.iml
78
**/.ipynb_checkpoints
89
**/.python-version
910
.tox
1011
*~
12+
.coverage

MANIFEST.in

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
recursive-include src/sagemaker_tensorflow_container *
2+
13
include VERSION
24
include LICENSE
3-
include README.md
5+
include README.rst
6+
7+
prune test
8+
9+
recursive-exclude * __pycache__
10+
recursive-exclude * *.py[co]

README.rst

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ The Docker images are built from the Dockerfiles specified in
5656
The Docker files are grouped based on TensorFlow version and separated
5757
based on Python version and processor type.
5858

59+
The Docker files for TensorFlow 2.0 are available in the
60+
`tf-2 <https://github.com/aws/sagemaker-tensorflow-container/tree/tf-2>`__ branch, in
61+
`docker/2.0.0/ <https://github.com/aws/sagemaker-tensorflow-container/tree/tf-2/docker/2.0.0>`__.
62+
5963
The Docker images, used to run training & inference jobs, are built from
6064
both corresponding "base" and "final" Dockerfiles.
6165

@@ -203,7 +207,7 @@ Running integration tests require `Docker <https://www.docker.com/>`__ and `AWS
203207
credentials <https://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/setup-credentials.html>`__,
204208
as the integration tests make calls to a couple AWS services. The integration and functional
205209
tests require configurations specified within their respective
206-
`conftest.py <https://github.com/aws/sagemaker-tensorflow-containers/blob/master/test/integ/conftest.py>`__.
210+
`conftest.py <https://github.com/aws/sagemaker-tensorflow-containers/blob/master/test/integration/conftest.py>`__.Make sure to update the account-id and region at a minimum.
207211

208212
Integration tests on GPU require `Nvidia-Docker <https://github.com/NVIDIA/nvidia-docker>`__.
209213

@@ -218,18 +222,18 @@ If you want to run local integration tests, then use:
218222

219223
# Required arguments for integration tests are found in test/integ/conftest.py
220224

221-
pytest test/integ --docker-base-name <your_docker_image> \
222-
--tag <your_docker_image_tag> \
223-
--framework-version <tensorflow_version> \
224-
--processor <cpu_or_gpu>
225+
pytest test/integration --docker-base-name <your_docker_image> \
226+
--tag <your_docker_image_tag> \
227+
--framework-version <tensorflow_version> \
228+
--processor <cpu_or_gpu>
225229

226230
::
227231

228232
# Example
229-
pytest test/integ --docker-base-name preprod-tensorflow \
230-
--tag 1.0 \
231-
--framework-version 1.4.1 \
232-
--processor cpu
233+
pytest test/integration --docker-base-name preprod-tensorflow \
234+
--tag 1.0 \
235+
--framework-version 1.4.1 \
236+
--processor cpu
233237

234238
Functional Tests
235239
~~~~~~~~~~~~~~~~

buildspec-deploy.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
version: 0.2
2+
3+
phases:
4+
build:
5+
commands:
6+
- PACKAGE_FILE="$CODEBUILD_SRC_DIR_ARTIFACT_1/sagemaker_tensorflow_training-*.tar.gz"
7+
8+
# publish to pypi
9+
- publish-pypi-package $PACKAGE_FILE

buildspec-release.yml

Lines changed: 15 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,34 @@
11
version: 0.2
22

3-
env:
4-
variables:
5-
FRAMEWORK_VERSION: '1.13.1'
6-
GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
7-
SETUP_FILE: 'setup_cmds.sh'
8-
SETUP_CMDS: '#!/bin/bash\npip install --upgrade pip\npip install -U -e .\npip install -U -e .[test]'
9-
103
phases:
11-
pre_build:
12-
commands:
13-
- start-dockerd
14-
- ACCOUNT=$(aws --region $AWS_DEFAULT_REGION sts --endpoint-url https://sts.$AWS_DEFAULT_REGION.amazonaws.com get-caller-identity --query 'Account' --output text)
15-
164
build:
175
commands:
186
# prepare the release (update versions, changelog etc.)
19-
- git-release --min-version 2.0.0 --prepare
7+
- git-release --prepare
208

21-
# run linter
22-
- tox -e flake8
9+
# run linters
10+
- tox -e flake8,twine
2311

2412
# run unit tests
25-
- tox -e py36,py27 test/unit
26-
27-
# Create pip archive
28-
- build_dir="docker/$FRAMEWORK_VERSION"
29-
- python3 setup.py sdist
30-
- tar_name=$(ls dist)
31-
- cp dist/$tar_name $build_dir
32-
33-
# Build all images
34-
- python3 scripts/build_all.py --account $ACCOUNT --region $AWS_DEFAULT_REGION
13+
- AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
14+
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
15+
tox -e py27,py36 -- test/unit
3516

36-
# Publish all images
37-
- python3 scripts/publish_all.py --account $ACCOUNT --region $AWS_DEFAULT_REGION
17+
# run local integ tests
18+
#- $(aws ecr get-login --no-include-email --region us-west-2)
19+
#- IGNORE_COVERAGE=- tox -e py27,py36 -- test/integ/local
3820

39-
- base_name="$ACCOUNT.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/sagemaker-tensorflow-scriptmode"
40-
# run local cpu integ tests
41-
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
42-
- IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --docker-base-name $base_name --framework-version $FRAMEWORK_VERSION --processor cpu
43-
- IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --docker-base-name $base_name --py-version 2 --framework-version $FRAMEWORK_VERSION --processor cpu
21+
# run sagemaker integ tests
22+
#- IGNORE_COVERAGE=- tox -e py27,py36 -- test/integ/sagemaker
4423

45-
# launch remote gpu instance
46-
- prefix='ml.'
47-
- instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
48-
- create-key-pair
49-
- launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu
50-
- printf "$SETUP_CMDS" > $SETUP_FILE
51-
52-
- py2_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --docker-base-name $base_name --py-version 2 --framework-version $FRAMEWORK_VERSION --processor gpu"
53-
- remote-test --github-repo sagemaker-tensorflow-container --setup-file $SETUP_FILE --branch script-mode --test-cmd "$py2_cmd"
54-
- py3_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --docker-base-name $base_name --framework-version $FRAMEWORK_VERSION --processor gpu"
55-
- remote-test --github-repo sagemaker-tensorflow-container --branch script-mode --test-cmd "$py3_cmd" --skip-setup
56-
57-
- IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n 24 --region $AWS_DEFAULT_REGION --account-id 142577830533 --docker-base-name sagemaker-tensorflow-scriptmode --framework-version $FRAMEWORK_VERSION --processor cpu,gpu --py-version 2,3
58-
59-
- |
60-
echo '[{
61-
"repository": "sagemaker-tensorflow-scriptmode",
62-
"tags": [{
63-
"source": "1.13.1-cpu-py2",
64-
"dest": ["1.13.1-cpu-py2", "1.13-cpu-py2", "1.13.1-cpu-py2-'${CODEBUILD_BUILD_ID#*:}'"]
65-
},{
66-
"source": "1.13.1-cpu-py3",
67-
"dest": ["1.13.1-cpu-py3", "1.13-cpu-py3", "1.13.1-cpu-py3-'${CODEBUILD_BUILD_ID#*:}'"]
68-
},{
69-
"source": "1.13.1-gpu-py2",
70-
"dest": ["1.13.1-gpu-py2", "1.13-gpu-py2", "1.13.1-gpu-py2-'${CODEBUILD_BUILD_ID#*:}'"]
71-
},{
72-
"source": "1.13.1-gpu-py3",
73-
"dest": ["1.13.1-gpu-py3", "1.13-gpu-py3", "1.13.1-gpu-py3-'${CODEBUILD_BUILD_ID#*:}'"]
74-
}],
75-
"test": [
76-
"IGNORE_COVERAGE=- tox -e py36 -- -m deploy_test test/integration/sagemaker -n 4 --region {region} --account-id {aws-id} --instance-type {cpu-instance-type} --docker-base-name sagemaker-tensorflow-scriptmode --framework-version 1.13.1 --processor cpu --py-version 2,3",
77-
"IGNORE_COVERAGE=- tox -e py36 -- -m deploy_test test/integration/sagemaker -n 4 --region {region} --account-id {aws-id} --docker-base-name sagemaker-tensorflow-scriptmode --framework-version 1.13.1 --processor gpu --py-version 2,3"
78-
]
79-
}]' > deployments.json
24+
# generate the distribution package
25+
- python3 setup.py sdist
8026

8127
# publish the release to github
8228
- git-release --publish
8329

84-
finally:
85-
# shut down remote gpu instance
86-
- cleanup-gpu-instances
87-
- cleanup-key-pairs
88-
8930
artifacts:
9031
files:
91-
- deployments.json
32+
- dist/sagemaker_tensorflow_training-*.tar.gz
9233
name: ARTIFACT_1
34+
discard-paths: yes

buildspec.yml

Lines changed: 60 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '1.13.1'
6-
CPU_FRAMEWORK_BINARY: 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/cpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl'
7-
CPU_PY_VERSION: '3'
8-
GPU_FRAMEWORK_BINARY: 'https://s3-us-west-2.amazonaws.com/tensorflow-aws/1.13/AmazonLinux/gpu/latest-patch-latest-patch/tensorflow-1.13.1-cp36-cp36m-linux_x86_64.whl'
9-
GPU_PY_VERSION: '3'
5+
FRAMEWORK_VERSION: '1.15.0'
106
ECR_REPO: 'sagemaker-test'
117
GITHUB_REPO: 'sagemaker-tensorflow-container'
128
SETUP_FILE: 'setup_cmds.sh'
@@ -28,86 +24,101 @@ phases:
2824
- pip3 install -U -e .[test]
2925

3026
# run flake8
31-
- tox -e flake8
27+
- tox -e flake8,twine
3228

3329
# run unit tests
3430
- tox -e py36,py27 test/unit
3531

3632
# Create pip archive
37-
- build_dir="docker/$FRAMEWORK_VERSION"
33+
- root_dir=$(pwd)
3834
- build_id="$(echo $CODEBUILD_BUILD_ID | sed -e 's/:/-/g')"
3935
- python3 setup.py sdist
4036
- tar_name=$(ls dist)
41-
- cp dist/$tar_name $build_dir
4237

43-
# build cpu image
44-
- cpu_dockerfile="Dockerfile.cpu"
38+
# Find build artifacts
39+
- build_artifacts=$root_dir/docker/artifacts
4540

46-
# Download framework binary
47-
- cpu_fw_binary=$(basename $CPU_FRAMEWORK_BINARY)
48-
- wget -O $build_dir/$cpu_fw_binary $CPU_FRAMEWORK_BINARY
49-
50-
- CPU_TAG="$FRAMEWORK_VERSION-cpu-py$CPU_PY_VERSION-$build_id"
41+
# build py2 images
5142

43+
# prepare build context
44+
- build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py2"
45+
- cp $root_dir/dist/$tar_name $build_dir
46+
- cp $build_artifacts/* $build_dir/
5247
- cd $build_dir
53-
- docker build -f $cpu_dockerfile --build-arg framework_support_installable=$tar_name --build-arg py_version=$CPU_PY_VERSION --build-arg framework_installable=$cpu_fw_binary -t $PREPROD_IMAGE:$CPU_TAG .
54-
- cd ../../
48+
49+
# build cpu image
50+
- cpu_dockerfile="Dockerfile.cpu"
51+
- CPU_TAG_PY2="$FRAMEWORK_VERSION-cpu-py2-$build_id"
52+
- docker build -f $cpu_dockerfile -t $PREPROD_IMAGE:$CPU_TAG_PY2 .
5553

5654
# build gpu image
5755
- gpu_dockerfile="Dockerfile.gpu"
56+
- GPU_TAG_PY2="$FRAMEWORK_VERSION-gpu-py2-$build_id"
57+
- docker build -f $gpu_dockerfile -t $PREPROD_IMAGE:$GPU_TAG_PY2 .
5858

59-
# Download framework binary
60-
- gpu_fw_binary=$(basename $GPU_FRAMEWORK_BINARY)
61-
- wget -O $build_dir/$gpu_fw_binary $GPU_FRAMEWORK_BINARY
62-
63-
- GPU_TAG="$FRAMEWORK_VERSION-gpu-py$GPU_PY_VERSION-$build_id"
59+
# build py3 images
6460

61+
# prepare build context
62+
- build_dir="$root_dir/docker/$FRAMEWORK_VERSION/py3"
63+
- cp $root_dir/dist/$tar_name $build_dir
64+
- cp $build_artifacts/* $build_dir/
6565
- cd $build_dir
66-
- docker build -f $gpu_dockerfile --build-arg framework_support_installable=$tar_name --build-arg py_version=$GPU_PY_VERSION --build-arg framework_installable=$gpu_fw_binary -t $PREPROD_IMAGE:$GPU_TAG .
67-
- cd ../../
66+
67+
# build cpu image
68+
- cpu_dockerfile="Dockerfile.cpu"
69+
- CPU_TAG_PY3="$FRAMEWORK_VERSION-cpu-py3-$build_id"
70+
- docker build -f $cpu_dockerfile -t $PREPROD_IMAGE:$CPU_TAG_PY3 .
71+
72+
# build gpu image
73+
- gpu_dockerfile="Dockerfile.gpu"
74+
- GPU_TAG_PY3="$FRAMEWORK_VERSION-gpu-py3-$build_id"
75+
- docker build -f $gpu_dockerfile -t $PREPROD_IMAGE:$GPU_TAG_PY3 .
6876

6977
# push images to ecr
7078
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
71-
- docker push $PREPROD_IMAGE:$CPU_TAG
72-
- docker push $PREPROD_IMAGE:$GPU_TAG
79+
- docker push $PREPROD_IMAGE:$CPU_TAG_PY2
80+
- docker push $PREPROD_IMAGE:$GPU_TAG_PY2
81+
- docker push $PREPROD_IMAGE:$CPU_TAG_PY3
82+
- docker push $PREPROD_IMAGE:$GPU_TAG_PY3
7383

7484
# launch remote gpu instance
7585
- instance_type='p2.xlarge'
7686
- create-key-pair
7787
- launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu
7888

7989
# run cpu integration tests
80-
- |
81-
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
82-
pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG --framework-version $FRAMEWORK_VERSION --py-version $CPU_PY_VERSION --processor cpu
83-
else
84-
echo "skipping cpu integration tests"
85-
fi
90+
- py3_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor cpu"
91+
- py2_cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $CPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor cpu"
92+
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
93+
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
8694

8795
# run gpu integration tests
88-
- |
89-
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
90-
printf "$SETUP_CMDS" > $SETUP_FILE
91-
cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG --framework-version $FRAMEWORK_VERSION --py-version $GPU_PY_VERSION --processor gpu"
92-
remote-test --github-repo $GITHUB_REPO --test-cmd "$cmd" --setup-file $SETUP_FILE --pr-number "$PR_NUM"
93-
else
94-
echo "skipping gpu integration tests"
95-
fi
96+
- printf "$SETUP_CMDS" > $SETUP_FILE
97+
- cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY2 --framework-version $FRAMEWORK_VERSION --py-version 2 --processor gpu"
98+
- py3_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
99+
- execute-command-if-has-matching-changes "$py3_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
100+
101+
- cmd="pytest test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $PREPROD_IMAGE --tag $GPU_TAG_PY3 --framework-version $FRAMEWORK_VERSION --py-version 3 --processor gpu"
102+
- py2_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
103+
- execute-command-if-has-matching-changes "$py2_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
96104

97105
# run sagemaker tests
98-
- |
99-
if has-matching-changes "test/" "tests/" "src/*.py" "docker/*" "buildspec.yml"; then
100-
pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG --py-version $CPU_PY_VERSION --processor cpu
101-
pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG --py-version $GPU_PY_VERSION --processor gpu
102-
else
103-
echo "skipping sagemaker tests"
104-
fi
106+
- test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY2 --py-version 2 --processor cpu"
107+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
108+
- test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY2 --py-version 2 --processor gpu"
109+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
110+
- test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $CPU_TAG_PY3 --py-version 3 --processor cpu"
111+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
112+
- test_cmd="pytest test/integration/sagemaker -n 8 --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --tag $GPU_TAG_PY3 --py-version 3 --processor gpu"
113+
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "docker/*" "buildspec.yml"
105114

106115
finally:
107116
# shut down remote gpu instance
108117
- cleanup-gpu-instances
109118
- cleanup-key-pairs
110119

111120
# remove ecr image
112-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG
113-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG
121+
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY2
122+
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY2
123+
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$CPU_TAG_PY3
124+
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GPU_TAG_PY3

0 commit comments

Comments
 (0)