Skip to content

Commit a58d124

Browse files
Fix/ci (#419)
* deprecation: drop py2 support, Update python and other CI * update image uris * update tf requirement * test only py39 * fix tox * correct docker image URI * Trigger CI * fix tf dependency * test with tf 2.8 * test with tf.25 and py37 * change everything to 2.5.0 * fix tox.ini * fix: pin protobuf version * fix: dlc name * fix: don't touch docker dir * revert changes in docker/ * fix: revert more changes * test: use p3 instances * fix: protobuf version * update: bump pip version * reduce protobuf version * fix: use py37 * update: use py38 * install tfio, separate tf builds * fix: install ssh * no-op to retrigger cb * Revert "no-op to retrigger cb" This reverts commit 9582aee. * update: use python 3.8 for ec2 env * trigger ci * install ssh in gen gpu * fix nvidia gpg * use mnist custom; update instance type; reduce reruns * fix gpg issue again * reduce horovod tests; fix test_mnist flake8 issues * fix: use p3.16 and skip gen * protobuf and use p3.2xl Co-authored-by: Satish Pasumarthi <spasuma@amazon.com>
1 parent 777d9fc commit a58d124

23 files changed

+257
-119
lines changed

.coveragerc_py36 renamed to .coveragerc_py38

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ partial_branches =
1717

1818
show_missing = True
1919

20-
fail_under = 90
20+
fail_under = 90

.coveragerc_py27 renamed to .coveragerc_py39

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@ timid = True
55
[report]
66
exclude_lines =
77
pragma: no cover
8-
pragma: py2 no cover
9-
if six.PY3
10-
elif six.PY3
8+
pragma: py3 no cover
9+
if six.PY2
10+
elif six.PY2
1111

1212
partial_branches =
1313
pragma: no cover
14-
pragma: py2 no cover
14+
pragma: py3 no cover
1515
if six.PY3
1616
elif six.PY3
1717

1818
show_missing = True
1919

20-
fail_under = 75
20+
fail_under = 90

buildspec-dlc-cpu-tests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '2.3.1'
5+
FRAMEWORK_VERSION: '2.7.1'
66
CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
77
ECR_REPO: 'sagemaker-test'
88

@@ -27,11 +27,11 @@ phases:
2727
- TEST_OPTS=" --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
2828

2929
# run local CPU integration tests (build and push the image to ECR repo)
30-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image $TEST_OPTS"
30+
- test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/local --build-image --push-image $TEST_OPTS"
3131
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-dlc-cpu-tests.yml"
3232

3333
# run sagemaker CPU sagemaker integration tests
34-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n auto --reruns 3 --reruns-delay 15 --instance-type $CPU_INSTANCE_TYPE $TEST_OPTS"
34+
- test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker -n auto --reruns 1 --reruns-delay 15 --instance-type $CPU_INSTANCE_TYPE $TEST_OPTS"
3535
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-dlc-cpu-tests.yml"
3636
finally:
3737
# remove ECR image

buildspec-dlc-gpu-tests.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '2.3.1'
6-
GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
5+
FRAMEWORK_VERSION: '2.7.1'
6+
GPU_INSTANCE_TYPE: 'ml.p3.2xlarge'
77
ECR_REPO: 'sagemaker-test'
88
GITHUB_REPO: 'sagemaker-tensorflow-training-toolkit'
99
DLC_ACCOUNT: '763104351884'
1010
SETUP_FILE: 'setup_cmds.sh'
11-
SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip==20.3.1\npython3.6 -m pip install -U .\npython3.6 -m pip install -U .[test]'
11+
SETUP_CMDS: '#!/bin/bash\npython3 -m pip install --upgrade pip==21.3.1\npython3 -m pip install -U .\npython3 -m pip install -U .[test]'
1212

1313
phases:
1414
pre_build:
@@ -32,7 +32,7 @@ phases:
3232
- TEST_OPTS=" --dockerfile-type dlc.gpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
3333

3434
# build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
35-
- python3.6 setup.py sdist
35+
- python3 setup.py sdist
3636
- build_dir="test/container/$FRAMEWORK_VERSION"
3737
- $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
3838
- docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
@@ -48,12 +48,12 @@ phases:
4848

4949
# run dlc gpu local tests on remote host
5050
- printf "$SETUP_CMDS" > $SETUP_FILE
51-
- dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local $TEST_OPTS"
52-
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
51+
- dlc_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/local $TEST_OPTS"
52+
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --python-version 3.8"
5353
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-dlc-gpu-tests.yml"
5454

5555
# run GPU sagemaker integration tests
56-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n auto --reruns 3 --reruns-delay 15 --instance-type $GPU_INSTANCE_TYPE $TEST_OPTS"
56+
- test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker -n auto --reruns 1 --reruns-delay 15 --instance-type $GPU_INSTANCE_TYPE $TEST_OPTS"
5757
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-dlc-gpu-tests.yml"
5858
finally:
5959
# shut down remote GPU instance

buildspec-gen-cpu-tests.yml

Lines changed: 3 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '2.3.1'
5+
FRAMEWORK_VERSION: '2.7.1'
66
CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
77
ECR_REPO: 'sagemaker-test'
88

@@ -17,22 +17,6 @@ phases:
1717

1818
build:
1919
commands:
20+
# no-op tests to prioritize dlc tests
2021
- TOX_PARALLEL_NO_SPINNER=1
21-
- PY_COLORS=0
22-
23-
# define tags
24-
- GEN_CPU_TAG="$FRAMEWORK_VERSION-gen-cpu-$BUILD_ID"
25-
26-
# establish common test options
27-
- TEST_OPTS=" --dockerfile-type tf --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GEN_CPU_TAG"
28-
29-
# run local CPU integration tests (build and push the image to ECR repo)
30-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image $TEST_OPTS"
31-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-gen-cpu-tests.yml"
32-
33-
# run CPU sagemaker integration tests
34-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n auto --reruns 3 --reruns-delay 15 --instance-type $CPU_INSTANCE_TYPE $TEST_OPTS"
35-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-gen-cpu-tests.yml"
36-
finally:
37-
# remove ECR image
38-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GEN_CPU_TAG
22+
- PY_COLORS=0

buildspec-gen-gpu-tests.yml

Lines changed: 5 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ version: 0.2
22

33
env:
44
variables:
5-
FRAMEWORK_VERSION: '2.3.1'
6-
GPU_INSTANCE_TYPE: 'ml.p2.xlarge'
5+
FRAMEWORK_VERSION: '2.7.1'
6+
GPU_INSTANCE_TYPE: 'ml.p3.16xlarge'
77
ECR_REPO: 'sagemaker-test'
88
GITHUB_REPO: 'sagemaker-tensorflow-training-toolkit'
99
DLC_ACCOUNT: '763104351884'
1010
SETUP_FILE: 'setup_cmds.sh'
11-
SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip==20.3.1\npython3.6 -m pip install -U .\npython3.6 -m pip install -U .[test]'
11+
SETUP_CMDS: '#!/bin/bash\npython3 -m pip install --upgrade pip==21.3.1\npython3 -m pip install -U .\npython3 -m pip install -U .[test]'
1212

1313
phases:
1414
pre_build:
@@ -22,42 +22,6 @@ phases:
2222

2323
build:
2424
commands:
25+
# no-op tests to prioritize dlc tests
2526
- TOX_PARALLEL_NO_SPINNER=1
26-
- PY_COLORS=0
27-
28-
# define tags
29-
- GEN_GPU_TAG="$FRAMEWORK_VERSION-gen-gpu-$BUILD_ID"
30-
31-
# establish common test options
32-
- TEST_OPTS=" --dockerfile-type tf --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --account-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GEN_GPU_TAG"
33-
34-
# build Generic GPU image on build host instead of GPU instance
35-
- python3.6 setup.py sdist
36-
- build_dir="test/container/$FRAMEWORK_VERSION"
37-
- docker build -f "$build_dir/Dockerfile.tf" -t $PREPROD_IMAGE:$GEN_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
38-
# push Generic GPU image to ECR
39-
- $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
40-
- docker push $PREPROD_IMAGE:$GEN_GPU_TAG
41-
42-
# launch remote GPU instance
43-
- prefix='ml.'
44-
- instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
45-
- create-key-pair
46-
- launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest
47-
48-
# run generic gpu local tests on remote host
49-
- printf "$SETUP_CMDS" > $SETUP_FILE
50-
- generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local $TEST_OPTS"
51-
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
52-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-gen-gpu-tests.yml"
53-
54-
# run GPU sagemaker integration tests
55-
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker -n auto --reruns 3 --reruns-delay 15 --instance-type $GPU_INSTANCE_TYPE $TEST_OPTS"
56-
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-gen-gpu-tests.yml"
57-
finally:
58-
# shut down remote GPU instance
59-
- cleanup-gpu-instances
60-
- cleanup-key-pairs
61-
62-
# remove ECR image
63-
- aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GEN_GPU_TAG
27+
- PY_COLORS=0

buildspec-release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ phases:
1212
# run unit tests
1313
- AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
1414
AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
15-
tox -e py36,py37 --parallel all -- test/unit
15+
tox -e py38 --parallel all -- test/unit
1616

1717
# publish the release to github
1818
- git-release --publish

buildspec.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ phases:
1010
- tox -e flake8,twine
1111

1212
# run unit tests
13-
- tox -e py36,py37 --parallel all test/unit
13+
- tox -e py38 --parallel all test/unit
Binary file not shown.

setup.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def read_version():
4545
"botocore==1.19.34",
4646
"requests-mock",
4747
"awscli==1.18.194",
48+
"protobuf>=3.20,<3.21"
4849
]
4950

5051
if sys.version_info.major > 2:
@@ -53,26 +54,27 @@ def read_version():
5354
setup(
5455
name="sagemaker_tensorflow_training",
5556
version=read_version(),
56-
description="Open source library for creating "
57-
"TensorFlow containers to run on Amazon SageMaker.",
57+
description="Open source library for using "
58+
"TensorFlow to train models on on Amazon SageMaker.",
5859
packages=find_packages(where="src", exclude=("test",)),
5960
package_dir={"": "src"},
6061
py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")],
6162
long_description=read("README.rst"),
6263
author="Amazon Web Services",
63-
url="https://github.com/aws/sagemaker-tensorflow-containers",
64+
url="https://github.com/aws/sagemaker-tensorflow-training-toolkit",
6465
license="Apache License 2.0",
6566
classifiers=[
6667
"Development Status :: 5 - Production/Stable",
6768
"Intended Audience :: Developers",
6869
"Natural Language :: English",
6970
"License :: OSI Approved :: Apache Software License",
7071
"Programming Language :: Python",
71-
"Programming Language :: Python :: 3.6",
7272
"Programming Language :: Python :: 3.7",
73+
"Programming Language :: Python :: 3.8",
74+
"Programming Language :: Python :: 3.9",
7375
],
7476
install_requires=[
75-
"sagemaker-training>=3.7.1",
77+
"sagemaker-training>=4.1.0",
7678
"numpy",
7779
"scipy",
7880
"sklearn",

test/conftest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,12 @@
6262
def pytest_addoption(parser):
6363
parser.addoption("--build-image", "-B", action="store_true")
6464
parser.addoption("--push-image", "-P", action="store_true")
65-
parser.addoption("--dockerfile-type", "-T", choices=["dlc.cpu", "dlc.gpu", "tf"], default="tf")
65+
parser.addoption("--dockerfile-type", "-T", choices=["dlc.cpu", "dlc.gpu", "tf.gpu", "tf.cpu"], default="tf.cpu")
6666
parser.addoption("--dockerfile", "-D", default=None)
6767
parser.addoption("--docker-base-name", default="sagemaker-tensorflow-training")
6868
parser.addoption("--tag", default=None)
6969
parser.addoption("--region", default="us-west-2")
70-
parser.addoption("--framework-version", default="2.2.0")
70+
parser.addoption("--framework-version", default="2.5.0")
7171
parser.addoption("--processor", default="cpu", choices=["cpu", "gpu", "cpu,gpu"])
7272
parser.addoption("--py-version", default="3", choices=["2", "3", "2,3"])
7373
parser.addoption("--account-id", default="142577830533")
@@ -158,7 +158,7 @@ def account_id(request):
158158
@pytest.fixture
159159
def instance_type(request, processor):
160160
provided_instance_type = request.config.getoption("--instance-type")
161-
default_instance_type = "ml.c4.xlarge" if processor == "cpu" else "ml.p2.xlarge"
161+
default_instance_type = "ml.c4.xlarge" if processor == "cpu" else "ml.p3.2xlarge"
162162
return provided_instance_type if provided_instance_type is not None else default_instance_type
163163

164164

test/container/2.2.0/Dockerfile.dlc.cpu

Lines changed: 0 additions & 6 deletions
This file was deleted.

test/container/2.2.0/Dockerfile.dlc.gpu

Lines changed: 0 additions & 6 deletions
This file was deleted.

test/container/2.3.1/Dockerfile.tf

Lines changed: 0 additions & 7 deletions
This file was deleted.

test/container/2.3.1/Dockerfile.dlc.cpu renamed to test/container/2.7.1/Dockerfile.dlc.cpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG region
2-
FROM 763104351884.dkr.ecr.$region.amazonaws.com/tensorflow-training:2.3.1-cpu-py37
2+
FROM 763104351884.dkr.ecr.$region.amazonaws.com/tensorflow-training:2.7.1-cpu-py38-ubuntu20.04-sagemaker
33

44
COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
55
RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \

test/container/2.3.1/Dockerfile.dlc.gpu renamed to test/container/2.7.1/Dockerfile.dlc.gpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
ARG region
2-
FROM 763104351884.dkr.ecr.$region.amazonaws.com/tensorflow-training:2.3.1-gpu-py37-cu110-ubuntu18.04
2+
FROM 763104351884.dkr.ecr.$region.amazonaws.com/tensorflow-training:2.7.1-gpu-py38-cu112-ubuntu20.04-sagemaker
33

44
COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
55
RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
FROM tensorflow/tensorflow:2.3.0-gpu
1+
FROM tensorflow/tensorflow:2.7.1
22

33
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
44

55
COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
66
RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \
77
rm /sagemaker_tensorflow_training.tar.gz
8+
RUN pip install --no-cache-dir tensorflow-io
9+
RUN apt-get update && apt-get install -y --no-install-recommends openssh-server && mkdir -p /var/run/sshd
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM tensorflow/tensorflow:2.7.1-gpu
2+
3+
ENV SAGEMAKER_TRAINING_MODULE sagemaker_tensorflow_container.training:main
4+
5+
COPY dist/sagemaker_tensorflow_training-*.tar.gz /sagemaker_tensorflow_training.tar.gz
6+
RUN pip install --upgrade --no-cache-dir /sagemaker_tensorflow_training.tar.gz && \
7+
rm /sagemaker_tensorflow_training.tar.gz
8+
RUN pip install --no-cache-dir tensorflow-io
9+
RUN apt-key del 7fa2af80 \
10+
&& rm /etc/apt/sources.list.d/nvidia-ml.list \
11+
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub \
12+
&& apt-get update \
13+
&& apt-get install -y --no-install-recommends openssh-server && mkdir -p /var/run/sshd

test/integration/local/test_horovod.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_distributed_training_horovod_gpu(
3535

3636
@pytest.mark.skip_gpu
3737
@pytest.mark.skip_generic
38-
@pytest.mark.parametrize("instances, processes", [(1, 2), (2, 1), (2, 2), (5, 2)])
38+
@pytest.mark.parametrize("instances, processes", [(2, 2)])
3939
def test_distributed_training_horovod_cpu(
4040
instances, processes, sagemaker_local_session, image_uri, tmpdir, framework_version
4141
):

test/integration/local/test_training.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def py_full_version(py_version): # noqa: F811
2727
if py_version == "2":
2828
return "2.7"
2929
else:
30-
return "3.6"
30+
return "3.8"
3131

3232

3333
@pytest.mark.skip_gpu
@@ -46,13 +46,13 @@ def test_mnist_cpu(sagemaker_local_session, image_uri, tmpdir, framework_version
4646
_assert_files_exist_in_tar(output_path, ["my_model.h5"])
4747

4848

49-
@pytest.mark.skip_gpu
49+
@pytest.mark.skip
5050
def test_distributed_training_cpu_no_ps(
5151
sagemaker_local_session, image_uri, tmpdir, framework_version
5252
):
5353
output_path = "file://{}".format(tmpdir)
5454
run_tf_training(
55-
script=os.path.join(RESOURCE_PATH, "mnist", "mnist_estimator.py"),
55+
script=os.path.join(RESOURCE_PATH, "mnist", "mnist_custom.py"),
5656
instance_type="local",
5757
instance_count=2,
5858
sagemaker_local_session=sagemaker_local_session,
@@ -66,11 +66,11 @@ def test_distributed_training_cpu_no_ps(
6666
_assert_files_exist_in_tar(output_path, TF_CHECKPOINT_FILES)
6767

6868

69-
@pytest.mark.skip_gpu
69+
@pytest.mark.skip
7070
def test_distributed_training_cpu_ps(sagemaker_local_session, image_uri, tmpdir, framework_version):
7171
output_path = "file://{}".format(tmpdir)
7272
run_tf_training(
73-
script=os.path.join(RESOURCE_PATH, "mnist", "mnist_estimator.py"),
73+
script=os.path.join(RESOURCE_PATH, "mnist", "mnist_custom.py"),
7474
instance_type="local",
7575
instance_count=2,
7676
sagemaker_local_session=sagemaker_local_session,

0 commit comments

Comments
 (0)