Skip to content

Commit 541a724

Browse files
authored
fix: use unique name for integration job hyperparameter tuning job (#204)
1 parent 0b0ddb7 commit 541a724

File tree

6 files changed

+46
-20
lines changed

6 files changed

+46
-20
lines changed

.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[flake8]
2-
application_import_names = sagemaker_tensorflow_container, test
2+
application_import_names = sagemaker_tensorflow_container, test, utils
33
import-order-style = google

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ dist
66
*.iml
77
**/.ipynb_checkpoints
88
**/.python-version
9-
.tox
9+
.tox
10+
*~

test/integration/sagemaker/test_horovod.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of
@@ -17,6 +17,8 @@
1717
import sagemaker
1818
from sagemaker.tensorflow import TensorFlow
1919

20+
from utils import unique_name_from_base
21+
2022
RESOURCE_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'resources')
2123

2224

@@ -41,7 +43,7 @@ def test_distributed_training_horovod(sagemaker_session,
4143
'sagemaker_mpi_num_of_processes_per_host': 1},
4244
sagemaker_session=sagemaker_session)
4345

44-
estimator.fit()
46+
estimator.fit(job_name=unique_name_from_base('test-tf-horovod'))
4547

4648
model_data_source = sagemaker.local.data.get_data_source_instance(
4749
estimator.model_data, sagemaker_session)

test/integration/sagemaker/test_mnist.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
# Copyright 2017-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License"). You
44
# may not use this file except in compliance with the License. A copy of
@@ -19,6 +19,7 @@
1919
from six.moves.urllib.parse import urlparse
2020

2121
from sagemaker_tensorflow_container.training import SAGEMAKER_PARAMETER_SERVER_ENABLED
22+
from utils import unique_name_from_base
2223

2324

2425
def test_mnist(sagemaker_session, ecr_image, instance_type, framework_version):
@@ -31,12 +32,11 @@ def test_mnist(sagemaker_session, ecr_image, instance_type, framework_version):
3132
sagemaker_session=sagemaker_session,
3233
image_name=ecr_image,
3334
framework_version=framework_version,
34-
py_version='py3',
35-
base_job_name='test-sagemaker-mnist')
35+
script_mode=True)
3636
inputs = estimator.sagemaker_session.upload_data(
3737
path=os.path.join(resource_path, 'mnist', 'data'),
3838
key_prefix='scriptmode/mnist')
39-
estimator.fit(inputs)
39+
estimator.fit(inputs, job_name=unique_name_from_base('test-sagemaker-mnist'))
4040
_assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
4141

4242

@@ -50,12 +50,11 @@ def test_distributed_mnist_no_ps(sagemaker_session, ecr_image, instance_type, fr
5050
sagemaker_session=sagemaker_session,
5151
image_name=ecr_image,
5252
framework_version=framework_version,
53-
py_version='py3',
54-
base_job_name='test-tf-sm-distributed-mnist')
53+
script_mode=True)
5554
inputs = estimator.sagemaker_session.upload_data(
5655
path=os.path.join(resource_path, 'mnist', 'data'),
5756
key_prefix='scriptmode/mnist')
58-
estimator.fit(inputs)
57+
estimator.fit(inputs, job_name=unique_name_from_base('test-tf-sm-distributed-mnist'))
5958
_assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
6059

6160

@@ -70,12 +69,11 @@ def test_distributed_mnist_ps(sagemaker_session, ecr_image, instance_type, frame
7069
sagemaker_session=sagemaker_session,
7170
image_name=ecr_image,
7271
framework_version=framework_version,
73-
py_version='py3',
74-
base_job_name='test-tf-sm-distributed-mnist')
72+
script_mode=True)
7573
inputs = estimator.sagemaker_session.upload_data(
7674
path=os.path.join(resource_path, 'mnist', 'data-distributed'),
7775
key_prefix='scriptmode/mnist-distributed')
78-
estimator.fit(inputs)
76+
estimator.fit(inputs, job_name=unique_name_from_base('test-tf-sm-distributed-mnist'))
7977
_assert_checkpoint_exists(sagemaker_session.boto_region_name, estimator.model_dir, 0)
8078
_assert_s3_file_exists(sagemaker_session.boto_region_name, estimator.model_data)
8179

@@ -104,9 +102,9 @@ def test_s3_plugin(sagemaker_session, ecr_image, instance_type, region, framewor
104102
sagemaker_session=sagemaker_session,
105103
image_name=ecr_image,
106104
framework_version=framework_version,
107-
py_version='py3',
108-
base_job_name='test-tf-sm-s3-mnist')
109-
estimator.fit('s3://sagemaker-sample-data-{}/tensorflow/mnist'.format(region))
105+
script_mode=True)
106+
estimator.fit('s3://sagemaker-sample-data-{}/tensorflow/mnist'.format(region),
107+
job_name=unique_name_from_base('test-tf-sm-s3-mnist'))
110108
_assert_s3_file_exists(region, estimator.model_data)
111109
_assert_checkpoint_exists(region, estimator.model_dir, 200)
112110

test/integration/sagemaker/test_tuning_model_dir.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from sagemaker.tensorflow import TensorFlow
1818
from sagemaker.tuner import HyperparameterTuner, IntegerParameter
1919

20+
from utils import unique_name_from_base
21+
2022

2123
def test_model_dir_with_training_job_name(sagemaker_session, ecr_image, instance_type, framework_version):
2224
resource_path = os.path.join(os.path.dirname(__file__), '../..', 'resources')
@@ -36,9 +38,8 @@ def test_model_dir_with_training_job_name(sagemaker_session, ecr_image, instance
3638
hyperparameter_ranges={'arbitrary_value': IntegerParameter(0, 1)},
3739
metric_definitions=[{'Name': 'accuracy', 'Regex': 'accuracy=([01])'}],
3840
max_jobs=1,
39-
max_parallel_jobs=1,
40-
base_tuning_job_name='test-tf-tuning-model-dir')
41+
max_parallel_jobs=1)
4142

4243
# User script has logic to check for the correct model_dir
43-
tuner.fit()
44+
tuner.fit(job_name=unique_name_from_base('test-tf-model-dir', max_length=32))
4445
tuner.wait()

test/integration/sagemaker/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import
14+
15+
import random
16+
import time
17+
18+
19+
def unique_name_from_base(base, max_length=63):
20+
unique = '%04x' % random.randrange(16**4) # 4-digit hex
21+
ts = str(int(time.time()))
22+
available_length = max_length - 2 - len(ts) - len(unique)
23+
trimmed = base[:available_length]
24+
return '{}-{}-{}'.format(trimmed, ts, unique)

0 commit comments

Comments
 (0)