Skip to content

Commit d7a76ff

Browse files
committed
backmerged
2 parents 6035770 + c6a3a2b commit d7a76ff

25 files changed

+743
-69
lines changed

ads/common/auth.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ def create_signer(
416416
>>> auth = ads.auth.create_signer(auth_type="security_token", config=config) # security token authentication created based on provided config
417417
"""
418418
if signer or signer_callable:
419-
configuration = ads.telemetry.update_oci_client_config()
419+
configuration = ads.telemetry.update_oci_client_config(config)
420420
if signer_callable:
421421
signer = signer_callable(**signer_kwargs)
422422
signer_dict = {
@@ -479,7 +479,7 @@ def default_signer(client_kwargs: Optional[Dict] = None) -> Dict:
479479
"""
480480
auth_state = AuthState()
481481
if auth_state.oci_signer or auth_state.oci_signer_callable:
482-
configuration = ads.telemetry.update_oci_client_config()
482+
configuration = ads.telemetry.update_oci_client_config(auth_state.oci_config)
483483
signer = auth_state.oci_signer
484484
if auth_state.oci_signer_callable:
485485
signer_kwargs = auth_state.oci_signer_kwargs or {}

ads/model/artifact_uploader.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,11 @@ class LargeArtifactUploader(ArtifactUploader):
128128
The OCI Object Storage URI where model artifacts will be copied to.
129129
The `bucket_uri` is only necessary for uploading large artifacts which
130130
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
131+
131132
.. versionadded:: 2.8.10
132-
If artifact_path is object storage path to a zip archive, bucket_uri will be ignored.
133+
134+
If artifact_path is object storage path to a zip archive, bucket_uri will be ignored.
135+
133136
dsc_model: OCIDataScienceModel
134137
The data scince model instance.
135138
overwrite_existing_artifact: bool
@@ -173,8 +176,11 @@ def __init__(
173176
The OCI Object Storage URI where model artifacts will be copied to.
174177
The `bucket_uri` is only necessary for uploading large artifacts from local which
175178
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
179+
176180
.. versionadded:: 2.8.10
177-
If `artifact_path` is object storage path to a zip archive, `bucket_uri` will be ignored.
181+
182+
If `artifact_path` is object storage path to a zip archive, `bucket_uri` will be ignored.
183+
178184
auth: (Dict, optional). Defaults to `None`.
179185
The default authetication is set using `ads.set_auth` API.
180186
If you need to override the default, use the `ads.common.auth.api_keys` or

ads/model/datascience_model.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,11 @@ def create(self, **kwargs) -> "DataScienceModel":
549549
The OCI Object Storage URI where model artifacts will be copied to.
550550
The `bucket_uri` is only necessary for uploading large artifacts which
551551
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
552+
552553
.. versionadded:: 2.8.10
553-
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
554+
555+
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
556+
554557
overwrite_existing_artifact: (bool, optional). Defaults to `True`.
555558
Overwrite target bucket artifact if exists.
556559
remove_existing_artifact: (bool, optional). Defaults to `True`.
@@ -639,8 +642,11 @@ def upload_artifact(
639642
The OCI Object Storage URI where model artifacts will be copied to.
640643
The `bucket_uri` is only necessary for uploading large artifacts which
641644
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
645+
642646
.. versionadded:: 2.8.10
643-
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
647+
648+
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
649+
644650
auth: (Dict, optional). Defaults to `None`.
645651
The default authentication is set using `ads.set_auth` API.
646652
If you need to override the default, use the `ads.common.auth.api_keys` or

ads/opctl/distributed/cli.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,14 @@ def commands():
3030
"-f",
3131
help="Distributed training framework type",
3232
type=click.Choice(
33-
["dask", "horovod-tensorflow", "horovod-pytorch", "pytorch", "tensorflow"]
33+
[
34+
"dask",
35+
"horovod-tensorflow",
36+
"horovod-pytorch",
37+
"pytorch",
38+
"tensorflow",
39+
"ray",
40+
]
3441
),
3542
default=None,
3643
required=True,

docs/source/release_notes.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22
Release Notes
33
=============
44

5+
2.8.11
6+
------
7+
Release date: October 18, 2023
8+
9+
* Added support to mount file systems in Data Science notebook sessions and jobs.
10+
* Added support to cancel all job runs in the ADS ``api`` and ``opctl`` commands.
11+
* Updated ``ads.set_auth()`` to use both ``config`` and ``signer`` when provided.
12+
* Fixed a bug when initializing distributed training artifacts with "Ray" framework.
13+
514
2.8.10
615
------
716
Release date: September 27, 2023

docs/source/user_guide/jobs/infra_and_runtime.rst

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,61 @@ see also `ADS Logging <../logging/logging.html>`_.
152152

153153
With logging configured, you can call :py:meth:`~ads.jobs.DataScienceJobRun.watch` method to stream the logs.
154154

155+
156+
Mounting File Systems
157+
---------------------
158+
159+
Data Science Job supports mounting multiple types of file systems,
160+
see `Data Science Job Mounting File Systems <https://docs.oracle.com/en-us/iaas/data-science/using/jobs-create.htm>`_. A maximum number of 5 file systems are
161+
allowed to be mounted for each Data Science Job. You can specify a list of file systems to be mounted
162+
by calling :py:meth:`~ads.jobs.DataScienceJob.with_storage_mount()`. For each file system to be mounted,
163+
you need to pass a dictionary with ``src`` and ``dest`` as keys. For example, you can pass
164+
``<mount_target_ip_address>@<export_path>`` as the value for ``src`` to mount OCI File Storage and you can also
165+
pass ``oci://<bucket_name>@<namespace>/<prefix>`` to mount OCI Object Storage. The value of
166+
``dest`` indicates the path and directory to which you want to mount the file system and must be in the
167+
format as ``<destination_path>/<destination_directory_name>``. The ``<destination_directory_name>`` is required
168+
while the ``<destination_path>`` is optional. The ``<destination_path>`` must start with character ``/`` if provided.
169+
If not, the file systems will be mounted to ``/mnt/<destination_directory_name>`` by default.
170+
171+
172+
.. tabs::
173+
174+
.. code-tab:: python
175+
:caption: Python
176+
177+
from ads.jobs import DataScienceJob
178+
179+
infrastructure = (
180+
DataScienceJob()
181+
.with_log_group_id("<log_group_ocid>")
182+
.with_log_id("<log_ocid>")
183+
.with_storage_mount(
184+
{
185+
"src" : "<mount_target_ip_address>@<export_path>",
186+
"dest" : "<destination_path>/<destination_directory_name>"
187+
}, # mount oci file storage to path "<destination_path>/<destination_directory_name>"
188+
{
189+
"src" : "oci://<bucket_name>@<namespace>/<prefix>",
190+
"dest" : "<destination_directory_name>"
191+
} # mount oci object storage to path "/mnt/<destination_directory_name>"
192+
)
193+
)
194+
195+
.. code-tab:: yaml
196+
:caption: YAML
197+
198+
kind: infrastructure
199+
type: dataScienceJob
200+
spec:
201+
logGroupId: <log_group_ocid>
202+
logId: <log_ocid>
203+
storageMount:
204+
- src: <mount_target_ip_address>@<export_path>
205+
dest: <destination_path>/<destination_directory_name>
206+
- src: oci://<bucket_name>@<namespace>/<prefix>
207+
dest: <destination_directory_name>
208+
209+
155210
Runtime
156211
=======
157212

docs/source/user_guide/jobs/tabs/infra_config.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@
2222
.with_shape_config_details(memory_in_gbs=16, ocpus=1)
2323
# Minimum/Default block storage size is 50 (GB).
2424
.with_block_storage_size(50)
25+
# A maximum number of 5 file systems are allowed to be mounted for a job.
26+
.with_storage_mount(
27+
{
28+
"src" : "<mount_target_ip_address>@<export_path>",
29+
"dest" : "<destination_path>/<destination_directory_name>"
30+
}, # mount oci file storage to path "<destination_path>/<destination_directory_name>"
31+
{
32+
"src" : "oci://<bucket_name>@<namespace>/<prefix>",
33+
"dest" : "<destination_directory_name>"
34+
} # mount oci object storage to path "/mnt/<destination_directory_name>"
35+
)
2536
)
2637

2738
.. code-tab:: yaml
@@ -40,3 +51,8 @@
4051
ocpus: 1
4152
shapeName: VM.Standard.E3.Flex
4253
subnetId: <subnet_ocid>
54+
storageMount:
55+
- src: <mount_target_ip_address>@<export_path>
56+
dest: <destination_path>/<destination_directory_name>
57+
- src: oci://<bucket_name>@<namespace>/<prefix>
58+
dest: <destination_directory_name>

docs/source/user_guide/jobs/tabs/llama2_full.rst

Lines changed: 16 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -14,42 +14,32 @@
1414
.with_compartment_id("<compartment_ocid>")
1515
.with_project_id("<project_ocid>")
1616
.with_subnet_id("<subnet_ocid>")
17-
.with_shape_name("VM.GPU.A10.1")
17+
.with_shape_name("VM.GPU.A10.2")
1818
.with_block_storage_size(256)
1919
)
2020
.with_runtime(
2121
PyTorchDistributedRuntime()
2222
# Specify the service conda environment by slug name.
23-
.with_service_conda("pytorch20_p39_gpu_v1")
23+
.with_service_conda("pytorch20_p39_gpu_v2")
2424
.with_git(
2525
url="https://github.com/facebookresearch/llama-recipes.git",
26-
commit="03faba661f079ee1ecaeb66deaa6bdec920a7bab"
26+
commit="1aecd00924738239f8d86f342b36bacad180d2b3"
2727
)
2828
.with_dependency(
2929
pip_pkg=" ".join([
30-
"'accelerate>=0.21.0'",
31-
"appdirs",
32-
"loralib",
33-
"bitsandbytes==0.39.1",
34-
"black",
35-
"'black[jupyter]'",
36-
"datasets",
37-
"fire",
38-
"'git+https://github.com/huggingface/peft.git'",
39-
"'transformers>=4.31.0'",
40-
"sentencepiece",
41-
"py7zr",
42-
"scipy",
43-
"optimum"
30+
"--extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0",
31+
"git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4"
32+
"appdirs==1.4.4",
33+
"llama-recipes==0.0.1",
34+
"py7zr==0.20.6",
4435
])
4536
)
4637
.with_output("/home/datascience/outputs", "oci://bucket@namespace/outputs/$JOB_RUN_OCID")
4738
.with_command(" ".join([
48-
"torchrun llama_finetuning.py",
39+
"torchrun examples/finetuning.py",
4940
"--enable_fsdp",
5041
"--pure_bf16",
5142
"--batch_size_training 1",
52-
"--micro_batch_size 1",
5343
"--model_name $MODEL_NAME",
5444
"--dist_checkpoint_root_folder /home/datascience/outputs",
5545
"--dist_checkpoint_folder fine-tuned"
@@ -87,36 +77,26 @@
8777
spec:
8878
git:
8979
url: https://github.com/facebookresearch/llama-recipes.git
90-
commit: 03faba661f079ee1ecaeb66deaa6bdec920a7bab
80+
commit: 1aecd00924738239f8d86f342b36bacad180d2b3
9181
command: >-
9282
torchrun llama_finetuning.py
9383
--enable_fsdp
9484
--pure_bf16
9585
--batch_size_training 1
96-
--micro_batch_size 1
9786
--model_name $MODEL_NAME
9887
--dist_checkpoint_root_folder /home/datascience/outputs
9988
--dist_checkpoint_folder fine-tuned
10089
replicas: 2
10190
conda:
10291
type: service
103-
slug: pytorch20_p39_gpu_v1
92+
slug: pytorch20_p39_gpu_v2
10493
dependencies:
10594
pipPackages: >-
106-
'accelerate>=0.21.0'
107-
appdirs
108-
loralib
109-
bitsandbytes==0.39.1
110-
black
111-
'black[jupyter]'
112-
datasets
113-
fire
114-
'git+https://github.com/huggingface/peft.git'
115-
'transformers>=4.31.0'
116-
sentencepiece
117-
py7zr
118-
scipy
119-
optimum
95+
--extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0
96+
git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4
97+
llama-recipes==0.0.1
98+
appdirs==1.4.4
99+
py7zr==0.20.6
120100
outputDir: /home/datascience/outputs
121101
outputUri: oci://bucket@namespace/outputs/$JOB_RUN_OCID
122102
env:

docs/source/user_guide/jobs/tabs/quick_start_job.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,17 @@
2424
.with_shape_config_details(memory_in_gbs=16, ocpus=1)
2525
# Minimum/Default block storage size is 50 (GB).
2626
.with_block_storage_size(50)
27+
# A maximum number of 5 file systems are allowed to be mounted for a job.
28+
.with_storage_mount(
29+
{
30+
"src" : "<mount_target_ip_address>@<export_path>",
31+
"dest" : "<destination_path>/<destination_directory_name>"
32+
}, # mount oci file storage to path "<destination_path>/<destination_directory_name>"
33+
{
34+
"src" : "oci://<bucket_name>@<namespace>/<prefix>",
35+
"dest" : "<destination_directory_name>"
36+
} # mount oci object storage to path "/mnt/<destination_directory_name>"
37+
)
2738
)
2839
.with_runtime(
2940
PythonRuntime()
@@ -59,6 +70,11 @@
5970
ocpus: 1
6071
shapeName: VM.Standard.E3.Flex
6172
subnetId: <subnet_ocid>
73+
storageMount:
74+
- src: <mount_target_ip_address>@<export_path>
75+
dest: <destination_path>/<destination_directory_name>
76+
- src: oci://<bucket_name>@<namespace>/<prefix>
77+
dest: <destination_directory_name>
6278
runtime:
6379
kind: runtime
6480
type: python

docs/source/user_guide/model_registration/_template/deploy.rst

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@ You can use the ``.deploy()`` method to deploy a model. You must first save the
77

88
The ``.deploy()`` method returns a ``ModelDeployment`` object. Specify deployment attributes such as display name, instance type, number of instances, maximum router bandwidth, and logging groups. The API takes the following parameters:
99

10-
See `API documentation <../../ads.model.html#id1>`__ for more details about the parameters.
11-
10+
See :py:meth:`~ads.model.GenericModel.deploy` for more details about the parameters.
1211

1312
.. admonition:: Tips
1413
:class: note
1514

1615
* Providing ``deployment_access_log_id`` and ``deployment_predict_log_id`` helps in debugging your model inference setup.
17-
* Default Load Balancer configuration has bandwidth of 10 Mbps. `Refer service document to help you choose the right setup. <https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_create.htm>`_
18-
* Check for supported instance shapes `here <https://docs.oracle.com/en-us/iaas/data-science/using/overview.htm#supported-shapes>`_ .
16+
* Default Load Balancer configuration has bandwidth of 10 Mbps. `Refer service document to help you choose the right setup. <https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_create.htm>`_
17+
* Check for supported instance shapes `here <https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm>`_ .

0 commit comments

Comments
 (0)