Skip to content

Commit 435741b

Browse files
committed
Merge branch 'main' of https://github.com/oracle/accelerated-data-science into ODSC-47734/add_support_to_cancel_all_runs
2 parents 0ad891a + fa6f315 commit 435741b

File tree

7 files changed

+186
-69
lines changed

7 files changed

+186
-69
lines changed

.gitleaks.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ useDefault = true
1313
'''example-password''',
1414
'''this-is-not-the-secret''',
1515
'''<redacted>''',
16+
'''security_token''',
1617
# NVIDIA_GPGKEY_SUM from public documentation:
1718
# https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/centos7/base/Dockerfile
1819
'''d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87'''

ads/model/artifact_uploader.py

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Dict, Optional
1111

1212
from ads.common import utils
13+
from ads.common.object_storage_details import ObjectStorageDetails
1314
from ads.model.common import utils as model_utils
1415
from ads.model.service.oci_datascience_model import OCIDataScienceModel
1516

@@ -29,7 +30,10 @@ def __init__(self, dsc_model: OCIDataScienceModel, artifact_path: str):
2930
artifact_path: str
3031
The model artifact location.
3132
"""
32-
if not os.path.exists(artifact_path):
33+
if not (
34+
ObjectStorageDetails.is_oci_path(artifact_path)
35+
or os.path.exists(artifact_path)
36+
):
3337
raise ValueError(f"The `{artifact_path}` does not exist")
3438

3539
self.dsc_model = dsc_model
@@ -45,7 +49,7 @@ def upload(self):
4549
) as progress:
4650
self.progress = progress
4751
self.progress.update("Preparing model artifacts ZIP archive.")
48-
self._prepare_artiact_tmp_zip()
52+
self._prepare_artifact_tmp_zip()
4953
self.progress.update("Uploading model artifacts.")
5054
self._upload()
5155
self.progress.update(
@@ -55,22 +59,19 @@ def upload(self):
5559
except Exception:
5660
raise
5761
finally:
58-
self._remove_artiact_tmp_zip()
62+
self._remove_artifact_tmp_zip()
5963

60-
def _prepare_artiact_tmp_zip(self) -> str:
64+
def _prepare_artifact_tmp_zip(self) -> str:
6165
"""Prepares model artifacts ZIP archive.
6266
63-
Parameters
64-
----------
65-
progress: (TqdmProgressBar, optional). Defaults to `None`.
66-
The progress indicator.
67-
6867
Returns
6968
-------
7069
str
7170
Path to the model artifact ZIP archive.
7271
"""
73-
if os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith(
72+
if ObjectStorageDetails.is_oci_path(self.artifact_path):
73+
self.artifact_zip_path = self.artifact_path
74+
elif os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith(
7475
".zip"
7576
):
7677
self.artifact_zip_path = self.artifact_path
@@ -80,7 +81,7 @@ def _prepare_artiact_tmp_zip(self) -> str:
8081
)
8182
return self.artifact_zip_path
8283

83-
def _remove_artiact_tmp_zip(self):
84+
def _remove_artifact_tmp_zip(self):
8485
"""Removes temporary created artifact zip archive."""
8586
if (
8687
self.artifact_zip_path
@@ -112,7 +113,10 @@ class LargeArtifactUploader(ArtifactUploader):
112113
Attributes
113114
----------
114115
artifact_path: str
115-
The model artifact location.
116+
The model artifact location. Possible values are:
117+
- object storage path to zip archive. Example: `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`.
118+
- local path to zip archive. Example: `./mymodel.zip`.
119+
- local path to folder with artifacts. Example: `./mymodel`.
116120
artifact_zip_path: str
117121
The uri of the zip of model artifact.
118122
auth: dict
@@ -124,6 +128,8 @@ class LargeArtifactUploader(ArtifactUploader):
124128
The OCI Object Storage URI where model artifacts will be copied to.
125129
The `bucket_uri` is only necessary for uploading large artifacts which
126130
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
131+
.. versionadded:: 2.8.10
132+
If artifact_path is object storage path to a zip archive, bucket_uri will be ignored.
127133
dsc_model: OCIDataScienceModel
128134
The data scince model instance.
129135
overwrite_existing_artifact: bool
@@ -145,7 +151,7 @@ def __init__(
145151
self,
146152
dsc_model: OCIDataScienceModel,
147153
artifact_path: str,
148-
bucket_uri: str,
154+
bucket_uri: str = None,
149155
auth: Optional[Dict] = None,
150156
region: Optional[str] = None,
151157
overwrite_existing_artifact: Optional[bool] = True,
@@ -159,11 +165,16 @@ def __init__(
159165
dsc_model: OCIDataScienceModel
160166
The data scince model instance.
161167
artifact_path: str
162-
The model artifact location.
163-
bucket_uri: str
168+
The model artifact location. Possible values are:
169+
- object storage path to zip archive. Example: `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`.
170+
- local path to zip archive. Example: `./mymodel.zip`.
171+
- local path to folder with artifacts. Example: `./mymodel`.
172+
bucket_uri: (str, optional). Defaults to `None`.
164173
The OCI Object Storage URI where model artifacts will be copied to.
165-
The `bucket_uri` is only necessary for uploading large artifacts which
174+
The `bucket_uri` is only necessary for uploading large artifacts from local which
166175
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
176+
.. versionadded:: 2.8.10
177+
If `artifact_path` is object storage path to a zip archive, `bucket_uri` will be ignored.
167178
auth: (Dict, optional). Defaults to `None`.
168179
The default authetication is set using `ads.set_auth` API.
169180
If you need to override the default, use the `ads.common.auth.api_keys` or
@@ -179,11 +190,22 @@ def __init__(
179190
parallel_process_count: (int, optional).
180191
The number of worker processes to use in parallel for uploading individual parts of a multipart upload.
181192
"""
193+
self.auth = auth or dsc_model.auth
194+
if ObjectStorageDetails.is_oci_path(artifact_path):
195+
if not artifact_path.endswith(".zip"):
196+
raise ValueError(
197+
f"The `artifact_path={artifact_path}` is invalid."
198+
"The remote path for model artifact should be a zip archive, "
199+
"e.g. `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`."
200+
)
201+
if not utils.is_path_exists(uri=artifact_path, auth=self.auth):
202+
raise ValueError(f"The `{artifact_path}` does not exist.")
203+
bucket_uri = artifact_path
204+
182205
if not bucket_uri:
183206
raise ValueError("The `bucket_uri` must be provided.")
184207

185208
super().__init__(dsc_model=dsc_model, artifact_path=artifact_path)
186-
self.auth = auth or dsc_model.auth
187209
self.region = region or utils.extract_region(self.auth)
188210
self.bucket_uri = bucket_uri
189211
self.overwrite_existing_artifact = overwrite_existing_artifact
@@ -192,38 +214,38 @@ def __init__(
192214

193215
def _upload(self):
194216
"""Uploads model artifacts to the model catalog."""
195-
self.progress.update("Copying model artifact to the Object Storage bucket")
196-
197217
bucket_uri = self.bucket_uri
198-
bucket_uri_file_name = os.path.basename(bucket_uri)
199-
200-
if not bucket_uri_file_name:
201-
bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip")
202-
elif not bucket_uri.lower().endswith(".zip"):
203-
bucket_uri = f"{bucket_uri}.zip"
204-
205-
if not self.overwrite_existing_artifact and utils.is_path_exists(
206-
uri=bucket_uri, auth=self.auth
207-
):
208-
raise FileExistsError(
209-
f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or "
210-
"set `overwrite_existing_artifact` to `True` if you wish to overwrite."
211-
)
218+
self.progress.update("Copying model artifact to the Object Storage bucket")
219+
if not bucket_uri == self.artifact_zip_path:
220+
bucket_uri_file_name = os.path.basename(bucket_uri)
221+
222+
if not bucket_uri_file_name:
223+
bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip")
224+
elif not bucket_uri.lower().endswith(".zip"):
225+
bucket_uri = f"{bucket_uri}.zip"
226+
227+
if not self.overwrite_existing_artifact and utils.is_path_exists(
228+
uri=bucket_uri, auth=self.auth
229+
):
230+
raise FileExistsError(
231+
f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or "
232+
"set `overwrite_existing_artifact` to `True` if you wish to overwrite."
233+
)
212234

213-
try:
214-
utils.upload_to_os(
215-
src_uri=self.artifact_zip_path,
216-
dst_uri=bucket_uri,
217-
auth=self.auth,
218-
parallel_process_count=self._parallel_process_count,
219-
force_overwrite=self.overwrite_existing_artifact,
220-
progressbar_description="Copying model artifact to the Object Storage bucket.",
221-
)
222-
except Exception as ex:
223-
raise RuntimeError(
224-
f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`."
225-
f"See Exception: {ex}"
226-
)
235+
try:
236+
utils.upload_to_os(
237+
src_uri=self.artifact_zip_path,
238+
dst_uri=bucket_uri,
239+
auth=self.auth,
240+
parallel_process_count=self._parallel_process_count,
241+
force_overwrite=self.overwrite_existing_artifact,
242+
progressbar_description="Copying model artifact to the Object Storage bucket.",
243+
)
244+
except Exception as ex:
245+
raise RuntimeError(
246+
f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`."
247+
f"See Exception: {ex}"
248+
)
227249

228250
self.progress.update("Exporting model artifact to the model catalog")
229251
self.dsc_model.export_model_artifact(bucket_uri=bucket_uri, region=self.region)

ads/model/datascience_model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import pandas
1313
from ads.common import utils
14+
from ads.common.object_storage_details import ObjectStorageDetails
1415
from ads.config import COMPARTMENT_OCID, PROJECT_OCID
1516
from ads.feature_engineering.schema import Schema
1617
from ads.jobs.builders.base import Builder
@@ -548,6 +549,8 @@ def create(self, **kwargs) -> "DataScienceModel":
548549
The OCI Object Storage URI where model artifacts will be copied to.
549550
The `bucket_uri` is only necessary for uploading large artifacts which
550551
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
552+
.. versionadded:: 2.8.10
553+
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
551554
overwrite_existing_artifact: (bool, optional). Defaults to `True`.
552555
Overwrite target bucket artifact if exists.
553556
remove_existing_artifact: (bool, optional). Defaults to `True`.
@@ -636,6 +639,8 @@ def upload_artifact(
636639
The OCI Object Storage URI where model artifacts will be copied to.
637640
The `bucket_uri` is only necessary for uploading large artifacts which
638641
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
642+
.. versionadded:: 2.8.10
643+
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
639644
auth: (Dict, optional). Defaults to `None`.
640645
The default authentication is set using `ads.set_auth` API.
641646
If you need to override the default, use the `ads.common.auth.api_keys` or
@@ -668,6 +673,13 @@ def upload_artifact(
668673
"timeout": timeout,
669674
}
670675

676+
if ObjectStorageDetails.is_oci_path(self.artifact):
677+
if bucket_uri and bucket_uri != self.artifact:
678+
logger.warn(
679+
"The `bucket_uri` will be ignored and the value of `self.artifact` will be used instead."
680+
)
681+
bucket_uri = self.artifact
682+
671683
if bucket_uri or utils.folder_size(self.artifact) > _MAX_ARTIFACT_SIZE_IN_BYTES:
672684
if not bucket_uri:
673685
raise ModelArtifactSizeError(

ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
HTML_PATH = os.path.join(_cwd, "resources", "template.html")
3131
CONFIG_PATH = os.path.join(_cwd, "resources", "config.yaml")
3232
PYTHON_VER_PATTERN = "^([3])(\.[6-9])(\.\d+)?$"
33-
PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/Ri7zFc_h91sxMdgnza9Qnqw3Ina8hf8wzDvEpAnUXMDOnUR1U1fpsaBUjUfgPgIq/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
33+
PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
3434

3535
TESTS = {
3636
"score_py": {
@@ -195,7 +195,9 @@ def check_runtime_yml(file_path) -> Tuple[bool, str]:
195195
return False, TESTS["runtime_path_exist"]["error_msg"]
196196
else:
197197
TESTS["runtime_path_exist"]["success"] = False
198-
return False, TESTS["runtime_path_exist"]["error_msg"]
198+
TESTS["runtime_path_exist"][
199+
"error_msg"
200+
] = "WARNING: Unable to validate if INFERENCE_ENV_PATH exists. Please check if you have internet access."
199201
else:
200202
bucket_name = env_path.username
201203
namespace = env_path.hostname

docs/source/release_notes.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,24 @@
22
Release Notes
33
=============
44

5+
2.8.10
6+
------
7+
Release date: September 27, 2023
8+
9+
* Improved the ``LargeArtifactUploader`` class to understand OCI paths to upload model artifacts to the model catalog by reference.
10+
* Removed ``ADSDataset`` runtime dependency on ``geopandas``.
11+
* Fixed a bug in the progress bar during model registration.
12+
* Fixed a bug where session variable could be referenced before assignment.
13+
* Fixed a bug with model artifact save.
14+
* Fixed a bug with pipelines step.
15+
516
2.8.9
617
-----
718
Release date: September 5, 2023
819

920
* Upgraded the ``scikit-learn`` dependency to ``>=1.0``.
1021
* Upgraded the ``pandas`` dependency to ``>1.2.1,<2.1`` to allow you to use ADS with pandas 2.0.
22+
* Implemented multi-part upload in the ``ArtifactUploader`` to upload model artifacts to the model catalog.
1123
* Fixed the "Attribute not found" error, when ``deploy()`` called twice in ``GenericModel``.
1224
* Fixed the fetch of the security token, when the relative path for the ``security_token_file`` is provided (used in session token-bases authentication).
1325

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ build-backend = "flit_core.buildapi"
1919

2020
# Required
2121
name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
22-
version = "2.8.9"
22+
version = "2.8.10"
2323

2424
# Optional
2525
description = "Oracle Accelerated Data Science SDK"
@@ -57,7 +57,7 @@ dependencies = [
5757
"asteval>=0.9.25",
5858
"cerberus>=1.3.4",
5959
"cloudpickle>=1.6.0",
60-
"fsspec>=0.8.7,<2023.9.1", # v2.9.1 introduced issues, releved by unit tests
60+
"fsspec>=0.8.7,<2023.9.1", # v2.9.1 introduced issues, revealed by unit tests
6161
"gitpython>=3.1.2",
6262
"jinja2>=2.11.2",
6363
"matplotlib>=3.1.3",
@@ -109,7 +109,7 @@ onnx = [
109109
"lightgbm==3.3.1",
110110
"onnx>=1.12.0",
111111
"onnxmltools>=1.10.0",
112-
"onnxruntime>=1.10.0,<1.16", # v1.16 introduced issues https://github.com/microsoft/onnxruntime/issues/17631, releved by unit tests
112+
"onnxruntime>=1.10.0,<1.16", # v1.16 introduced issues https://github.com/microsoft/onnxruntime/issues/17631, revealedd by unit tests
113113
"oracle_ads[viz]",
114114
"protobuf<=3.20",
115115
"skl2onnx>=1.10.4",

0 commit comments

Comments
 (0)