Skip to content

Commit da6eb53

Browse files
authored
ODSC-46642/pass model by reference (#342)
1 parent 6fd9b03 commit da6eb53

File tree

3 files changed

+166
-64
lines changed

3 files changed

+166
-64
lines changed

ads/model/artifact_uploader.py

Lines changed: 69 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing import Dict, Optional
1111

1212
from ads.common import utils
13+
from ads.common.object_storage_details import ObjectStorageDetails
1314
from ads.model.common import utils as model_utils
1415
from ads.model.service.oci_datascience_model import OCIDataScienceModel
1516

@@ -29,7 +30,10 @@ def __init__(self, dsc_model: OCIDataScienceModel, artifact_path: str):
2930
artifact_path: str
3031
The model artifact location.
3132
"""
32-
if not os.path.exists(artifact_path):
33+
if not (
34+
ObjectStorageDetails.is_oci_path(artifact_path)
35+
or os.path.exists(artifact_path)
36+
):
3337
raise ValueError(f"The `{artifact_path}` does not exist")
3438

3539
self.dsc_model = dsc_model
@@ -45,7 +49,7 @@ def upload(self):
4549
) as progress:
4650
self.progress = progress
4751
self.progress.update("Preparing model artifacts ZIP archive.")
48-
self._prepare_artiact_tmp_zip()
52+
self._prepare_artifact_tmp_zip()
4953
self.progress.update("Uploading model artifacts.")
5054
self._upload()
5155
self.progress.update(
@@ -55,22 +59,19 @@ def upload(self):
5559
except Exception:
5660
raise
5761
finally:
58-
self._remove_artiact_tmp_zip()
62+
self._remove_artifact_tmp_zip()
5963

60-
def _prepare_artiact_tmp_zip(self) -> str:
64+
def _prepare_artifact_tmp_zip(self) -> str:
6165
"""Prepares model artifacts ZIP archive.
6266
63-
Parameters
64-
----------
65-
progress: (TqdmProgressBar, optional). Defaults to `None`.
66-
The progress indicator.
67-
6867
Returns
6968
-------
7069
str
7170
Path to the model artifact ZIP archive.
7271
"""
73-
if os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith(
72+
if ObjectStorageDetails.is_oci_path(self.artifact_path):
73+
self.artifact_zip_path = self.artifact_path
74+
elif os.path.isfile(self.artifact_path) and self.artifact_path.lower().endswith(
7475
".zip"
7576
):
7677
self.artifact_zip_path = self.artifact_path
@@ -80,7 +81,7 @@ def _prepare_artiact_tmp_zip(self) -> str:
8081
)
8182
return self.artifact_zip_path
8283

83-
def _remove_artiact_tmp_zip(self):
84+
def _remove_artifact_tmp_zip(self):
8485
"""Removes temporary created artifact zip archive."""
8586
if (
8687
self.artifact_zip_path
@@ -112,7 +113,10 @@ class LargeArtifactUploader(ArtifactUploader):
112113
Attributes
113114
----------
114115
artifact_path: str
115-
The model artifact location.
116+
The model artifact location. Possible values are:
117+
- object storage path to zip archive. Example: `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`.
118+
- local path to zip archive. Example: `./mymodel.zip`.
119+
- local path to folder with artifacts. Example: `./mymodel`.
116120
artifact_zip_path: str
117121
The uri of the zip of model artifact.
118122
auth: dict
@@ -124,6 +128,8 @@ class LargeArtifactUploader(ArtifactUploader):
124128
The OCI Object Storage URI where model artifacts will be copied to.
125129
The `bucket_uri` is only necessary for uploading large artifacts which
126130
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
131+
.. versionadded:: 2.8.10
132+
If artifact_path is object storage path to a zip archive, bucket_uri will be ignored.
127133
dsc_model: OCIDataScienceModel
128134
The data scince model instance.
129135
overwrite_existing_artifact: bool
@@ -145,7 +151,7 @@ def __init__(
145151
self,
146152
dsc_model: OCIDataScienceModel,
147153
artifact_path: str,
148-
bucket_uri: str,
154+
bucket_uri: str = None,
149155
auth: Optional[Dict] = None,
150156
region: Optional[str] = None,
151157
overwrite_existing_artifact: Optional[bool] = True,
@@ -159,11 +165,16 @@ def __init__(
159165
dsc_model: OCIDataScienceModel
160166
The data scince model instance.
161167
artifact_path: str
162-
The model artifact location.
163-
bucket_uri: str
168+
The model artifact location. Possible values are:
169+
- object storage path to zip archive. Example: `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`.
170+
- local path to zip archive. Example: `./mymodel.zip`.
171+
- local path to folder with artifacts. Example: `./mymodel`.
172+
bucket_uri: (str, optional). Defaults to `None`.
164173
The OCI Object Storage URI where model artifacts will be copied to.
165-
The `bucket_uri` is only necessary for uploading large artifacts which
174+
The `bucket_uri` is only necessary for uploading large artifacts from local which
166175
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
176+
.. versionadded:: 2.8.10
177+
If `artifact_path` is object storage path to a zip archive, `bucket_uri` will be ignored.
167178
auth: (Dict, optional). Defaults to `None`.
168179
The default authetication is set using `ads.set_auth` API.
169180
If you need to override the default, use the `ads.common.auth.api_keys` or
@@ -179,11 +190,22 @@ def __init__(
179190
parallel_process_count: (int, optional).
180191
The number of worker processes to use in parallel for uploading individual parts of a multipart upload.
181192
"""
193+
self.auth = auth or dsc_model.auth
194+
if ObjectStorageDetails.is_oci_path(artifact_path):
195+
if not artifact_path.endswith(".zip"):
196+
raise ValueError(
197+
f"The `artifact_path={artifact_path}` is invalid."
198+
"The remote path for model artifact should be a zip archive, "
199+
"e.g. `oci://<bucket_name>@<namespace>/prefix/mymodel.zip`."
200+
)
201+
if not utils.is_path_exists(uri=artifact_path, auth=self.auth):
202+
raise ValueError(f"The `{artifact_path}` does not exist.")
203+
bucket_uri = artifact_path
204+
182205
if not bucket_uri:
183206
raise ValueError("The `bucket_uri` must be provided.")
184207

185208
super().__init__(dsc_model=dsc_model, artifact_path=artifact_path)
186-
self.auth = auth or dsc_model.auth
187209
self.region = region or utils.extract_region(self.auth)
188210
self.bucket_uri = bucket_uri
189211
self.overwrite_existing_artifact = overwrite_existing_artifact
@@ -192,38 +214,38 @@ def __init__(
192214

193215
def _upload(self):
194216
"""Uploads model artifacts to the model catalog."""
195-
self.progress.update("Copying model artifact to the Object Storage bucket")
196-
197217
bucket_uri = self.bucket_uri
198-
bucket_uri_file_name = os.path.basename(bucket_uri)
199-
200-
if not bucket_uri_file_name:
201-
bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip")
202-
elif not bucket_uri.lower().endswith(".zip"):
203-
bucket_uri = f"{bucket_uri}.zip"
204-
205-
if not self.overwrite_existing_artifact and utils.is_path_exists(
206-
uri=bucket_uri, auth=self.auth
207-
):
208-
raise FileExistsError(
209-
f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or "
210-
"set `overwrite_existing_artifact` to `True` if you wish to overwrite."
211-
)
218+
self.progress.update("Copying model artifact to the Object Storage bucket")
219+
if not bucket_uri == self.artifact_zip_path:
220+
bucket_uri_file_name = os.path.basename(bucket_uri)
221+
222+
if not bucket_uri_file_name:
223+
bucket_uri = os.path.join(bucket_uri, f"{self.dsc_model.id}.zip")
224+
elif not bucket_uri.lower().endswith(".zip"):
225+
bucket_uri = f"{bucket_uri}.zip"
226+
227+
if not self.overwrite_existing_artifact and utils.is_path_exists(
228+
uri=bucket_uri, auth=self.auth
229+
):
230+
raise FileExistsError(
231+
f"The bucket_uri=`{self.bucket_uri}` exists. Please use a new file name or "
232+
"set `overwrite_existing_artifact` to `True` if you wish to overwrite."
233+
)
212234

213-
try:
214-
utils.upload_to_os(
215-
src_uri=self.artifact_zip_path,
216-
dst_uri=bucket_uri,
217-
auth=self.auth,
218-
parallel_process_count=self._parallel_process_count,
219-
force_overwrite=self.overwrite_existing_artifact,
220-
progressbar_description="Copying model artifact to the Object Storage bucket.",
221-
)
222-
except Exception as ex:
223-
raise RuntimeError(
224-
f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`."
225-
f"See Exception: {ex}"
226-
)
235+
try:
236+
utils.upload_to_os(
237+
src_uri=self.artifact_zip_path,
238+
dst_uri=bucket_uri,
239+
auth=self.auth,
240+
parallel_process_count=self._parallel_process_count,
241+
force_overwrite=self.overwrite_existing_artifact,
242+
progressbar_description="Copying model artifact to the Object Storage bucket.",
243+
)
244+
except Exception as ex:
245+
raise RuntimeError(
246+
f"Failed to upload model artifact to the given Object Storage path `{self.bucket_uri}`."
247+
f"See Exception: {ex}"
248+
)
227249

228250
self.progress.update("Exporting model artifact to the model catalog")
229251
self.dsc_model.export_model_artifact(bucket_uri=bucket_uri, region=self.region)

ads/model/datascience_model.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import pandas
1313
from ads.common import utils
14+
from ads.common.object_storage_details import ObjectStorageDetails
1415
from ads.config import COMPARTMENT_OCID, PROJECT_OCID
1516
from ads.feature_engineering.schema import Schema
1617
from ads.jobs.builders.base import Builder
@@ -548,6 +549,8 @@ def create(self, **kwargs) -> "DataScienceModel":
548549
The OCI Object Storage URI where model artifacts will be copied to.
549550
The `bucket_uri` is only necessary for uploading large artifacts which
550551
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
552+
.. versionadded:: 2.8.10
553+
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
551554
overwrite_existing_artifact: (bool, optional). Defaults to `True`.
552555
Overwrite target bucket artifact if exists.
553556
remove_existing_artifact: (bool, optional). Defaults to `True`.
@@ -636,6 +639,8 @@ def upload_artifact(
636639
The OCI Object Storage URI where model artifacts will be copied to.
637640
The `bucket_uri` is only necessary for uploading large artifacts which
638641
size is greater than 2GB. Example: `oci://<bucket_name>@<namespace>/prefix/`.
642+
.. versionadded:: 2.8.10
643+
If `artifact` is provided as an object storage path to a zip archive, `bucket_uri` will be ignored.
639644
auth: (Dict, optional). Defaults to `None`.
640645
The default authentication is set using `ads.set_auth` API.
641646
If you need to override the default, use the `ads.common.auth.api_keys` or
@@ -668,6 +673,13 @@ def upload_artifact(
668673
"timeout": timeout,
669674
}
670675

676+
if ObjectStorageDetails.is_oci_path(self.artifact):
677+
if bucket_uri and bucket_uri != self.artifact:
678+
logger.warn(
679+
"The `bucket_uri` will be ignored and the value of `self.artifact` will be used instead."
680+
)
681+
bucket_uri = self.artifact
682+
671683
if bucket_uri or utils.folder_size(self.artifact) > _MAX_ARTIFACT_SIZE_IN_BYTES:
672684
if not bucket_uri:
673685
raise ModelArtifactSizeError(

0 commit comments

Comments
 (0)