Skip to content

Commit 8c5866e

Browse files
authored
Interoperability with model catalog (#260)
2 parents 4952fca + 58bfaa9 commit 8c5866e

File tree

8 files changed

+66
-21
lines changed

8 files changed

+66
-21
lines changed

ads/feature_store/dataset.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ def with_model_details(self, model_details: ModelDetails) -> "Dataset":
498498
"The argument `model_details` has to be of type `ModelDetails`"
499499
"but is of type: `{}`".format(type(model_details))
500500
)
501+
501502
return self.set_spec(self.CONST_MODEL_DETAILS, model_details.to_dict())
502503

503504
def add_models(self, model_details: ModelDetails) -> "Dataset":
@@ -516,9 +517,20 @@ def add_models(self, model_details: ModelDetails) -> "Dataset":
516517
if existing_model_details and existing_model_details.items:
517518
items = existing_model_details["items"]
518519
for item in items:
519-
model_details.items.append(item)
520+
if item not in model_details.items:
521+
model_details.items.append(item)
520522
self.with_model_details(model_details)
521-
return self.update()
523+
try:
524+
return self.update()
525+
except Exception as ex:
526+
logger.error(
527+
f"Dataset update Failed with : {type(ex)} with error message: {ex}"
528+
)
529+
if existing_model_details:
530+
self.with_model_details(ModelDetails().with_items(existing_model_details["items"]))
531+
else:
532+
self.with_model_details(ModelDetails().with_items([]))
533+
return self
522534

523535
def remove_models(self, model_details: ModelDetails) -> "Dataset":
524536
"""remove model details from the dataset, remove from the existing dataset model id list

ads/feature_store/docs/source/dataset.rst

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ Use the ``show()`` method on the ``Dataset`` instance to visualize the lineage o
306306

307307
The ``show()`` method takes the following optional parameter:
308308

309-
- ``rankdir: (str, optional)``. Defaults to ``LR``. The allowed values are ``TB`` or ``LR``. This parameter is applicable only for ``graph`` mode and it renders the direction of the graph as either top to bottom (TB) or left to right (LR).
309+
- ``rankdir: (str, optional)``. Defaults to ``LR``. The allowed values are ``TB`` or ``LR``. This parameter is applicable only for ``graph`` mode and it renders the direction of the graph as either top to bottom (TB) or left to right (LR).
310310

311311

312312
.. code-block:: python3
@@ -317,3 +317,16 @@ Below is an example of the output.
317317

318318
.. figure:: figures/dataset_lineage.png
319319
:width: 400
320+
321+
322+
Add Model Details
323+
=================
324+
325+
You can call the ``add_models()`` method of the Dataset instance to add model ids to dataset.
326+
The ``.add_models()`` method takes the following parameter:
327+
328+
- ``model_details: ModelDetails``. ModelDetails takes ``items: List[str]`` as parameter and model ids to be passed as items.
329+
330+
.. code-block:: python3
331+
332+
dataset.add_models(ModelDetails().with_items([<ocid1.datasciencemodel..<unique_id>]))

ads/feature_store/docs/source/quickstart.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ Background reading to understand the concepts of Feature Store and OCI Data Scie
5555
compartment_id = "ocid1.compartment.<unique_id>"
5656
metastore_id = "ocid1.datacatalogmetastore.oc1.iad.<unique_id>"
5757
api_gateway_endpoint = "https://**.{region}.oci.customer-oci.com/20230101"
58+
os.environ["OCI_FS_SERVICE_ENDPOINT"] = api_gateway_endpoint
5859
59-
ads.set_auth(auth="user_principal", client_kwargs={"service_endpoint": api_gateway_endpoint})
60+
ads.set_auth(auth="api_key")
6061

6162
# step1: Create feature store
6263
feature_store_resource = (

ads/feature_store/mixin/oci_feature_store.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,19 @@
66

77
from ads.common.oci_mixin import OCIModelMixin
88
import oci.feature_store
9+
import os
910

1011

1112
class OCIFeatureStoreMixin(OCIModelMixin):
1213
@classmethod
1314
def init_client(
1415
cls, **kwargs
1516
) -> oci.feature_store.feature_store_client.FeatureStoreClient:
17+
# TODO: Getting the endpoint from authorizer
18+
fs_service_endpoint = os.environ.get("OCI_FS_SERVICE_ENDPOINT")
19+
if fs_service_endpoint:
20+
kwargs = {"service_endpoint": fs_service_endpoint}
21+
1622
client = cls._init_client(
1723
client=oci.feature_store.feature_store_client.FeatureStoreClient, **kwargs
1824
)

ads/feature_store/validation_output.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@ def to_pandas(self) -> pd.DataFrame:
2323
The validation output information as a pandas DataFrame.
2424
"""
2525
if self.content:
26-
validation_output_json = (
27-
json.loads(self.content)
28-
)
29-
profile_result = pd.json_normalize(validation_output_json.get("results")).transpose()
26+
validation_output_json = json.loads(self.content)
27+
profile_result = pd.json_normalize(
28+
validation_output_json.get("results")
29+
).transpose()
3030
return profile_result
3131

3232
def to_summary(self) -> pd.DataFrame:
@@ -39,9 +39,7 @@ def to_summary(self) -> pd.DataFrame:
3939
The validation output summary information as a pandas DataFrame.
4040
"""
4141
if self.content:
42-
validation_output_json = (
43-
json.loads(self.content)
44-
)
42+
validation_output_json = json.loads(self.content)
4543
profile_result = pd.json_normalize(validation_output_json).transpose()
4644
summary_df = profile_result.drop("results")
4745
return summary_df

ads/model/generic_model.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from ads.evaluations import EvaluatorMixin
3535
from ads.feature_engineering import ADSImage
3636
from ads.feature_engineering.schema import Schema
37+
from ads.feature_store.model_details import ModelDetails
3738
from ads.model.artifact import ModelArtifact
3839
from ads.model.common.utils import (
3940
_extract_locals,
@@ -65,7 +66,7 @@
6566
Framework,
6667
ModelCustomMetadata,
6768
ModelProvenanceMetadata,
68-
ModelTaxonomyMetadata,
69+
ModelTaxonomyMetadata, MetadataCustomCategory,
6970
)
7071
from ads.model.model_metadata_mixin import MetadataMixin
7172
from ads.model.model_properties import ModelProperties
@@ -1825,6 +1826,7 @@ def save(
18251826
remove_existing_artifact: Optional[bool] = True,
18261827
model_version_set: Optional[Union[str, ModelVersionSet]] = None,
18271828
version_label: Optional[str] = None,
1829+
featurestore_dataset=None,
18281830
**kwargs,
18291831
) -> str:
18301832
"""Saves model artifacts to the model catalog.
@@ -1856,6 +1858,8 @@ def save(
18561858
The model version set OCID, or model version set name, or `ModelVersionSet` instance.
18571859
version_label: (str, optional). Defaults to None.
18581860
The model version lebel.
1861+
featurestore_dataset: (Dataset, optional).
1862+
The feature store dataset
18591863
kwargs:
18601864
project_id: (str, optional).
18611865
Project OCID. If not specified, the value will be taken either
@@ -1937,6 +1941,15 @@ def save(
19371941
# variables in case of saving model in context of model version set.
19381942
model_version_set_id = _extract_model_version_set_id(model_version_set)
19391943

1944+
if featurestore_dataset:
1945+
dataset_details = {
1946+
"dataset-id": featurestore_dataset.id,
1947+
"dataset-name": featurestore_dataset.name
1948+
}
1949+
self.metadata_custom.add("featurestore.dataset", value=str(dataset_details),
1950+
category=MetadataCustomCategory.TRAINING_AND_VALIDATION_DATASETS,
1951+
description="feature store dataset", replace=True)
1952+
19401953
self.dsc_model = (
19411954
self.dsc_model.with_compartment_id(self.properties.compartment_id)
19421955
.with_project_id(self.properties.project_id)
@@ -1965,6 +1978,10 @@ def save(
19651978
.with_infrastructure(ModelDeploymentInfrastructure())
19661979
.with_runtime(ModelDeploymentContainerRuntime())
19671980
)
1981+
# Add the model id to the feature store dataset
1982+
if featurestore_dataset:
1983+
model_details = ModelDetails().with_items([self.model_id])
1984+
featurestore_dataset.add_models(model_details)
19681985

19691986
return self.model_id
19701987

tests/integration/feature_store/test_base.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,8 @@
2020
from ads.feature_store.statistics_config import StatisticsConfig
2121

2222

23-
client_kwargs = dict(
24-
retry_strategy=oci.retry.NoneRetryStrategy,
25-
service_endpoint=os.getenv("service_endpoint"),
26-
)
27-
ads.set_auth(client_kwargs=client_kwargs)
23+
ads.set_auth()
24+
os.environ["OCI_FS_SERVICE_ENDPOINT"] = os.getenv("service_endpoint")
2825

2926
try:
3027
from ads.feature_store.feature_store import FeatureStore
@@ -37,7 +34,9 @@
3734

3835
class FeatureStoreTestCase:
3936
# networks compartment in feature store
40-
TIME_NOW = str.format("{}_{}",datetime.utcnow().strftime("%Y_%m_%d_%H_%M_%S"),int(random()*1000))
37+
TIME_NOW = str.format(
38+
"{}_{}", datetime.utcnow().strftime("%Y_%m_%d_%H_%M_%S"), int(random() * 1000)
39+
)
4140
TENANCY_ID = "ocid1.tenancy.oc1..aaaaaaaa462hfhplpx652b32ix62xrdijppq2c7okwcqjlgrbknhgtj2kofa"
4241
COMPARTMENT_ID = "ocid1.tenancy.oc1..aaaaaaaa462hfhplpx652b32ix62xrdijppq2c7okwcqjlgrbknhgtj2kofa"
4342
METASTORE_ID = "ocid1.datacatalogmetastore.oc1.iad.amaaaaaabiudgxyap7tizm4gscwz7amu7dixz7ml3mtesqzzwwg3urvvdgua"

tests/integration/feature_store/test_dataset_validations.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,8 @@ def test_dataset_model_details(self):
9191
assert dataset.oci_dataset.id
9292

9393
dataset.materialise()
94-
updated_dataset = dataset.add_models(ModelDetails().with_items(["model_ocid"]))
95-
updated_dataset.show()
96-
assert updated_dataset.model_details is not None
94+
dataset.add_models(ModelDetails().with_items(["model_ocid_invalid"]))
95+
assert len(dataset.model_details.get("items")) == 0
9796
self.clean_up_dataset(dataset)
9897
self.clean_up_feature_group(fg)
9998
self.clean_up_entity(entity)

0 commit comments

Comments
 (0)