Skip to content

Commit 40eac53

Browse files
committed
ODSC-45790: Improve error reporting. Improve get_feature_df output. Handle manual featuregroup assosciation with dataset to support complex queries
1 parent ba93f40 commit 40eac53

File tree

4 files changed

+60
-10
lines changed

4 files changed

+60
-10
lines changed

ads/feature_store/dataset.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88

99
import pandas
1010
from great_expectations.core import ExpectationSuite
11+
from oci.feature_store.models import (
12+
DatasetFeatureGroupCollection,
13+
DatasetFeatureGroupSummary,
14+
)
15+
1116
from ads.common import utils
1217
from ads.common.oci_mixin import OCIModelMixin
1318
from ads.feature_store.common.enums import (
@@ -26,6 +31,7 @@
2631
OciExecutionStrategyProvider,
2732
)
2833
from ads.feature_store.feature import DatasetFeature
34+
from ads.feature_store.feature_group import FeatureGroup
2935
from ads.feature_store.feature_group_expectation import Expectation
3036
from ads.feature_store.feature_option_details import FeatureOptionDetails
3137
from ads.feature_store.service.oci_dataset import OCIDataset
@@ -113,6 +119,7 @@ class Dataset(Builder):
113119
CONST_ITEMS = "items"
114120
CONST_LAST_JOB_ID = "jobId"
115121
CONST_MODEL_DETAILS = "modelDetails"
122+
CONST_FEATURE_GROUP = "datasetFeatureGroups"
116123

117124
attribute_map = {
118125
CONST_ID: "id",
@@ -130,6 +137,7 @@ class Dataset(Builder):
130137
CONST_LIFECYCLE_STATE: "lifecycle_state",
131138
CONST_MODEL_DETAILS: "model_details",
132139
CONST_PARTITION_KEYS: "partition_keys",
140+
CONST_FEATURE_GROUP: "dataset_feature_groups",
133141
}
134142

135143
def __init__(self, spec: Dict = None, **kwargs) -> None:
@@ -503,6 +511,44 @@ def with_model_details(self, model_details: ModelDetails) -> "Dataset":
503511

504512
return self.set_spec(self.CONST_MODEL_DETAILS, model_details.to_dict())
505513

514+
@property
515+
def feature_groups(self) -> List["FeatureGroup"]:
516+
collection: "DatasetFeatureGroupCollection" = self.get_spec(
517+
self.CONST_FEATURE_GROUP
518+
)
519+
feature_groups: List["FeatureGroup"] = []
520+
if collection:
521+
for datasetFGSummary in collection.items:
522+
feature_groups.append(
523+
FeatureGroup.from_id(datasetFGSummary.feature_group_id)
524+
)
525+
526+
return feature_groups
527+
528+
@feature_groups.setter
529+
def feature_groups(self, feature_groups: List["FeatureGroup"]):
530+
self.with_feature_groups(feature_groups)
531+
532+
def with_feature_groups(self, feature_groups: List["FeatureGroup"]) -> "Dataset":
533+
"""Sets the model details for the dataset.
534+
535+
Parameters
536+
----------
537+
feature_groups: List of feature groups
538+
Returns
539+
-------
540+
Dataset
541+
The Dataset instance (self).
542+
543+
"""
544+
collection: List["DatasetFeatureGroupSummary"] = []
545+
for group in feature_groups:
546+
collection.append(DatasetFeatureGroupSummary(feature_group_id=group.id))
547+
548+
return self.set_spec(
549+
self.CONST_FEATURE_GROUP, DatasetFeatureGroupCollection(items=collection)
550+
)
551+
506552
@property
507553
def partition_keys(self) -> List[str]:
508554
return self.get_spec(self.CONST_PARTITION_KEYS)
@@ -560,7 +606,9 @@ def add_models(self, model_details: ModelDetails) -> "Dataset":
560606
f"Dataset update Failed with : {type(ex)} with error message: {ex}"
561607
)
562608
if existing_model_details:
563-
self.with_model_details(ModelDetails().with_items(existing_model_details["items"]))
609+
self.with_model_details(
610+
ModelDetails().with_items(existing_model_details["items"])
611+
)
564612
else:
565613
self.with_model_details(ModelDetails().with_items([]))
566614
return self

ads/feature_store/execution_strategy/spark/spark_execution.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
show_validation_summary,
1717
)
1818
from ads.feature_store.execution_strategy.engine.spark_engine import SparkEngine
19+
import traceback
1920

2021
try:
2122
from pyspark.sql import DataFrame
@@ -291,8 +292,9 @@ def _save_offline_dataframe(
291292

292293
except Exception as ex:
293294
error_details = str(ex)
295+
tb = traceback.format_exc()
294296
logger.error(
295-
f"FeatureGroup Materialization Failed with : {type(ex)} with error message: {ex}"
297+
f"FeatureGroup Materialization Failed with : {type(ex)} with error message: {ex} and stacktrace {tb}",
296298
)
297299

298300
show_ingestion_summary(
@@ -427,8 +429,9 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
427429

428430
except Exception as ex:
429431
error_details = str(ex)
432+
tb = traceback.format_exc()
430433
logger.error(
431-
f"Dataset Materialization Failed with : {type(ex)} with error message: {ex}"
434+
f"Dataset Materialization Failed with : {type(ex)} with error message: {ex} and stacktrace {tb}"
432435
)
433436

434437
show_ingestion_summary(

ads/feature_store/feature_group.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -748,7 +748,6 @@ def get_features_df(self) -> "pd.DataFrame":
748748
{
749749
"name": feature.feature_name,
750750
"type": feature.feature_type,
751-
"feature_group_id": feature.feature_group_id,
752751
}
753752
)
754753
return pd.DataFrame.from_records(records)

tests/unitary/with_extras/feature_store/test_dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,12 @@ def test__to_oci_fs_entity(self, mock_load_key_file, mock_config_from_file):
259259
@patch.object(SparkSessionSingleton, "__init__", return_value=None)
260260
@patch.object(SparkSessionSingleton, "get_spark_session")
261261
def test_materialise(self, spark, get_spark_session, mock_update):
262-
with patch.object(DatasetJob, "create") as mock_dataset_job:
263-
with patch.object(FeatureStore, "from_id"):
264-
with patch.object(DatasetJob, "_mark_job_complete"):
265-
mock_dataset_job.return_value = self.mock_dsc_dataset_job
266-
self.mock_dsc_dataset.with_id(DATASET_OCID)
267-
self.mock_dsc_dataset.materialise()
262+
with patch.object(DatasetJob, "create") as mock_dataset_job:
263+
with patch.object(FeatureStore, "from_id"):
264+
with patch.object(DatasetJob, "_mark_job_complete"):
265+
mock_dataset_job.return_value = self.mock_dsc_dataset_job
266+
self.mock_dsc_dataset.with_id(DATASET_OCID)
267+
self.mock_dsc_dataset.materialise()
268268

269269
@patch.object(SparkSessionSingleton, "__init__", return_value=None)
270270
@patch.object(SparkSessionSingleton, "get_spark_session")

0 commit comments

Comments
 (0)