Skip to content

Commit dc85f55

Browse files
committed
Adding sql validation/pretty printing
1 parent aaed732 commit dc85f55

File tree

3 files changed

+60
-30
lines changed

3 files changed

+60
-30
lines changed

ads/feature_store/dataset.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8; -*-
3-
import json
43
import logging
54
from copy import deepcopy
65
from datetime import datetime
76
from typing import Dict, List, Union
87

98
import pandas
9+
import pandas as pd
1010
from great_expectations.core import ExpectationSuite
1111

1212
from ads import deprecated
@@ -159,6 +159,7 @@ def __init__(self, spec: Dict = None, **kwargs) -> None:
159159
super().__init__(spec=spec, **deepcopy(kwargs))
160160
# Specify oci Dataset instance
161161
self.dataset_job = None
162+
self._is_manual_association: bool = False
162163
self._spark_engine = None
163164
self.oci_dataset = self._to_oci_dataset(**kwargs)
164165
self.lineage = OCILineage(**kwargs)
@@ -191,6 +192,16 @@ def spark_engine(self):
191192
self._spark_engine = SparkEngine(get_metastore_id(self.feature_store_id))
192193
return self._spark_engine
193194

195+
@property
196+
def is_manual_association(self):
197+
collection: DatasetFeatureGroupCollection = self.get_spec(
198+
self.CONST_FEATURE_GROUP
199+
)
200+
if collection and collection.is_manual_association is not None:
201+
return collection.is_manual_association
202+
else:
203+
return self._is_manual_association
204+
194205
@property
195206
def kind(self) -> str:
196207
"""The kind of the object as showing in a YAML."""
@@ -572,8 +583,18 @@ def with_feature_groups(self, feature_groups: List["FeatureGroup"]) -> "Dataset"
572583
for group in feature_groups:
573584
collection.append(DatasetFeatureGroupSummary(feature_group_id=group.id))
574585

586+
self._is_manual_association = True
575587
return self.set_spec(
576-
self.CONST_FEATURE_GROUP, DatasetFeatureGroupCollection(items=collection)
588+
self.CONST_FEATURE_GROUP,
589+
DatasetFeatureGroupCollection(items=collection, is_manual_association=True),
590+
)
591+
592+
def feature_groups_to_df(self):
593+
return pd.DataFrame.from_records(
594+
[
595+
feature_group.oci_feature_group.to_df_record()
596+
for feature_group in self.feature_groups
597+
]
577598
)
578599

579600
@property
@@ -687,7 +708,7 @@ def show(self, rankdir: str = GraphOrientation.LEFT_RIGHT) -> None:
687708
f"Can't get lineage information for Feature group id {self.id}"
688709
)
689710

690-
def create(self, **kwargs) -> "Dataset":
711+
def create(self, validate_sql=True, **kwargs) -> "Dataset":
691712
"""Creates dataset resource.
692713
693714
!!! note "Lazy"
@@ -700,6 +721,8 @@ def create(self, **kwargs) -> "Dataset":
700721
kwargs
701722
Additional kwargs arguments.
702723
Can be any attribute that `oci.feature_store.models.Dataset` accepts.
724+
validate_sql:
725+
Boolean value indicating whether to validate sql before creating dataset
703726
704727
Returns
705728
-------
@@ -720,6 +743,9 @@ def create(self, **kwargs) -> "Dataset":
720743
if self.statistics_config is None:
721744
self.statistics_config = StatisticsConfig()
722745

746+
if validate_sql is True:
747+
self.spark_engine.sql(self.get_spec(self.CONST_QUERY))
748+
723749
payload = deepcopy(self._spec)
724750
payload.pop("id", None)
725751
logger.debug(f"Creating a dataset resource with payload {payload}")

ads/feature_store/feature_group.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def __init__(self, spec: Dict = None, **kwargs) -> None:
180180
# Specify oci FeatureGroup instance
181181
self.feature_group_job = None
182182
self._spark_engine = None
183-
self.oci_feature_group = self._to_oci_feature_group(**kwargs)
183+
self.oci_feature_group: OCIFeatureGroup = self._to_oci_feature_group(**kwargs)
184184
self.dsc_job = OCIFeatureGroupJob()
185185
self.lineage = OCILineage(**kwargs)
186186

@@ -940,7 +940,7 @@ def get_last_job(self) -> "FeatureGroupJob":
940940
return fg_job[0]
941941
return FeatureGroupJob.from_id(self.job_id)
942942

943-
def select(self, features: Optional[List[str]] = []) -> Query:
943+
def select(self, features: Optional[List[str]] = ()) -> Query:
944944
"""
945945
Selects a subset of features from the feature group and returns a Query object that can be used to view the
946946
resulting dataframe.
@@ -1165,28 +1165,9 @@ def list_df(cls, compartment_id: str = None, **kwargs) -> "pd.DataFrame":
11651165
for oci_feature_group in OCIFeatureGroup.list_resource(
11661166
compartment_id, **kwargs
11671167
):
1168-
records.append(
1169-
{
1170-
"id": oci_feature_group.id,
1171-
"name": oci_feature_group.name,
1172-
"description": oci_feature_group.description,
1173-
"time_created": oci_feature_group.time_created.strftime(
1174-
utils.date_format
1175-
),
1176-
"time_updated": oci_feature_group.time_updated.strftime(
1177-
utils.date_format
1178-
),
1179-
"lifecycle_state": oci_feature_group.lifecycle_state,
1180-
"created_by": f"...{oci_feature_group.created_by[-6:]}",
1181-
"compartment_id": f"...{oci_feature_group.compartment_id[-6:]}",
1182-
"primary_keys": oci_feature_group.primary_keys,
1183-
"feature_store_id": oci_feature_group.feature_store_id,
1184-
"entity_id": oci_feature_group.entity_id,
1185-
"input_feature_details": oci_feature_group.input_feature_details,
1186-
"expectation_details": oci_feature_group.expectation_details,
1187-
"statistics_config": oci_feature_group.statistics_config,
1188-
}
1189-
)
1168+
oci_feature_group: OCIFeatureGroup = oci_feature_group
1169+
records.append(oci_feature_group.to_df_record())
1170+
11901171
return pd.DataFrame.from_records(records)
11911172

11921173
@classmethod

ads/feature_store/service/oci_feature_group.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77
import datetime
8+
import pandas as pd
9+
from ads.common import utils
810

911
import oci
1012
from oci.feature_store.models import (
@@ -64,9 +66,9 @@ class OCIFeatureGroup(OCIFeatureStoreMixin, oci.feature_store.models.FeatureGrou
6466
Gets feature group by OCID.
6567
Examples
6668
--------
67-
>>> oci_feature_group = OCIFeatureGroup.from_id("<feature_group_id>")
68-
>>> oci_feature_group.description = "A brand new description"
69-
>>> oci_feature_group.delete()
69+
>>> self = OCIFeatureGroup.from_id("<feature_group_id>")
70+
>>> self.description = "A brand new description"
71+
>>> self.delete()
7072
"""
7173

7274
def create(self) -> "OCIFeatureGroup":
@@ -122,6 +124,27 @@ def delete(self):
122124
"""
123125
self.client.delete_feature_group(self.id)
124126

127+
def to_df(self):
128+
return pd.DataFrame.from_records([self.to_df_record()])
129+
130+
def to_df_record(self):
131+
return {
132+
"id": self.id,
133+
"name": self.name,
134+
"description": self.description,
135+
"time_created": self.time_created.strftime(utils.date_format),
136+
"time_updated": self.time_updated.strftime(utils.date_format),
137+
"lifecycle_state": self.lifecycle_state,
138+
"created_by": f"...{self.created_by[-6:]}",
139+
"compartment_id": f"...{self.compartment_id[-6:]}",
140+
"primary_keys": self.primary_keys,
141+
"feature_store_id": self.feature_store_id,
142+
"entity_id": self.entity_id,
143+
"input_feature_details": self.input_feature_details,
144+
"expectation_details": self.expectation_details,
145+
"statistics_config": self.statistics_config,
146+
}
147+
125148
@classmethod
126149
def from_id(cls, id: str) -> "OCIFeatureGroup":
127150
"""Gets feature group resource by id.

0 commit comments

Comments
 (0)