Skip to content

Commit d2646a8

Browse files
committed
updated the documents
1 parent e18ad22 commit d2646a8

File tree

8 files changed

+68
-60
lines changed

8 files changed

+68
-60
lines changed

ads/feature_store/common/spark_session_singleton.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ def __init__(self, metastore_id: str = None):
7575
"spark.hadoop.oracle.dcat.metastore.id", metastore_id
7676
).config(
7777
"spark.sql.warehouse.dir", metastore.default_managed_table_location
78-
)\
79-
.config("spark.driver.memory", "16G")
78+
).config(
79+
"spark.driver.memory", "16G"
80+
)
8081

8182
if developer_enabled():
8283
# Configure spark session with delta jars only in developer mode. In other cases,

ads/feature_store/common/utils/feature_schema_mapper.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ def map_feature_type_to_pandas(feature_type):
241241
raise TypeError(f"Feature Type {feature_type} is not supported for pandas")
242242

243243

244-
245244
def map_spark_type_to_stats_data_type(spark_type):
246245
"""Maps the spark data types to MLM library data types
247246
args:

ads/feature_store/common/utils/utility.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545

4646
def get_execution_engine_type(
47-
data_frame: Union[DataFrame, pd.DataFrame]
47+
data_frame: Union[DataFrame, pd.DataFrame]
4848
) -> ExecutionEngine:
4949
"""
5050
Determines the execution engine type for a given DataFrame.
@@ -84,7 +84,7 @@ def get_metastore_id(feature_store_id: str):
8484

8585

8686
def validate_delta_format_parameters(
87-
timestamp: datetime = None, version_number: int = None, is_restore: bool = False
87+
timestamp: datetime = None, version_number: int = None, is_restore: bool = False
8888
):
8989
"""
9090
Validate the user input provided as part of preview, restore APIs for ingested data, Ingested data is
@@ -118,9 +118,9 @@ def validate_delta_format_parameters(
118118

119119

120120
def get_features(
121-
output_columns: List[dict],
122-
parent_id: str,
123-
entity_type: EntityType = EntityType.FEATURE_GROUP,
121+
output_columns: List[dict],
122+
parent_id: str,
123+
entity_type: EntityType = EntityType.FEATURE_GROUP,
124124
) -> List[Feature]:
125125
"""
126126
Returns a list of features, given a list of output_columns and a feature_group_id.
@@ -155,7 +155,9 @@ def get_features(
155155

156156

157157
def get_schema_from_pandas_df(df: pd.DataFrame, feature_store_id: str):
158-
spark = SparkSessionSingleton(get_metastore_id(feature_store_id)).get_spark_session()
158+
spark = SparkSessionSingleton(
159+
get_metastore_id(feature_store_id)
160+
).get_spark_session()
159161
converted_df = spark.createDataFrame(df)
160162
return get_schema_from_spark_df(converted_df)
161163

@@ -174,7 +176,9 @@ def get_schema_from_spark_df(df: DataFrame):
174176
return schema_details
175177

176178

177-
def get_schema_from_df(data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str) -> List[dict]:
179+
def get_schema_from_df(
180+
data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str
181+
) -> List[dict]:
178182
"""
179183
Given a DataFrame, returns a list of dictionaries that describe its schema.
180184
If the DataFrame is a pandas DataFrame, it uses pandas methods to get the schema.
@@ -187,8 +191,7 @@ def get_schema_from_df(data_frame: Union[DataFrame, pd.DataFrame], feature_store
187191

188192

189193
def get_input_features_from_df(
190-
data_frame: Union[DataFrame, pd.DataFrame],
191-
feature_store_id: str
194+
data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str
192195
) -> List[FeatureDetail]:
193196
"""
194197
Given a DataFrame, returns a list of FeatureDetail objects that represent its input features.
@@ -205,7 +208,7 @@ def get_input_features_from_df(
205208

206209

207210
def convert_expectation_suite_to_expectation(
208-
expectation_suite: ExpectationSuite, expectation_type: ExpectationType
211+
expectation_suite: ExpectationSuite, expectation_type: ExpectationType
209212
):
210213
"""
211214
Convert an ExpectationSuite object to an Expectation object with detailed rule information.
@@ -264,7 +267,7 @@ def largest_matching_subset_of_primary_keys(left_feature_group, right_feature_gr
264267

265268

266269
def convert_pandas_datatype_with_schema(
267-
raw_feature_details: List[dict], input_df: pd.DataFrame
270+
raw_feature_details: List[dict], input_df: pd.DataFrame
268271
) -> pd.DataFrame:
269272
feature_detail_map = {}
270273
columns_to_remove = []
@@ -281,21 +284,25 @@ def convert_pandas_datatype_with_schema(
281284
.where(pd.notnull(input_df[column]), None)
282285
)
283286
else:
284-
logger.warning("column" + column + "doesn't exist in the input feature details")
287+
logger.warning(
288+
"column" + column + "doesn't exist in the input feature details"
289+
)
285290
columns_to_remove.append(column)
286291
return input_df.drop(columns=columns_to_remove)
287292

288293

289294
def convert_spark_dataframe_with_schema(
290-
raw_feature_details: List[dict], input_df: DataFrame
295+
raw_feature_details: List[dict], input_df: DataFrame
291296
) -> DataFrame:
292297
feature_detail_map = {}
293298
columns_to_remove = []
294299
for feature_details in raw_feature_details:
295300
feature_detail_map[feature_details.get("name")] = feature_details
296301
for column in input_df.columns:
297302
if column not in feature_detail_map.keys():
298-
logger.warning("column" + column + "doesn't exist in the input feature details")
303+
logger.warning(
304+
"column" + column + "doesn't exist in the input feature details"
305+
)
299306
columns_to_remove.append(column)
300307

301308
return input_df.drop(*columns_to_remove)

ads/feature_store/entity.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,9 @@ def create_feature_group(
370370
raw_feature_details = (
371371
input_feature_details
372372
if input_feature_details
373-
else get_input_features_from_df(schema_details_dataframe, self.feature_store_id)
373+
else get_input_features_from_df(
374+
schema_details_dataframe, self.feature_store_id
375+
)
374376
)
375377

376378
self.oci_feature_group = self._build_feature_group(

ads/feature_store/execution_strategy/spark/spark_execution.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,7 @@
3838
from ads.feature_store.transformation import Transformation
3939

4040
from ads.feature_store.feature_statistics.statistics_service import StatisticsService
41-
from ads.feature_store.common.utils.utility import (
42-
validate_input_feature_details
43-
)
41+
from ads.feature_store.common.utils.utility import validate_input_feature_details
4442

4543
logger = logging.getLogger(__name__)
4644

@@ -178,7 +176,9 @@ def _save_offline_dataframe(
178176
self.spark_engine.create_database(database)
179177

180178
if not feature_group.is_infer_schema:
181-
data_frame = validate_input_feature_details(feature_group.input_feature_details, data_frame)
179+
data_frame = validate_input_feature_details(
180+
feature_group.input_feature_details, data_frame
181+
)
182182

183183
# TODO: Get event timestamp column and apply filtering basis from and to timestamp
184184

ads/feature_store/feature_group.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,9 +476,10 @@ def with_input_feature_details(
476476
def with_schema_details_from_dataframe(
477477
self, data_frame: Union[DataFrame, pd.DataFrame]
478478
) -> "FeatureGroup":
479-
480479
if not self.feature_store_id:
481-
raise ValueError("FeatureStore id must be set before calling `with_schema_details_from_dataframe`")
480+
raise ValueError(
481+
"FeatureStore id must be set before calling `with_schema_details_from_dataframe`"
482+
)
482483

483484
schema_details = get_schema_from_df(data_frame, self.feature_store_id)
484485
feature_details = []

ads/feature_store/feature_store_registrar.py

Lines changed: 33 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ def __init__(self):
7373

7474
class _ModelBuilderHashDict(Generic[_ModelBuilderT]):
7575
def __init__(
76-
self,
77-
builders: Optional[List[_ModelBuilderT]],
78-
hash_fn: Callable = lambda model: model.name
79-
if model.attribute_map.get("name")
80-
else model.display_name,
76+
self,
77+
builders: Optional[List[_ModelBuilderT]],
78+
hash_fn: Callable = lambda model: model.name
79+
if model.attribute_map.get("name")
80+
else model.display_name,
8181
):
8282
self.__hash_fn = hash_fn
8383
self._dict: Dict[str, _ModelBuilderT] = {}
@@ -125,10 +125,10 @@ def get(self, key: str) -> Union[None, _ModelBuilderT]:
125125

126126
class _ElementMap(Generic[_ModelBuilderT]):
127127
def __init__(
128-
self,
129-
element_type: _ModelBuilderT,
130-
element_dict: _ModelBuilderHashDict[_ModelBuilderT],
131-
parent_child_map: Dict[str, "_ParentChildMap"] = None,
128+
self,
129+
element_type: _ModelBuilderT,
130+
element_dict: _ModelBuilderHashDict[_ModelBuilderT],
131+
parent_child_map: Dict[str, "_ParentChildMap"] = None,
132132
):
133133
self.element_type = element_type
134134
self.element_dict = element_dict
@@ -140,9 +140,9 @@ def add_element_from_dict(self, config: dict) -> str:
140140

141141
class _ParentChildMap:
142142
def __init__(
143-
self,
144-
child_mapping: "_ElementMap",
145-
parent_child_hash_map: DefaultDict[str, List[str]],
143+
self,
144+
child_mapping: "_ElementMap",
145+
parent_child_hash_map: DefaultDict[str, List[str]],
146146
):
147147
self.child_mapping = child_mapping
148148
self.parent_child_hash_map = parent_child_hash_map
@@ -158,12 +158,12 @@ class FeatureStoreRegistrar:
158158
DATASET_SPEC = "dataset"
159159

160160
def __init__(
161-
self,
162-
feature_store: FeatureStore = None,
163-
entities: List[Entity] = None,
164-
datasets: List[Dataset] = None,
165-
feature_groups: List[FeatureGroup] = None,
166-
transformations: List[Transformation] = None,
161+
self,
162+
feature_store: FeatureStore = None,
163+
entities: List[Entity] = None,
164+
datasets: List[Dataset] = None,
165+
feature_groups: List[FeatureGroup] = None,
166+
transformations: List[Transformation] = None,
167167
):
168168
"""
169169
Initialised feature registrar resource
@@ -189,7 +189,7 @@ def __init__(
189189
self._entity_map = {}
190190

191191
def create(
192-
self,
192+
self,
193193
) -> Tuple[
194194
FeatureStore,
195195
List[Entity],
@@ -248,12 +248,10 @@ def _create_transformations(self) -> List[Transformation]:
248248
for transformation in self._transformations.get_dict().values():
249249
transformation.feature_store_id = self._feature_store_id
250250
transformation.compartment_id = (
251-
transformation.compartment_id or self._root_compartment_id
251+
transformation.compartment_id or self._root_compartment_id
252252
)
253253
# to encode to base64
254-
transformation.source_code_function = (
255-
transformation.source_code_function
256-
)
254+
transformation.source_code_function = transformation.source_code_function
257255
return self._transformations.create_models(self._progress)
258256

259257
def _create_feature_groups(self) -> List[FeatureGroup]:
@@ -288,7 +286,7 @@ def _create_feature_groups(self) -> List[FeatureGroup]:
288286
feature_group.primary_keys = feature_group.primary_keys
289287
feature_group.feature_store_id = self._feature_store_id
290288
feature_group.compartment_id = (
291-
feature_group.compartment_id or self._root_compartment_id
289+
feature_group.compartment_id or self._root_compartment_id
292290
)
293291

294292
return self._feature_groups.create_models(self._progress)
@@ -394,11 +392,11 @@ def generate_yaml(cls, uri: str = "feature_store.yaml"):
394392

395393
@classmethod
396394
def from_yaml(
397-
cls,
398-
yaml_string: str = None,
399-
uri: str = None,
400-
loader: callable = Loader,
401-
**kwargs,
395+
cls,
396+
yaml_string: str = None,
397+
uri: str = None,
398+
loader: callable = Loader,
399+
**kwargs,
402400
) -> "FeatureStoreRegistrar":
403401
"""Creates an object from YAML string provided or from URI location containing YAML string
404402
@@ -445,11 +443,11 @@ def _find_yaml_definition_file() -> str:
445443

446444
def _get_progress_steps_count(self) -> int:
447445
return (
448-
self._feature_store.get_count()
449-
+ self._entities.get_count()
450-
+ self._transformations.get_count()
451-
+ self._feature_groups.get_count()
452-
+ self._datasets.get_count()
446+
self._feature_store.get_count()
447+
+ self._entities.get_count()
448+
+ self._transformations.get_count()
449+
+ self._feature_groups.get_count()
450+
+ self._datasets.get_count()
453451
)
454452

455453
@staticmethod
@@ -470,7 +468,7 @@ def _read_from_file(uri: str, **kwargs) -> str:
470468

471469
@staticmethod
472470
def _populate_child_mappings(
473-
parent_dict: dict, parent_map: _ElementMap, parent_hash: str
471+
parent_dict: dict, parent_map: _ElementMap, parent_hash: str
474472
):
475473
for key, elements in parent_dict.get("spec").items():
476474
if key in parent_map.parent_child_map:

ads/feature_store/transformation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,4 +498,4 @@ def to_dict(self) -> Dict:
498498

499499
def __repr__(self) -> str:
500500
"""Displays the object as YAML."""
501-
return self.to_yaml()
501+
return self.to_yaml()

0 commit comments

Comments
 (0)