Skip to content

Commit 9e5ab20

Browse files
committed
code refactoring
1 parent 85ae918 commit 9e5ab20

File tree

10 files changed

+81
-369
lines changed

10 files changed

+81
-369
lines changed

ads/feature_store/common/spark_session_singleton.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ def __init__(self, metastore_id: str = None):
6262
)
6363

6464
if not developer_enabled() and metastore_id:
65+
print("Not Developer Enabled")
6566
# Get the authentication credentials for the OCI data catalog service
6667
auth = copy.copy(ads.auth.default_signer())
6768

@@ -79,6 +80,7 @@ def __init__(self, metastore_id: str = None):
7980
.config("spark.driver.memory", "16G")
8081

8182
if developer_enabled():
83+
print("Developer Enabled")
8284
# Configure spark session with delta jars only in developer mode. In other cases,
8385
# jars should be part of the conda pack
8486
self.spark_session = configure_spark_with_delta_pip(

ads/feature_store/common/utils/transformation_query_validator.py

Lines changed: 0 additions & 96 deletions
This file was deleted.

ads/feature_store/common/utils/utility.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444

4545

4646
def get_execution_engine_type(
47-
data_frame: Union[DataFrame, pd.DataFrame]
47+
data_frame: Union[DataFrame, pd.DataFrame]
4848
) -> ExecutionEngine:
4949
"""
5050
Determines the execution engine type for a given DataFrame.
@@ -84,7 +84,7 @@ def get_metastore_id(feature_store_id: str):
8484

8585

8686
def validate_delta_format_parameters(
87-
timestamp: datetime = None, version_number: int = None, is_restore: bool = False
87+
timestamp: datetime = None, version_number: int = None, is_restore: bool = False
8888
):
8989
"""
9090
Validate the user input provided as part of preview, restore APIs for ingested data, Ingested data is
@@ -118,9 +118,9 @@ def validate_delta_format_parameters(
118118

119119

120120
def get_features(
121-
output_columns: List[dict],
122-
parent_id: str,
123-
entity_type: EntityType = EntityType.FEATURE_GROUP,
121+
output_columns: List[dict],
122+
parent_id: str,
123+
entity_type: EntityType = EntityType.FEATURE_GROUP,
124124
) -> List[Feature]:
125125
"""
126126
Returns a list of features, given a list of output_columns and a feature_group_id.
@@ -154,8 +154,8 @@ def get_features(
154154
return features
155155

156156

157-
def get_schema_from_pandas_df(df: pd.DataFrame):
158-
spark = SparkSessionSingleton().get_spark_session()
157+
def get_schema_from_pandas_df(df: pd.DataFrame, feature_store_id: str):
158+
spark = SparkSessionSingleton(get_metastore_id(feature_store_id)).get_spark_session()
159159
converted_df = spark.createDataFrame(df)
160160
return get_schema_from_spark_df(converted_df)
161161

@@ -174,27 +174,28 @@ def get_schema_from_spark_df(df: DataFrame):
174174
return schema_details
175175

176176

177-
def get_schema_from_df(data_frame: Union[DataFrame, pd.DataFrame]) -> List[dict]:
177+
def get_schema_from_df(data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str) -> List[dict]:
178178
"""
179179
Given a DataFrame, returns a list of dictionaries that describe its schema.
180180
If the DataFrame is a pandas DataFrame, it uses pandas methods to get the schema.
181181
If it's a PySpark DataFrame, it uses PySpark methods to get the schema.
182182
"""
183183
if isinstance(data_frame, pd.DataFrame):
184-
return get_schema_from_pandas_df(data_frame)
184+
return get_schema_from_pandas_df(data_frame, feature_store_id)
185185
else:
186186
return get_schema_from_spark_df(data_frame)
187187

188188

189189
def get_input_features_from_df(
190-
data_frame: Union[DataFrame, pd.DataFrame]
190+
data_frame: Union[DataFrame, pd.DataFrame],
191+
feature_store_id: str
191192
) -> List[FeatureDetail]:
192193
"""
193194
Given a DataFrame, returns a list of FeatureDetail objects that represent its input features.
194195
Each FeatureDetail object contains information about a single input feature, such as its name, data type, and
195196
whether it's categorical or numerical.
196197
"""
197-
schema_details = get_schema_from_df(data_frame)
198+
schema_details = get_schema_from_df(data_frame, feature_store_id)
198199
feature_details = []
199200

200201
for schema_detail in schema_details:
@@ -204,7 +205,7 @@ def get_input_features_from_df(
204205

205206

206207
def convert_expectation_suite_to_expectation(
207-
expectation_suite: ExpectationSuite, expectation_type: ExpectationType
208+
expectation_suite: ExpectationSuite, expectation_type: ExpectationType
208209
):
209210
"""
210211
Convert an ExpectationSuite object to an Expectation object with detailed rule information.
@@ -282,7 +283,7 @@ def convert_pandas_datatype_with_schema(
282283
else:
283284
logger.warning("column" + column + "doesn't exist in the input feature details")
284285
columns_to_remove.append(column)
285-
return input_df.drop(columns = columns_to_remove)
286+
return input_df.drop(columns=columns_to_remove)
286287

287288

288289
def convert_spark_dataframe_with_schema(

ads/feature_store/entity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ def create_feature_group(
370370
raw_feature_details = (
371371
input_feature_details
372372
if input_feature_details
373-
else get_input_features_from_df(schema_details_dataframe)
373+
else get_input_features_from_df(schema_details_dataframe, self.feature_store_id)
374374
)
375375

376376
self.oci_feature_group = self._build_feature_group(

ads/feature_store/feature_group.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,11 @@ def with_input_feature_details(
476476
def with_schema_details_from_dataframe(
477477
self, data_frame: Union[DataFrame, pd.DataFrame]
478478
) -> "FeatureGroup":
479-
schema_details = get_schema_from_df(data_frame)
479+
480+
if not self.feature_store_id:
481+
raise ValueError("FeatureStore id must be set before calling `with_schema_details_from_dataframe`")
482+
483+
schema_details = get_schema_from_df(data_frame, self.feature_store_id)
480484
feature_details = []
481485

482486
for schema_detail in schema_details:

ads/feature_store/feature_store.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,11 @@ def list_entities_df(
426426

427427
def _build_transformation(
428428
self,
429-
transformation_mode: TransformationMode,
430-
source_code_func=None,
429+
source_code_func,
430+
transformation_mode,
431431
display_name: str = None,
432432
description: str = None,
433433
compartment_id: str = None,
434-
sql_query: str = None,
435434
):
436435
transformation = (
437436
Transformation()
@@ -443,19 +442,17 @@ def _build_transformation(
443442
compartment_id if compartment_id else self.compartment_id
444443
)
445444
.with_feature_store_id(self.id)
446-
.with_transformation_query_input(sql_query)
447445
)
448446

449447
return transformation
450448

451449
def create_transformation(
452450
self,
451+
source_code_func,
453452
transformation_mode: TransformationMode,
454-
source_code_func=None,
455453
display_name: str = None,
456454
description: str = None,
457455
compartment_id: str = None,
458-
sql_query: str = None,
459456
) -> "Transformation":
460457
"""Creates transformation resource from feature store.
461458
@@ -471,9 +468,6 @@ def create_transformation(
471468
description for the entity.
472469
compartment_id: str
473470
compartment_id for the entity.
474-
sql_query: str
475-
inline sql query to be passed for transformation creation,
476-
Please ensure to use DATA_SOURCE_INPUT as FROM table name
477471
478472
Returns
479473
-------
@@ -486,12 +480,11 @@ def create_transformation(
486480
)
487481

488482
self.oci_transformation = self._build_transformation(
489-
transformation_mode,
490483
source_code_func,
484+
transformation_mode,
491485
display_name,
492486
description,
493487
compartment_id,
494-
sql_query,
495488
)
496489

497490
return self.oci_transformation.create()

0 commit comments

Comments
 (0)