Skip to content

Commit 5b5ce60

Browse files
addressed review comments and enable sql transformation for YAML
1 parent 2a57246 commit 5b5ce60

File tree

6 files changed

+22
-7
lines changed

6 files changed

+22
-7
lines changed

ads/feature_store/feature_store_registrar.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,9 @@ def _create_transformations(self) -> List[Transformation]:
250250
transformation.compartment_id = (
251251
transformation.compartment_id or self._root_compartment_id
252252
)
253-
# to encode to base64
254-
transformation.source_code_function = transformation.source_code_function
253+
if transformation.source_code_function:
254+
# to encode to base64
255+
transformation.source_code_function = transformation.source_code_function
255256
return self._transformations.create_models(self._progress)
256257

257258
def _create_feature_groups(self) -> List[FeatureGroup]:

ads/feature_store/templates/feature_store_template.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ spec:
1616
spec:
1717
displayName: '{entity_name}'
1818

19+
# we can pass sql query in case of spark based sql transformations by passing query inline in transformationQueryInput
1920
transformation: &transformation
2021
- kind: transformation
2122
spec:

ads/feature_store/transformation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ def create(self, **kwargs) -> "Transformation":
389389
raise ValueError(
390390
"Transformation display name and function name must be same."
391391
)
392-
elif self.transformation_mode == TransformationMode.SQL.value:
392+
elif self.transformation_mode.lower() == TransformationMode.SQL.value:
393393
TransformationQueryValidator.verify_sql_input(self.transformation_query_input,
394394
self.CONST_DATA_SOURCE_TRANSFORMATION_INPUT)
395395
# convert it to transformation function to ensure the integrity with backend

tests/data/feature_store_minimal.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ spec:
1616
- kind: transformation
1717
spec:
1818
displayName: transactions_df
19-
transformationMode: SPARK_SQL
19+
transformationMode: SQL
2020
sourceCode: '
2121
def transactions_df(transactions_batch):
2222
sql_query = f"""

tests/integration/feature_store/test_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def clean_up_transformation(transformation):
454454
try:
455455
transformation.delete()
456456
except Exception as ex:
457-
print("Failed to delete feature group: ", str(ex))
457+
print("Failed to delete transformation: ", str(ex))
458458
exit(1)
459459

460460
@staticmethod

tests/integration/feature_store/test_transformation.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from ads.feature_store.feature_group import FeatureGroup
1111
from ads.feature_store.common.enums import TransformationMode
1212
from tests.integration.feature_store.test_base import FeatureStoreTestCase
13+
from ads.feature_store.common.spark_session_singleton import SparkSessionSingleton
1314

1415

1516
class TestFeatureStoreTransformation(FeatureStoreTestCase):
@@ -18,7 +19,7 @@ class TestFeatureStoreTransformation(FeatureStoreTestCase):
1819
valid_spark_queries = [
1920
"SELECT requisitionId, length(title) As title_word_count,"
2021
" CASE When length(title) > 0 Then 0 Else 1 End As empty_title,"
21-
" length(description) As description_word_count," \
22+
" length(description) As description_word_count,"
2223
" length(designation) As designation_word_count FROM DATA_SOURCE_INPUT",
2324
"SELECT user_id, credit_score FROM DATA_SOURCE_INPUT",
2425
"SELECT country, city, zipcode, state FROM DATA_SOURCE_INPUT WHERE state in ('PR', 'AZ', 'FL') order by state",
@@ -104,7 +105,19 @@ def test_transformation_query_with_feature_group_job(self):
104105
)
105106
assert fg.oci_feature_group.id
106107

107-
fg.materialise(self.data)
108+
# convert pandas to spark dataframe to run SPARK SQL transformation mode
109+
spark = SparkSessionSingleton().get_spark_session()
110+
spark_df = spark.createDataFrame(self.data)
111+
# get item count
112+
item_count = spark_df.count()
113+
# materialise to delta table
114+
fg.materialise(spark_df)
115+
# read dataframe
116+
df = fg.select().read()
117+
# assert dataframe
118+
assert df
119+
# assert count
120+
assert df.count() == item_count
108121

109122
self.clean_up_feature_group(fg)
110123
self.clean_up_transformation(transformation)

0 commit comments

Comments
 (0)